{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3744091355829082, "eval_steps": 250, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.360228389572705e-05, "grad_norm": 101.65216064453125, "learning_rate": 5e-05, "loss": 8.6947, "num_input_tokens_seen": 66716, "step": 1 }, { "epoch": 9.360228389572705e-05, "loss": 8.418586730957031, "loss_ce": 4.6138997077941895, "loss_iou": 1.1171875, "loss_num": 0.3125, "loss_xval": 3.8125, "num_input_tokens_seen": 66716, "step": 1 }, { "epoch": 0.0001872045677914541, "grad_norm": 96.95561981201172, "learning_rate": 5e-05, "loss": 8.4681, "num_input_tokens_seen": 133808, "step": 2 }, { "epoch": 0.0001872045677914541, "loss": 8.591793060302734, "loss_ce": 4.539058685302734, "loss_iou": 1.203125, "loss_num": 0.33203125, "loss_xval": 4.0625, "num_input_tokens_seen": 133808, "step": 2 }, { "epoch": 0.00028080685168718116, "grad_norm": 98.5390853881836, "learning_rate": 5e-05, "loss": 8.838, "num_input_tokens_seen": 198860, "step": 3 }, { "epoch": 0.00028080685168718116, "loss": 8.935352325439453, "loss_ce": 4.909960746765137, "loss_iou": 1.21875, "loss_num": 0.31640625, "loss_xval": 4.03125, "num_input_tokens_seen": 198860, "step": 3 }, { "epoch": 0.0003744091355829082, "grad_norm": 99.13172912597656, "learning_rate": 5e-05, "loss": 8.4022, "num_input_tokens_seen": 265008, "step": 4 }, { "epoch": 0.0003744091355829082, "loss": 8.233530044555664, "loss_ce": 4.821421146392822, "loss_iou": 0.99609375, "loss_num": 0.28515625, "loss_xval": 3.40625, "num_input_tokens_seen": 265008, "step": 4 }, { "epoch": 0.00046801141947863526, "grad_norm": 94.90145111083984, "learning_rate": 5e-05, "loss": 8.5621, "num_input_tokens_seen": 331540, "step": 5 }, { "epoch": 0.00046801141947863526, "loss": 8.46754264831543, "loss_ce": 4.4636359214782715, "loss_iou": 1.2265625, "loss_num": 0.30859375, "loss_xval": 4.0, "num_input_tokens_seen": 331540, "step": 5 }, { "epoch": 0.0005616137033743623, "grad_norm": 95.53054809570312, "learning_rate": 5e-05, "loss": 8.3328, "num_input_tokens_seen": 397776, "step": 6 }, { "epoch": 0.0005616137033743623, "loss": 8.50499153137207, "loss_ce": 4.422959804534912, "loss_iou": 1.21875, "loss_num": 0.328125, "loss_xval": 4.09375, "num_input_tokens_seen": 397776, "step": 6 }, { "epoch": 0.0006552159872700894, "grad_norm": 90.96040344238281, "learning_rate": 5e-05, "loss": 7.9192, "num_input_tokens_seen": 464376, "step": 7 }, { "epoch": 0.0006552159872700894, "loss": 7.958445072174072, "loss_ce": 3.9564919471740723, "loss_iou": 1.21875, "loss_num": 0.314453125, "loss_xval": 4.0, "num_input_tokens_seen": 464376, "step": 7 }, { "epoch": 0.0007488182711658164, "grad_norm": 86.27174377441406, "learning_rate": 5e-05, "loss": 7.4725, "num_input_tokens_seen": 531144, "step": 8 }, { "epoch": 0.0007488182711658164, "loss": 7.436006546020508, "loss_ce": 3.336397171020508, "loss_iou": 1.2421875, "loss_num": 0.322265625, "loss_xval": 4.09375, "num_input_tokens_seen": 531144, "step": 8 }, { "epoch": 0.0008424205550615435, "grad_norm": 77.49860382080078, "learning_rate": 5e-05, "loss": 7.1421, "num_input_tokens_seen": 597284, "step": 9 }, { "epoch": 0.0008424205550615435, "loss": 7.4713544845581055, "loss_ce": 3.3854169845581055, "loss_iou": 1.2890625, "loss_num": 0.30078125, "loss_xval": 4.09375, "num_input_tokens_seen": 597284, "step": 9 }, { "epoch": 0.0009360228389572705, "grad_norm": 82.7765884399414, "learning_rate": 5e-05, "loss": 6.3314, "num_input_tokens_seen": 663084, "step": 10 }, { "epoch": 0.0009360228389572705, "loss": 6.534829139709473, "loss_ce": 2.8512353897094727, "loss_iou": 1.1640625, "loss_num": 0.26953125, "loss_xval": 3.6875, "num_input_tokens_seen": 663084, "step": 10 }, { "epoch": 0.0010296251228529977, "grad_norm": 41.23231887817383, "learning_rate": 5e-05, "loss": 5.5767, "num_input_tokens_seen": 728692, "step": 11 }, { "epoch": 0.0010296251228529977, "loss": 5.544264793395996, "loss_ce": 1.9759057760238647, "loss_iou": 1.0703125, "loss_num": 0.28515625, "loss_xval": 3.5625, "num_input_tokens_seen": 728692, "step": 11 }, { "epoch": 0.0011232274067487246, "grad_norm": 35.95630645751953, "learning_rate": 5e-05, "loss": 5.464, "num_input_tokens_seen": 795364, "step": 12 }, { "epoch": 0.0011232274067487246, "loss": 5.679543972015381, "loss_ce": 1.7693877220153809, "loss_iou": 1.2421875, "loss_num": 0.287109375, "loss_xval": 3.90625, "num_input_tokens_seen": 795364, "step": 12 }, { "epoch": 0.0012168296906444518, "grad_norm": 30.46725845336914, "learning_rate": 5e-05, "loss": 5.1851, "num_input_tokens_seen": 861160, "step": 13 }, { "epoch": 0.0012168296906444518, "loss": 5.258492469787598, "loss_ce": 1.4420864582061768, "loss_iou": 1.2109375, "loss_num": 0.27734375, "loss_xval": 3.8125, "num_input_tokens_seen": 861160, "step": 13 }, { "epoch": 0.0013104319745401787, "grad_norm": 34.04572296142578, "learning_rate": 5e-05, "loss": 4.9258, "num_input_tokens_seen": 926332, "step": 14 }, { "epoch": 0.0013104319745401787, "loss": 4.778914451599121, "loss_ce": 1.314070463180542, "loss_iou": 1.109375, "loss_num": 0.25, "loss_xval": 3.46875, "num_input_tokens_seen": 926332, "step": 14 }, { "epoch": 0.0014040342584359059, "grad_norm": 32.23663330078125, "learning_rate": 5e-05, "loss": 4.3431, "num_input_tokens_seen": 992124, "step": 15 }, { "epoch": 0.0014040342584359059, "loss": 4.308436393737793, "loss_ce": 0.9920302629470825, "loss_iou": 1.0859375, "loss_num": 0.2275390625, "loss_xval": 3.3125, "num_input_tokens_seen": 992124, "step": 15 }, { "epoch": 0.0014976365423316328, "grad_norm": 19.634584426879883, "learning_rate": 5e-05, "loss": 3.8286, "num_input_tokens_seen": 1057580, "step": 16 }, { "epoch": 0.0014976365423316328, "loss": 3.418391704559326, "loss_ce": 0.7045247554779053, "loss_iou": 0.83984375, "loss_num": 0.2060546875, "loss_xval": 2.71875, "num_input_tokens_seen": 1057580, "step": 16 }, { "epoch": 0.00159123882622736, "grad_norm": 26.426013946533203, "learning_rate": 5e-05, "loss": 4.1967, "num_input_tokens_seen": 1123876, "step": 17 }, { "epoch": 0.00159123882622736, "loss": 4.267147064208984, "loss_ce": 0.5249598622322083, "loss_iou": 1.25, "loss_num": 0.248046875, "loss_xval": 3.75, "num_input_tokens_seen": 1123876, "step": 17 }, { "epoch": 0.001684841110123087, "grad_norm": 36.31227493286133, "learning_rate": 5e-05, "loss": 3.6872, "num_input_tokens_seen": 1190144, "step": 18 }, { "epoch": 0.001684841110123087, "loss": 3.7517099380493164, "loss_ce": 0.32592862844467163, "loss_iou": 1.1484375, "loss_num": 0.2255859375, "loss_xval": 3.421875, "num_input_tokens_seen": 1190144, "step": 18 }, { "epoch": 0.001778443394018814, "grad_norm": 20.471094131469727, "learning_rate": 5e-05, "loss": 3.2612, "num_input_tokens_seen": 1255520, "step": 19 }, { "epoch": 0.001778443394018814, "loss": 3.0514767169952393, "loss_ce": 0.12569549679756165, "loss_iou": 0.859375, "loss_num": 0.2421875, "loss_xval": 2.921875, "num_input_tokens_seen": 1255520, "step": 19 }, { "epoch": 0.001872045677914541, "grad_norm": 13.815719604492188, "learning_rate": 5e-05, "loss": 3.565, "num_input_tokens_seen": 1322952, "step": 20 }, { "epoch": 0.001872045677914541, "loss": 3.4919581413269043, "loss_ce": 0.06422379612922668, "loss_iou": 1.1328125, "loss_num": 0.2314453125, "loss_xval": 3.421875, "num_input_tokens_seen": 1322952, "step": 20 }, { "epoch": 0.001965647961810268, "grad_norm": 28.042123794555664, "learning_rate": 5e-05, "loss": 3.384, "num_input_tokens_seen": 1389420, "step": 21 }, { "epoch": 0.001965647961810268, "loss": 3.3590993881225586, "loss_ce": 0.04464637115597725, "loss_iou": 1.109375, "loss_num": 0.220703125, "loss_xval": 3.3125, "num_input_tokens_seen": 1389420, "step": 21 }, { "epoch": 0.0020592502457059954, "grad_norm": 14.250997543334961, "learning_rate": 5e-05, "loss": 3.408, "num_input_tokens_seen": 1455232, "step": 22 }, { "epoch": 0.0020592502457059954, "loss": 3.383174180984497, "loss_ce": 0.05114297196269035, "loss_iou": 1.140625, "loss_num": 0.2119140625, "loss_xval": 3.328125, "num_input_tokens_seen": 1455232, "step": 22 }, { "epoch": 0.002152852529601722, "grad_norm": 11.689615249633789, "learning_rate": 5e-05, "loss": 3.3596, "num_input_tokens_seen": 1521676, "step": 23 }, { "epoch": 0.002152852529601722, "loss": 3.3575775623321533, "loss_ce": 0.043124400079250336, "loss_iou": 1.1484375, "loss_num": 0.205078125, "loss_xval": 3.3125, "num_input_tokens_seen": 1521676, "step": 23 }, { "epoch": 0.0022464548134974493, "grad_norm": 16.82274627685547, "learning_rate": 5e-05, "loss": 3.3032, "num_input_tokens_seen": 1587692, "step": 24 }, { "epoch": 0.0022464548134974493, "loss": 3.3633084297180176, "loss_ce": 0.023464728146791458, "loss_iou": 1.140625, "loss_num": 0.212890625, "loss_xval": 3.34375, "num_input_tokens_seen": 1587692, "step": 24 }, { "epoch": 0.0023400570973931764, "grad_norm": 60.981964111328125, "learning_rate": 5e-05, "loss": 3.1801, "num_input_tokens_seen": 1652968, "step": 25 }, { "epoch": 0.0023400570973931764, "loss": 3.167349100112915, "loss_ce": 0.0384429395198822, "loss_iou": 1.0625, "loss_num": 0.203125, "loss_xval": 3.125, "num_input_tokens_seen": 1652968, "step": 25 }, { "epoch": 0.0024336593812889036, "grad_norm": 45.581478118896484, "learning_rate": 5e-05, "loss": 3.6619, "num_input_tokens_seen": 1719392, "step": 26 }, { "epoch": 0.0024336593812889036, "loss": 3.7349956035614014, "loss_ce": 0.04163616895675659, "loss_iou": 1.203125, "loss_num": 0.259765625, "loss_xval": 3.6875, "num_input_tokens_seen": 1719392, "step": 26 }, { "epoch": 0.0025272616651846303, "grad_norm": 28.024511337280273, "learning_rate": 5e-05, "loss": 3.3445, "num_input_tokens_seen": 1785800, "step": 27 }, { "epoch": 0.0025272616651846303, "loss": 3.184875965118408, "loss_ce": 0.04132130742073059, "loss_iou": 0.9609375, "loss_num": 0.2451171875, "loss_xval": 3.140625, "num_input_tokens_seen": 1785800, "step": 27 }, { "epoch": 0.0026208639490803575, "grad_norm": 13.812726974487305, "learning_rate": 5e-05, "loss": 3.0071, "num_input_tokens_seen": 1851224, "step": 28 }, { "epoch": 0.0026208639490803575, "loss": 3.247553586959839, "loss_ce": 0.03075670264661312, "loss_iou": 1.0703125, "loss_num": 0.21484375, "loss_xval": 3.21875, "num_input_tokens_seen": 1851224, "step": 28 }, { "epoch": 0.0027144662329760846, "grad_norm": 56.087398529052734, "learning_rate": 5e-05, "loss": 3.1415, "num_input_tokens_seen": 1917632, "step": 29 }, { "epoch": 0.0027144662329760846, "loss": 3.1579842567443848, "loss_ce": 0.02517169900238514, "loss_iou": 1.0546875, "loss_num": 0.2060546875, "loss_xval": 3.125, "num_input_tokens_seen": 1917632, "step": 29 }, { "epoch": 0.0028080685168718118, "grad_norm": 44.8372688293457, "learning_rate": 5e-05, "loss": 3.4511, "num_input_tokens_seen": 1984736, "step": 30 }, { "epoch": 0.0028080685168718118, "loss": 3.401815414428711, "loss_ce": 0.02876855805516243, "loss_iou": 1.1328125, "loss_num": 0.2216796875, "loss_xval": 3.375, "num_input_tokens_seen": 1984736, "step": 30 }, { "epoch": 0.002901670800767539, "grad_norm": 33.350791931152344, "learning_rate": 5e-05, "loss": 3.3317, "num_input_tokens_seen": 2051392, "step": 31 }, { "epoch": 0.002901670800767539, "loss": 3.283022403717041, "loss_ce": 0.021303577348589897, "loss_iou": 1.125, "loss_num": 0.2021484375, "loss_xval": 3.265625, "num_input_tokens_seen": 2051392, "step": 31 }, { "epoch": 0.0029952730846632657, "grad_norm": 17.58970069885254, "learning_rate": 5e-05, "loss": 2.9831, "num_input_tokens_seen": 2117932, "step": 32 }, { "epoch": 0.0029952730846632657, "loss": 2.834751605987549, "loss_ce": 0.030063960701227188, "loss_iou": 0.9140625, "loss_num": 0.1943359375, "loss_xval": 2.8125, "num_input_tokens_seen": 2117932, "step": 32 }, { "epoch": 0.003088875368558993, "grad_norm": 24.616716384887695, "learning_rate": 5e-05, "loss": 3.0216, "num_input_tokens_seen": 2184880, "step": 33 }, { "epoch": 0.003088875368558993, "loss": 3.0619702339172363, "loss_ce": 0.01900148205459118, "loss_iou": 1.046875, "loss_num": 0.1884765625, "loss_xval": 3.046875, "num_input_tokens_seen": 2184880, "step": 33 }, { "epoch": 0.00318247765245472, "grad_norm": 86.11531066894531, "learning_rate": 5e-05, "loss": 3.2853, "num_input_tokens_seen": 2251368, "step": 34 }, { "epoch": 0.00318247765245472, "loss": 3.290015697479248, "loss_ce": 0.020484520122408867, "loss_iou": 1.1953125, "loss_num": 0.1767578125, "loss_xval": 3.265625, "num_input_tokens_seen": 2251368, "step": 34 }, { "epoch": 0.003276079936350447, "grad_norm": 28.53223419189453, "learning_rate": 5e-05, "loss": 3.7949, "num_input_tokens_seen": 2318268, "step": 35 }, { "epoch": 0.003276079936350447, "loss": 3.716911792755127, "loss_ce": 0.017692841589450836, "loss_iou": 1.1953125, "loss_num": 0.259765625, "loss_xval": 3.703125, "num_input_tokens_seen": 2318268, "step": 35 }, { "epoch": 0.003369682220246174, "grad_norm": 18.319623947143555, "learning_rate": 5e-05, "loss": 3.6345, "num_input_tokens_seen": 2385824, "step": 36 }, { "epoch": 0.003369682220246174, "loss": 3.726459503173828, "loss_ce": 0.019428269937634468, "loss_iou": 1.1796875, "loss_num": 0.26953125, "loss_xval": 3.703125, "num_input_tokens_seen": 2385824, "step": 36 }, { "epoch": 0.003463284504141901, "grad_norm": 16.41934585571289, "learning_rate": 5e-05, "loss": 3.4501, "num_input_tokens_seen": 2452780, "step": 37 }, { "epoch": 0.003463284504141901, "loss": 3.402754545211792, "loss_ce": 0.021895283833146095, "loss_iou": 1.1171875, "loss_num": 0.23046875, "loss_xval": 3.375, "num_input_tokens_seen": 2452780, "step": 37 }, { "epoch": 0.003556886788037628, "grad_norm": 14.25910472869873, "learning_rate": 5e-05, "loss": 3.3161, "num_input_tokens_seen": 2519612, "step": 38 }, { "epoch": 0.003556886788037628, "loss": 3.2734827995300293, "loss_ce": 0.015670407563447952, "loss_iou": 1.0859375, "loss_num": 0.2177734375, "loss_xval": 3.25, "num_input_tokens_seen": 2519612, "step": 38 }, { "epoch": 0.0036504890719333554, "grad_norm": 15.347772598266602, "learning_rate": 5e-05, "loss": 3.1594, "num_input_tokens_seen": 2585796, "step": 39 }, { "epoch": 0.0036504890719333554, "loss": 3.0453782081604004, "loss_ce": 0.01608125865459442, "loss_iou": 1.03125, "loss_num": 0.193359375, "loss_xval": 3.03125, "num_input_tokens_seen": 2585796, "step": 39 }, { "epoch": 0.003744091355829082, "grad_norm": 19.047100067138672, "learning_rate": 5e-05, "loss": 3.2023, "num_input_tokens_seen": 2652228, "step": 40 }, { "epoch": 0.003744091355829082, "loss": 3.250826835632324, "loss_ce": 0.022311009466648102, "loss_iou": 1.078125, "loss_num": 0.2158203125, "loss_xval": 3.234375, "num_input_tokens_seen": 2652228, "step": 40 }, { "epoch": 0.0038376936397248092, "grad_norm": 34.57749557495117, "learning_rate": 5e-05, "loss": 3.1466, "num_input_tokens_seen": 2718992, "step": 41 }, { "epoch": 0.0038376936397248092, "loss": 3.1191940307617188, "loss_ce": 0.013725237920880318, "loss_iou": 1.1328125, "loss_num": 0.1669921875, "loss_xval": 3.109375, "num_input_tokens_seen": 2718992, "step": 41 }, { "epoch": 0.003931295923620536, "grad_norm": 35.58552551269531, "learning_rate": 5e-05, "loss": 3.2487, "num_input_tokens_seen": 2786188, "step": 42 }, { "epoch": 0.003931295923620536, "loss": 3.334336757659912, "loss_ce": 0.01793038472533226, "loss_iou": 1.125, "loss_num": 0.2119140625, "loss_xval": 3.3125, "num_input_tokens_seen": 2786188, "step": 42 }, { "epoch": 0.0040248982075162636, "grad_norm": 21.095924377441406, "learning_rate": 5e-05, "loss": 2.8285, "num_input_tokens_seen": 2851692, "step": 43 }, { "epoch": 0.0040248982075162636, "loss": 3.0042130947113037, "loss_ce": 0.0198379959911108, "loss_iou": 1.078125, "loss_num": 0.1640625, "loss_xval": 2.984375, "num_input_tokens_seen": 2851692, "step": 43 }, { "epoch": 0.004118500491411991, "grad_norm": 35.09490966796875, "learning_rate": 5e-05, "loss": 3.01, "num_input_tokens_seen": 2916516, "step": 44 }, { "epoch": 0.004118500491411991, "loss": 2.9932496547698975, "loss_ce": 0.020593497902154922, "loss_iou": 1.046875, "loss_num": 0.17578125, "loss_xval": 2.96875, "num_input_tokens_seen": 2916516, "step": 44 }, { "epoch": 0.004212102775307718, "grad_norm": 29.586074829101562, "learning_rate": 5e-05, "loss": 3.2593, "num_input_tokens_seen": 2982764, "step": 45 }, { "epoch": 0.004212102775307718, "loss": 3.2288575172424316, "loss_ce": 0.01401369459927082, "loss_iou": 1.171875, "loss_num": 0.1728515625, "loss_xval": 3.21875, "num_input_tokens_seen": 2982764, "step": 45 }, { "epoch": 0.004305705059203444, "grad_norm": 20.80161476135254, "learning_rate": 5e-05, "loss": 2.7546, "num_input_tokens_seen": 3049572, "step": 46 }, { "epoch": 0.004305705059203444, "loss": 2.786905288696289, "loss_ce": 0.02225703001022339, "loss_iou": 0.92578125, "loss_num": 0.1826171875, "loss_xval": 2.765625, "num_input_tokens_seen": 3049572, "step": 46 }, { "epoch": 0.004399307343099171, "grad_norm": 48.15776443481445, "learning_rate": 5e-05, "loss": 2.8875, "num_input_tokens_seen": 3114936, "step": 47 }, { "epoch": 0.004399307343099171, "loss": 2.9008116722106934, "loss_ce": 0.02434687502682209, "loss_iou": 1.0234375, "loss_num": 0.166015625, "loss_xval": 2.875, "num_input_tokens_seen": 3114936, "step": 47 }, { "epoch": 0.0044929096269948985, "grad_norm": 27.882295608520508, "learning_rate": 5e-05, "loss": 3.3689, "num_input_tokens_seen": 3182172, "step": 48 }, { "epoch": 0.0044929096269948985, "loss": 3.354379653930664, "loss_ce": 0.022348247468471527, "loss_iou": 1.109375, "loss_num": 0.22265625, "loss_xval": 3.328125, "num_input_tokens_seen": 3182172, "step": 48 }, { "epoch": 0.004586511910890626, "grad_norm": 33.86907958984375, "learning_rate": 5e-05, "loss": 3.1029, "num_input_tokens_seen": 3248160, "step": 49 }, { "epoch": 0.004586511910890626, "loss": 3.132784605026245, "loss_ce": 0.025362728163599968, "loss_iou": 1.1015625, "loss_num": 0.1796875, "loss_xval": 3.109375, "num_input_tokens_seen": 3248160, "step": 49 }, { "epoch": 0.004680114194786353, "grad_norm": 20.456836700439453, "learning_rate": 5e-05, "loss": 2.7285, "num_input_tokens_seen": 3313712, "step": 50 }, { "epoch": 0.004680114194786353, "loss": 3.049952507019043, "loss_ce": 0.01284329779446125, "loss_iou": 1.0234375, "loss_num": 0.1962890625, "loss_xval": 3.03125, "num_input_tokens_seen": 3313712, "step": 50 }, { "epoch": 0.00477371647868208, "grad_norm": 35.79520797729492, "learning_rate": 5e-05, "loss": 2.8448, "num_input_tokens_seen": 3379916, "step": 51 }, { "epoch": 0.00477371647868208, "loss": 3.0145621299743652, "loss_ce": 0.014562256634235382, "loss_iou": 1.1015625, "loss_num": 0.1591796875, "loss_xval": 3.0, "num_input_tokens_seen": 3379916, "step": 51 }, { "epoch": 0.004867318762577807, "grad_norm": 15.867273330688477, "learning_rate": 5e-05, "loss": 2.815, "num_input_tokens_seen": 3445848, "step": 52 }, { "epoch": 0.004867318762577807, "loss": 2.692965507507324, "loss_ce": 0.009859994053840637, "loss_iou": 0.8984375, "loss_num": 0.177734375, "loss_xval": 2.6875, "num_input_tokens_seen": 3445848, "step": 52 }, { "epoch": 0.004960921046473534, "grad_norm": 16.5876407623291, "learning_rate": 5e-05, "loss": 2.8533, "num_input_tokens_seen": 3512724, "step": 53 }, { "epoch": 0.004960921046473534, "loss": 2.8537731170654297, "loss_ce": 0.019788645207881927, "loss_iou": 1.0546875, "loss_num": 0.14453125, "loss_xval": 2.828125, "num_input_tokens_seen": 3512724, "step": 53 }, { "epoch": 0.005054523330369261, "grad_norm": 38.60595703125, "learning_rate": 5e-05, "loss": 2.6611, "num_input_tokens_seen": 3578592, "step": 54 }, { "epoch": 0.005054523330369261, "loss": 2.607724189758301, "loss_ce": 0.008114909753203392, "loss_iou": 0.95703125, "loss_num": 0.1376953125, "loss_xval": 2.59375, "num_input_tokens_seen": 3578592, "step": 54 }, { "epoch": 0.005148125614264988, "grad_norm": 52.92810821533203, "learning_rate": 5e-05, "loss": 2.9911, "num_input_tokens_seen": 3644532, "step": 55 }, { "epoch": 0.005148125614264988, "loss": 2.998157501220703, "loss_ce": 0.009876357391476631, "loss_iou": 1.15625, "loss_num": 0.1357421875, "loss_xval": 2.984375, "num_input_tokens_seen": 3644532, "step": 55 }, { "epoch": 0.005241727898160715, "grad_norm": 29.84058952331543, "learning_rate": 5e-05, "loss": 2.8044, "num_input_tokens_seen": 3711204, "step": 56 }, { "epoch": 0.005241727898160715, "loss": 2.824930191040039, "loss_ce": 0.008523866534233093, "loss_iou": 1.046875, "loss_num": 0.1435546875, "loss_xval": 2.8125, "num_input_tokens_seen": 3711204, "step": 56 }, { "epoch": 0.005335330182056442, "grad_norm": 17.782625198364258, "learning_rate": 5e-05, "loss": 2.7295, "num_input_tokens_seen": 3777152, "step": 57 }, { "epoch": 0.005335330182056442, "loss": 2.6248414516448975, "loss_ce": 0.007653830572962761, "loss_iou": 1.0, "loss_num": 0.1240234375, "loss_xval": 2.625, "num_input_tokens_seen": 3777152, "step": 57 }, { "epoch": 0.005428932465952169, "grad_norm": 15.291202545166016, "learning_rate": 5e-05, "loss": 2.6158, "num_input_tokens_seen": 3841644, "step": 58 }, { "epoch": 0.005428932465952169, "loss": 2.5831832885742188, "loss_ce": 0.008964783512055874, "loss_iou": 0.99609375, "loss_num": 0.11669921875, "loss_xval": 2.578125, "num_input_tokens_seen": 3841644, "step": 58 }, { "epoch": 0.005522534749847896, "grad_norm": 62.47341537475586, "learning_rate": 5e-05, "loss": 2.5464, "num_input_tokens_seen": 3907636, "step": 59 }, { "epoch": 0.005522534749847896, "loss": 2.5465049743652344, "loss_ce": 0.005489123519510031, "loss_iou": 1.0390625, "loss_num": 0.09375, "loss_xval": 2.546875, "num_input_tokens_seen": 3907636, "step": 59 }, { "epoch": 0.0056161370337436236, "grad_norm": 19.503198623657227, "learning_rate": 5e-05, "loss": 2.9708, "num_input_tokens_seen": 3973224, "step": 60 }, { "epoch": 0.0056161370337436236, "loss": 2.744785785675049, "loss_ce": 0.015293493866920471, "loss_iou": 1.0, "loss_num": 0.1455078125, "loss_xval": 2.734375, "num_input_tokens_seen": 3973224, "step": 60 }, { "epoch": 0.005709739317639351, "grad_norm": 26.515504837036133, "learning_rate": 5e-05, "loss": 3.1155, "num_input_tokens_seen": 4040240, "step": 61 }, { "epoch": 0.005709739317639351, "loss": 3.1289381980895996, "loss_ce": 0.009797676466405392, "loss_iou": 1.1796875, "loss_num": 0.15234375, "loss_xval": 3.125, "num_input_tokens_seen": 4040240, "step": 61 }, { "epoch": 0.005803341601535078, "grad_norm": 16.495893478393555, "learning_rate": 5e-05, "loss": 2.9449, "num_input_tokens_seen": 4106088, "step": 62 }, { "epoch": 0.005803341601535078, "loss": 3.079580307006836, "loss_ce": 0.011220941320061684, "loss_iou": 1.109375, "loss_num": 0.171875, "loss_xval": 3.0625, "num_input_tokens_seen": 4106088, "step": 62 }, { "epoch": 0.005896943885430804, "grad_norm": 19.985366821289062, "learning_rate": 5e-05, "loss": 2.9913, "num_input_tokens_seen": 4172704, "step": 63 }, { "epoch": 0.005896943885430804, "loss": 2.873556613922119, "loss_ce": 0.010275539010763168, "loss_iou": 1.0625, "loss_num": 0.146484375, "loss_xval": 2.859375, "num_input_tokens_seen": 4172704, "step": 63 }, { "epoch": 0.005990546169326531, "grad_norm": 19.245983123779297, "learning_rate": 5e-05, "loss": 2.6785, "num_input_tokens_seen": 4239912, "step": 64 }, { "epoch": 0.005990546169326531, "loss": 2.6892271041870117, "loss_ce": 0.007586401421576738, "loss_iou": 1.015625, "loss_num": 0.130859375, "loss_xval": 2.6875, "num_input_tokens_seen": 4239912, "step": 64 }, { "epoch": 0.0060841484532222585, "grad_norm": 14.93860149383545, "learning_rate": 5e-05, "loss": 2.3807, "num_input_tokens_seen": 4305944, "step": 65 }, { "epoch": 0.0060841484532222585, "loss": 2.3514838218688965, "loss_ce": 0.006269071251153946, "loss_iou": 0.92578125, "loss_num": 0.09814453125, "loss_xval": 2.34375, "num_input_tokens_seen": 4305944, "step": 65 }, { "epoch": 0.006177750737117986, "grad_norm": 24.663524627685547, "learning_rate": 5e-05, "loss": 2.6203, "num_input_tokens_seen": 4372744, "step": 66 }, { "epoch": 0.006177750737117986, "loss": 2.5763978958129883, "loss_ce": 0.008038531057536602, "loss_iou": 1.0390625, "loss_num": 0.09716796875, "loss_xval": 2.5625, "num_input_tokens_seen": 4372744, "step": 66 }, { "epoch": 0.006271353021013713, "grad_norm": 38.61247253417969, "learning_rate": 5e-05, "loss": 2.8844, "num_input_tokens_seen": 4439252, "step": 67 }, { "epoch": 0.006271353021013713, "loss": 3.074906826019287, "loss_ce": 0.012406734749674797, "loss_iou": 1.234375, "loss_num": 0.1181640625, "loss_xval": 3.0625, "num_input_tokens_seen": 4439252, "step": 67 }, { "epoch": 0.00636495530490944, "grad_norm": 36.13186264038086, "learning_rate": 5e-05, "loss": 2.6886, "num_input_tokens_seen": 4505692, "step": 68 }, { "epoch": 0.00636495530490944, "loss": 2.7200677394866943, "loss_ce": 0.009130253456532955, "loss_iou": 1.0546875, "loss_num": 0.1201171875, "loss_xval": 2.71875, "num_input_tokens_seen": 4505692, "step": 68 }, { "epoch": 0.006458557588805167, "grad_norm": 17.341550827026367, "learning_rate": 5e-05, "loss": 2.6977, "num_input_tokens_seen": 4571836, "step": 69 }, { "epoch": 0.006458557588805167, "loss": 2.6547369956970215, "loss_ce": 0.015088449232280254, "loss_iou": 0.95703125, "loss_num": 0.1455078125, "loss_xval": 2.640625, "num_input_tokens_seen": 4571836, "step": 69 }, { "epoch": 0.006552159872700894, "grad_norm": 11.432044982910156, "learning_rate": 5e-05, "loss": 2.5908, "num_input_tokens_seen": 4638160, "step": 70 }, { "epoch": 0.006552159872700894, "loss": 2.603581428527832, "loss_ce": 0.003971965983510017, "loss_iou": 0.99609375, "loss_num": 0.1220703125, "loss_xval": 2.59375, "num_input_tokens_seen": 4638160, "step": 70 }, { "epoch": 0.006645762156596621, "grad_norm": 12.256217002868652, "learning_rate": 5e-05, "loss": 2.2877, "num_input_tokens_seen": 4703816, "step": 71 }, { "epoch": 0.006645762156596621, "loss": 2.4855265617370605, "loss_ce": 0.010917097330093384, "loss_iou": 0.9765625, "loss_num": 0.10498046875, "loss_xval": 2.46875, "num_input_tokens_seen": 4703816, "step": 71 }, { "epoch": 0.006739364440492348, "grad_norm": 86.1080551147461, "learning_rate": 5e-05, "loss": 2.6883, "num_input_tokens_seen": 4770108, "step": 72 }, { "epoch": 0.006739364440492348, "loss": 2.6870758533477783, "loss_ce": 0.009341409429907799, "loss_iou": 1.1171875, "loss_num": 0.08935546875, "loss_xval": 2.671875, "num_input_tokens_seen": 4770108, "step": 72 }, { "epoch": 0.006832966724388075, "grad_norm": 21.067468643188477, "learning_rate": 5e-05, "loss": 2.9645, "num_input_tokens_seen": 4836600, "step": 73 }, { "epoch": 0.006832966724388075, "loss": 2.9787683486938477, "loss_ce": 0.019783899188041687, "loss_iou": 1.0859375, "loss_num": 0.15625, "loss_xval": 2.953125, "num_input_tokens_seen": 4836600, "step": 73 }, { "epoch": 0.006926569008283802, "grad_norm": 25.35846710205078, "learning_rate": 5e-05, "loss": 2.9524, "num_input_tokens_seen": 4904020, "step": 74 }, { "epoch": 0.006926569008283802, "loss": 3.219608783721924, "loss_ce": 0.05554642528295517, "loss_iou": 1.1171875, "loss_num": 0.1875, "loss_xval": 3.15625, "num_input_tokens_seen": 4904020, "step": 74 }, { "epoch": 0.007020171292179529, "grad_norm": 12.379012107849121, "learning_rate": 5e-05, "loss": 2.727, "num_input_tokens_seen": 4969560, "step": 75 }, { "epoch": 0.007020171292179529, "loss": 2.524005889892578, "loss_ce": 0.008869229815900326, "loss_iou": 0.9140625, "loss_num": 0.13671875, "loss_xval": 2.515625, "num_input_tokens_seen": 4969560, "step": 75 }, { "epoch": 0.007113773576075256, "grad_norm": 20.057796478271484, "learning_rate": 5e-05, "loss": 2.896, "num_input_tokens_seen": 5037228, "step": 76 }, { "epoch": 0.007113773576075256, "loss": 2.935084819793701, "loss_ce": 0.007350385654717684, "loss_iou": 1.09375, "loss_num": 0.1494140625, "loss_xval": 2.921875, "num_input_tokens_seen": 5037228, "step": 76 }, { "epoch": 0.0072073758599709836, "grad_norm": 13.165987014770508, "learning_rate": 5e-05, "loss": 2.9167, "num_input_tokens_seen": 5103536, "step": 77 }, { "epoch": 0.0072073758599709836, "loss": 3.1115217208862305, "loss_ce": 0.00605290150269866, "loss_iou": 1.125, "loss_num": 0.1708984375, "loss_xval": 3.109375, "num_input_tokens_seen": 5103536, "step": 77 }, { "epoch": 0.007300978143866711, "grad_norm": 15.973751068115234, "learning_rate": 5e-05, "loss": 2.8458, "num_input_tokens_seen": 5170608, "step": 78 }, { "epoch": 0.007300978143866711, "loss": 2.7468762397766113, "loss_ce": 0.006641830783337355, "loss_iou": 1.0234375, "loss_num": 0.13671875, "loss_xval": 2.734375, "num_input_tokens_seen": 5170608, "step": 78 }, { "epoch": 0.007394580427762437, "grad_norm": 15.901095390319824, "learning_rate": 5e-05, "loss": 2.8575, "num_input_tokens_seen": 5235860, "step": 79 }, { "epoch": 0.007394580427762437, "loss": 2.915633201599121, "loss_ce": 0.011336389929056168, "loss_iou": 1.109375, "loss_num": 0.13671875, "loss_xval": 2.90625, "num_input_tokens_seen": 5235860, "step": 79 }, { "epoch": 0.007488182711658164, "grad_norm": 12.038989067077637, "learning_rate": 5e-05, "loss": 2.8296, "num_input_tokens_seen": 5302656, "step": 80 }, { "epoch": 0.007488182711658164, "loss": 2.6915206909179688, "loss_ce": 0.013786448165774345, "loss_iou": 0.96484375, "loss_num": 0.1484375, "loss_xval": 2.671875, "num_input_tokens_seen": 5302656, "step": 80 }, { "epoch": 0.007581784995553891, "grad_norm": 11.164108276367188, "learning_rate": 5e-05, "loss": 2.3217, "num_input_tokens_seen": 5368124, "step": 81 }, { "epoch": 0.007581784995553891, "loss": 2.479562997817993, "loss_ce": 0.003000450786203146, "loss_iou": 0.9765625, "loss_num": 0.1044921875, "loss_xval": 2.46875, "num_input_tokens_seen": 5368124, "step": 81 }, { "epoch": 0.0076753872794496185, "grad_norm": 9.867695808410645, "learning_rate": 5e-05, "loss": 2.4559, "num_input_tokens_seen": 5434644, "step": 82 }, { "epoch": 0.0076753872794496185, "loss": 2.4299824237823486, "loss_ce": 0.00505574606359005, "loss_iou": 0.96875, "loss_num": 0.09814453125, "loss_xval": 2.421875, "num_input_tokens_seen": 5434644, "step": 82 }, { "epoch": 0.007768989563345346, "grad_norm": 20.0883846282959, "learning_rate": 5e-05, "loss": 2.5437, "num_input_tokens_seen": 5500948, "step": 83 }, { "epoch": 0.007768989563345346, "loss": 2.514523983001709, "loss_ce": 0.010617696680128574, "loss_iou": 1.0234375, "loss_num": 0.0908203125, "loss_xval": 2.5, "num_input_tokens_seen": 5500948, "step": 83 }, { "epoch": 0.007862591847241073, "grad_norm": 27.041717529296875, "learning_rate": 5e-05, "loss": 3.0171, "num_input_tokens_seen": 5566960, "step": 84 }, { "epoch": 0.007862591847241073, "loss": 3.0104644298553467, "loss_ce": 0.020230058580636978, "loss_iou": 1.21875, "loss_num": 0.11083984375, "loss_xval": 2.984375, "num_input_tokens_seen": 5566960, "step": 84 }, { "epoch": 0.007956194131136799, "grad_norm": 35.098602294921875, "learning_rate": 5e-05, "loss": 2.7361, "num_input_tokens_seen": 5633568, "step": 85 }, { "epoch": 0.007956194131136799, "loss": 2.7287986278533936, "loss_ce": 0.004189261235296726, "loss_iou": 1.140625, "loss_num": 0.08984375, "loss_xval": 2.71875, "num_input_tokens_seen": 5633568, "step": 85 }, { "epoch": 0.008049796415032527, "grad_norm": 14.407003402709961, "learning_rate": 5e-05, "loss": 2.904, "num_input_tokens_seen": 5699680, "step": 86 }, { "epoch": 0.008049796415032527, "loss": 2.864617109298706, "loss_ce": 0.010124976746737957, "loss_iou": 0.98046875, "loss_num": 0.1796875, "loss_xval": 2.859375, "num_input_tokens_seen": 5699680, "step": 86 }, { "epoch": 0.008143398698928253, "grad_norm": 10.01558780670166, "learning_rate": 5e-05, "loss": 2.5973, "num_input_tokens_seen": 5765836, "step": 87 }, { "epoch": 0.008143398698928253, "loss": 2.4706103801727295, "loss_ce": 0.0038135177455842495, "loss_iou": 0.890625, "loss_num": 0.13671875, "loss_xval": 2.46875, "num_input_tokens_seen": 5765836, "step": 87 }, { "epoch": 0.008237000982823981, "grad_norm": 31.901260375976562, "learning_rate": 5e-05, "loss": 2.6865, "num_input_tokens_seen": 5832224, "step": 88 }, { "epoch": 0.008237000982823981, "loss": 2.5909039974212646, "loss_ce": 0.0030133752152323723, "loss_iou": 1.0234375, "loss_num": 0.10791015625, "loss_xval": 2.59375, "num_input_tokens_seen": 5832224, "step": 88 }, { "epoch": 0.008330603266719708, "grad_norm": 29.9424991607666, "learning_rate": 5e-05, "loss": 2.29, "num_input_tokens_seen": 5899164, "step": 89 }, { "epoch": 0.008330603266719708, "loss": 2.2349627017974854, "loss_ce": 0.006935363169759512, "loss_iou": 0.87890625, "loss_num": 0.09423828125, "loss_xval": 2.234375, "num_input_tokens_seen": 5899164, "step": 89 }, { "epoch": 0.008424205550615436, "grad_norm": 30.49257469177246, "learning_rate": 5e-05, "loss": 2.8297, "num_input_tokens_seen": 5966128, "step": 90 }, { "epoch": 0.008424205550615436, "loss": 2.859210252761841, "loss_ce": 0.009600731544196606, "loss_iou": 1.09375, "loss_num": 0.1328125, "loss_xval": 2.84375, "num_input_tokens_seen": 5966128, "step": 90 }, { "epoch": 0.008517807834511162, "grad_norm": 31.240354537963867, "learning_rate": 5e-05, "loss": 2.7538, "num_input_tokens_seen": 6032860, "step": 91 }, { "epoch": 0.008517807834511162, "loss": 2.7331926822662354, "loss_ce": 0.012489484623074532, "loss_iou": 1.078125, "loss_num": 0.11328125, "loss_xval": 2.71875, "num_input_tokens_seen": 6032860, "step": 91 }, { "epoch": 0.008611410118406888, "grad_norm": 27.121564865112305, "learning_rate": 5e-05, "loss": 2.6359, "num_input_tokens_seen": 6098444, "step": 92 }, { "epoch": 0.008611410118406888, "loss": 2.57448148727417, "loss_ce": 0.006122007966041565, "loss_iou": 1.0078125, "loss_num": 0.1103515625, "loss_xval": 2.5625, "num_input_tokens_seen": 6098444, "step": 92 }, { "epoch": 0.008705012402302616, "grad_norm": 13.42473316192627, "learning_rate": 5e-05, "loss": 2.6054, "num_input_tokens_seen": 6164956, "step": 93 }, { "epoch": 0.008705012402302616, "loss": 2.515028715133667, "loss_ce": 0.005263021681457758, "loss_iou": 1.0234375, "loss_num": 0.09326171875, "loss_xval": 2.515625, "num_input_tokens_seen": 6164956, "step": 93 }, { "epoch": 0.008798614686198343, "grad_norm": 19.04734230041504, "learning_rate": 5e-05, "loss": 2.5692, "num_input_tokens_seen": 6231608, "step": 94 }, { "epoch": 0.008798614686198343, "loss": 2.5756468772888184, "loss_ce": 0.005334290210157633, "loss_iou": 1.03125, "loss_num": 0.10302734375, "loss_xval": 2.5625, "num_input_tokens_seen": 6231608, "step": 94 }, { "epoch": 0.00889221697009407, "grad_norm": 20.92302894592285, "learning_rate": 5e-05, "loss": 2.4289, "num_input_tokens_seen": 6297636, "step": 95 }, { "epoch": 0.00889221697009407, "loss": 2.3900952339172363, "loss_ce": 0.007282722741365433, "loss_iou": 0.9765625, "loss_num": 0.08642578125, "loss_xval": 2.375, "num_input_tokens_seen": 6297636, "step": 95 }, { "epoch": 0.008985819253989797, "grad_norm": 20.4478759765625, "learning_rate": 5e-05, "loss": 2.422, "num_input_tokens_seen": 6363796, "step": 96 }, { "epoch": 0.008985819253989797, "loss": 2.25628662109375, "loss_ce": 0.010192908346652985, "loss_iou": 0.8828125, "loss_num": 0.09716796875, "loss_xval": 2.25, "num_input_tokens_seen": 6363796, "step": 96 }, { "epoch": 0.009079421537885525, "grad_norm": 14.334249496459961, "learning_rate": 5e-05, "loss": 2.3518, "num_input_tokens_seen": 6429864, "step": 97 }, { "epoch": 0.009079421537885525, "loss": 2.1521637439727783, "loss_ce": 0.0049469019286334515, "loss_iou": 0.88671875, "loss_num": 0.0751953125, "loss_xval": 2.140625, "num_input_tokens_seen": 6429864, "step": 97 }, { "epoch": 0.009173023821781251, "grad_norm": 31.936813354492188, "learning_rate": 5e-05, "loss": 2.4487, "num_input_tokens_seen": 6496220, "step": 98 }, { "epoch": 0.009173023821781251, "loss": 2.390045642852783, "loss_ce": 0.007233068346977234, "loss_iou": 1.015625, "loss_num": 0.0693359375, "loss_xval": 2.375, "num_input_tokens_seen": 6496220, "step": 98 }, { "epoch": 0.00926662610567698, "grad_norm": 17.394086837768555, "learning_rate": 5e-05, "loss": 2.8472, "num_input_tokens_seen": 6562808, "step": 99 }, { "epoch": 0.00926662610567698, "loss": 2.81095027923584, "loss_ce": 0.008215953595936298, "loss_iou": 1.09375, "loss_num": 0.125, "loss_xval": 2.796875, "num_input_tokens_seen": 6562808, "step": 99 }, { "epoch": 0.009360228389572706, "grad_norm": 10.863395690917969, "learning_rate": 5e-05, "loss": 2.1969, "num_input_tokens_seen": 6627500, "step": 100 }, { "epoch": 0.009360228389572706, "loss": 2.2767670154571533, "loss_ce": 0.004672226030379534, "loss_iou": 0.8984375, "loss_num": 0.095703125, "loss_xval": 2.265625, "num_input_tokens_seen": 6627500, "step": 100 }, { "epoch": 0.009453830673468432, "grad_norm": 46.18409729003906, "learning_rate": 5e-05, "loss": 2.5992, "num_input_tokens_seen": 6693964, "step": 101 }, { "epoch": 0.009453830673468432, "loss": 2.6916022300720215, "loss_ce": 0.008008443750441074, "loss_iou": 1.0546875, "loss_num": 0.1142578125, "loss_xval": 2.6875, "num_input_tokens_seen": 6693964, "step": 101 }, { "epoch": 0.00954743295736416, "grad_norm": 17.2650146484375, "learning_rate": 5e-05, "loss": 2.5906, "num_input_tokens_seen": 6761124, "step": 102 }, { "epoch": 0.00954743295736416, "loss": 2.5785131454467773, "loss_ce": 0.004294442944228649, "loss_iou": 1.09375, "loss_num": 0.07861328125, "loss_xval": 2.578125, "num_input_tokens_seen": 6761124, "step": 102 }, { "epoch": 0.009641035241259886, "grad_norm": 11.22831916809082, "learning_rate": 5e-05, "loss": 2.3741, "num_input_tokens_seen": 6826968, "step": 103 }, { "epoch": 0.009641035241259886, "loss": 2.296377182006836, "loss_ce": 0.005361703224480152, "loss_iou": 0.9296875, "loss_num": 0.0869140625, "loss_xval": 2.296875, "num_input_tokens_seen": 6826968, "step": 103 }, { "epoch": 0.009734637525155614, "grad_norm": 35.63290023803711, "learning_rate": 5e-05, "loss": 2.5937, "num_input_tokens_seen": 6893440, "step": 104 }, { "epoch": 0.009734637525155614, "loss": 2.4925217628479004, "loss_ce": 0.005217037629336119, "loss_iou": 1.0703125, "loss_num": 0.06884765625, "loss_xval": 2.484375, "num_input_tokens_seen": 6893440, "step": 104 }, { "epoch": 0.00982823980905134, "grad_norm": 18.57401466369629, "learning_rate": 5e-05, "loss": 3.1896, "num_input_tokens_seen": 6960672, "step": 105 }, { "epoch": 0.00982823980905134, "loss": 3.2786853313446045, "loss_ce": 0.007201008033007383, "loss_iou": 1.2265625, "loss_num": 0.1640625, "loss_xval": 3.265625, "num_input_tokens_seen": 6960672, "step": 105 }, { "epoch": 0.009921842092947069, "grad_norm": 14.300521850585938, "learning_rate": 5e-05, "loss": 2.83, "num_input_tokens_seen": 7026292, "step": 106 }, { "epoch": 0.009921842092947069, "loss": 2.820474147796631, "loss_ce": 0.004067921079695225, "loss_iou": 1.0390625, "loss_num": 0.146484375, "loss_xval": 2.8125, "num_input_tokens_seen": 7026292, "step": 106 }, { "epoch": 0.010015444376842795, "grad_norm": 7.471635818481445, "learning_rate": 5e-05, "loss": 2.6847, "num_input_tokens_seen": 7092416, "step": 107 }, { "epoch": 0.010015444376842795, "loss": 2.739936113357544, "loss_ce": 0.003608077298849821, "loss_iou": 1.0390625, "loss_num": 0.1328125, "loss_xval": 2.734375, "num_input_tokens_seen": 7092416, "step": 107 }, { "epoch": 0.010109046660738521, "grad_norm": 18.97633934020996, "learning_rate": 5e-05, "loss": 2.5772, "num_input_tokens_seen": 7158496, "step": 108 }, { "epoch": 0.010109046660738521, "loss": 2.6946582794189453, "loss_ce": 0.00520497839897871, "loss_iou": 1.0546875, "loss_num": 0.11474609375, "loss_xval": 2.6875, "num_input_tokens_seen": 7158496, "step": 108 }, { "epoch": 0.01020264894463425, "grad_norm": 7.846622467041016, "learning_rate": 5e-05, "loss": 2.2517, "num_input_tokens_seen": 7224048, "step": 109 }, { "epoch": 0.01020264894463425, "loss": 2.1874606609344482, "loss_ce": 0.003378791268914938, "loss_iou": 0.859375, "loss_num": 0.09228515625, "loss_xval": 2.1875, "num_input_tokens_seen": 7224048, "step": 109 }, { "epoch": 0.010296251228529976, "grad_norm": 6.304441928863525, "learning_rate": 5e-05, "loss": 2.4298, "num_input_tokens_seen": 7289960, "step": 110 }, { "epoch": 0.010296251228529976, "loss": 2.305495262145996, "loss_ce": 0.011550042778253555, "loss_iou": 0.85546875, "loss_num": 0.1162109375, "loss_xval": 2.296875, "num_input_tokens_seen": 7289960, "step": 110 }, { "epoch": 0.010389853512425704, "grad_norm": 7.191917419433594, "learning_rate": 5e-05, "loss": 2.066, "num_input_tokens_seen": 7355564, "step": 111 }, { "epoch": 0.010389853512425704, "loss": 1.8617236614227295, "loss_ce": 0.01125979796051979, "loss_iou": 0.734375, "loss_num": 0.0751953125, "loss_xval": 1.8515625, "num_input_tokens_seen": 7355564, "step": 111 }, { "epoch": 0.01048345579632143, "grad_norm": 20.665714263916016, "learning_rate": 5e-05, "loss": 2.3708, "num_input_tokens_seen": 7421660, "step": 112 }, { "epoch": 0.01048345579632143, "loss": 2.4235143661499023, "loss_ce": 0.014334505423903465, "loss_iou": 0.98828125, "loss_num": 0.0859375, "loss_xval": 2.40625, "num_input_tokens_seen": 7421660, "step": 112 }, { "epoch": 0.010577058080217158, "grad_norm": 15.690848350524902, "learning_rate": 5e-05, "loss": 2.3252, "num_input_tokens_seen": 7488084, "step": 113 }, { "epoch": 0.010577058080217158, "loss": 2.275463581085205, "loss_ce": 0.0030024414882063866, "loss_iou": 0.9609375, "loss_num": 0.0703125, "loss_xval": 2.265625, "num_input_tokens_seen": 7488084, "step": 113 }, { "epoch": 0.010670660364112884, "grad_norm": 42.13125991821289, "learning_rate": 5e-05, "loss": 2.6392, "num_input_tokens_seen": 7555320, "step": 114 }, { "epoch": 0.010670660364112884, "loss": 2.6008431911468506, "loss_ce": 0.007093184161931276, "loss_iou": 1.140625, "loss_num": 0.06396484375, "loss_xval": 2.59375, "num_input_tokens_seen": 7555320, "step": 114 }, { "epoch": 0.010764262648008612, "grad_norm": 11.553654670715332, "learning_rate": 5e-05, "loss": 2.9421, "num_input_tokens_seen": 7622696, "step": 115 }, { "epoch": 0.010764262648008612, "loss": 3.0041770935058594, "loss_ce": 0.006130222696810961, "loss_iou": 1.15625, "loss_num": 0.13671875, "loss_xval": 3.0, "num_input_tokens_seen": 7622696, "step": 115 }, { "epoch": 0.010857864931904338, "grad_norm": 30.39330291748047, "learning_rate": 5e-05, "loss": 2.9718, "num_input_tokens_seen": 7689676, "step": 116 }, { "epoch": 0.010857864931904338, "loss": 3.096498966217041, "loss_ce": 0.008608417585492134, "loss_iou": 1.2109375, "loss_num": 0.1328125, "loss_xval": 3.09375, "num_input_tokens_seen": 7689676, "step": 116 }, { "epoch": 0.010951467215800065, "grad_norm": 9.5101957321167, "learning_rate": 5e-05, "loss": 2.5344, "num_input_tokens_seen": 7755928, "step": 117 }, { "epoch": 0.010951467215800065, "loss": 2.370011806488037, "loss_ce": 0.0052659399807453156, "loss_iou": 0.92578125, "loss_num": 0.10302734375, "loss_xval": 2.359375, "num_input_tokens_seen": 7755928, "step": 117 }, { "epoch": 0.011045069499695793, "grad_norm": 10.531336784362793, "learning_rate": 5e-05, "loss": 2.4916, "num_input_tokens_seen": 7821848, "step": 118 }, { "epoch": 0.011045069499695793, "loss": 2.4239916801452637, "loss_ce": 0.0074876840226352215, "loss_iou": 0.96875, "loss_num": 0.09619140625, "loss_xval": 2.421875, "num_input_tokens_seen": 7821848, "step": 118 }, { "epoch": 0.011138671783591519, "grad_norm": 12.39312744140625, "learning_rate": 5e-05, "loss": 2.2888, "num_input_tokens_seen": 7887764, "step": 119 }, { "epoch": 0.011138671783591519, "loss": 2.2058448791503906, "loss_ce": 0.004673070274293423, "loss_iou": 0.890625, "loss_num": 0.083984375, "loss_xval": 2.203125, "num_input_tokens_seen": 7887764, "step": 119 }, { "epoch": 0.011232274067487247, "grad_norm": 15.486209869384766, "learning_rate": 5e-05, "loss": 2.3216, "num_input_tokens_seen": 7953732, "step": 120 }, { "epoch": 0.011232274067487247, "loss": 2.1480751037597656, "loss_ce": 0.009403292089700699, "loss_iou": 0.88671875, "loss_num": 0.07275390625, "loss_xval": 2.140625, "num_input_tokens_seen": 7953732, "step": 120 }, { "epoch": 0.011325876351382973, "grad_norm": 12.126895904541016, "learning_rate": 5e-05, "loss": 2.3391, "num_input_tokens_seen": 8019224, "step": 121 }, { "epoch": 0.011325876351382973, "loss": 2.291839838027954, "loss_ce": 0.006683724001049995, "loss_iou": 0.97265625, "loss_num": 0.068359375, "loss_xval": 2.28125, "num_input_tokens_seen": 8019224, "step": 121 }, { "epoch": 0.011419478635278701, "grad_norm": 40.47333908081055, "learning_rate": 5e-05, "loss": 2.738, "num_input_tokens_seen": 8086356, "step": 122 }, { "epoch": 0.011419478635278701, "loss": 2.6039280891418457, "loss_ce": 0.0033420585095882416, "loss_iou": 1.171875, "loss_num": 0.052001953125, "loss_xval": 2.59375, "num_input_tokens_seen": 8086356, "step": 122 }, { "epoch": 0.011513080919174428, "grad_norm": 15.583828926086426, "learning_rate": 5e-05, "loss": 3.0616, "num_input_tokens_seen": 8153056, "step": 123 }, { "epoch": 0.011513080919174428, "loss": 2.9486942291259766, "loss_ce": 0.01314749475568533, "loss_iou": 1.0234375, "loss_num": 0.177734375, "loss_xval": 2.9375, "num_input_tokens_seen": 8153056, "step": 123 }, { "epoch": 0.011606683203070156, "grad_norm": 10.330870628356934, "learning_rate": 5e-05, "loss": 2.8413, "num_input_tokens_seen": 8218476, "step": 124 }, { "epoch": 0.011606683203070156, "loss": 2.6871485710144043, "loss_ce": 0.013320403173565865, "loss_iou": 0.9765625, "loss_num": 0.14453125, "loss_xval": 2.671875, "num_input_tokens_seen": 8218476, "step": 124 }, { "epoch": 0.011700285486965882, "grad_norm": 9.994678497314453, "learning_rate": 5e-05, "loss": 2.7807, "num_input_tokens_seen": 8285120, "step": 125 }, { "epoch": 0.011700285486965882, "loss": 2.8563404083251953, "loss_ce": 0.006731036584824324, "loss_iou": 1.0546875, "loss_num": 0.150390625, "loss_xval": 2.84375, "num_input_tokens_seen": 8285120, "step": 125 }, { "epoch": 0.011793887770861608, "grad_norm": 7.227442264556885, "learning_rate": 5e-05, "loss": 2.472, "num_input_tokens_seen": 8351572, "step": 126 }, { "epoch": 0.011793887770861608, "loss": 2.2409982681274414, "loss_ce": 0.0075998734682798386, "loss_iou": 0.83984375, "loss_num": 0.1103515625, "loss_xval": 2.234375, "num_input_tokens_seen": 8351572, "step": 126 }, { "epoch": 0.011887490054757336, "grad_norm": 7.387115478515625, "learning_rate": 5e-05, "loss": 2.6037, "num_input_tokens_seen": 8419100, "step": 127 }, { "epoch": 0.011887490054757336, "loss": 2.6546459197998047, "loss_ce": 0.004255238920450211, "loss_iou": 1.0, "loss_num": 0.130859375, "loss_xval": 2.65625, "num_input_tokens_seen": 8419100, "step": 127 }, { "epoch": 0.011981092338653063, "grad_norm": 6.874162197113037, "learning_rate": 5e-05, "loss": 2.4978, "num_input_tokens_seen": 8486096, "step": 128 }, { "epoch": 0.011981092338653063, "loss": 2.5121519565582275, "loss_ce": 0.008245638571679592, "loss_iou": 0.9921875, "loss_num": 0.103515625, "loss_xval": 2.5, "num_input_tokens_seen": 8486096, "step": 128 }, { "epoch": 0.01207469462254879, "grad_norm": 7.546186923980713, "learning_rate": 5e-05, "loss": 2.4236, "num_input_tokens_seen": 8552400, "step": 129 }, { "epoch": 0.01207469462254879, "loss": 2.4138965606689453, "loss_ce": 0.0017871541203930974, "loss_iou": 0.98046875, "loss_num": 0.0908203125, "loss_xval": 2.40625, "num_input_tokens_seen": 8552400, "step": 129 }, { "epoch": 0.012168296906444517, "grad_norm": 13.933201789855957, "learning_rate": 5e-05, "loss": 2.4146, "num_input_tokens_seen": 8617660, "step": 130 }, { "epoch": 0.012168296906444517, "loss": 2.359180450439453, "loss_ce": 0.006641239859163761, "loss_iou": 0.9765625, "loss_num": 0.080078125, "loss_xval": 2.359375, "num_input_tokens_seen": 8617660, "step": 130 }, { "epoch": 0.012261899190340245, "grad_norm": 13.758872985839844, "learning_rate": 5e-05, "loss": 2.2476, "num_input_tokens_seen": 8684492, "step": 131 }, { "epoch": 0.012261899190340245, "loss": 2.2961173057556152, "loss_ce": 0.006566723342984915, "loss_iou": 0.921875, "loss_num": 0.08935546875, "loss_xval": 2.296875, "num_input_tokens_seen": 8684492, "step": 131 }, { "epoch": 0.012355501474235971, "grad_norm": 34.28789138793945, "learning_rate": 5e-05, "loss": 2.4416, "num_input_tokens_seen": 8750736, "step": 132 }, { "epoch": 0.012355501474235971, "loss": 2.599449634552002, "loss_ce": 0.007652592845261097, "loss_iou": 1.0859375, "loss_num": 0.083984375, "loss_xval": 2.59375, "num_input_tokens_seen": 8750736, "step": 132 }, { "epoch": 0.012449103758131698, "grad_norm": 16.799196243286133, "learning_rate": 5e-05, "loss": 2.6894, "num_input_tokens_seen": 8817252, "step": 133 }, { "epoch": 0.012449103758131698, "loss": 2.734215259552002, "loss_ce": 0.0037463903427124023, "loss_iou": 1.09375, "loss_num": 0.10888671875, "loss_xval": 2.734375, "num_input_tokens_seen": 8817252, "step": 133 }, { "epoch": 0.012542706042027426, "grad_norm": 17.619638442993164, "learning_rate": 5e-05, "loss": 2.0357, "num_input_tokens_seen": 8883412, "step": 134 }, { "epoch": 0.012542706042027426, "loss": 1.9238464832305908, "loss_ce": 0.002459696726873517, "loss_iou": 0.8046875, "loss_num": 0.0625, "loss_xval": 1.921875, "num_input_tokens_seen": 8883412, "step": 134 }, { "epoch": 0.012636308325923152, "grad_norm": 20.350069046020508, "learning_rate": 5e-05, "loss": 2.3698, "num_input_tokens_seen": 8950140, "step": 135 }, { "epoch": 0.012636308325923152, "loss": 2.399346351623535, "loss_ce": 0.005791927687823772, "loss_iou": 1.0234375, "loss_num": 0.07080078125, "loss_xval": 2.390625, "num_input_tokens_seen": 8950140, "step": 135 }, { "epoch": 0.01272991060981888, "grad_norm": 14.986017227172852, "learning_rate": 5e-05, "loss": 2.6098, "num_input_tokens_seen": 9017656, "step": 136 }, { "epoch": 0.01272991060981888, "loss": 2.6524224281311035, "loss_ce": 0.005938143003731966, "loss_iou": 1.078125, "loss_num": 0.099609375, "loss_xval": 2.640625, "num_input_tokens_seen": 9017656, "step": 136 }, { "epoch": 0.012823512893714606, "grad_norm": 17.91053581237793, "learning_rate": 5e-05, "loss": 2.3665, "num_input_tokens_seen": 9083428, "step": 137 }, { "epoch": 0.012823512893714606, "loss": 2.3613765239715576, "loss_ce": 0.00395461730659008, "loss_iou": 0.98828125, "loss_num": 0.0771484375, "loss_xval": 2.359375, "num_input_tokens_seen": 9083428, "step": 137 }, { "epoch": 0.012917115177610334, "grad_norm": 17.348831176757812, "learning_rate": 5e-05, "loss": 2.3659, "num_input_tokens_seen": 9150248, "step": 138 }, { "epoch": 0.012917115177610334, "loss": 2.268336772918701, "loss_ce": 0.008571156300604343, "loss_iou": 0.96875, "loss_num": 0.06494140625, "loss_xval": 2.265625, "num_input_tokens_seen": 9150248, "step": 138 }, { "epoch": 0.01301071746150606, "grad_norm": 16.304168701171875, "learning_rate": 5e-05, "loss": 2.7852, "num_input_tokens_seen": 9216832, "step": 139 }, { "epoch": 0.01301071746150606, "loss": 2.989776849746704, "loss_ce": 0.005401725880801678, "loss_iou": 1.234375, "loss_num": 0.1025390625, "loss_xval": 2.984375, "num_input_tokens_seen": 9216832, "step": 139 }, { "epoch": 0.013104319745401789, "grad_norm": 6.476009368896484, "learning_rate": 5e-05, "loss": 2.5077, "num_input_tokens_seen": 9282932, "step": 140 }, { "epoch": 0.013104319745401789, "loss": 2.4586074352264404, "loss_ce": 0.007435602601617575, "loss_iou": 0.9921875, "loss_num": 0.09326171875, "loss_xval": 2.453125, "num_input_tokens_seen": 9282932, "step": 140 }, { "epoch": 0.013197922029297515, "grad_norm": 20.276348114013672, "learning_rate": 5e-05, "loss": 2.2915, "num_input_tokens_seen": 9349952, "step": 141 }, { "epoch": 0.013197922029297515, "loss": 2.3907690048217773, "loss_ce": 0.006003286689519882, "loss_iou": 0.984375, "loss_num": 0.08349609375, "loss_xval": 2.390625, "num_input_tokens_seen": 9349952, "step": 141 }, { "epoch": 0.013291524313193241, "grad_norm": 19.617143630981445, "learning_rate": 5e-05, "loss": 2.3906, "num_input_tokens_seen": 9415312, "step": 142 }, { "epoch": 0.013291524313193241, "loss": 2.362905740737915, "loss_ce": 0.0054837800562381744, "loss_iou": 0.98046875, "loss_num": 0.0791015625, "loss_xval": 2.359375, "num_input_tokens_seen": 9415312, "step": 142 }, { "epoch": 0.01338512659708897, "grad_norm": 27.583656311035156, "learning_rate": 5e-05, "loss": 2.3203, "num_input_tokens_seen": 9482276, "step": 143 }, { "epoch": 0.01338512659708897, "loss": 2.3812010288238525, "loss_ce": 0.006201109383255243, "loss_iou": 1.0234375, "loss_num": 0.06591796875, "loss_xval": 2.375, "num_input_tokens_seen": 9482276, "step": 143 }, { "epoch": 0.013478728880984696, "grad_norm": 11.42666244506836, "learning_rate": 5e-05, "loss": 2.9603, "num_input_tokens_seen": 9548436, "step": 144 }, { "epoch": 0.013478728880984696, "loss": 3.061565637588501, "loss_ce": 0.010784330777823925, "loss_iou": 1.1875, "loss_num": 0.1337890625, "loss_xval": 3.046875, "num_input_tokens_seen": 9548436, "step": 144 }, { "epoch": 0.013572331164880424, "grad_norm": 13.747142791748047, "learning_rate": 5e-05, "loss": 2.7446, "num_input_tokens_seen": 9615716, "step": 145 }, { "epoch": 0.013572331164880424, "loss": 2.8178625106811523, "loss_ce": 0.007315758150070906, "loss_iou": 1.109375, "loss_num": 0.11962890625, "loss_xval": 2.8125, "num_input_tokens_seen": 9615716, "step": 145 }, { "epoch": 0.01366593344877615, "grad_norm": 8.339811325073242, "learning_rate": 5e-05, "loss": 2.1399, "num_input_tokens_seen": 9681096, "step": 146 }, { "epoch": 0.01366593344877615, "loss": 2.1111319065093994, "loss_ce": 0.008592926897108555, "loss_iou": 0.859375, "loss_num": 0.076171875, "loss_xval": 2.109375, "num_input_tokens_seen": 9681096, "step": 146 }, { "epoch": 0.013759535732671878, "grad_norm": 14.685997009277344, "learning_rate": 5e-05, "loss": 2.4779, "num_input_tokens_seen": 9747872, "step": 147 }, { "epoch": 0.013759535732671878, "loss": 2.411579132080078, "loss_ce": 0.006305626593530178, "loss_iou": 0.98828125, "loss_num": 0.0869140625, "loss_xval": 2.40625, "num_input_tokens_seen": 9747872, "step": 147 }, { "epoch": 0.013853138016567604, "grad_norm": 10.855498313903809, "learning_rate": 5e-05, "loss": 1.8449, "num_input_tokens_seen": 9813032, "step": 148 }, { "epoch": 0.013853138016567604, "loss": 1.9836018085479736, "loss_ce": 0.004109744913876057, "loss_iou": 0.83203125, "loss_num": 0.0634765625, "loss_xval": 1.9765625, "num_input_tokens_seen": 9813032, "step": 148 }, { "epoch": 0.013946740300463332, "grad_norm": 39.248653411865234, "learning_rate": 5e-05, "loss": 2.6402, "num_input_tokens_seen": 9878828, "step": 149 }, { "epoch": 0.013946740300463332, "loss": 2.7219836711883545, "loss_ce": 0.006163286045193672, "loss_iou": 1.171875, "loss_num": 0.07421875, "loss_xval": 2.71875, "num_input_tokens_seen": 9878828, "step": 149 }, { "epoch": 0.014040342584359058, "grad_norm": 10.349116325378418, "learning_rate": 5e-05, "loss": 2.4491, "num_input_tokens_seen": 9944760, "step": 150 }, { "epoch": 0.014040342584359058, "loss": 2.5953550338745117, "loss_ce": 0.006976166274398565, "loss_iou": 0.9765625, "loss_num": 0.126953125, "loss_xval": 2.59375, "num_input_tokens_seen": 9944760, "step": 150 }, { "epoch": 0.014133944868254785, "grad_norm": 16.034276962280273, "learning_rate": 5e-05, "loss": 2.8513, "num_input_tokens_seen": 10011116, "step": 151 }, { "epoch": 0.014133944868254785, "loss": 3.006396770477295, "loss_ce": 0.010302877053618431, "loss_iou": 1.140625, "loss_num": 0.142578125, "loss_xval": 3.0, "num_input_tokens_seen": 10011116, "step": 151 }, { "epoch": 0.014227547152150513, "grad_norm": 10.876625061035156, "learning_rate": 5e-05, "loss": 2.661, "num_input_tokens_seen": 10077456, "step": 152 }, { "epoch": 0.014227547152150513, "loss": 2.6819534301757812, "loss_ce": 0.006172118242830038, "loss_iou": 1.0625, "loss_num": 0.1103515625, "loss_xval": 2.671875, "num_input_tokens_seen": 10077456, "step": 152 }, { "epoch": 0.014321149436046239, "grad_norm": 12.449788093566895, "learning_rate": 5e-05, "loss": 2.538, "num_input_tokens_seen": 10142392, "step": 153 }, { "epoch": 0.014321149436046239, "loss": 2.5689287185668945, "loss_ce": 0.008381940424442291, "loss_iou": 1.03125, "loss_num": 0.09814453125, "loss_xval": 2.5625, "num_input_tokens_seen": 10142392, "step": 153 }, { "epoch": 0.014414751719941967, "grad_norm": 8.446009635925293, "learning_rate": 5e-05, "loss": 2.6359, "num_input_tokens_seen": 10208352, "step": 154 }, { "epoch": 0.014414751719941967, "loss": 2.5929999351501465, "loss_ce": 0.0031562570948153734, "loss_iou": 1.0390625, "loss_num": 0.1025390625, "loss_xval": 2.59375, "num_input_tokens_seen": 10208352, "step": 154 }, { "epoch": 0.014508354003837693, "grad_norm": 10.110790252685547, "learning_rate": 5e-05, "loss": 2.3568, "num_input_tokens_seen": 10274700, "step": 155 }, { "epoch": 0.014508354003837693, "loss": 2.2932240962982178, "loss_ce": 0.0041616931557655334, "loss_iou": 0.9453125, "loss_num": 0.07958984375, "loss_xval": 2.28125, "num_input_tokens_seen": 10274700, "step": 155 }, { "epoch": 0.014601956287733421, "grad_norm": 9.76326847076416, "learning_rate": 5e-05, "loss": 2.4006, "num_input_tokens_seen": 10340968, "step": 156 }, { "epoch": 0.014601956287733421, "loss": 2.31691837310791, "loss_ce": 0.002465251600369811, "loss_iou": 0.98828125, "loss_num": 0.0673828125, "loss_xval": 2.3125, "num_input_tokens_seen": 10340968, "step": 156 }, { "epoch": 0.014695558571629148, "grad_norm": 9.665358543395996, "learning_rate": 5e-05, "loss": 2.3008, "num_input_tokens_seen": 10407364, "step": 157 }, { "epoch": 0.014695558571629148, "loss": 2.257051944732666, "loss_ce": 0.003145768539980054, "loss_iou": 0.94921875, "loss_num": 0.0712890625, "loss_xval": 2.25, "num_input_tokens_seen": 10407364, "step": 157 }, { "epoch": 0.014789160855524874, "grad_norm": 21.795377731323242, "learning_rate": 5e-05, "loss": 2.4966, "num_input_tokens_seen": 10473436, "step": 158 }, { "epoch": 0.014789160855524874, "loss": 2.6553800106048584, "loss_ce": 0.003036240115761757, "loss_iou": 1.1875, "loss_num": 0.055908203125, "loss_xval": 2.65625, "num_input_tokens_seen": 10473436, "step": 158 }, { "epoch": 0.014882763139420602, "grad_norm": 18.09992027282715, "learning_rate": 5e-05, "loss": 2.7492, "num_input_tokens_seen": 10539464, "step": 159 }, { "epoch": 0.014882763139420602, "loss": 2.6252808570861816, "loss_ce": 0.0051638283766806126, "loss_iou": 0.98046875, "loss_num": 0.1328125, "loss_xval": 2.625, "num_input_tokens_seen": 10539464, "step": 159 }, { "epoch": 0.014976365423316328, "grad_norm": 7.850207328796387, "learning_rate": 5e-05, "loss": 2.5368, "num_input_tokens_seen": 10606284, "step": 160 }, { "epoch": 0.014976365423316328, "loss": 2.542900800704956, "loss_ce": 0.007744714617729187, "loss_iou": 1.0, "loss_num": 0.107421875, "loss_xval": 2.53125, "num_input_tokens_seen": 10606284, "step": 160 }, { "epoch": 0.015069967707212056, "grad_norm": 23.272611618041992, "learning_rate": 5e-05, "loss": 2.3709, "num_input_tokens_seen": 10671240, "step": 161 }, { "epoch": 0.015069967707212056, "loss": 2.5069596767425537, "loss_ce": 0.006959730759263039, "loss_iou": 0.9921875, "loss_num": 0.10400390625, "loss_xval": 2.5, "num_input_tokens_seen": 10671240, "step": 161 }, { "epoch": 0.015163569991107783, "grad_norm": 9.053507804870605, "learning_rate": 5e-05, "loss": 2.3128, "num_input_tokens_seen": 10737284, "step": 162 }, { "epoch": 0.015163569991107783, "loss": 2.2194528579711914, "loss_ce": 0.007905061356723309, "loss_iou": 0.89453125, "loss_num": 0.08447265625, "loss_xval": 2.21875, "num_input_tokens_seen": 10737284, "step": 162 }, { "epoch": 0.01525717227500351, "grad_norm": 10.071734428405762, "learning_rate": 5e-05, "loss": 2.2584, "num_input_tokens_seen": 10802660, "step": 163 }, { "epoch": 0.01525717227500351, "loss": 2.2959604263305664, "loss_ce": 0.008850975893437862, "loss_iou": 0.96875, "loss_num": 0.0693359375, "loss_xval": 2.28125, "num_input_tokens_seen": 10802660, "step": 163 }, { "epoch": 0.015350774558899237, "grad_norm": 28.983577728271484, "learning_rate": 5e-05, "loss": 2.4874, "num_input_tokens_seen": 10867884, "step": 164 }, { "epoch": 0.015350774558899237, "loss": 2.6018126010894775, "loss_ce": 0.006109514273703098, "loss_iou": 1.125, "loss_num": 0.068359375, "loss_xval": 2.59375, "num_input_tokens_seen": 10867884, "step": 164 }, { "epoch": 0.015444376842794965, "grad_norm": 13.433588981628418, "learning_rate": 5e-05, "loss": 2.6725, "num_input_tokens_seen": 10934420, "step": 165 }, { "epoch": 0.015444376842794965, "loss": 2.5403361320495605, "loss_ce": 0.008109445683658123, "loss_iou": 0.98046875, "loss_num": 0.11376953125, "loss_xval": 2.53125, "num_input_tokens_seen": 10934420, "step": 165 }, { "epoch": 0.015537979126690691, "grad_norm": 7.735687732696533, "learning_rate": 5e-05, "loss": 2.409, "num_input_tokens_seen": 10999412, "step": 166 }, { "epoch": 0.015537979126690691, "loss": 2.1762490272521973, "loss_ce": 0.008280330337584019, "loss_iou": 0.84375, "loss_num": 0.095703125, "loss_xval": 2.171875, "num_input_tokens_seen": 10999412, "step": 166 }, { "epoch": 0.015631581410586418, "grad_norm": 7.087269306182861, "learning_rate": 5e-05, "loss": 2.2915, "num_input_tokens_seen": 11065368, "step": 167 }, { "epoch": 0.015631581410586418, "loss": 2.211134433746338, "loss_ce": 0.0038590305484831333, "loss_iou": 0.8984375, "loss_num": 0.08154296875, "loss_xval": 2.203125, "num_input_tokens_seen": 11065368, "step": 167 }, { "epoch": 0.015725183694482146, "grad_norm": 6.687474727630615, "learning_rate": 5e-05, "loss": 2.0499, "num_input_tokens_seen": 11131428, "step": 168 }, { "epoch": 0.015725183694482146, "loss": 1.9903085231781006, "loss_ce": 0.003980449866503477, "loss_iou": 0.8046875, "loss_num": 0.07568359375, "loss_xval": 1.984375, "num_input_tokens_seen": 11131428, "step": 168 }, { "epoch": 0.015818785978377874, "grad_norm": 24.095243453979492, "learning_rate": 5e-05, "loss": 2.3104, "num_input_tokens_seen": 11197824, "step": 169 }, { "epoch": 0.015818785978377874, "loss": 2.221169948577881, "loss_ce": 0.004373104777187109, "loss_iou": 0.98046875, "loss_num": 0.05126953125, "loss_xval": 2.21875, "num_input_tokens_seen": 11197824, "step": 169 }, { "epoch": 0.015912388262273598, "grad_norm": 19.663116455078125, "learning_rate": 5e-05, "loss": 2.2558, "num_input_tokens_seen": 11263620, "step": 170 }, { "epoch": 0.015912388262273598, "loss": 2.362255096435547, "loss_ce": 0.0038566177245229483, "loss_iou": 1.0078125, "loss_num": 0.0673828125, "loss_xval": 2.359375, "num_input_tokens_seen": 11263620, "step": 170 }, { "epoch": 0.016005990546169326, "grad_norm": 17.370332717895508, "learning_rate": 5e-05, "loss": 2.2358, "num_input_tokens_seen": 11329808, "step": 171 }, { "epoch": 0.016005990546169326, "loss": 2.3029870986938477, "loss_ce": 0.0022056836169213057, "loss_iou": 1.0078125, "loss_num": 0.05810546875, "loss_xval": 2.296875, "num_input_tokens_seen": 11329808, "step": 171 }, { "epoch": 0.016099592830065054, "grad_norm": 10.88261890411377, "learning_rate": 5e-05, "loss": 2.6163, "num_input_tokens_seen": 11395504, "step": 172 }, { "epoch": 0.016099592830065054, "loss": 2.5592880249023438, "loss_ce": 0.006553507875651121, "loss_iou": 1.0546875, "loss_num": 0.08935546875, "loss_xval": 2.546875, "num_input_tokens_seen": 11395504, "step": 172 }, { "epoch": 0.016193195113960782, "grad_norm": 9.813773155212402, "learning_rate": 5e-05, "loss": 2.2342, "num_input_tokens_seen": 11461240, "step": 173 }, { "epoch": 0.016193195113960782, "loss": 2.2554492950439453, "loss_ce": 0.005449455231428146, "loss_iou": 0.9609375, "loss_num": 0.06640625, "loss_xval": 2.25, "num_input_tokens_seen": 11461240, "step": 173 }, { "epoch": 0.016286797397856507, "grad_norm": 30.811744689941406, "learning_rate": 5e-05, "loss": 2.3332, "num_input_tokens_seen": 11527512, "step": 174 }, { "epoch": 0.016286797397856507, "loss": 2.167771816253662, "loss_ce": 0.004686021711677313, "loss_iou": 0.890625, "loss_num": 0.07568359375, "loss_xval": 2.15625, "num_input_tokens_seen": 11527512, "step": 174 }, { "epoch": 0.016380399681752235, "grad_norm": 10.913748741149902, "learning_rate": 5e-05, "loss": 2.121, "num_input_tokens_seen": 11592812, "step": 175 }, { "epoch": 0.016380399681752235, "loss": 1.9444025754928589, "loss_ce": 0.0037287739105522633, "loss_iou": 0.7578125, "loss_num": 0.08544921875, "loss_xval": 1.9375, "num_input_tokens_seen": 11592812, "step": 175 }, { "epoch": 0.016474001965647963, "grad_norm": 8.592153549194336, "learning_rate": 5e-05, "loss": 2.4228, "num_input_tokens_seen": 11659136, "step": 176 }, { "epoch": 0.016474001965647963, "loss": 2.4883460998535156, "loss_ce": 0.00397090008482337, "loss_iou": 1.015625, "loss_num": 0.08984375, "loss_xval": 2.484375, "num_input_tokens_seen": 11659136, "step": 176 }, { "epoch": 0.016567604249543687, "grad_norm": 12.420134544372559, "learning_rate": 5e-05, "loss": 2.1218, "num_input_tokens_seen": 11724716, "step": 177 }, { "epoch": 0.016567604249543687, "loss": 2.189643144607544, "loss_ce": 0.0021431802306324244, "loss_iou": 0.953125, "loss_num": 0.055419921875, "loss_xval": 2.1875, "num_input_tokens_seen": 11724716, "step": 177 }, { "epoch": 0.016661206533439415, "grad_norm": 16.79232406616211, "learning_rate": 5e-05, "loss": 2.1733, "num_input_tokens_seen": 11790176, "step": 178 }, { "epoch": 0.016661206533439415, "loss": 2.0610435009002686, "loss_ce": 0.0035484700929373503, "loss_iou": 0.89453125, "loss_num": 0.053955078125, "loss_xval": 2.0625, "num_input_tokens_seen": 11790176, "step": 178 }, { "epoch": 0.016754808817335144, "grad_norm": 13.537357330322266, "learning_rate": 5e-05, "loss": 2.1361, "num_input_tokens_seen": 11855320, "step": 179 }, { "epoch": 0.016754808817335144, "loss": 2.492384910583496, "loss_ce": 0.0060567706823349, "loss_iou": 1.0234375, "loss_num": 0.0869140625, "loss_xval": 2.484375, "num_input_tokens_seen": 11855320, "step": 179 }, { "epoch": 0.01684841110123087, "grad_norm": 15.200262069702148, "learning_rate": 5e-05, "loss": 2.504, "num_input_tokens_seen": 11921980, "step": 180 }, { "epoch": 0.01684841110123087, "loss": 2.3923120498657227, "loss_ce": 0.00364036881364882, "loss_iou": 0.99609375, "loss_num": 0.080078125, "loss_xval": 2.390625, "num_input_tokens_seen": 11921980, "step": 180 }, { "epoch": 0.016942013385126596, "grad_norm": 12.651123046875, "learning_rate": 5e-05, "loss": 2.11, "num_input_tokens_seen": 11988024, "step": 181 }, { "epoch": 0.016942013385126596, "loss": 1.9797072410583496, "loss_ce": 0.008760059252381325, "loss_iou": 0.828125, "loss_num": 0.0615234375, "loss_xval": 1.96875, "num_input_tokens_seen": 11988024, "step": 181 }, { "epoch": 0.017035615669022324, "grad_norm": 13.741872787475586, "learning_rate": 5e-05, "loss": 2.6191, "num_input_tokens_seen": 12054344, "step": 182 }, { "epoch": 0.017035615669022324, "loss": 2.6435494422912598, "loss_ce": 0.004877523984760046, "loss_iou": 1.140625, "loss_num": 0.072265625, "loss_xval": 2.640625, "num_input_tokens_seen": 12054344, "step": 182 }, { "epoch": 0.017129217952918052, "grad_norm": 16.598339080810547, "learning_rate": 5e-05, "loss": 2.4599, "num_input_tokens_seen": 12119892, "step": 183 }, { "epoch": 0.017129217952918052, "loss": 2.4329566955566406, "loss_ce": 0.003269173437729478, "loss_iou": 1.0234375, "loss_num": 0.076171875, "loss_xval": 2.4375, "num_input_tokens_seen": 12119892, "step": 183 }, { "epoch": 0.017222820236813777, "grad_norm": 12.140115737915039, "learning_rate": 5e-05, "loss": 2.392, "num_input_tokens_seen": 12185780, "step": 184 }, { "epoch": 0.017222820236813777, "loss": 2.060917377471924, "loss_ce": 0.00354421092197299, "loss_iou": 0.859375, "loss_num": 0.0673828125, "loss_xval": 2.0625, "num_input_tokens_seen": 12185780, "step": 184 }, { "epoch": 0.017316422520709505, "grad_norm": 21.703996658325195, "learning_rate": 5e-05, "loss": 2.3705, "num_input_tokens_seen": 12251676, "step": 185 }, { "epoch": 0.017316422520709505, "loss": 2.2541608810424805, "loss_ce": 0.00611409917473793, "loss_iou": 0.9765625, "loss_num": 0.059814453125, "loss_xval": 2.25, "num_input_tokens_seen": 12251676, "step": 185 }, { "epoch": 0.017410024804605233, "grad_norm": 14.7355375289917, "learning_rate": 5e-05, "loss": 2.3044, "num_input_tokens_seen": 12318336, "step": 186 }, { "epoch": 0.017410024804605233, "loss": 2.4516263008117676, "loss_ce": 0.0043604495003819466, "loss_iou": 1.0390625, "loss_num": 0.07373046875, "loss_xval": 2.453125, "num_input_tokens_seen": 12318336, "step": 186 }, { "epoch": 0.01750362708850096, "grad_norm": 8.447161674499512, "learning_rate": 5e-05, "loss": 2.2574, "num_input_tokens_seen": 12384792, "step": 187 }, { "epoch": 0.01750362708850096, "loss": 2.2445409297943115, "loss_ce": 0.0033300297800451517, "loss_iou": 0.96875, "loss_num": 0.060546875, "loss_xval": 2.234375, "num_input_tokens_seen": 12384792, "step": 187 }, { "epoch": 0.017597229372396685, "grad_norm": 10.398317337036133, "learning_rate": 5e-05, "loss": 2.1611, "num_input_tokens_seen": 12450828, "step": 188 }, { "epoch": 0.017597229372396685, "loss": 2.133500814437866, "loss_ce": 0.006547821685671806, "loss_iou": 0.921875, "loss_num": 0.056640625, "loss_xval": 2.125, "num_input_tokens_seen": 12450828, "step": 188 }, { "epoch": 0.017690831656292413, "grad_norm": 13.35843563079834, "learning_rate": 5e-05, "loss": 2.3758, "num_input_tokens_seen": 12518072, "step": 189 }, { "epoch": 0.017690831656292413, "loss": 2.461061954498291, "loss_ce": 0.011843172833323479, "loss_iou": 1.0234375, "loss_num": 0.0810546875, "loss_xval": 2.453125, "num_input_tokens_seen": 12518072, "step": 189 }, { "epoch": 0.01778443394018814, "grad_norm": 28.607030868530273, "learning_rate": 5e-05, "loss": 2.4175, "num_input_tokens_seen": 12585416, "step": 190 }, { "epoch": 0.01778443394018814, "loss": 2.3904967308044434, "loss_ce": 0.005731300450861454, "loss_iou": 1.0625, "loss_num": 0.05322265625, "loss_xval": 2.390625, "num_input_tokens_seen": 12585416, "step": 190 }, { "epoch": 0.017878036224083866, "grad_norm": 7.866536617279053, "learning_rate": 5e-05, "loss": 2.7323, "num_input_tokens_seen": 12652420, "step": 191 }, { "epoch": 0.017878036224083866, "loss": 2.5774574279785156, "loss_ce": 0.006168528459966183, "loss_iou": 0.97265625, "loss_num": 0.12451171875, "loss_xval": 2.578125, "num_input_tokens_seen": 12652420, "step": 191 }, { "epoch": 0.017971638507979594, "grad_norm": 12.57685375213623, "learning_rate": 5e-05, "loss": 2.746, "num_input_tokens_seen": 12718744, "step": 192 }, { "epoch": 0.017971638507979594, "loss": 2.8357036113739014, "loss_ce": 0.0036723511293530464, "loss_iou": 1.0859375, "loss_num": 0.1328125, "loss_xval": 2.828125, "num_input_tokens_seen": 12718744, "step": 192 }, { "epoch": 0.018065240791875322, "grad_norm": 7.3611531257629395, "learning_rate": 5e-05, "loss": 2.5383, "num_input_tokens_seen": 12785104, "step": 193 }, { "epoch": 0.018065240791875322, "loss": 2.5341057777404785, "loss_ce": 0.005785365588963032, "loss_iou": 0.98828125, "loss_num": 0.109375, "loss_xval": 2.53125, "num_input_tokens_seen": 12785104, "step": 193 }, { "epoch": 0.01815884307577105, "grad_norm": 7.051367282867432, "learning_rate": 5e-05, "loss": 2.3916, "num_input_tokens_seen": 12850648, "step": 194 }, { "epoch": 0.01815884307577105, "loss": 2.2845864295959473, "loss_ce": 0.005289818160235882, "loss_iou": 0.9140625, "loss_num": 0.09033203125, "loss_xval": 2.28125, "num_input_tokens_seen": 12850648, "step": 194 }, { "epoch": 0.018252445359666775, "grad_norm": 7.7706708908081055, "learning_rate": 5e-05, "loss": 2.3577, "num_input_tokens_seen": 12917360, "step": 195 }, { "epoch": 0.018252445359666775, "loss": 2.3785312175750732, "loss_ce": 0.00548445014283061, "loss_iou": 0.9921875, "loss_num": 0.078125, "loss_xval": 2.375, "num_input_tokens_seen": 12917360, "step": 195 }, { "epoch": 0.018346047643562503, "grad_norm": 4.753993511199951, "learning_rate": 5e-05, "loss": 2.375, "num_input_tokens_seen": 12983124, "step": 196 }, { "epoch": 0.018346047643562503, "loss": 2.251105546951294, "loss_ce": 0.004035319201648235, "loss_iou": 0.9375, "loss_num": 0.0751953125, "loss_xval": 2.25, "num_input_tokens_seen": 12983124, "step": 196 }, { "epoch": 0.01843964992745823, "grad_norm": 17.095426559448242, "learning_rate": 5e-05, "loss": 2.101, "num_input_tokens_seen": 13049128, "step": 197 }, { "epoch": 0.01843964992745823, "loss": 1.9639153480529785, "loss_ce": 0.0034661320969462395, "loss_iou": 0.8125, "loss_num": 0.06689453125, "loss_xval": 1.9609375, "num_input_tokens_seen": 13049128, "step": 197 }, { "epoch": 0.01853325221135396, "grad_norm": 10.60446548461914, "learning_rate": 5e-05, "loss": 2.1544, "num_input_tokens_seen": 13115624, "step": 198 }, { "epoch": 0.01853325221135396, "loss": 2.3536925315856934, "loss_ce": 0.0021300031803548336, "loss_iou": 1.0078125, "loss_num": 0.06640625, "loss_xval": 2.34375, "num_input_tokens_seen": 13115624, "step": 198 }, { "epoch": 0.018626854495249683, "grad_norm": 8.845651626586914, "learning_rate": 5e-05, "loss": 1.9546, "num_input_tokens_seen": 13181448, "step": 199 }, { "epoch": 0.018626854495249683, "loss": 2.158360481262207, "loss_ce": 0.002110264729708433, "loss_iou": 0.93359375, "loss_num": 0.057373046875, "loss_xval": 2.15625, "num_input_tokens_seen": 13181448, "step": 199 }, { "epoch": 0.01872045677914541, "grad_norm": 11.761173248291016, "learning_rate": 5e-05, "loss": 2.1373, "num_input_tokens_seen": 13247672, "step": 200 }, { "epoch": 0.01872045677914541, "loss": 2.325364589691162, "loss_ce": 0.0030990494415163994, "loss_iou": 0.9609375, "loss_num": 0.080078125, "loss_xval": 2.328125, "num_input_tokens_seen": 13247672, "step": 200 }, { "epoch": 0.01881405906304114, "grad_norm": 24.597557067871094, "learning_rate": 5e-05, "loss": 2.5439, "num_input_tokens_seen": 13313348, "step": 201 }, { "epoch": 0.01881405906304114, "loss": 2.637383460998535, "loss_ce": 0.004570887889713049, "loss_iou": 1.1640625, "loss_num": 0.0615234375, "loss_xval": 2.625, "num_input_tokens_seen": 13313348, "step": 201 }, { "epoch": 0.018907661346936864, "grad_norm": 9.031757354736328, "learning_rate": 5e-05, "loss": 2.89, "num_input_tokens_seen": 13378600, "step": 202 }, { "epoch": 0.018907661346936864, "loss": 2.8787484169006348, "loss_ce": 0.005701353773474693, "loss_iou": 1.125, "loss_num": 0.1259765625, "loss_xval": 2.875, "num_input_tokens_seen": 13378600, "step": 202 }, { "epoch": 0.019001263630832592, "grad_norm": 7.955211639404297, "learning_rate": 5e-05, "loss": 2.5635, "num_input_tokens_seen": 13444416, "step": 203 }, { "epoch": 0.019001263630832592, "loss": 2.63411283493042, "loss_ce": 0.001300313975661993, "loss_iou": 1.03125, "loss_num": 0.11328125, "loss_xval": 2.625, "num_input_tokens_seen": 13444416, "step": 203 }, { "epoch": 0.01909486591472832, "grad_norm": 8.365921020507812, "learning_rate": 5e-05, "loss": 2.5201, "num_input_tokens_seen": 13510788, "step": 204 }, { "epoch": 0.01909486591472832, "loss": 2.4572911262512207, "loss_ce": 0.00221311766654253, "loss_iou": 0.98046875, "loss_num": 0.09912109375, "loss_xval": 2.453125, "num_input_tokens_seen": 13510788, "step": 204 }, { "epoch": 0.019188468198624048, "grad_norm": 11.965587615966797, "learning_rate": 5e-05, "loss": 2.3749, "num_input_tokens_seen": 13575660, "step": 205 }, { "epoch": 0.019188468198624048, "loss": 2.3484880924224854, "loss_ce": 0.008644196204841137, "loss_iou": 0.95703125, "loss_num": 0.0859375, "loss_xval": 2.34375, "num_input_tokens_seen": 13575660, "step": 205 }, { "epoch": 0.019282070482519773, "grad_norm": 18.333850860595703, "learning_rate": 5e-05, "loss": 2.3765, "num_input_tokens_seen": 13641848, "step": 206 }, { "epoch": 0.019282070482519773, "loss": 2.342848777770996, "loss_ce": 0.0030051928479224443, "loss_iou": 0.99609375, "loss_num": 0.0693359375, "loss_xval": 2.34375, "num_input_tokens_seen": 13641848, "step": 206 }, { "epoch": 0.0193756727664155, "grad_norm": 15.698119163513184, "learning_rate": 5e-05, "loss": 2.769, "num_input_tokens_seen": 13708084, "step": 207 }, { "epoch": 0.0193756727664155, "loss": 2.7347211837768555, "loss_ce": 0.0022994456812739372, "loss_iou": 1.125, "loss_num": 0.0966796875, "loss_xval": 2.734375, "num_input_tokens_seen": 13708084, "step": 207 }, { "epoch": 0.01946927505031123, "grad_norm": 13.12657356262207, "learning_rate": 5e-05, "loss": 2.3812, "num_input_tokens_seen": 13774940, "step": 208 }, { "epoch": 0.01946927505031123, "loss": 2.2628304958343506, "loss_ce": 0.005018184892833233, "loss_iou": 0.953125, "loss_num": 0.0712890625, "loss_xval": 2.25, "num_input_tokens_seen": 13774940, "step": 208 }, { "epoch": 0.019562877334206953, "grad_norm": 11.415872573852539, "learning_rate": 5e-05, "loss": 2.2971, "num_input_tokens_seen": 13841564, "step": 209 }, { "epoch": 0.019562877334206953, "loss": 2.2718019485473633, "loss_ce": 0.004223851952701807, "loss_iou": 0.95703125, "loss_num": 0.07080078125, "loss_xval": 2.265625, "num_input_tokens_seen": 13841564, "step": 209 }, { "epoch": 0.01965647961810268, "grad_norm": 22.843965530395508, "learning_rate": 5e-05, "loss": 2.0982, "num_input_tokens_seen": 13907716, "step": 210 }, { "epoch": 0.01965647961810268, "loss": 2.142209053039551, "loss_ce": 0.008419920690357685, "loss_iou": 0.921875, "loss_num": 0.05908203125, "loss_xval": 2.140625, "num_input_tokens_seen": 13907716, "step": 210 }, { "epoch": 0.01975008190199841, "grad_norm": 13.346732139587402, "learning_rate": 5e-05, "loss": 2.0266, "num_input_tokens_seen": 13974352, "step": 211 }, { "epoch": 0.01975008190199841, "loss": 2.0488786697387695, "loss_ce": 0.006886437069624662, "loss_iou": 0.890625, "loss_num": 0.052001953125, "loss_xval": 2.046875, "num_input_tokens_seen": 13974352, "step": 211 }, { "epoch": 0.019843684185894137, "grad_norm": 13.644083023071289, "learning_rate": 5e-05, "loss": 2.3802, "num_input_tokens_seen": 14040576, "step": 212 }, { "epoch": 0.019843684185894137, "loss": 2.3828635215759277, "loss_ce": 0.005910306237637997, "loss_iou": 1.0, "loss_num": 0.07568359375, "loss_xval": 2.375, "num_input_tokens_seen": 14040576, "step": 212 }, { "epoch": 0.019937286469789862, "grad_norm": 16.804580688476562, "learning_rate": 5e-05, "loss": 2.2239, "num_input_tokens_seen": 14106472, "step": 213 }, { "epoch": 0.019937286469789862, "loss": 2.160003662109375, "loss_ce": 0.005218386184424162, "loss_iou": 0.92578125, "loss_num": 0.060791015625, "loss_xval": 2.15625, "num_input_tokens_seen": 14106472, "step": 213 }, { "epoch": 0.02003088875368559, "grad_norm": 12.16080379486084, "learning_rate": 5e-05, "loss": 2.5757, "num_input_tokens_seen": 14174120, "step": 214 }, { "epoch": 0.02003088875368559, "loss": 2.5228171348571777, "loss_ce": 0.0052390191704034805, "loss_iou": 1.09375, "loss_num": 0.06591796875, "loss_xval": 2.515625, "num_input_tokens_seen": 14174120, "step": 214 }, { "epoch": 0.020124491037581318, "grad_norm": 8.559412956237793, "learning_rate": 5e-05, "loss": 2.2206, "num_input_tokens_seen": 14240428, "step": 215 }, { "epoch": 0.020124491037581318, "loss": 2.2577953338623047, "loss_ce": 0.0038892091251909733, "loss_iou": 0.984375, "loss_num": 0.057861328125, "loss_xval": 2.25, "num_input_tokens_seen": 14240428, "step": 215 }, { "epoch": 0.020218093321477042, "grad_norm": 12.233105659484863, "learning_rate": 5e-05, "loss": 2.1681, "num_input_tokens_seen": 14306032, "step": 216 }, { "epoch": 0.020218093321477042, "loss": 2.202512502670288, "loss_ce": 0.005735145881772041, "loss_iou": 0.890625, "loss_num": 0.08251953125, "loss_xval": 2.203125, "num_input_tokens_seen": 14306032, "step": 216 }, { "epoch": 0.02031169560537277, "grad_norm": 28.35647201538086, "learning_rate": 5e-05, "loss": 2.464, "num_input_tokens_seen": 14372164, "step": 217 }, { "epoch": 0.02031169560537277, "loss": 2.3463313579559326, "loss_ce": 0.0025814578402787447, "loss_iou": 1.0234375, "loss_num": 0.06005859375, "loss_xval": 2.34375, "num_input_tokens_seen": 14372164, "step": 217 }, { "epoch": 0.0204052978892685, "grad_norm": 9.605964660644531, "learning_rate": 5e-05, "loss": 2.7749, "num_input_tokens_seen": 14438152, "step": 218 }, { "epoch": 0.0204052978892685, "loss": 2.80424165725708, "loss_ce": 0.0015071036759763956, "loss_iou": 1.0859375, "loss_num": 0.125, "loss_xval": 2.796875, "num_input_tokens_seen": 14438152, "step": 218 }, { "epoch": 0.020498900173164226, "grad_norm": 5.129776477813721, "learning_rate": 5e-05, "loss": 2.5083, "num_input_tokens_seen": 14503776, "step": 219 }, { "epoch": 0.020498900173164226, "loss": 2.6203508377075195, "loss_ce": 0.005116347223520279, "loss_iou": 1.0546875, "loss_num": 0.1025390625, "loss_xval": 2.609375, "num_input_tokens_seen": 14503776, "step": 219 }, { "epoch": 0.02059250245705995, "grad_norm": 7.953933238983154, "learning_rate": 5e-05, "loss": 2.394, "num_input_tokens_seen": 14570076, "step": 220 }, { "epoch": 0.02059250245705995, "loss": 2.503218650817871, "loss_ce": 0.007124858908355236, "loss_iou": 0.98828125, "loss_num": 0.10400390625, "loss_xval": 2.5, "num_input_tokens_seen": 14570076, "step": 220 }, { "epoch": 0.02068610474095568, "grad_norm": 46.45488739013672, "learning_rate": 5e-05, "loss": 2.3549, "num_input_tokens_seen": 14636444, "step": 221 }, { "epoch": 0.02068610474095568, "loss": 2.2976410388946533, "loss_ce": 0.00271901348605752, "loss_iou": 0.953125, "loss_num": 0.07861328125, "loss_xval": 2.296875, "num_input_tokens_seen": 14636444, "step": 221 }, { "epoch": 0.020779707024851407, "grad_norm": 20.381505966186523, "learning_rate": 5e-05, "loss": 2.2723, "num_input_tokens_seen": 14703504, "step": 222 }, { "epoch": 0.020779707024851407, "loss": 2.2910914421081543, "loss_ce": 0.002028927905485034, "loss_iou": 0.9921875, "loss_num": 0.0615234375, "loss_xval": 2.28125, "num_input_tokens_seen": 14703504, "step": 222 }, { "epoch": 0.020873309308747135, "grad_norm": 4.785074710845947, "learning_rate": 5e-05, "loss": 2.3778, "num_input_tokens_seen": 14770508, "step": 223 }, { "epoch": 0.020873309308747135, "loss": 2.339602470397949, "loss_ce": 0.004641698207706213, "loss_iou": 0.9765625, "loss_num": 0.07666015625, "loss_xval": 2.328125, "num_input_tokens_seen": 14770508, "step": 223 }, { "epoch": 0.02096691159264286, "grad_norm": 9.842256546020508, "learning_rate": 5e-05, "loss": 2.1408, "num_input_tokens_seen": 14836768, "step": 224 }, { "epoch": 0.02096691159264286, "loss": 2.0448293685913086, "loss_ce": 0.00405783299356699, "loss_iou": 0.8515625, "loss_num": 0.06689453125, "loss_xval": 2.046875, "num_input_tokens_seen": 14836768, "step": 224 }, { "epoch": 0.021060513876538588, "grad_norm": 9.726057052612305, "learning_rate": 5e-05, "loss": 2.2071, "num_input_tokens_seen": 14901788, "step": 225 }, { "epoch": 0.021060513876538588, "loss": 2.148120164871216, "loss_ce": 0.0035889961291104555, "loss_iou": 0.91015625, "loss_num": 0.06396484375, "loss_xval": 2.140625, "num_input_tokens_seen": 14901788, "step": 225 }, { "epoch": 0.021154116160434316, "grad_norm": 29.221086502075195, "learning_rate": 5e-05, "loss": 2.1879, "num_input_tokens_seen": 14967428, "step": 226 }, { "epoch": 0.021154116160434316, "loss": 2.3783063888549805, "loss_ce": 0.005259726196527481, "loss_iou": 1.0546875, "loss_num": 0.0537109375, "loss_xval": 2.375, "num_input_tokens_seen": 14967428, "step": 226 }, { "epoch": 0.02124771844433004, "grad_norm": 24.138704299926758, "learning_rate": 5e-05, "loss": 2.9276, "num_input_tokens_seen": 15034212, "step": 227 }, { "epoch": 0.02124771844433004, "loss": 2.9417781829833984, "loss_ce": 0.002325114095583558, "loss_iou": 1.1328125, "loss_num": 0.1357421875, "loss_xval": 2.9375, "num_input_tokens_seen": 15034212, "step": 227 }, { "epoch": 0.02134132072822577, "grad_norm": 4.556021690368652, "learning_rate": 5e-05, "loss": 2.7162, "num_input_tokens_seen": 15099904, "step": 228 }, { "epoch": 0.02134132072822577, "loss": 2.652596950531006, "loss_ce": 0.013925191015005112, "loss_iou": 0.984375, "loss_num": 0.1337890625, "loss_xval": 2.640625, "num_input_tokens_seen": 15099904, "step": 228 }, { "epoch": 0.021434923012121496, "grad_norm": 11.66744613647461, "learning_rate": 5e-05, "loss": 2.5639, "num_input_tokens_seen": 15166456, "step": 229 }, { "epoch": 0.021434923012121496, "loss": 2.5610897541046143, "loss_ce": 0.0064022233709692955, "loss_iou": 1.0078125, "loss_num": 0.107421875, "loss_xval": 2.5625, "num_input_tokens_seen": 15166456, "step": 229 }, { "epoch": 0.021528525296017224, "grad_norm": 3.8487696647644043, "learning_rate": 5e-05, "loss": 2.5557, "num_input_tokens_seen": 15232448, "step": 230 }, { "epoch": 0.021528525296017224, "loss": 2.4851675033569336, "loss_ce": 0.01446419395506382, "loss_iou": 1.0, "loss_num": 0.0947265625, "loss_xval": 2.46875, "num_input_tokens_seen": 15232448, "step": 230 }, { "epoch": 0.02162212757991295, "grad_norm": 6.852122783660889, "learning_rate": 5e-05, "loss": 2.4145, "num_input_tokens_seen": 15299076, "step": 231 }, { "epoch": 0.02162212757991295, "loss": 2.2217366695404053, "loss_ce": 0.006648796610534191, "loss_iou": 0.87109375, "loss_num": 0.09375, "loss_xval": 2.21875, "num_input_tokens_seen": 15299076, "step": 231 }, { "epoch": 0.021715729863808677, "grad_norm": 10.094616889953613, "learning_rate": 5e-05, "loss": 2.3538, "num_input_tokens_seen": 15365272, "step": 232 }, { "epoch": 0.021715729863808677, "loss": 2.340567111968994, "loss_ce": 0.006582723464816809, "loss_iou": 0.96484375, "loss_num": 0.080078125, "loss_xval": 2.328125, "num_input_tokens_seen": 15365272, "step": 232 }, { "epoch": 0.021809332147704405, "grad_norm": 14.189200401306152, "learning_rate": 5e-05, "loss": 2.4126, "num_input_tokens_seen": 15431180, "step": 233 }, { "epoch": 0.021809332147704405, "loss": 2.4369239807128906, "loss_ce": 0.0013770000077784061, "loss_iou": 1.015625, "loss_num": 0.08056640625, "loss_xval": 2.4375, "num_input_tokens_seen": 15431180, "step": 233 }, { "epoch": 0.02190293443160013, "grad_norm": 16.224164962768555, "learning_rate": 5e-05, "loss": 2.6631, "num_input_tokens_seen": 15498748, "step": 234 }, { "epoch": 0.02190293443160013, "loss": 2.6548266410827637, "loss_ce": 0.004436132963746786, "loss_iou": 1.1328125, "loss_num": 0.076171875, "loss_xval": 2.65625, "num_input_tokens_seen": 15498748, "step": 234 }, { "epoch": 0.021996536715495858, "grad_norm": 9.499267578125, "learning_rate": 5e-05, "loss": 2.5154, "num_input_tokens_seen": 15565392, "step": 235 }, { "epoch": 0.021996536715495858, "loss": 2.5411789417266846, "loss_ce": 0.0021164612844586372, "loss_iou": 1.015625, "loss_num": 0.1015625, "loss_xval": 2.53125, "num_input_tokens_seen": 15565392, "step": 235 }, { "epoch": 0.022090138999391586, "grad_norm": 5.71075963973999, "learning_rate": 5e-05, "loss": 2.2016, "num_input_tokens_seen": 15632800, "step": 236 }, { "epoch": 0.022090138999391586, "loss": 2.220017910003662, "loss_ce": 0.0012678343337029219, "loss_iou": 0.9453125, "loss_num": 0.06640625, "loss_xval": 2.21875, "num_input_tokens_seen": 15632800, "step": 236 }, { "epoch": 0.022183741283287314, "grad_norm": 11.757972717285156, "learning_rate": 5e-05, "loss": 2.3164, "num_input_tokens_seen": 15698656, "step": 237 }, { "epoch": 0.022183741283287314, "loss": 2.343291997909546, "loss_ce": 0.003448127768933773, "loss_iou": 1.0078125, "loss_num": 0.06298828125, "loss_xval": 2.34375, "num_input_tokens_seen": 15698656, "step": 237 }, { "epoch": 0.022277343567183038, "grad_norm": 8.407330513000488, "learning_rate": 5e-05, "loss": 2.3152, "num_input_tokens_seen": 15764320, "step": 238 }, { "epoch": 0.022277343567183038, "loss": 2.46926212310791, "loss_ce": 0.00832437165081501, "loss_iou": 1.0234375, "loss_num": 0.08154296875, "loss_xval": 2.46875, "num_input_tokens_seen": 15764320, "step": 238 }, { "epoch": 0.022370945851078766, "grad_norm": 10.095173835754395, "learning_rate": 5e-05, "loss": 2.2786, "num_input_tokens_seen": 15830768, "step": 239 }, { "epoch": 0.022370945851078766, "loss": 2.2201032638549805, "loss_ce": 0.008189301937818527, "loss_iou": 0.9296875, "loss_num": 0.0712890625, "loss_xval": 2.21875, "num_input_tokens_seen": 15830768, "step": 239 }, { "epoch": 0.022464548134974494, "grad_norm": 31.006059646606445, "learning_rate": 5e-05, "loss": 2.4645, "num_input_tokens_seen": 15896844, "step": 240 }, { "epoch": 0.022464548134974494, "loss": 2.4931678771972656, "loss_ce": 0.010745798237621784, "loss_iou": 1.09375, "loss_num": 0.060302734375, "loss_xval": 2.484375, "num_input_tokens_seen": 15896844, "step": 240 }, { "epoch": 0.02255815041887022, "grad_norm": 25.16346549987793, "learning_rate": 5e-05, "loss": 2.8696, "num_input_tokens_seen": 15964128, "step": 241 }, { "epoch": 0.02255815041887022, "loss": 2.869755268096924, "loss_ce": 0.005497458856552839, "loss_iou": 1.09375, "loss_num": 0.1337890625, "loss_xval": 2.859375, "num_input_tokens_seen": 15964128, "step": 241 }, { "epoch": 0.022651752702765947, "grad_norm": 5.70390510559082, "learning_rate": 5e-05, "loss": 2.7806, "num_input_tokens_seen": 16030220, "step": 242 }, { "epoch": 0.022651752702765947, "loss": 2.78690242767334, "loss_ce": 0.0036993513349443674, "loss_iou": 1.109375, "loss_num": 0.111328125, "loss_xval": 2.78125, "num_input_tokens_seen": 16030220, "step": 242 }, { "epoch": 0.022745354986661675, "grad_norm": 4.852331638336182, "learning_rate": 5e-05, "loss": 2.7649, "num_input_tokens_seen": 16097076, "step": 243 }, { "epoch": 0.022745354986661675, "loss": 2.715427875518799, "loss_ce": 0.00449032848700881, "loss_iou": 1.0859375, "loss_num": 0.10791015625, "loss_xval": 2.71875, "num_input_tokens_seen": 16097076, "step": 243 }, { "epoch": 0.022838957270557403, "grad_norm": 8.202338218688965, "learning_rate": 5e-05, "loss": 2.4259, "num_input_tokens_seen": 16163904, "step": 244 }, { "epoch": 0.022838957270557403, "loss": 2.4780466556549072, "loss_ce": 0.00539036188274622, "loss_iou": 1.0, "loss_num": 0.095703125, "loss_xval": 2.46875, "num_input_tokens_seen": 16163904, "step": 244 }, { "epoch": 0.022932559554453127, "grad_norm": 27.955408096313477, "learning_rate": 5e-05, "loss": 2.5161, "num_input_tokens_seen": 16230508, "step": 245 }, { "epoch": 0.022932559554453127, "loss": 2.4997286796569824, "loss_ce": 0.009494267404079437, "loss_iou": 1.046875, "loss_num": 0.07861328125, "loss_xval": 2.484375, "num_input_tokens_seen": 16230508, "step": 245 }, { "epoch": 0.023026161838348855, "grad_norm": 6.299352645874023, "learning_rate": 5e-05, "loss": 2.3895, "num_input_tokens_seen": 16296232, "step": 246 }, { "epoch": 0.023026161838348855, "loss": 2.376498222351074, "loss_ce": 0.0036955017130821943, "loss_iou": 0.96875, "loss_num": 0.087890625, "loss_xval": 2.375, "num_input_tokens_seen": 16296232, "step": 246 }, { "epoch": 0.023119764122244583, "grad_norm": 47.97476577758789, "learning_rate": 5e-05, "loss": 2.2768, "num_input_tokens_seen": 16364056, "step": 247 }, { "epoch": 0.023119764122244583, "loss": 2.3292150497436523, "loss_ce": 0.004019611515104771, "loss_iou": 0.98046875, "loss_num": 0.07177734375, "loss_xval": 2.328125, "num_input_tokens_seen": 16364056, "step": 247 }, { "epoch": 0.02321336640614031, "grad_norm": 12.00546932220459, "learning_rate": 5e-05, "loss": 2.109, "num_input_tokens_seen": 16429512, "step": 248 }, { "epoch": 0.02321336640614031, "loss": 2.0103962421417236, "loss_ce": 0.0033161009196192026, "loss_iou": 0.84375, "loss_num": 0.0634765625, "loss_xval": 2.0, "num_input_tokens_seen": 16429512, "step": 248 }, { "epoch": 0.023306968690036036, "grad_norm": 13.863018989562988, "learning_rate": 5e-05, "loss": 2.293, "num_input_tokens_seen": 16497392, "step": 249 }, { "epoch": 0.023306968690036036, "loss": 2.2520527839660645, "loss_ce": 0.0020527015440165997, "loss_iou": 1.0, "loss_num": 0.048828125, "loss_xval": 2.25, "num_input_tokens_seen": 16497392, "step": 249 }, { "epoch": 0.023400570973931764, "grad_norm": 12.705256462097168, "learning_rate": 5e-05, "loss": 2.6155, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023400570973931764, "eval_seeclick_CIoU": -0.10535179451107979, "eval_seeclick_GIoU": -0.1347958855330944, "eval_seeclick_IoU": 0.08255543187260628, "eval_seeclick_MAE_all": 0.19345863908529282, "eval_seeclick_MAE_h": 0.22437167167663574, "eval_seeclick_MAE_w": 0.17006926238536835, "eval_seeclick_MAE_x_boxes": 0.2265409231185913, "eval_seeclick_MAE_y_boxes": 0.16204872727394104, "eval_seeclick_NUM_probability": 0.9990282654762268, "eval_seeclick_inside_bbox": 0.14479167014360428, "eval_seeclick_loss": 3.3097572326660156, "eval_seeclick_loss_ce": 0.04021553136408329, "eval_seeclick_loss_iou": 1.138916015625, "eval_seeclick_loss_num": 0.20947265625, "eval_seeclick_loss_xval": 3.326171875, "eval_seeclick_runtime": 65.061, "eval_seeclick_samples_per_second": 0.722, "eval_seeclick_steps_per_second": 0.031, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023400570973931764, "eval_icons_CIoU": -0.126484464854002, "eval_icons_GIoU": -0.08203662559390068, "eval_icons_IoU": 0.012108168564736843, "eval_icons_MAE_all": 0.2220921739935875, "eval_icons_MAE_h": 0.2853976711630821, "eval_icons_MAE_w": 0.15436376631259918, "eval_icons_MAE_x_boxes": 0.15638123452663422, "eval_icons_MAE_y_boxes": 0.1470625400543213, "eval_icons_NUM_probability": 0.9994451999664307, "eval_icons_inside_bbox": 0.04340277798473835, "eval_icons_loss": 3.319797992706299, "eval_icons_loss_ce": 0.002282801433466375, "eval_icons_loss_iou": 1.099853515625, "eval_icons_loss_num": 0.230224609375, "eval_icons_loss_xval": 3.3525390625, "eval_icons_runtime": 71.6355, "eval_icons_samples_per_second": 0.698, "eval_icons_steps_per_second": 0.028, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023400570973931764, "eval_screenspot_CIoU": -0.010043622304995855, "eval_screenspot_GIoU": -0.021931735177834828, "eval_screenspot_IoU": 0.08838420361280441, "eval_screenspot_MAE_all": 0.17027866343657175, "eval_screenspot_MAE_h": 0.19475771486759186, "eval_screenspot_MAE_w": 0.1290009394288063, "eval_screenspot_MAE_x_boxes": 0.16163779298464456, "eval_screenspot_MAE_y_boxes": 0.11051993568738301, "eval_screenspot_NUM_probability": 0.9991798202196757, "eval_screenspot_inside_bbox": 0.20499999821186066, "eval_screenspot_loss": 2.9312689304351807, "eval_screenspot_loss_ce": 0.012030040534834066, "eval_screenspot_loss_iou": 1.0362955729166667, "eval_screenspot_loss_num": 0.17974853515625, "eval_screenspot_loss_xval": 2.9694010416666665, "eval_screenspot_runtime": 120.7494, "eval_screenspot_samples_per_second": 0.737, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023400570973931764, "eval_compot_CIoU": -0.059945203363895416, "eval_compot_GIoU": -0.0336238956078887, "eval_compot_IoU": 0.021148216910660267, "eval_compot_MAE_all": 0.1428975909948349, "eval_compot_MAE_h": 0.17755433917045593, "eval_compot_MAE_w": 0.10511288791894913, "eval_compot_MAE_x_boxes": 0.10315332189202309, "eval_compot_MAE_y_boxes": 0.09815165773034096, "eval_compot_NUM_probability": 0.9983721375465393, "eval_compot_inside_bbox": 0.0711805559694767, "eval_compot_loss": 2.7843072414398193, "eval_compot_loss_ce": 0.011173189617693424, "eval_compot_loss_iou": 1.02001953125, "eval_compot_loss_num": 0.146759033203125, "eval_compot_loss_xval": 2.7734375, "eval_compot_runtime": 66.2387, "eval_compot_samples_per_second": 0.755, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023400570973931764, "eval_custom_ui_MAE_all": 0.12166675180196762, "eval_custom_ui_MAE_x": 0.11208774521946907, "eval_custom_ui_MAE_y": 0.13124577701091766, "eval_custom_ui_NUM_probability": 0.999786376953125, "eval_custom_ui_loss": 0.6037883162498474, "eval_custom_ui_loss_ce": 0.017134987749159336, "eval_custom_ui_loss_num": 0.11712646484375, "eval_custom_ui_loss_xval": 0.585693359375, "eval_custom_ui_runtime": 53.7774, "eval_custom_ui_samples_per_second": 0.93, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023400570973931764, "loss": 0.5946378707885742, "loss_ce": 0.018710199743509293, "loss_iou": 0.0, "loss_num": 0.115234375, "loss_xval": 0.57421875, "num_input_tokens_seen": 16563512, "step": 250 }, { "epoch": 0.023494173257827492, "grad_norm": 6.045687198638916, "learning_rate": 5e-05, "loss": 1.8794, "num_input_tokens_seen": 16630008, "step": 251 }, { "epoch": 0.023494173257827492, "loss": 1.9556176662445068, "loss_ce": 0.004201718606054783, "loss_iou": 0.79296875, "loss_num": 0.07373046875, "loss_xval": 1.953125, "num_input_tokens_seen": 16630008, "step": 251 }, { "epoch": 0.023587775541723217, "grad_norm": 11.652396202087402, "learning_rate": 5e-05, "loss": 2.1469, "num_input_tokens_seen": 16696696, "step": 252 }, { "epoch": 0.023587775541723217, "loss": 2.2387962341308594, "loss_ce": 0.011257155798375607, "loss_iou": 0.9453125, "loss_num": 0.06787109375, "loss_xval": 2.234375, "num_input_tokens_seen": 16696696, "step": 252 }, { "epoch": 0.023681377825618945, "grad_norm": 9.669051170349121, "learning_rate": 5e-05, "loss": 2.4593, "num_input_tokens_seen": 16763824, "step": 253 }, { "epoch": 0.023681377825618945, "loss": 2.461796760559082, "loss_ce": 0.006718586664646864, "loss_iou": 1.03125, "loss_num": 0.07763671875, "loss_xval": 2.453125, "num_input_tokens_seen": 16763824, "step": 253 }, { "epoch": 0.023774980109514673, "grad_norm": 6.617365837097168, "learning_rate": 5e-05, "loss": 2.3379, "num_input_tokens_seen": 16830448, "step": 254 }, { "epoch": 0.023774980109514673, "loss": 2.5503010749816895, "loss_ce": 0.005379147361963987, "loss_iou": 1.0234375, "loss_num": 0.0986328125, "loss_xval": 2.546875, "num_input_tokens_seen": 16830448, "step": 254 }, { "epoch": 0.0238685823934104, "grad_norm": 12.18752670288086, "learning_rate": 5e-05, "loss": 2.3084, "num_input_tokens_seen": 16896776, "step": 255 }, { "epoch": 0.0238685823934104, "loss": 2.2040317058563232, "loss_ce": 0.005789526738226414, "loss_iou": 0.9609375, "loss_num": 0.0556640625, "loss_xval": 2.203125, "num_input_tokens_seen": 16896776, "step": 255 }, { "epoch": 0.023962184677306125, "grad_norm": 8.434796333312988, "learning_rate": 5e-05, "loss": 2.4458, "num_input_tokens_seen": 16964024, "step": 256 }, { "epoch": 0.023962184677306125, "loss": 2.423982620239258, "loss_ce": 0.006013859994709492, "loss_iou": 0.98046875, "loss_num": 0.09130859375, "loss_xval": 2.421875, "num_input_tokens_seen": 16964024, "step": 256 }, { "epoch": 0.024055786961201853, "grad_norm": 25.34197235107422, "learning_rate": 5e-05, "loss": 1.9667, "num_input_tokens_seen": 17030448, "step": 257 }, { "epoch": 0.024055786961201853, "loss": 2.267993927001953, "loss_ce": 0.005298535339534283, "loss_iou": 0.9609375, "loss_num": 0.0693359375, "loss_xval": 2.265625, "num_input_tokens_seen": 17030448, "step": 257 }, { "epoch": 0.02414938924509758, "grad_norm": 20.773479461669922, "learning_rate": 5e-05, "loss": 2.4686, "num_input_tokens_seen": 17097300, "step": 258 }, { "epoch": 0.02414938924509758, "loss": 2.5461835861206055, "loss_ce": 0.006144314538687468, "loss_iou": 1.0625, "loss_num": 0.08203125, "loss_xval": 2.546875, "num_input_tokens_seen": 17097300, "step": 258 }, { "epoch": 0.024242991528993306, "grad_norm": 13.068894386291504, "learning_rate": 5e-05, "loss": 2.4266, "num_input_tokens_seen": 17163384, "step": 259 }, { "epoch": 0.024242991528993306, "loss": 2.4044408798217773, "loss_ce": 0.004050274379551411, "loss_iou": 1.0, "loss_num": 0.0791015625, "loss_xval": 2.40625, "num_input_tokens_seen": 17163384, "step": 259 }, { "epoch": 0.024336593812889034, "grad_norm": 8.591397285461426, "learning_rate": 5e-05, "loss": 2.0286, "num_input_tokens_seen": 17229192, "step": 260 }, { "epoch": 0.024336593812889034, "loss": 2.1226484775543213, "loss_ce": 0.0064375754445791245, "loss_iou": 0.8828125, "loss_num": 0.0703125, "loss_xval": 2.109375, "num_input_tokens_seen": 17229192, "step": 260 }, { "epoch": 0.024430196096784762, "grad_norm": 21.167705535888672, "learning_rate": 5e-05, "loss": 2.0875, "num_input_tokens_seen": 17295184, "step": 261 }, { "epoch": 0.024430196096784762, "loss": 2.235891342163086, "loss_ce": 0.005422517191618681, "loss_iou": 0.9609375, "loss_num": 0.0615234375, "loss_xval": 2.234375, "num_input_tokens_seen": 17295184, "step": 261 }, { "epoch": 0.02452379838068049, "grad_norm": 34.061092376708984, "learning_rate": 5e-05, "loss": 2.2078, "num_input_tokens_seen": 17361696, "step": 262 }, { "epoch": 0.02452379838068049, "loss": 2.2449076175689697, "loss_ce": 0.004673250950872898, "loss_iou": 0.9765625, "loss_num": 0.056396484375, "loss_xval": 2.234375, "num_input_tokens_seen": 17361696, "step": 262 }, { "epoch": 0.024617400664576215, "grad_norm": 10.199859619140625, "learning_rate": 5e-05, "loss": 2.5873, "num_input_tokens_seen": 17428492, "step": 263 }, { "epoch": 0.024617400664576215, "loss": 2.6498115062713623, "loss_ce": 0.005280329845845699, "loss_iou": 1.109375, "loss_num": 0.08642578125, "loss_xval": 2.640625, "num_input_tokens_seen": 17428492, "step": 263 }, { "epoch": 0.024711002948471943, "grad_norm": 68.19354248046875, "learning_rate": 5e-05, "loss": 2.1439, "num_input_tokens_seen": 17495072, "step": 264 }, { "epoch": 0.024711002948471943, "loss": 1.9357881546020508, "loss_ce": 0.005612347275018692, "loss_iou": 0.8203125, "loss_num": 0.056640625, "loss_xval": 1.9296875, "num_input_tokens_seen": 17495072, "step": 264 }, { "epoch": 0.02480460523236767, "grad_norm": 21.633960723876953, "learning_rate": 5e-05, "loss": 1.9389, "num_input_tokens_seen": 17560868, "step": 265 }, { "epoch": 0.02480460523236767, "loss": 2.2004802227020264, "loss_ce": 0.005167837254703045, "loss_iou": 0.96875, "loss_num": 0.051025390625, "loss_xval": 2.1875, "num_input_tokens_seen": 17560868, "step": 265 }, { "epoch": 0.024898207516263395, "grad_norm": 13.91876220703125, "learning_rate": 5e-05, "loss": 2.563, "num_input_tokens_seen": 17626452, "step": 266 }, { "epoch": 0.024898207516263395, "loss": 2.716935157775879, "loss_ce": 0.0020913132466375828, "loss_iou": 1.09375, "loss_num": 0.1064453125, "loss_xval": 2.71875, "num_input_tokens_seen": 17626452, "step": 266 }, { "epoch": 0.024991809800159123, "grad_norm": 6.295694828033447, "learning_rate": 5e-05, "loss": 2.582, "num_input_tokens_seen": 17693108, "step": 267 }, { "epoch": 0.024991809800159123, "loss": 2.455904483795166, "loss_ce": 0.002779476810246706, "loss_iou": 1.015625, "loss_num": 0.08447265625, "loss_xval": 2.453125, "num_input_tokens_seen": 17693108, "step": 267 }, { "epoch": 0.02508541208405485, "grad_norm": 7.04426383972168, "learning_rate": 5e-05, "loss": 2.3078, "num_input_tokens_seen": 17759324, "step": 268 }, { "epoch": 0.02508541208405485, "loss": 2.317918300628662, "loss_ce": 0.005906580947339535, "loss_iou": 0.9140625, "loss_num": 0.09716796875, "loss_xval": 2.3125, "num_input_tokens_seen": 17759324, "step": 268 }, { "epoch": 0.02517901436795058, "grad_norm": 13.625930786132812, "learning_rate": 5e-05, "loss": 2.2721, "num_input_tokens_seen": 17826488, "step": 269 }, { "epoch": 0.02517901436795058, "loss": 2.192628860473633, "loss_ce": 0.003175870981067419, "loss_iou": 0.9453125, "loss_num": 0.05908203125, "loss_xval": 2.1875, "num_input_tokens_seen": 17826488, "step": 269 }, { "epoch": 0.025272616651846304, "grad_norm": 12.6675443649292, "learning_rate": 5e-05, "loss": 2.3457, "num_input_tokens_seen": 17892828, "step": 270 }, { "epoch": 0.025272616651846304, "loss": 2.5291335582733154, "loss_ce": 0.0017897638026624918, "loss_iou": 1.0703125, "loss_num": 0.076171875, "loss_xval": 2.53125, "num_input_tokens_seen": 17892828, "step": 270 }, { "epoch": 0.025366218935742032, "grad_norm": 15.294829368591309, "learning_rate": 5e-05, "loss": 1.9474, "num_input_tokens_seen": 17959500, "step": 271 }, { "epoch": 0.025366218935742032, "loss": 1.862996220588684, "loss_ce": 0.0025225295685231686, "loss_iou": 0.7890625, "loss_num": 0.0576171875, "loss_xval": 1.859375, "num_input_tokens_seen": 17959500, "step": 271 }, { "epoch": 0.02545982121963776, "grad_norm": 11.249760627746582, "learning_rate": 5e-05, "loss": 2.192, "num_input_tokens_seen": 18025824, "step": 272 }, { "epoch": 0.02545982121963776, "loss": 2.178846836090088, "loss_ce": 0.002089033368974924, "loss_iou": 0.9453125, "loss_num": 0.056884765625, "loss_xval": 2.171875, "num_input_tokens_seen": 18025824, "step": 272 }, { "epoch": 0.025553423503533488, "grad_norm": 8.352343559265137, "learning_rate": 5e-05, "loss": 2.4752, "num_input_tokens_seen": 18092556, "step": 273 }, { "epoch": 0.025553423503533488, "loss": 2.6020169258117676, "loss_ce": 0.006313713733106852, "loss_iou": 1.046875, "loss_num": 0.099609375, "loss_xval": 2.59375, "num_input_tokens_seen": 18092556, "step": 273 }, { "epoch": 0.025647025787429212, "grad_norm": 6.692131042480469, "learning_rate": 5e-05, "loss": 2.2705, "num_input_tokens_seen": 18158372, "step": 274 }, { "epoch": 0.025647025787429212, "loss": 2.384133815765381, "loss_ce": 0.0071808514185249805, "loss_iou": 0.9765625, "loss_num": 0.08447265625, "loss_xval": 2.375, "num_input_tokens_seen": 18158372, "step": 274 }, { "epoch": 0.02574062807132494, "grad_norm": 8.52961254119873, "learning_rate": 5e-05, "loss": 2.0383, "num_input_tokens_seen": 18223328, "step": 275 }, { "epoch": 0.02574062807132494, "loss": 1.9305155277252197, "loss_ce": 0.00473435502499342, "loss_iou": 0.83203125, "loss_num": 0.052001953125, "loss_xval": 1.921875, "num_input_tokens_seen": 18223328, "step": 275 }, { "epoch": 0.02583423035522067, "grad_norm": 29.442432403564453, "learning_rate": 5e-05, "loss": 2.4488, "num_input_tokens_seen": 18288760, "step": 276 }, { "epoch": 0.02583423035522067, "loss": 2.27962589263916, "loss_ce": 0.0032587742898613214, "loss_iou": 1.0234375, "loss_num": 0.045654296875, "loss_xval": 2.28125, "num_input_tokens_seen": 18288760, "step": 276 }, { "epoch": 0.025927832639116393, "grad_norm": 8.034605026245117, "learning_rate": 5e-05, "loss": 2.8575, "num_input_tokens_seen": 18355144, "step": 277 }, { "epoch": 0.025927832639116393, "loss": 2.8461673259735107, "loss_ce": 0.0063237641006708145, "loss_iou": 1.125, "loss_num": 0.11962890625, "loss_xval": 2.84375, "num_input_tokens_seen": 18355144, "step": 277 }, { "epoch": 0.02602143492301212, "grad_norm": 5.831605434417725, "learning_rate": 5e-05, "loss": 2.653, "num_input_tokens_seen": 18422816, "step": 278 }, { "epoch": 0.02602143492301212, "loss": 2.581786632537842, "loss_ce": 0.0036615575663745403, "loss_iou": 1.015625, "loss_num": 0.10791015625, "loss_xval": 2.578125, "num_input_tokens_seen": 18422816, "step": 278 }, { "epoch": 0.02611503720690785, "grad_norm": 6.519211292266846, "learning_rate": 5e-05, "loss": 2.2615, "num_input_tokens_seen": 18488748, "step": 279 }, { "epoch": 0.02611503720690785, "loss": 2.423325538635254, "loss_ce": 0.004380151629447937, "loss_iou": 0.9765625, "loss_num": 0.09375, "loss_xval": 2.421875, "num_input_tokens_seen": 18488748, "step": 279 }, { "epoch": 0.026208639490803577, "grad_norm": 8.102645874023438, "learning_rate": 5e-05, "loss": 2.273, "num_input_tokens_seen": 18554808, "step": 280 }, { "epoch": 0.026208639490803577, "loss": 2.238867998123169, "loss_ce": 0.006446137558668852, "loss_iou": 0.9375, "loss_num": 0.0712890625, "loss_xval": 2.234375, "num_input_tokens_seen": 18554808, "step": 280 }, { "epoch": 0.026302241774699302, "grad_norm": 7.74899959564209, "learning_rate": 5e-05, "loss": 2.0421, "num_input_tokens_seen": 18621248, "step": 281 }, { "epoch": 0.026302241774699302, "loss": 1.935258388519287, "loss_ce": 0.015092317946255207, "loss_iou": 0.80078125, "loss_num": 0.064453125, "loss_xval": 1.921875, "num_input_tokens_seen": 18621248, "step": 281 }, { "epoch": 0.02639584405859503, "grad_norm": 30.35383415222168, "learning_rate": 5e-05, "loss": 2.1578, "num_input_tokens_seen": 18687824, "step": 282 }, { "epoch": 0.02639584405859503, "loss": 2.1037988662719727, "loss_ce": 0.01102550607174635, "loss_iou": 0.890625, "loss_num": 0.0615234375, "loss_xval": 2.09375, "num_input_tokens_seen": 18687824, "step": 282 }, { "epoch": 0.026489446342490758, "grad_norm": 9.503483772277832, "learning_rate": 5e-05, "loss": 2.6332, "num_input_tokens_seen": 18753680, "step": 283 }, { "epoch": 0.026489446342490758, "loss": 2.821577787399292, "loss_ce": 0.003218400292098522, "loss_iou": 1.1875, "loss_num": 0.08837890625, "loss_xval": 2.8125, "num_input_tokens_seen": 18753680, "step": 283 }, { "epoch": 0.026583048626386482, "grad_norm": 10.875184059143066, "learning_rate": 5e-05, "loss": 2.2969, "num_input_tokens_seen": 18819844, "step": 284 }, { "epoch": 0.026583048626386482, "loss": 2.3365728855133057, "loss_ce": 0.005518095567822456, "loss_iou": 0.984375, "loss_num": 0.072265625, "loss_xval": 2.328125, "num_input_tokens_seen": 18819844, "step": 284 }, { "epoch": 0.02667665091028221, "grad_norm": 9.263823509216309, "learning_rate": 5e-05, "loss": 1.9969, "num_input_tokens_seen": 18885644, "step": 285 }, { "epoch": 0.02667665091028221, "loss": 1.907745122909546, "loss_ce": 0.0014951556222513318, "loss_iou": 0.8203125, "loss_num": 0.05224609375, "loss_xval": 1.90625, "num_input_tokens_seen": 18885644, "step": 285 }, { "epoch": 0.02677025319417794, "grad_norm": 29.129364013671875, "learning_rate": 5e-05, "loss": 2.6109, "num_input_tokens_seen": 18950844, "step": 286 }, { "epoch": 0.02677025319417794, "loss": 2.4338810443878174, "loss_ce": 0.003705211216583848, "loss_iou": 1.0859375, "loss_num": 0.052001953125, "loss_xval": 2.4375, "num_input_tokens_seen": 18950844, "step": 286 }, { "epoch": 0.026863855478073666, "grad_norm": 17.17812728881836, "learning_rate": 5e-05, "loss": 2.8428, "num_input_tokens_seen": 19016132, "step": 287 }, { "epoch": 0.026863855478073666, "loss": 3.132232189178467, "loss_ce": 0.005278948228806257, "loss_iou": 1.2421875, "loss_num": 0.12890625, "loss_xval": 3.125, "num_input_tokens_seen": 19016132, "step": 287 }, { "epoch": 0.02695745776196939, "grad_norm": 8.610562324523926, "learning_rate": 5e-05, "loss": 2.2594, "num_input_tokens_seen": 19081500, "step": 288 }, { "epoch": 0.02695745776196939, "loss": 2.117274761199951, "loss_ce": 0.007167115341871977, "loss_iou": 0.81640625, "loss_num": 0.0947265625, "loss_xval": 2.109375, "num_input_tokens_seen": 19081500, "step": 288 }, { "epoch": 0.02705106004586512, "grad_norm": 6.186338901519775, "learning_rate": 5e-05, "loss": 2.6048, "num_input_tokens_seen": 19147244, "step": 289 }, { "epoch": 0.02705106004586512, "loss": 2.6463186740875244, "loss_ce": 0.007646896876394749, "loss_iou": 1.0625, "loss_num": 0.10205078125, "loss_xval": 2.640625, "num_input_tokens_seen": 19147244, "step": 289 }, { "epoch": 0.027144662329760847, "grad_norm": 9.07485294342041, "learning_rate": 5e-05, "loss": 2.3572, "num_input_tokens_seen": 19215032, "step": 290 }, { "epoch": 0.027144662329760847, "loss": 2.259942054748535, "loss_ce": 0.0060356780886650085, "loss_iou": 0.95703125, "loss_num": 0.06884765625, "loss_xval": 2.25, "num_input_tokens_seen": 19215032, "step": 290 }, { "epoch": 0.02723826461365657, "grad_norm": 8.642754554748535, "learning_rate": 5e-05, "loss": 2.4579, "num_input_tokens_seen": 19280924, "step": 291 }, { "epoch": 0.02723826461365657, "loss": 2.3687572479248047, "loss_ce": 0.0054758149199187756, "loss_iou": 1.0, "loss_num": 0.072265625, "loss_xval": 2.359375, "num_input_tokens_seen": 19280924, "step": 291 }, { "epoch": 0.0273318668975523, "grad_norm": 7.2403483390808105, "learning_rate": 5e-05, "loss": 2.2344, "num_input_tokens_seen": 19346720, "step": 292 }, { "epoch": 0.0273318668975523, "loss": 2.137115478515625, "loss_ce": 0.007232709787786007, "loss_iou": 0.91015625, "loss_num": 0.061767578125, "loss_xval": 2.125, "num_input_tokens_seen": 19346720, "step": 292 }, { "epoch": 0.027425469181448028, "grad_norm": 8.314435005187988, "learning_rate": 5e-05, "loss": 2.0899, "num_input_tokens_seen": 19412088, "step": 293 }, { "epoch": 0.027425469181448028, "loss": 1.943402886390686, "loss_ce": 0.0049263592809438705, "loss_iou": 0.82421875, "loss_num": 0.0576171875, "loss_xval": 1.9375, "num_input_tokens_seen": 19412088, "step": 293 }, { "epoch": 0.027519071465343756, "grad_norm": 9.556326866149902, "learning_rate": 5e-05, "loss": 2.2337, "num_input_tokens_seen": 19479208, "step": 294 }, { "epoch": 0.027519071465343756, "loss": 2.3005897998809814, "loss_ce": 0.0037147465627640486, "loss_iou": 0.98046875, "loss_num": 0.06640625, "loss_xval": 2.296875, "num_input_tokens_seen": 19479208, "step": 294 }, { "epoch": 0.02761267374923948, "grad_norm": 13.136784553527832, "learning_rate": 5e-05, "loss": 2.1323, "num_input_tokens_seen": 19544596, "step": 295 }, { "epoch": 0.02761267374923948, "loss": 2.2036406993865967, "loss_ce": 0.010281315073370934, "loss_iou": 0.95703125, "loss_num": 0.05615234375, "loss_xval": 2.1875, "num_input_tokens_seen": 19544596, "step": 295 }, { "epoch": 0.02770627603313521, "grad_norm": 5.934866428375244, "learning_rate": 5e-05, "loss": 2.0263, "num_input_tokens_seen": 19610172, "step": 296 }, { "epoch": 0.02770627603313521, "loss": 2.067943572998047, "loss_ce": 0.006664199288934469, "loss_iou": 0.859375, "loss_num": 0.068359375, "loss_xval": 2.0625, "num_input_tokens_seen": 19610172, "step": 296 }, { "epoch": 0.027799878317030936, "grad_norm": 22.314382553100586, "learning_rate": 5e-05, "loss": 1.9041, "num_input_tokens_seen": 19675376, "step": 297 }, { "epoch": 0.027799878317030936, "loss": 1.9297223091125488, "loss_ce": 0.0033306607510894537, "loss_iou": 0.81640625, "loss_num": 0.05859375, "loss_xval": 1.9296875, "num_input_tokens_seen": 19675376, "step": 297 }, { "epoch": 0.027893480600926664, "grad_norm": 13.681904792785645, "learning_rate": 5e-05, "loss": 2.1424, "num_input_tokens_seen": 19742156, "step": 298 }, { "epoch": 0.027893480600926664, "loss": 2.1528525352478027, "loss_ce": 0.007344689220190048, "loss_iou": 0.9375, "loss_num": 0.054931640625, "loss_xval": 2.140625, "num_input_tokens_seen": 19742156, "step": 298 }, { "epoch": 0.02798708288482239, "grad_norm": 14.090749740600586, "learning_rate": 5e-05, "loss": 2.1478, "num_input_tokens_seen": 19807300, "step": 299 }, { "epoch": 0.02798708288482239, "loss": 2.121377944946289, "loss_ce": 0.0027258479967713356, "loss_iou": 0.89453125, "loss_num": 0.06689453125, "loss_xval": 2.125, "num_input_tokens_seen": 19807300, "step": 299 }, { "epoch": 0.028080685168718117, "grad_norm": 12.29017448425293, "learning_rate": 5e-05, "loss": 2.1001, "num_input_tokens_seen": 19872908, "step": 300 }, { "epoch": 0.028080685168718117, "loss": 2.1516480445861816, "loss_ce": 0.00321069173514843, "loss_iou": 0.93359375, "loss_num": 0.05712890625, "loss_xval": 2.15625, "num_input_tokens_seen": 19872908, "step": 300 }, { "epoch": 0.028174287452613845, "grad_norm": 11.03828239440918, "learning_rate": 5e-05, "loss": 2.3709, "num_input_tokens_seen": 19939296, "step": 301 }, { "epoch": 0.028174287452613845, "loss": 2.3694803714752197, "loss_ce": 0.007175634615123272, "loss_iou": 0.98046875, "loss_num": 0.0791015625, "loss_xval": 2.359375, "num_input_tokens_seen": 19939296, "step": 301 }, { "epoch": 0.02826788973650957, "grad_norm": 18.373350143432617, "learning_rate": 5e-05, "loss": 2.1843, "num_input_tokens_seen": 20005232, "step": 302 }, { "epoch": 0.02826788973650957, "loss": 2.271916389465332, "loss_ce": 0.005314883776009083, "loss_iou": 0.98046875, "loss_num": 0.061279296875, "loss_xval": 2.265625, "num_input_tokens_seen": 20005232, "step": 302 }, { "epoch": 0.028361492020405298, "grad_norm": 10.74294376373291, "learning_rate": 5e-05, "loss": 2.1218, "num_input_tokens_seen": 20071088, "step": 303 }, { "epoch": 0.028361492020405298, "loss": 2.2172842025756836, "loss_ce": 0.007323308382183313, "loss_iou": 0.95703125, "loss_num": 0.05908203125, "loss_xval": 2.203125, "num_input_tokens_seen": 20071088, "step": 303 }, { "epoch": 0.028455094304301026, "grad_norm": 15.19969367980957, "learning_rate": 5e-05, "loss": 1.9467, "num_input_tokens_seen": 20137280, "step": 304 }, { "epoch": 0.028455094304301026, "loss": 2.124724864959717, "loss_ce": 0.007537480443716049, "loss_iou": 0.9140625, "loss_num": 0.058349609375, "loss_xval": 2.125, "num_input_tokens_seen": 20137280, "step": 304 }, { "epoch": 0.028548696588196754, "grad_norm": 15.501870155334473, "learning_rate": 5e-05, "loss": 2.361, "num_input_tokens_seen": 20203344, "step": 305 }, { "epoch": 0.028548696588196754, "loss": 2.3150863647460938, "loss_ce": 0.010398984886705875, "loss_iou": 0.984375, "loss_num": 0.06689453125, "loss_xval": 2.3125, "num_input_tokens_seen": 20203344, "step": 305 }, { "epoch": 0.028642298872092478, "grad_norm": 9.604381561279297, "learning_rate": 5e-05, "loss": 1.8154, "num_input_tokens_seen": 20269392, "step": 306 }, { "epoch": 0.028642298872092478, "loss": 1.6880543231964111, "loss_ce": 0.003728260286152363, "loss_iou": 0.7265625, "loss_num": 0.045654296875, "loss_xval": 1.6875, "num_input_tokens_seen": 20269392, "step": 306 }, { "epoch": 0.028735901155988206, "grad_norm": 9.203229904174805, "learning_rate": 5e-05, "loss": 2.072, "num_input_tokens_seen": 20336624, "step": 307 }, { "epoch": 0.028735901155988206, "loss": 2.2043728828430176, "loss_ce": 0.003201248124241829, "loss_iou": 0.9609375, "loss_num": 0.05517578125, "loss_xval": 2.203125, "num_input_tokens_seen": 20336624, "step": 307 }, { "epoch": 0.028829503439883934, "grad_norm": 10.39322566986084, "learning_rate": 5e-05, "loss": 2.0674, "num_input_tokens_seen": 20402832, "step": 308 }, { "epoch": 0.028829503439883934, "loss": 1.8517069816589355, "loss_ce": 0.006492123007774353, "loss_iou": 0.7890625, "loss_num": 0.05322265625, "loss_xval": 1.84375, "num_input_tokens_seen": 20402832, "step": 308 }, { "epoch": 0.02892310572377966, "grad_norm": 12.747912406921387, "learning_rate": 5e-05, "loss": 1.9345, "num_input_tokens_seen": 20468660, "step": 309 }, { "epoch": 0.02892310572377966, "loss": 1.903911828994751, "loss_ce": 0.0054743001237511635, "loss_iou": 0.8359375, "loss_num": 0.04541015625, "loss_xval": 1.8984375, "num_input_tokens_seen": 20468660, "step": 309 }, { "epoch": 0.029016708007675387, "grad_norm": 14.974900245666504, "learning_rate": 5e-05, "loss": 2.5532, "num_input_tokens_seen": 20535584, "step": 310 }, { "epoch": 0.029016708007675387, "loss": 2.6041457653045654, "loss_ce": 0.0035598722752183676, "loss_iou": 1.109375, "loss_num": 0.07470703125, "loss_xval": 2.59375, "num_input_tokens_seen": 20535584, "step": 310 }, { "epoch": 0.029110310291571115, "grad_norm": 6.866947650909424, "learning_rate": 5e-05, "loss": 2.3236, "num_input_tokens_seen": 20602124, "step": 311 }, { "epoch": 0.029110310291571115, "loss": 2.388014316558838, "loss_ce": 0.007154986262321472, "loss_iou": 1.0, "loss_num": 0.0751953125, "loss_xval": 2.375, "num_input_tokens_seen": 20602124, "step": 311 }, { "epoch": 0.029203912575466843, "grad_norm": 16.738218307495117, "learning_rate": 5e-05, "loss": 2.1105, "num_input_tokens_seen": 20668156, "step": 312 }, { "epoch": 0.029203912575466843, "loss": 2.0506856441497803, "loss_ce": 0.0062519097700715065, "loss_iou": 0.859375, "loss_num": 0.0654296875, "loss_xval": 2.046875, "num_input_tokens_seen": 20668156, "step": 312 }, { "epoch": 0.029297514859362567, "grad_norm": 41.55394744873047, "learning_rate": 5e-05, "loss": 2.0664, "num_input_tokens_seen": 20734828, "step": 313 }, { "epoch": 0.029297514859362567, "loss": 1.9767587184906006, "loss_ce": 0.005078917369246483, "loss_iou": 0.85546875, "loss_num": 0.05322265625, "loss_xval": 1.96875, "num_input_tokens_seen": 20734828, "step": 313 }, { "epoch": 0.029391117143258295, "grad_norm": 26.242605209350586, "learning_rate": 5e-05, "loss": 2.488, "num_input_tokens_seen": 20801332, "step": 314 }, { "epoch": 0.029391117143258295, "loss": 2.4511401653289795, "loss_ce": 0.0038744183257222176, "loss_iou": 1.09375, "loss_num": 0.053466796875, "loss_xval": 2.453125, "num_input_tokens_seen": 20801332, "step": 314 }, { "epoch": 0.029484719427154023, "grad_norm": 7.956735610961914, "learning_rate": 5e-05, "loss": 2.3767, "num_input_tokens_seen": 20867284, "step": 315 }, { "epoch": 0.029484719427154023, "loss": 2.5748186111450195, "loss_ce": 0.0064592985436320305, "loss_iou": 1.046875, "loss_num": 0.0927734375, "loss_xval": 2.5625, "num_input_tokens_seen": 20867284, "step": 315 }, { "epoch": 0.029578321711049748, "grad_norm": 5.501648426055908, "learning_rate": 5e-05, "loss": 2.4083, "num_input_tokens_seen": 20932536, "step": 316 }, { "epoch": 0.029578321711049748, "loss": 2.3189499378204346, "loss_ce": 0.002543692011386156, "loss_iou": 0.953125, "loss_num": 0.08203125, "loss_xval": 2.3125, "num_input_tokens_seen": 20932536, "step": 316 }, { "epoch": 0.029671923994945476, "grad_norm": 5.204927444458008, "learning_rate": 5e-05, "loss": 2.2079, "num_input_tokens_seen": 20997788, "step": 317 }, { "epoch": 0.029671923994945476, "loss": 2.251739263534546, "loss_ce": 0.0017392246518284082, "loss_iou": 0.90625, "loss_num": 0.0869140625, "loss_xval": 2.25, "num_input_tokens_seen": 20997788, "step": 317 }, { "epoch": 0.029765526278841204, "grad_norm": 13.988932609558105, "learning_rate": 5e-05, "loss": 2.1701, "num_input_tokens_seen": 21063624, "step": 318 }, { "epoch": 0.029765526278841204, "loss": 2.1111536026000977, "loss_ce": 0.007637982256710529, "loss_iou": 0.91015625, "loss_num": 0.057861328125, "loss_xval": 2.109375, "num_input_tokens_seen": 21063624, "step": 318 }, { "epoch": 0.029859128562736932, "grad_norm": 41.570011138916016, "learning_rate": 5e-05, "loss": 2.1344, "num_input_tokens_seen": 21129904, "step": 319 }, { "epoch": 0.029859128562736932, "loss": 2.3227148056030273, "loss_ce": 0.005332026164978743, "loss_iou": 1.0078125, "loss_num": 0.060546875, "loss_xval": 2.3125, "num_input_tokens_seen": 21129904, "step": 319 }, { "epoch": 0.029952730846632657, "grad_norm": 20.925189971923828, "learning_rate": 5e-05, "loss": 2.1157, "num_input_tokens_seen": 21195652, "step": 320 }, { "epoch": 0.029952730846632657, "loss": 2.256359815597534, "loss_ce": 0.006359857507050037, "loss_iou": 0.9375, "loss_num": 0.07470703125, "loss_xval": 2.25, "num_input_tokens_seen": 21195652, "step": 320 }, { "epoch": 0.030046333130528385, "grad_norm": 14.465229034423828, "learning_rate": 5e-05, "loss": 2.0414, "num_input_tokens_seen": 21261820, "step": 321 }, { "epoch": 0.030046333130528385, "loss": 2.1699764728546143, "loss_ce": 0.002129678148776293, "loss_iou": 0.89453125, "loss_num": 0.0751953125, "loss_xval": 2.171875, "num_input_tokens_seen": 21261820, "step": 321 }, { "epoch": 0.030139935414424113, "grad_norm": 16.589031219482422, "learning_rate": 5e-05, "loss": 2.0181, "num_input_tokens_seen": 21326652, "step": 322 }, { "epoch": 0.030139935414424113, "loss": 1.9930992126464844, "loss_ce": 0.004818132147192955, "loss_iou": 0.8359375, "loss_num": 0.06396484375, "loss_xval": 1.984375, "num_input_tokens_seen": 21326652, "step": 322 }, { "epoch": 0.030233537698319837, "grad_norm": 8.369630813598633, "learning_rate": 5e-05, "loss": 2.4247, "num_input_tokens_seen": 21392536, "step": 323 }, { "epoch": 0.030233537698319837, "loss": 2.5111265182495117, "loss_ce": 0.007220389321446419, "loss_iou": 1.0390625, "loss_num": 0.0859375, "loss_xval": 2.5, "num_input_tokens_seen": 21392536, "step": 323 }, { "epoch": 0.030327139982215565, "grad_norm": 13.11899471282959, "learning_rate": 5e-05, "loss": 1.9538, "num_input_tokens_seen": 21459492, "step": 324 }, { "epoch": 0.030327139982215565, "loss": 1.806440830230713, "loss_ce": 0.006636073812842369, "loss_iou": 0.77734375, "loss_num": 0.049560546875, "loss_xval": 1.796875, "num_input_tokens_seen": 21459492, "step": 324 }, { "epoch": 0.030420742266111293, "grad_norm": 16.149150848388672, "learning_rate": 5e-05, "loss": 1.9541, "num_input_tokens_seen": 21525064, "step": 325 }, { "epoch": 0.030420742266111293, "loss": 2.129866123199463, "loss_ce": 0.006819196045398712, "loss_iou": 0.9375, "loss_num": 0.049560546875, "loss_xval": 2.125, "num_input_tokens_seen": 21525064, "step": 325 }, { "epoch": 0.03051434455000702, "grad_norm": 14.965729713439941, "learning_rate": 5e-05, "loss": 2.3008, "num_input_tokens_seen": 21590260, "step": 326 }, { "epoch": 0.03051434455000702, "loss": 2.1891860961914062, "loss_ce": 0.0026626817416399717, "loss_iou": 0.9609375, "loss_num": 0.05322265625, "loss_xval": 2.1875, "num_input_tokens_seen": 21590260, "step": 326 }, { "epoch": 0.030607946833902746, "grad_norm": 135.52027893066406, "learning_rate": 5e-05, "loss": 2.1703, "num_input_tokens_seen": 21657200, "step": 327 }, { "epoch": 0.030607946833902746, "loss": 2.159827947616577, "loss_ce": 0.0026013727765530348, "loss_iou": 0.921875, "loss_num": 0.0625, "loss_xval": 2.15625, "num_input_tokens_seen": 21657200, "step": 327 }, { "epoch": 0.030701549117798474, "grad_norm": 10.350919723510742, "learning_rate": 5e-05, "loss": 2.3369, "num_input_tokens_seen": 21724264, "step": 328 }, { "epoch": 0.030701549117798474, "loss": 2.3039512634277344, "loss_ce": 0.0031697875820100307, "loss_iou": 0.9609375, "loss_num": 0.0751953125, "loss_xval": 2.296875, "num_input_tokens_seen": 21724264, "step": 328 }, { "epoch": 0.030795151401694202, "grad_norm": 16.0537052154541, "learning_rate": 5e-05, "loss": 1.9921, "num_input_tokens_seen": 21790456, "step": 329 }, { "epoch": 0.030795151401694202, "loss": 2.126025438308716, "loss_ce": 0.0020019442308694124, "loss_iou": 0.890625, "loss_num": 0.06884765625, "loss_xval": 2.125, "num_input_tokens_seen": 21790456, "step": 329 }, { "epoch": 0.03088875368558993, "grad_norm": 17.134672164916992, "learning_rate": 5e-05, "loss": 2.2526, "num_input_tokens_seen": 21856772, "step": 330 }, { "epoch": 0.03088875368558993, "loss": 2.4109668731689453, "loss_ce": 0.004716916009783745, "loss_iou": 1.0234375, "loss_num": 0.072265625, "loss_xval": 2.40625, "num_input_tokens_seen": 21856772, "step": 330 }, { "epoch": 0.030982355969485655, "grad_norm": 19.04180335998535, "learning_rate": 5e-05, "loss": 2.216, "num_input_tokens_seen": 21922996, "step": 331 }, { "epoch": 0.030982355969485655, "loss": 2.4084181785583496, "loss_ce": 0.0021683042868971825, "loss_iou": 1.046875, "loss_num": 0.0625, "loss_xval": 2.40625, "num_input_tokens_seen": 21922996, "step": 331 }, { "epoch": 0.031075958253381383, "grad_norm": 8.35339641571045, "learning_rate": 5e-05, "loss": 2.229, "num_input_tokens_seen": 21989376, "step": 332 }, { "epoch": 0.031075958253381383, "loss": 2.103503465652466, "loss_ce": 0.005847356282174587, "loss_iou": 0.86328125, "loss_num": 0.07421875, "loss_xval": 2.09375, "num_input_tokens_seen": 21989376, "step": 332 }, { "epoch": 0.03116956053727711, "grad_norm": 11.053943634033203, "learning_rate": 5e-05, "loss": 1.9455, "num_input_tokens_seen": 22054748, "step": 333 }, { "epoch": 0.03116956053727711, "loss": 1.8045395612716675, "loss_ce": 0.003758274018764496, "loss_iou": 0.76953125, "loss_num": 0.05224609375, "loss_xval": 1.796875, "num_input_tokens_seen": 22054748, "step": 333 }, { "epoch": 0.031263162821172835, "grad_norm": 12.617913246154785, "learning_rate": 5e-05, "loss": 2.0766, "num_input_tokens_seen": 22121664, "step": 334 }, { "epoch": 0.031263162821172835, "loss": 1.9510293006896973, "loss_ce": 0.004740268923342228, "loss_iou": 0.85546875, "loss_num": 0.04736328125, "loss_xval": 1.9453125, "num_input_tokens_seen": 22121664, "step": 334 }, { "epoch": 0.03135676510506857, "grad_norm": 11.487972259521484, "learning_rate": 5e-05, "loss": 1.9995, "num_input_tokens_seen": 22187308, "step": 335 }, { "epoch": 0.03135676510506857, "loss": 1.9693429470062256, "loss_ce": 0.00547579862177372, "loss_iou": 0.8203125, "loss_num": 0.06494140625, "loss_xval": 1.9609375, "num_input_tokens_seen": 22187308, "step": 335 }, { "epoch": 0.03145036738896429, "grad_norm": 25.225971221923828, "learning_rate": 5e-05, "loss": 2.189, "num_input_tokens_seen": 22254452, "step": 336 }, { "epoch": 0.03145036738896429, "loss": 2.1077098846435547, "loss_ce": 0.004194378387182951, "loss_iou": 0.9296875, "loss_num": 0.048583984375, "loss_xval": 2.109375, "num_input_tokens_seen": 22254452, "step": 336 }, { "epoch": 0.031543969672860016, "grad_norm": 8.82065486907959, "learning_rate": 5e-05, "loss": 2.6442, "num_input_tokens_seen": 22320172, "step": 337 }, { "epoch": 0.031543969672860016, "loss": 2.5134079456329346, "loss_ce": 0.007548556663095951, "loss_iou": 1.046875, "loss_num": 0.0830078125, "loss_xval": 2.5, "num_input_tokens_seen": 22320172, "step": 337 }, { "epoch": 0.03163757195675575, "grad_norm": 9.14218807220459, "learning_rate": 5e-05, "loss": 2.1566, "num_input_tokens_seen": 22385548, "step": 338 }, { "epoch": 0.03163757195675575, "loss": 1.9683382511138916, "loss_ce": 0.004959251265972853, "loss_iou": 0.7734375, "loss_num": 0.0830078125, "loss_xval": 1.9609375, "num_input_tokens_seen": 22385548, "step": 338 }, { "epoch": 0.03173117424065147, "grad_norm": 7.104374408721924, "learning_rate": 5e-05, "loss": 2.0937, "num_input_tokens_seen": 22451460, "step": 339 }, { "epoch": 0.03173117424065147, "loss": 2.0578629970550537, "loss_ce": 0.0021988661028444767, "loss_iou": 0.875, "loss_num": 0.060546875, "loss_xval": 2.0625, "num_input_tokens_seen": 22451460, "step": 339 }, { "epoch": 0.031824776524547196, "grad_norm": 7.342142105102539, "learning_rate": 5e-05, "loss": 1.9958, "num_input_tokens_seen": 22519000, "step": 340 }, { "epoch": 0.031824776524547196, "loss": 2.2122642993927, "loss_ce": 0.007186007685959339, "loss_iou": 0.92578125, "loss_num": 0.06982421875, "loss_xval": 2.203125, "num_input_tokens_seen": 22519000, "step": 340 }, { "epoch": 0.03191837880844293, "grad_norm": 15.019513130187988, "learning_rate": 5e-05, "loss": 2.0717, "num_input_tokens_seen": 22584336, "step": 341 }, { "epoch": 0.03191837880844293, "loss": 2.1923344135284424, "loss_ce": 0.006787460297346115, "loss_iou": 0.8984375, "loss_num": 0.078125, "loss_xval": 2.1875, "num_input_tokens_seen": 22584336, "step": 341 }, { "epoch": 0.03201198109233865, "grad_norm": 13.888160705566406, "learning_rate": 5e-05, "loss": 2.2871, "num_input_tokens_seen": 22650512, "step": 342 }, { "epoch": 0.03201198109233865, "loss": 2.2013368606567383, "loss_ce": 0.006024563685059547, "loss_iou": 0.94921875, "loss_num": 0.06005859375, "loss_xval": 2.1875, "num_input_tokens_seen": 22650512, "step": 342 }, { "epoch": 0.03210558337623438, "grad_norm": 51.5828971862793, "learning_rate": 5e-05, "loss": 2.2569, "num_input_tokens_seen": 22717600, "step": 343 }, { "epoch": 0.03210558337623438, "loss": 2.141033411026001, "loss_ce": 0.007244350388646126, "loss_iou": 0.921875, "loss_num": 0.0576171875, "loss_xval": 2.140625, "num_input_tokens_seen": 22717600, "step": 343 }, { "epoch": 0.03219918566013011, "grad_norm": 19.39960479736328, "learning_rate": 5e-05, "loss": 2.0903, "num_input_tokens_seen": 22784816, "step": 344 }, { "epoch": 0.03219918566013011, "loss": 2.307943820953369, "loss_ce": 0.0071627311408519745, "loss_iou": 0.9609375, "loss_num": 0.07470703125, "loss_xval": 2.296875, "num_input_tokens_seen": 22784816, "step": 344 }, { "epoch": 0.03229278794402583, "grad_norm": 8.64476203918457, "learning_rate": 5e-05, "loss": 1.9282, "num_input_tokens_seen": 22850788, "step": 345 }, { "epoch": 0.03229278794402583, "loss": 1.9108871221542358, "loss_ce": 0.009031591936945915, "loss_iou": 0.828125, "loss_num": 0.050048828125, "loss_xval": 1.8984375, "num_input_tokens_seen": 22850788, "step": 345 }, { "epoch": 0.032386390227921565, "grad_norm": 13.204330444335938, "learning_rate": 5e-05, "loss": 1.729, "num_input_tokens_seen": 22918332, "step": 346 }, { "epoch": 0.032386390227921565, "loss": 1.6758954524993896, "loss_ce": 0.006461820099502802, "loss_iou": 0.70703125, "loss_num": 0.05126953125, "loss_xval": 1.671875, "num_input_tokens_seen": 22918332, "step": 346 }, { "epoch": 0.03247999251181729, "grad_norm": 34.31864929199219, "learning_rate": 5e-05, "loss": 2.4913, "num_input_tokens_seen": 22984840, "step": 347 }, { "epoch": 0.03247999251181729, "loss": 2.3904194831848145, "loss_ce": 0.003700823523104191, "loss_iou": 1.0703125, "loss_num": 0.049560546875, "loss_xval": 2.390625, "num_input_tokens_seen": 22984840, "step": 347 }, { "epoch": 0.032573594795713014, "grad_norm": 7.036735534667969, "learning_rate": 5e-05, "loss": 2.7401, "num_input_tokens_seen": 23051540, "step": 348 }, { "epoch": 0.032573594795713014, "loss": 2.7596395015716553, "loss_ce": 0.007686318829655647, "loss_iou": 1.125, "loss_num": 0.1005859375, "loss_xval": 2.75, "num_input_tokens_seen": 23051540, "step": 348 }, { "epoch": 0.032667197079608745, "grad_norm": 6.597587585449219, "learning_rate": 5e-05, "loss": 2.674, "num_input_tokens_seen": 23117384, "step": 349 }, { "epoch": 0.032667197079608745, "loss": 2.6734397411346436, "loss_ce": 0.007424092385917902, "loss_iou": 1.0625, "loss_num": 0.10986328125, "loss_xval": 2.671875, "num_input_tokens_seen": 23117384, "step": 349 }, { "epoch": 0.03276079936350447, "grad_norm": 9.384273529052734, "learning_rate": 5e-05, "loss": 2.3646, "num_input_tokens_seen": 23183040, "step": 350 }, { "epoch": 0.03276079936350447, "loss": 2.3336098194122314, "loss_ce": 0.005484943278133869, "loss_iou": 0.953125, "loss_num": 0.083984375, "loss_xval": 2.328125, "num_input_tokens_seen": 23183040, "step": 350 }, { "epoch": 0.032854401647400194, "grad_norm": 25.62833023071289, "learning_rate": 5e-05, "loss": 2.3526, "num_input_tokens_seen": 23249424, "step": 351 }, { "epoch": 0.032854401647400194, "loss": 2.3824148178100586, "loss_ce": 0.003508599940687418, "loss_iou": 0.96484375, "loss_num": 0.09033203125, "loss_xval": 2.375, "num_input_tokens_seen": 23249424, "step": 351 }, { "epoch": 0.032948003931295926, "grad_norm": 11.983054161071777, "learning_rate": 5e-05, "loss": 2.177, "num_input_tokens_seen": 23315636, "step": 352 }, { "epoch": 0.032948003931295926, "loss": 2.2440249919891357, "loss_ce": 0.005743754096329212, "loss_iou": 0.953125, "loss_num": 0.06689453125, "loss_xval": 2.234375, "num_input_tokens_seen": 23315636, "step": 352 }, { "epoch": 0.03304160621519165, "grad_norm": 218.51168823242188, "learning_rate": 5e-05, "loss": 1.9974, "num_input_tokens_seen": 23382552, "step": 353 }, { "epoch": 0.03304160621519165, "loss": 1.7904752492904663, "loss_ce": 0.0036100444849580526, "loss_iou": 0.765625, "loss_num": 0.050537109375, "loss_xval": 1.7890625, "num_input_tokens_seen": 23382552, "step": 353 }, { "epoch": 0.033135208499087375, "grad_norm": 10.059981346130371, "learning_rate": 5e-05, "loss": 2.1556, "num_input_tokens_seen": 23447920, "step": 354 }, { "epoch": 0.033135208499087375, "loss": 1.9498950242996216, "loss_ce": 0.0026294696144759655, "loss_iou": 0.84765625, "loss_num": 0.05078125, "loss_xval": 1.9453125, "num_input_tokens_seen": 23447920, "step": 354 }, { "epoch": 0.033228810782983106, "grad_norm": 23.80320167541504, "learning_rate": 5e-05, "loss": 2.1835, "num_input_tokens_seen": 23514608, "step": 355 }, { "epoch": 0.033228810782983106, "loss": 2.131493091583252, "loss_ce": 0.013817189261317253, "loss_iou": 0.86328125, "loss_num": 0.07861328125, "loss_xval": 2.125, "num_input_tokens_seen": 23514608, "step": 355 }, { "epoch": 0.03332241306687883, "grad_norm": 12.59532356262207, "learning_rate": 5e-05, "loss": 2.2421, "num_input_tokens_seen": 23581888, "step": 356 }, { "epoch": 0.03332241306687883, "loss": 2.2068097591400146, "loss_ce": 0.0036847481969743967, "loss_iou": 0.9375, "loss_num": 0.0654296875, "loss_xval": 2.203125, "num_input_tokens_seen": 23581888, "step": 356 }, { "epoch": 0.033416015350774556, "grad_norm": 45.76629638671875, "learning_rate": 5e-05, "loss": 2.0625, "num_input_tokens_seen": 23648560, "step": 357 }, { "epoch": 0.033416015350774556, "loss": 2.1402082443237305, "loss_ce": 0.0044661033898591995, "loss_iou": 0.91796875, "loss_num": 0.059326171875, "loss_xval": 2.140625, "num_input_tokens_seen": 23648560, "step": 357 }, { "epoch": 0.03350961763467029, "grad_norm": 12.574625968933105, "learning_rate": 5e-05, "loss": 2.0878, "num_input_tokens_seen": 23714048, "step": 358 }, { "epoch": 0.03350961763467029, "loss": 2.0568721294403076, "loss_ce": 0.0031611050944775343, "loss_iou": 0.890625, "loss_num": 0.05419921875, "loss_xval": 2.046875, "num_input_tokens_seen": 23714048, "step": 358 }, { "epoch": 0.03360321991856601, "grad_norm": 29.129892349243164, "learning_rate": 5e-05, "loss": 2.2022, "num_input_tokens_seen": 23780536, "step": 359 }, { "epoch": 0.03360321991856601, "loss": 2.2413330078125, "loss_ce": 0.006957824341952801, "loss_iou": 1.0078125, "loss_num": 0.0439453125, "loss_xval": 2.234375, "num_input_tokens_seen": 23780536, "step": 359 }, { "epoch": 0.03369682220246174, "grad_norm": 10.787083625793457, "learning_rate": 5e-05, "loss": 2.3958, "num_input_tokens_seen": 23846064, "step": 360 }, { "epoch": 0.03369682220246174, "loss": 2.3584518432617188, "loss_ce": 0.004936321172863245, "loss_iou": 0.9609375, "loss_num": 0.08544921875, "loss_xval": 2.359375, "num_input_tokens_seen": 23846064, "step": 360 }, { "epoch": 0.03379042448635747, "grad_norm": 26.4855899810791, "learning_rate": 5e-05, "loss": 2.231, "num_input_tokens_seen": 23912776, "step": 361 }, { "epoch": 0.03379042448635747, "loss": 2.137748956680298, "loss_ce": 0.004936504643410444, "loss_iou": 0.90234375, "loss_num": 0.0654296875, "loss_xval": 2.125, "num_input_tokens_seen": 23912776, "step": 361 }, { "epoch": 0.03388402677025319, "grad_norm": 10.368152618408203, "learning_rate": 5e-05, "loss": 1.9874, "num_input_tokens_seen": 23978032, "step": 362 }, { "epoch": 0.03388402677025319, "loss": 1.9442696571350098, "loss_ce": 0.002741375006735325, "loss_iou": 0.8046875, "loss_num": 0.06640625, "loss_xval": 1.9453125, "num_input_tokens_seen": 23978032, "step": 362 }, { "epoch": 0.033977629054148924, "grad_norm": 21.596967697143555, "learning_rate": 5e-05, "loss": 2.0939, "num_input_tokens_seen": 24043472, "step": 363 }, { "epoch": 0.033977629054148924, "loss": 1.9262769222259521, "loss_ce": 0.001472302945330739, "loss_iou": 0.8359375, "loss_num": 0.05078125, "loss_xval": 1.921875, "num_input_tokens_seen": 24043472, "step": 363 }, { "epoch": 0.03407123133804465, "grad_norm": 10.161048889160156, "learning_rate": 5e-05, "loss": 2.0024, "num_input_tokens_seen": 24109644, "step": 364 }, { "epoch": 0.03407123133804465, "loss": 1.8891668319702148, "loss_ce": 0.0029362887144088745, "loss_iou": 0.8125, "loss_num": 0.05224609375, "loss_xval": 1.8828125, "num_input_tokens_seen": 24109644, "step": 364 }, { "epoch": 0.03416483362194037, "grad_norm": 27.79102325439453, "learning_rate": 5e-05, "loss": 2.0082, "num_input_tokens_seen": 24176184, "step": 365 }, { "epoch": 0.03416483362194037, "loss": 2.2154271602630615, "loss_ce": 0.0035131131298840046, "loss_iou": 0.984375, "loss_num": 0.049072265625, "loss_xval": 2.21875, "num_input_tokens_seen": 24176184, "step": 365 }, { "epoch": 0.034258435905836104, "grad_norm": 10.18643569946289, "learning_rate": 5e-05, "loss": 2.6185, "num_input_tokens_seen": 24241780, "step": 366 }, { "epoch": 0.034258435905836104, "loss": 2.62292218208313, "loss_ce": 0.007687842007726431, "loss_iou": 1.078125, "loss_num": 0.09033203125, "loss_xval": 2.609375, "num_input_tokens_seen": 24241780, "step": 366 }, { "epoch": 0.03435203818973183, "grad_norm": 9.291022300720215, "learning_rate": 5e-05, "loss": 2.2128, "num_input_tokens_seen": 24308588, "step": 367 }, { "epoch": 0.03435203818973183, "loss": 2.3625073432922363, "loss_ce": 0.005085327662527561, "loss_iou": 0.97265625, "loss_num": 0.08203125, "loss_xval": 2.359375, "num_input_tokens_seen": 24308588, "step": 367 }, { "epoch": 0.03444564047362755, "grad_norm": 12.33276081085205, "learning_rate": 5e-05, "loss": 2.1762, "num_input_tokens_seen": 24374416, "step": 368 }, { "epoch": 0.03444564047362755, "loss": 2.2196903228759766, "loss_ce": 0.006799739319831133, "loss_iou": 0.921875, "loss_num": 0.0732421875, "loss_xval": 2.21875, "num_input_tokens_seen": 24374416, "step": 368 }, { "epoch": 0.034539242757523285, "grad_norm": 12.163825988769531, "learning_rate": 5e-05, "loss": 2.0329, "num_input_tokens_seen": 24441288, "step": 369 }, { "epoch": 0.034539242757523285, "loss": 2.129265069961548, "loss_ce": 0.005241577047854662, "loss_iou": 0.8828125, "loss_num": 0.07275390625, "loss_xval": 2.125, "num_input_tokens_seen": 24441288, "step": 369 }, { "epoch": 0.03463284504141901, "grad_norm": 19.660593032836914, "learning_rate": 5e-05, "loss": 1.9867, "num_input_tokens_seen": 24505952, "step": 370 }, { "epoch": 0.03463284504141901, "loss": 1.8676186800003052, "loss_ce": 0.007633232045918703, "loss_iou": 0.828125, "loss_num": 0.040771484375, "loss_xval": 1.859375, "num_input_tokens_seen": 24505952, "step": 370 }, { "epoch": 0.03472644732531474, "grad_norm": 8.436841011047363, "learning_rate": 5e-05, "loss": 2.3642, "num_input_tokens_seen": 24572172, "step": 371 }, { "epoch": 0.03472644732531474, "loss": 2.509275436401367, "loss_ce": 0.005369182676076889, "loss_iou": 1.0703125, "loss_num": 0.07373046875, "loss_xval": 2.5, "num_input_tokens_seen": 24572172, "step": 371 }, { "epoch": 0.034820049609210466, "grad_norm": 17.772293090820312, "learning_rate": 5e-05, "loss": 2.1125, "num_input_tokens_seen": 24639288, "step": 372 }, { "epoch": 0.034820049609210466, "loss": 2.1627039909362793, "loss_ce": 0.005477518774569035, "loss_iou": 0.91796875, "loss_num": 0.06396484375, "loss_xval": 2.15625, "num_input_tokens_seen": 24639288, "step": 372 }, { "epoch": 0.03491365189310619, "grad_norm": 12.384817123413086, "learning_rate": 5e-05, "loss": 2.0017, "num_input_tokens_seen": 24706620, "step": 373 }, { "epoch": 0.03491365189310619, "loss": 1.9600626230239868, "loss_ce": 0.004984484985470772, "loss_iou": 0.8515625, "loss_num": 0.05126953125, "loss_xval": 1.953125, "num_input_tokens_seen": 24706620, "step": 373 }, { "epoch": 0.03500725417700192, "grad_norm": 39.35420227050781, "learning_rate": 5e-05, "loss": 2.3049, "num_input_tokens_seen": 24773152, "step": 374 }, { "epoch": 0.03500725417700192, "loss": 2.4104323387145996, "loss_ce": 0.008088554255664349, "loss_iou": 1.015625, "loss_num": 0.0732421875, "loss_xval": 2.40625, "num_input_tokens_seen": 24773152, "step": 374 }, { "epoch": 0.035100856460897646, "grad_norm": 29.24478530883789, "learning_rate": 5e-05, "loss": 1.9513, "num_input_tokens_seen": 24839316, "step": 375 }, { "epoch": 0.035100856460897646, "loss": 2.052703380584717, "loss_ce": 0.0038751561660319567, "loss_iou": 0.890625, "loss_num": 0.05322265625, "loss_xval": 2.046875, "num_input_tokens_seen": 24839316, "step": 375 }, { "epoch": 0.03519445874479337, "grad_norm": 10.423100471496582, "learning_rate": 5e-05, "loss": 1.9495, "num_input_tokens_seen": 24905412, "step": 376 }, { "epoch": 0.03519445874479337, "loss": 2.009066104888916, "loss_ce": 0.0012535951100289822, "loss_iou": 0.84375, "loss_num": 0.0634765625, "loss_xval": 2.0, "num_input_tokens_seen": 24905412, "step": 376 }, { "epoch": 0.0352880610286891, "grad_norm": 21.374744415283203, "learning_rate": 5e-05, "loss": 2.0672, "num_input_tokens_seen": 24971560, "step": 377 }, { "epoch": 0.0352880610286891, "loss": 2.2741591930389404, "loss_ce": 0.002674894407391548, "loss_iou": 1.0078125, "loss_num": 0.049560546875, "loss_xval": 2.265625, "num_input_tokens_seen": 24971560, "step": 377 }, { "epoch": 0.03538166331258483, "grad_norm": 7.7313361167907715, "learning_rate": 5e-05, "loss": 2.3296, "num_input_tokens_seen": 25037460, "step": 378 }, { "epoch": 0.03538166331258483, "loss": 2.5008437633514404, "loss_ce": 0.0067031835205852985, "loss_iou": 1.0078125, "loss_num": 0.0947265625, "loss_xval": 2.5, "num_input_tokens_seen": 25037460, "step": 378 }, { "epoch": 0.03547526559648055, "grad_norm": 65.93770599365234, "learning_rate": 5e-05, "loss": 2.2821, "num_input_tokens_seen": 25103504, "step": 379 }, { "epoch": 0.03547526559648055, "loss": 2.2385544776916504, "loss_ce": 0.00417946046218276, "loss_iou": 0.9453125, "loss_num": 0.06787109375, "loss_xval": 2.234375, "num_input_tokens_seen": 25103504, "step": 379 }, { "epoch": 0.03556886788037628, "grad_norm": 12.77132797241211, "learning_rate": 5e-05, "loss": 1.9231, "num_input_tokens_seen": 25169852, "step": 380 }, { "epoch": 0.03556886788037628, "loss": 1.8846288919448853, "loss_ce": 0.004746079444885254, "loss_iou": 0.7890625, "loss_num": 0.0595703125, "loss_xval": 1.8828125, "num_input_tokens_seen": 25169852, "step": 380 }, { "epoch": 0.03566247016427201, "grad_norm": 22.883155822753906, "learning_rate": 5e-05, "loss": 2.1241, "num_input_tokens_seen": 25236536, "step": 381 }, { "epoch": 0.03566247016427201, "loss": 2.1614749431610107, "loss_ce": 0.004248403944075108, "loss_iou": 0.9296875, "loss_num": 0.06005859375, "loss_xval": 2.15625, "num_input_tokens_seen": 25236536, "step": 381 }, { "epoch": 0.03575607244816773, "grad_norm": 19.95392608642578, "learning_rate": 5e-05, "loss": 2.5629, "num_input_tokens_seen": 25302256, "step": 382 }, { "epoch": 0.03575607244816773, "loss": 2.630160331726074, "loss_ce": 0.0032072500325739384, "loss_iou": 1.1328125, "loss_num": 0.07177734375, "loss_xval": 2.625, "num_input_tokens_seen": 25302256, "step": 382 }, { "epoch": 0.03584967473206346, "grad_norm": 9.674190521240234, "learning_rate": 5e-05, "loss": 2.3442, "num_input_tokens_seen": 25366992, "step": 383 }, { "epoch": 0.03584967473206346, "loss": 2.3065168857574463, "loss_ce": 0.0018293661996722221, "loss_iou": 0.9453125, "loss_num": 0.08349609375, "loss_xval": 2.3125, "num_input_tokens_seen": 25366992, "step": 383 }, { "epoch": 0.03594327701595919, "grad_norm": 19.20514488220215, "learning_rate": 5e-05, "loss": 1.8755, "num_input_tokens_seen": 25432540, "step": 384 }, { "epoch": 0.03594327701595919, "loss": 1.8492238521575928, "loss_ce": 0.003520740196108818, "loss_iou": 0.796875, "loss_num": 0.050048828125, "loss_xval": 1.84375, "num_input_tokens_seen": 25432540, "step": 384 }, { "epoch": 0.03603687929985492, "grad_norm": 20.697628021240234, "learning_rate": 5e-05, "loss": 1.9476, "num_input_tokens_seen": 25498652, "step": 385 }, { "epoch": 0.03603687929985492, "loss": 2.1511905193328857, "loss_ce": 0.016424886882305145, "loss_iou": 0.921875, "loss_num": 0.0576171875, "loss_xval": 2.140625, "num_input_tokens_seen": 25498652, "step": 385 }, { "epoch": 0.036130481583750644, "grad_norm": 13.417241096496582, "learning_rate": 5e-05, "loss": 2.2559, "num_input_tokens_seen": 25565424, "step": 386 }, { "epoch": 0.036130481583750644, "loss": 2.297234535217285, "loss_ce": 0.004265897441655397, "loss_iou": 0.9765625, "loss_num": 0.0673828125, "loss_xval": 2.296875, "num_input_tokens_seen": 25565424, "step": 386 }, { "epoch": 0.03622408386764637, "grad_norm": 15.320602416992188, "learning_rate": 5e-05, "loss": 1.7418, "num_input_tokens_seen": 25632460, "step": 387 }, { "epoch": 0.03622408386764637, "loss": 1.7443631887435913, "loss_ce": 0.00412881001830101, "loss_iou": 0.765625, "loss_num": 0.041259765625, "loss_xval": 1.7421875, "num_input_tokens_seen": 25632460, "step": 387 }, { "epoch": 0.0363176861515421, "grad_norm": 21.39435386657715, "learning_rate": 5e-05, "loss": 2.3779, "num_input_tokens_seen": 25698924, "step": 388 }, { "epoch": 0.0363176861515421, "loss": 2.4170782566070557, "loss_ce": 0.004968872293829918, "loss_iou": 1.078125, "loss_num": 0.05224609375, "loss_xval": 2.40625, "num_input_tokens_seen": 25698924, "step": 388 }, { "epoch": 0.036411288435437825, "grad_norm": 11.618865966796875, "learning_rate": 5e-05, "loss": 2.3603, "num_input_tokens_seen": 25765596, "step": 389 }, { "epoch": 0.036411288435437825, "loss": 2.6621227264404297, "loss_ce": 0.003919581882655621, "loss_iou": 1.0546875, "loss_num": 0.109375, "loss_xval": 2.65625, "num_input_tokens_seen": 25765596, "step": 389 }, { "epoch": 0.03650489071933355, "grad_norm": 13.078133583068848, "learning_rate": 5e-05, "loss": 2.1917, "num_input_tokens_seen": 25832992, "step": 390 }, { "epoch": 0.03650489071933355, "loss": 2.1810710430145264, "loss_ce": 0.006266321986913681, "loss_iou": 0.92578125, "loss_num": 0.06396484375, "loss_xval": 2.171875, "num_input_tokens_seen": 25832992, "step": 390 }, { "epoch": 0.03659849300322928, "grad_norm": 11.813979148864746, "learning_rate": 5e-05, "loss": 1.9176, "num_input_tokens_seen": 25898552, "step": 391 }, { "epoch": 0.03659849300322928, "loss": 1.9757747650146484, "loss_ce": 0.005071574356406927, "loss_iou": 0.80078125, "loss_num": 0.07421875, "loss_xval": 1.96875, "num_input_tokens_seen": 25898552, "step": 391 }, { "epoch": 0.036692095287125005, "grad_norm": 25.330154418945312, "learning_rate": 5e-05, "loss": 1.9453, "num_input_tokens_seen": 25965580, "step": 392 }, { "epoch": 0.036692095287125005, "loss": 2.0348823070526123, "loss_ce": 0.006561874412000179, "loss_iou": 0.90625, "loss_num": 0.04248046875, "loss_xval": 2.03125, "num_input_tokens_seen": 25965580, "step": 392 }, { "epoch": 0.03678569757102073, "grad_norm": 9.008604049682617, "learning_rate": 5e-05, "loss": 2.5008, "num_input_tokens_seen": 26031208, "step": 393 }, { "epoch": 0.03678569757102073, "loss": 2.6614603996276855, "loss_ce": 0.003257141914218664, "loss_iou": 1.1015625, "loss_num": 0.091796875, "loss_xval": 2.65625, "num_input_tokens_seen": 26031208, "step": 393 }, { "epoch": 0.03687929985491646, "grad_norm": 14.616659164428711, "learning_rate": 5e-05, "loss": 2.2705, "num_input_tokens_seen": 26096588, "step": 394 }, { "epoch": 0.03687929985491646, "loss": 2.3706624507904053, "loss_ce": 0.0015218132175505161, "loss_iou": 1.0078125, "loss_num": 0.0693359375, "loss_xval": 2.375, "num_input_tokens_seen": 26096588, "step": 394 }, { "epoch": 0.036972902138812186, "grad_norm": 8.542869567871094, "learning_rate": 5e-05, "loss": 2.0632, "num_input_tokens_seen": 26163324, "step": 395 }, { "epoch": 0.036972902138812186, "loss": 2.0058720111846924, "loss_ce": 0.001965888310223818, "loss_iou": 0.875, "loss_num": 0.049560546875, "loss_xval": 2.0, "num_input_tokens_seen": 26163324, "step": 395 }, { "epoch": 0.03706650442270792, "grad_norm": 13.70307445526123, "learning_rate": 5e-05, "loss": 2.1156, "num_input_tokens_seen": 26229608, "step": 396 }, { "epoch": 0.03706650442270792, "loss": 2.2452762126922607, "loss_ce": 0.006018448621034622, "loss_iou": 0.9765625, "loss_num": 0.057861328125, "loss_xval": 2.234375, "num_input_tokens_seen": 26229608, "step": 396 }, { "epoch": 0.03716010670660364, "grad_norm": 68.48944091796875, "learning_rate": 5e-05, "loss": 1.8305, "num_input_tokens_seen": 26294672, "step": 397 }, { "epoch": 0.03716010670660364, "loss": 1.8785256147384644, "loss_ce": 0.0054787942208349705, "loss_iou": 0.77734375, "loss_num": 0.06396484375, "loss_xval": 1.875, "num_input_tokens_seen": 26294672, "step": 397 }, { "epoch": 0.037253708990499367, "grad_norm": 15.26705551147461, "learning_rate": 5e-05, "loss": 2.0057, "num_input_tokens_seen": 26361136, "step": 398 }, { "epoch": 0.037253708990499367, "loss": 2.0479254722595215, "loss_ce": 0.010816069319844246, "loss_iou": 0.89453125, "loss_num": 0.050048828125, "loss_xval": 2.03125, "num_input_tokens_seen": 26361136, "step": 398 }, { "epoch": 0.0373473112743951, "grad_norm": 11.96884822845459, "learning_rate": 5e-05, "loss": 2.2064, "num_input_tokens_seen": 26428544, "step": 399 }, { "epoch": 0.0373473112743951, "loss": 2.3468685150146484, "loss_ce": 0.0070247529074549675, "loss_iou": 1.015625, "loss_num": 0.0634765625, "loss_xval": 2.34375, "num_input_tokens_seen": 26428544, "step": 399 }, { "epoch": 0.03744091355829082, "grad_norm": 13.7611722946167, "learning_rate": 5e-05, "loss": 2.1004, "num_input_tokens_seen": 26495640, "step": 400 }, { "epoch": 0.03744091355829082, "loss": 2.0225229263305664, "loss_ce": 0.007874608039855957, "loss_iou": 0.875, "loss_num": 0.052490234375, "loss_xval": 2.015625, "num_input_tokens_seen": 26495640, "step": 400 }, { "epoch": 0.03753451584218655, "grad_norm": 83.84553527832031, "learning_rate": 5e-05, "loss": 1.7903, "num_input_tokens_seen": 26561068, "step": 401 }, { "epoch": 0.03753451584218655, "loss": 1.8230633735656738, "loss_ce": 0.003727492643520236, "loss_iou": 0.8046875, "loss_num": 0.04248046875, "loss_xval": 1.8203125, "num_input_tokens_seen": 26561068, "step": 401 }, { "epoch": 0.03762811812608228, "grad_norm": 19.10430145263672, "learning_rate": 5e-05, "loss": 1.9353, "num_input_tokens_seen": 26626836, "step": 402 }, { "epoch": 0.03762811812608228, "loss": 1.8824224472045898, "loss_ce": 0.00174605508800596, "loss_iou": 0.81640625, "loss_num": 0.050048828125, "loss_xval": 1.8828125, "num_input_tokens_seen": 26626836, "step": 402 }, { "epoch": 0.037721720409978, "grad_norm": 22.448139190673828, "learning_rate": 5e-05, "loss": 1.8249, "num_input_tokens_seen": 26693028, "step": 403 }, { "epoch": 0.037721720409978, "loss": 1.768705129623413, "loss_ce": 0.0043009137734770775, "loss_iou": 0.75390625, "loss_num": 0.05126953125, "loss_xval": 1.765625, "num_input_tokens_seen": 26693028, "step": 403 }, { "epoch": 0.03781532269387373, "grad_norm": 10.823797225952148, "learning_rate": 5e-05, "loss": 2.4316, "num_input_tokens_seen": 26759472, "step": 404 }, { "epoch": 0.03781532269387373, "loss": 2.3532376289367676, "loss_ce": 0.008022695779800415, "loss_iou": 0.9375, "loss_num": 0.09423828125, "loss_xval": 2.34375, "num_input_tokens_seen": 26759472, "step": 404 }, { "epoch": 0.03790892497776946, "grad_norm": 14.573873519897461, "learning_rate": 5e-05, "loss": 1.831, "num_input_tokens_seen": 26825124, "step": 405 }, { "epoch": 0.03790892497776946, "loss": 1.9456989765167236, "loss_ce": 0.0074664149433374405, "loss_iou": 0.8046875, "loss_num": 0.0654296875, "loss_xval": 1.9375, "num_input_tokens_seen": 26825124, "step": 405 }, { "epoch": 0.038002527261665184, "grad_norm": 9.012618064880371, "learning_rate": 5e-05, "loss": 2.1544, "num_input_tokens_seen": 26891976, "step": 406 }, { "epoch": 0.038002527261665184, "loss": 2.2486791610717773, "loss_ce": 0.007468043826520443, "loss_iou": 0.9453125, "loss_num": 0.0712890625, "loss_xval": 2.234375, "num_input_tokens_seen": 26891976, "step": 406 }, { "epoch": 0.03809612954556091, "grad_norm": 18.2127742767334, "learning_rate": 5e-05, "loss": 2.1004, "num_input_tokens_seen": 26957404, "step": 407 }, { "epoch": 0.03809612954556091, "loss": 2.109196186065674, "loss_ce": 0.0047041852958500385, "loss_iou": 0.8828125, "loss_num": 0.0673828125, "loss_xval": 2.109375, "num_input_tokens_seen": 26957404, "step": 407 }, { "epoch": 0.03818973182945664, "grad_norm": 16.539289474487305, "learning_rate": 5e-05, "loss": 2.113, "num_input_tokens_seen": 27022952, "step": 408 }, { "epoch": 0.03818973182945664, "loss": 2.0526235103607178, "loss_ce": 0.004039513412863016, "loss_iou": 0.8515625, "loss_num": 0.06982421875, "loss_xval": 2.046875, "num_input_tokens_seen": 27022952, "step": 408 }, { "epoch": 0.038283334113352364, "grad_norm": 9.839387893676758, "learning_rate": 5e-05, "loss": 2.0395, "num_input_tokens_seen": 27089496, "step": 409 }, { "epoch": 0.038283334113352364, "loss": 1.8633708953857422, "loss_ce": 0.003019391791895032, "loss_iou": 0.80859375, "loss_num": 0.048828125, "loss_xval": 1.859375, "num_input_tokens_seen": 27089496, "step": 409 }, { "epoch": 0.038376936397248096, "grad_norm": 17.634851455688477, "learning_rate": 5e-05, "loss": 1.6128, "num_input_tokens_seen": 27155404, "step": 410 }, { "epoch": 0.038376936397248096, "loss": 1.4755079746246338, "loss_ce": 0.0012646487448364496, "loss_iou": 0.63671875, "loss_num": 0.04052734375, "loss_xval": 1.4765625, "num_input_tokens_seen": 27155404, "step": 410 }, { "epoch": 0.03847053868114382, "grad_norm": 11.373767852783203, "learning_rate": 5e-05, "loss": 2.2504, "num_input_tokens_seen": 27222132, "step": 411 }, { "epoch": 0.03847053868114382, "loss": 2.19195556640625, "loss_ce": 0.005432123318314552, "loss_iou": 0.953125, "loss_num": 0.056640625, "loss_xval": 2.1875, "num_input_tokens_seen": 27222132, "step": 411 }, { "epoch": 0.038564140965039545, "grad_norm": 12.374197959899902, "learning_rate": 5e-05, "loss": 2.0242, "num_input_tokens_seen": 27287484, "step": 412 }, { "epoch": 0.038564140965039545, "loss": 2.068218469619751, "loss_ce": 0.005718581844121218, "loss_iou": 0.8515625, "loss_num": 0.072265625, "loss_xval": 2.0625, "num_input_tokens_seen": 27287484, "step": 412 }, { "epoch": 0.038657743248935277, "grad_norm": 21.252538681030273, "learning_rate": 5e-05, "loss": 1.8521, "num_input_tokens_seen": 27353856, "step": 413 }, { "epoch": 0.038657743248935277, "loss": 1.5962870121002197, "loss_ce": 0.00571082066744566, "loss_iou": 0.68359375, "loss_num": 0.044677734375, "loss_xval": 1.59375, "num_input_tokens_seen": 27353856, "step": 413 }, { "epoch": 0.038751345532831, "grad_norm": 10.730375289916992, "learning_rate": 5e-05, "loss": 2.4089, "num_input_tokens_seen": 27419808, "step": 414 }, { "epoch": 0.038751345532831, "loss": 2.4442930221557617, "loss_ce": 0.0028866769280284643, "loss_iou": 1.015625, "loss_num": 0.0830078125, "loss_xval": 2.4375, "num_input_tokens_seen": 27419808, "step": 414 }, { "epoch": 0.038844947816726726, "grad_norm": 14.683554649353027, "learning_rate": 5e-05, "loss": 2.0207, "num_input_tokens_seen": 27486244, "step": 415 }, { "epoch": 0.038844947816726726, "loss": 1.9977325201034546, "loss_ce": 0.01042783074080944, "loss_iou": 0.8828125, "loss_num": 0.04345703125, "loss_xval": 1.984375, "num_input_tokens_seen": 27486244, "step": 415 }, { "epoch": 0.03893855010062246, "grad_norm": 16.708295822143555, "learning_rate": 5e-05, "loss": 1.8847, "num_input_tokens_seen": 27552460, "step": 416 }, { "epoch": 0.03893855010062246, "loss": 1.726832389831543, "loss_ce": 0.0036878606770187616, "loss_iou": 0.73046875, "loss_num": 0.0517578125, "loss_xval": 1.7265625, "num_input_tokens_seen": 27552460, "step": 416 }, { "epoch": 0.03903215238451818, "grad_norm": 18.461673736572266, "learning_rate": 5e-05, "loss": 2.2866, "num_input_tokens_seen": 27618188, "step": 417 }, { "epoch": 0.03903215238451818, "loss": 2.2025556564331055, "loss_ce": 0.0033370940946042538, "loss_iou": 0.9375, "loss_num": 0.064453125, "loss_xval": 2.203125, "num_input_tokens_seen": 27618188, "step": 417 }, { "epoch": 0.039125754668413906, "grad_norm": 17.931636810302734, "learning_rate": 5e-05, "loss": 1.8517, "num_input_tokens_seen": 27685044, "step": 418 }, { "epoch": 0.039125754668413906, "loss": 1.9908952713012695, "loss_ce": 0.004567192401736975, "loss_iou": 0.875, "loss_num": 0.046630859375, "loss_xval": 1.984375, "num_input_tokens_seen": 27685044, "step": 418 }, { "epoch": 0.03921935695230964, "grad_norm": 23.1167049407959, "learning_rate": 5e-05, "loss": 2.1168, "num_input_tokens_seen": 27750588, "step": 419 }, { "epoch": 0.03921935695230964, "loss": 2.1894702911376953, "loss_ce": 0.004900074098259211, "loss_iou": 0.94140625, "loss_num": 0.059326171875, "loss_xval": 2.1875, "num_input_tokens_seen": 27750588, "step": 419 }, { "epoch": 0.03931295923620536, "grad_norm": 15.535375595092773, "learning_rate": 5e-05, "loss": 2.5563, "num_input_tokens_seen": 27817228, "step": 420 }, { "epoch": 0.03931295923620536, "loss": 2.63668155670166, "loss_ce": 0.0077751874923706055, "loss_iou": 1.078125, "loss_num": 0.09423828125, "loss_xval": 2.625, "num_input_tokens_seen": 27817228, "step": 420 }, { "epoch": 0.039406561520101094, "grad_norm": 10.00205135345459, "learning_rate": 5e-05, "loss": 2.0777, "num_input_tokens_seen": 27883472, "step": 421 }, { "epoch": 0.039406561520101094, "loss": 2.1320810317993164, "loss_ce": 0.0031747817993164062, "loss_iou": 0.9140625, "loss_num": 0.059814453125, "loss_xval": 2.125, "num_input_tokens_seen": 27883472, "step": 421 }, { "epoch": 0.03950016380399682, "grad_norm": 14.320682525634766, "learning_rate": 5e-05, "loss": 2.0225, "num_input_tokens_seen": 27949516, "step": 422 }, { "epoch": 0.03950016380399682, "loss": 1.870326042175293, "loss_ce": 0.0065565044060349464, "loss_iou": 0.7265625, "loss_num": 0.08203125, "loss_xval": 1.8671875, "num_input_tokens_seen": 27949516, "step": 422 }, { "epoch": 0.03959376608789254, "grad_norm": 21.98247718811035, "learning_rate": 5e-05, "loss": 1.9695, "num_input_tokens_seen": 28014724, "step": 423 }, { "epoch": 0.03959376608789254, "loss": 2.2499985694885254, "loss_ce": 0.004881343804299831, "loss_iou": 0.9453125, "loss_num": 0.0712890625, "loss_xval": 2.25, "num_input_tokens_seen": 28014724, "step": 423 }, { "epoch": 0.039687368371788274, "grad_norm": 329.8367614746094, "learning_rate": 5e-05, "loss": 2.349, "num_input_tokens_seen": 28081584, "step": 424 }, { "epoch": 0.039687368371788274, "loss": 2.358642578125, "loss_ce": 0.0031738688703626394, "loss_iou": 0.9921875, "loss_num": 0.07470703125, "loss_xval": 2.359375, "num_input_tokens_seen": 28081584, "step": 424 }, { "epoch": 0.039780970655684, "grad_norm": 10.797112464904785, "learning_rate": 5e-05, "loss": 2.1071, "num_input_tokens_seen": 28147424, "step": 425 }, { "epoch": 0.039780970655684, "loss": 2.0887904167175293, "loss_ce": 0.006759242154657841, "loss_iou": 0.87890625, "loss_num": 0.064453125, "loss_xval": 2.078125, "num_input_tokens_seen": 28147424, "step": 425 }, { "epoch": 0.039874572939579724, "grad_norm": 10.683431625366211, "learning_rate": 5e-05, "loss": 1.9732, "num_input_tokens_seen": 28214152, "step": 426 }, { "epoch": 0.039874572939579724, "loss": 1.9705636501312256, "loss_ce": 0.0037668293807655573, "loss_iou": 0.83984375, "loss_num": 0.057373046875, "loss_xval": 1.96875, "num_input_tokens_seen": 28214152, "step": 426 }, { "epoch": 0.039968175223475455, "grad_norm": 30.997591018676758, "learning_rate": 5e-05, "loss": 1.9126, "num_input_tokens_seen": 28280776, "step": 427 }, { "epoch": 0.039968175223475455, "loss": 1.9151159524917603, "loss_ce": 0.004959717858582735, "loss_iou": 0.796875, "loss_num": 0.06396484375, "loss_xval": 1.90625, "num_input_tokens_seen": 28280776, "step": 427 }, { "epoch": 0.04006177750737118, "grad_norm": 20.698631286621094, "learning_rate": 5e-05, "loss": 2.2508, "num_input_tokens_seen": 28346732, "step": 428 }, { "epoch": 0.04006177750737118, "loss": 2.416332244873047, "loss_ce": 0.004222821444272995, "loss_iou": 1.0234375, "loss_num": 0.072265625, "loss_xval": 2.40625, "num_input_tokens_seen": 28346732, "step": 428 }, { "epoch": 0.040155379791266904, "grad_norm": 458.5082092285156, "learning_rate": 5e-05, "loss": 1.7608, "num_input_tokens_seen": 28412388, "step": 429 }, { "epoch": 0.040155379791266904, "loss": 1.812030553817749, "loss_ce": 0.001483736908994615, "loss_iou": 0.7734375, "loss_num": 0.05322265625, "loss_xval": 1.8125, "num_input_tokens_seen": 28412388, "step": 429 }, { "epoch": 0.040248982075162636, "grad_norm": 16.114900588989258, "learning_rate": 5e-05, "loss": 1.9463, "num_input_tokens_seen": 28478760, "step": 430 }, { "epoch": 0.040248982075162636, "loss": 1.9977428913116455, "loss_ce": 0.0026255943812429905, "loss_iou": 0.875, "loss_num": 0.04931640625, "loss_xval": 1.9921875, "num_input_tokens_seen": 28478760, "step": 430 }, { "epoch": 0.04034258435905836, "grad_norm": 13.54692554473877, "learning_rate": 5e-05, "loss": 2.1163, "num_input_tokens_seen": 28545792, "step": 431 }, { "epoch": 0.04034258435905836, "loss": 2.117304801940918, "loss_ce": 0.005976623855531216, "loss_iou": 0.89453125, "loss_num": 0.0654296875, "loss_xval": 2.109375, "num_input_tokens_seen": 28545792, "step": 431 }, { "epoch": 0.040436186642954085, "grad_norm": 19.964017868041992, "learning_rate": 5e-05, "loss": 1.8118, "num_input_tokens_seen": 28611936, "step": 432 }, { "epoch": 0.040436186642954085, "loss": 1.8202134370803833, "loss_ce": 0.004295459948480129, "loss_iou": 0.7265625, "loss_num": 0.07177734375, "loss_xval": 1.8125, "num_input_tokens_seen": 28611936, "step": 432 }, { "epoch": 0.040529788926849816, "grad_norm": 13.176482200622559, "learning_rate": 5e-05, "loss": 2.2294, "num_input_tokens_seen": 28678992, "step": 433 }, { "epoch": 0.040529788926849816, "loss": 2.1980347633361816, "loss_ce": 0.005651961546391249, "loss_iou": 0.921875, "loss_num": 0.0693359375, "loss_xval": 2.1875, "num_input_tokens_seen": 28678992, "step": 433 }, { "epoch": 0.04062339121074554, "grad_norm": 19.492185592651367, "learning_rate": 5e-05, "loss": 1.88, "num_input_tokens_seen": 28745540, "step": 434 }, { "epoch": 0.04062339121074554, "loss": 1.765675663948059, "loss_ce": 0.0029803107026964426, "loss_iou": 0.7109375, "loss_num": 0.0673828125, "loss_xval": 1.765625, "num_input_tokens_seen": 28745540, "step": 434 }, { "epoch": 0.04071699349464127, "grad_norm": 10.830259323120117, "learning_rate": 5e-05, "loss": 1.8285, "num_input_tokens_seen": 28812080, "step": 435 }, { "epoch": 0.04071699349464127, "loss": 1.8863437175750732, "loss_ce": 0.011343682184815407, "loss_iou": 0.80859375, "loss_num": 0.051513671875, "loss_xval": 1.875, "num_input_tokens_seen": 28812080, "step": 435 }, { "epoch": 0.040810595778537, "grad_norm": 19.71128273010254, "learning_rate": 5e-05, "loss": 2.28, "num_input_tokens_seen": 28879532, "step": 436 }, { "epoch": 0.040810595778537, "loss": 2.296851396560669, "loss_ce": 0.007788877934217453, "loss_iou": 0.9921875, "loss_num": 0.061767578125, "loss_xval": 2.28125, "num_input_tokens_seen": 28879532, "step": 436 }, { "epoch": 0.04090419806243272, "grad_norm": 35.30091094970703, "learning_rate": 5e-05, "loss": 2.5773, "num_input_tokens_seen": 28946524, "step": 437 }, { "epoch": 0.04090419806243272, "loss": 2.2610809803009033, "loss_ce": 0.002291942248120904, "loss_iou": 0.9609375, "loss_num": 0.0673828125, "loss_xval": 2.265625, "num_input_tokens_seen": 28946524, "step": 437 }, { "epoch": 0.04099780034632845, "grad_norm": 14.956010818481445, "learning_rate": 5e-05, "loss": 2.1915, "num_input_tokens_seen": 29014132, "step": 438 }, { "epoch": 0.04099780034632845, "loss": 2.039963483810425, "loss_ce": 0.0057838065549731255, "loss_iou": 0.8515625, "loss_num": 0.06640625, "loss_xval": 2.03125, "num_input_tokens_seen": 29014132, "step": 438 }, { "epoch": 0.04109140263022418, "grad_norm": 10.634124755859375, "learning_rate": 5e-05, "loss": 1.9611, "num_input_tokens_seen": 29081008, "step": 439 }, { "epoch": 0.04109140263022418, "loss": 2.0453054904937744, "loss_ce": 0.005754796788096428, "loss_iou": 0.828125, "loss_num": 0.0771484375, "loss_xval": 2.046875, "num_input_tokens_seen": 29081008, "step": 439 }, { "epoch": 0.0411850049141199, "grad_norm": 12.619905471801758, "learning_rate": 5e-05, "loss": 1.8783, "num_input_tokens_seen": 29148312, "step": 440 }, { "epoch": 0.0411850049141199, "loss": 1.8701744079589844, "loss_ce": 0.004940034355968237, "loss_iou": 0.8203125, "loss_num": 0.04541015625, "loss_xval": 1.8671875, "num_input_tokens_seen": 29148312, "step": 440 }, { "epoch": 0.041278607198015634, "grad_norm": 13.43472957611084, "learning_rate": 5e-05, "loss": 2.1071, "num_input_tokens_seen": 29213756, "step": 441 }, { "epoch": 0.041278607198015634, "loss": 2.1046857833862305, "loss_ce": 0.002390938112512231, "loss_iou": 0.86328125, "loss_num": 0.0751953125, "loss_xval": 2.109375, "num_input_tokens_seen": 29213756, "step": 441 }, { "epoch": 0.04137220948191136, "grad_norm": 15.941155433654785, "learning_rate": 5e-05, "loss": 1.8279, "num_input_tokens_seen": 29279772, "step": 442 }, { "epoch": 0.04137220948191136, "loss": 1.9232045412063599, "loss_ce": 0.0052358005195856094, "loss_iou": 0.81640625, "loss_num": 0.057861328125, "loss_xval": 1.921875, "num_input_tokens_seen": 29279772, "step": 442 }, { "epoch": 0.04146581176580708, "grad_norm": 28.43421745300293, "learning_rate": 5e-05, "loss": 2.2101, "num_input_tokens_seen": 29346800, "step": 443 }, { "epoch": 0.04146581176580708, "loss": 2.252653121948242, "loss_ce": 0.006559483706951141, "loss_iou": 1.015625, "loss_num": 0.041259765625, "loss_xval": 2.25, "num_input_tokens_seen": 29346800, "step": 443 }, { "epoch": 0.041559414049702814, "grad_norm": 13.325878143310547, "learning_rate": 5e-05, "loss": 2.6031, "num_input_tokens_seen": 29413804, "step": 444 }, { "epoch": 0.041559414049702814, "loss": 2.45267915725708, "loss_ce": 0.005413481034338474, "loss_iou": 1.0078125, "loss_num": 0.08642578125, "loss_xval": 2.453125, "num_input_tokens_seen": 29413804, "step": 444 }, { "epoch": 0.04165301633359854, "grad_norm": 9.816612243652344, "learning_rate": 5e-05, "loss": 2.0703, "num_input_tokens_seen": 29479516, "step": 445 }, { "epoch": 0.04165301633359854, "loss": 1.9830279350280762, "loss_ce": 0.007441938854753971, "loss_iou": 0.76953125, "loss_num": 0.087890625, "loss_xval": 1.9765625, "num_input_tokens_seen": 29479516, "step": 445 }, { "epoch": 0.04174661861749427, "grad_norm": 15.803251266479492, "learning_rate": 5e-05, "loss": 2.0669, "num_input_tokens_seen": 29546352, "step": 446 }, { "epoch": 0.04174661861749427, "loss": 2.098733901977539, "loss_ce": 0.006936904042959213, "loss_iou": 0.87890625, "loss_num": 0.06689453125, "loss_xval": 2.09375, "num_input_tokens_seen": 29546352, "step": 446 }, { "epoch": 0.041840220901389995, "grad_norm": 22.669797897338867, "learning_rate": 5e-05, "loss": 2.1419, "num_input_tokens_seen": 29613220, "step": 447 }, { "epoch": 0.041840220901389995, "loss": 2.2310471534729004, "loss_ce": 0.005461185239255428, "loss_iou": 0.95703125, "loss_num": 0.0625, "loss_xval": 2.21875, "num_input_tokens_seen": 29613220, "step": 447 }, { "epoch": 0.04193382318528572, "grad_norm": 102.99943542480469, "learning_rate": 5e-05, "loss": 2.2529, "num_input_tokens_seen": 29679504, "step": 448 }, { "epoch": 0.04193382318528572, "loss": 2.233792543411255, "loss_ce": 0.0023472788743674755, "loss_iou": 0.94140625, "loss_num": 0.06982421875, "loss_xval": 2.234375, "num_input_tokens_seen": 29679504, "step": 448 }, { "epoch": 0.04202742546918145, "grad_norm": 13.938831329345703, "learning_rate": 5e-05, "loss": 2.0361, "num_input_tokens_seen": 29746832, "step": 449 }, { "epoch": 0.04202742546918145, "loss": 2.0519959926605225, "loss_ce": 0.005120845511555672, "loss_iou": 0.875, "loss_num": 0.058837890625, "loss_xval": 2.046875, "num_input_tokens_seen": 29746832, "step": 449 }, { "epoch": 0.042121027753077175, "grad_norm": 17.788707733154297, "learning_rate": 5e-05, "loss": 2.2705, "num_input_tokens_seen": 29812948, "step": 450 }, { "epoch": 0.042121027753077175, "loss": 2.2611751556396484, "loss_ce": 0.003362696385011077, "loss_iou": 0.9453125, "loss_num": 0.072265625, "loss_xval": 2.25, "num_input_tokens_seen": 29812948, "step": 450 }, { "epoch": 0.0422146300369729, "grad_norm": 14.109766006469727, "learning_rate": 5e-05, "loss": 2.3016, "num_input_tokens_seen": 29878596, "step": 451 }, { "epoch": 0.0422146300369729, "loss": 2.1837148666381836, "loss_ce": 0.0030509615316987038, "loss_iou": 0.94140625, "loss_num": 0.060546875, "loss_xval": 2.1875, "num_input_tokens_seen": 29878596, "step": 451 }, { "epoch": 0.04230823232086863, "grad_norm": 17.93331527709961, "learning_rate": 5e-05, "loss": 1.7842, "num_input_tokens_seen": 29945328, "step": 452 }, { "epoch": 0.04230823232086863, "loss": 1.9210729598999023, "loss_ce": 0.003104252042248845, "loss_iou": 0.85546875, "loss_num": 0.0419921875, "loss_xval": 1.921875, "num_input_tokens_seen": 29945328, "step": 452 }, { "epoch": 0.042401834604764356, "grad_norm": 19.0549373626709, "learning_rate": 5e-05, "loss": 1.7662, "num_input_tokens_seen": 30011836, "step": 453 }, { "epoch": 0.042401834604764356, "loss": 1.575378179550171, "loss_ce": 0.005798084661364555, "loss_iou": 0.6640625, "loss_num": 0.047607421875, "loss_xval": 1.5703125, "num_input_tokens_seen": 30011836, "step": 453 }, { "epoch": 0.04249543688866008, "grad_norm": 17.29096221923828, "learning_rate": 5e-05, "loss": 2.1239, "num_input_tokens_seen": 30077988, "step": 454 }, { "epoch": 0.04249543688866008, "loss": 1.9790494441986084, "loss_ce": 0.002486966550350189, "loss_iou": 0.8515625, "loss_num": 0.0537109375, "loss_xval": 1.9765625, "num_input_tokens_seen": 30077988, "step": 454 }, { "epoch": 0.04258903917255581, "grad_norm": 20.69436264038086, "learning_rate": 5e-05, "loss": 1.9971, "num_input_tokens_seen": 30143916, "step": 455 }, { "epoch": 0.04258903917255581, "loss": 1.8437339067459106, "loss_ce": 0.005843220744282007, "loss_iou": 0.75, "loss_num": 0.0673828125, "loss_xval": 1.8359375, "num_input_tokens_seen": 30143916, "step": 455 }, { "epoch": 0.04268264145645154, "grad_norm": 113.77708435058594, "learning_rate": 5e-05, "loss": 1.7839, "num_input_tokens_seen": 30209432, "step": 456 }, { "epoch": 0.04268264145645154, "loss": 1.7519501447677612, "loss_ce": 0.008541999384760857, "loss_iou": 0.671875, "loss_num": 0.0791015625, "loss_xval": 1.7421875, "num_input_tokens_seen": 30209432, "step": 456 }, { "epoch": 0.04277624374034726, "grad_norm": 13.520759582519531, "learning_rate": 5e-05, "loss": 1.9484, "num_input_tokens_seen": 30275356, "step": 457 }, { "epoch": 0.04277624374034726, "loss": 1.990700364112854, "loss_ce": 0.005348884034901857, "loss_iou": 0.84765625, "loss_num": 0.0576171875, "loss_xval": 1.984375, "num_input_tokens_seen": 30275356, "step": 457 }, { "epoch": 0.04286984602424299, "grad_norm": 19.00589370727539, "learning_rate": 5e-05, "loss": 1.731, "num_input_tokens_seen": 30341892, "step": 458 }, { "epoch": 0.04286984602424299, "loss": 1.6080878973007202, "loss_ce": 0.0018257052870467305, "loss_iou": 0.703125, "loss_num": 0.039306640625, "loss_xval": 1.609375, "num_input_tokens_seen": 30341892, "step": 458 }, { "epoch": 0.04296344830813872, "grad_norm": 12.291425704956055, "learning_rate": 5e-05, "loss": 2.3168, "num_input_tokens_seen": 30408404, "step": 459 }, { "epoch": 0.04296344830813872, "loss": 2.277008056640625, "loss_ce": 0.0055235326290130615, "loss_iou": 0.95703125, "loss_num": 0.07177734375, "loss_xval": 2.265625, "num_input_tokens_seen": 30408404, "step": 459 }, { "epoch": 0.04305705059203445, "grad_norm": 31.746538162231445, "learning_rate": 5e-05, "loss": 2.1224, "num_input_tokens_seen": 30474776, "step": 460 }, { "epoch": 0.04305705059203445, "loss": 2.1912577152252197, "loss_ce": 0.0027812570333480835, "loss_iou": 0.9140625, "loss_num": 0.07373046875, "loss_xval": 2.1875, "num_input_tokens_seen": 30474776, "step": 460 }, { "epoch": 0.04315065287593017, "grad_norm": 12.762518882751465, "learning_rate": 5e-05, "loss": 1.9152, "num_input_tokens_seen": 30540476, "step": 461 }, { "epoch": 0.04315065287593017, "loss": 1.9707589149475098, "loss_ce": 0.0049386098980903625, "loss_iou": 0.84375, "loss_num": 0.055908203125, "loss_xval": 1.96875, "num_input_tokens_seen": 30540476, "step": 461 }, { "epoch": 0.0432442551598259, "grad_norm": 15.633072853088379, "learning_rate": 5e-05, "loss": 2.0971, "num_input_tokens_seen": 30607604, "step": 462 }, { "epoch": 0.0432442551598259, "loss": 2.1641714572906494, "loss_ce": 0.005968406796455383, "loss_iou": 0.9375, "loss_num": 0.057373046875, "loss_xval": 2.15625, "num_input_tokens_seen": 30607604, "step": 462 }, { "epoch": 0.04333785744372163, "grad_norm": 16.27423858642578, "learning_rate": 5e-05, "loss": 1.9722, "num_input_tokens_seen": 30673660, "step": 463 }, { "epoch": 0.04333785744372163, "loss": 1.743199110031128, "loss_ce": 0.002476481255143881, "loss_iou": 0.734375, "loss_num": 0.053955078125, "loss_xval": 1.7421875, "num_input_tokens_seen": 30673660, "step": 463 }, { "epoch": 0.043431459727617354, "grad_norm": 23.577835083007812, "learning_rate": 5e-05, "loss": 2.155, "num_input_tokens_seen": 30741112, "step": 464 }, { "epoch": 0.043431459727617354, "loss": 2.149505138397217, "loss_ce": 0.0049738697707653046, "loss_iou": 0.9140625, "loss_num": 0.0634765625, "loss_xval": 2.140625, "num_input_tokens_seen": 30741112, "step": 464 }, { "epoch": 0.04352506201151308, "grad_norm": 9.723146438598633, "learning_rate": 5e-05, "loss": 1.7155, "num_input_tokens_seen": 30806992, "step": 465 }, { "epoch": 0.04352506201151308, "loss": 1.694063663482666, "loss_ce": 0.004122295416891575, "loss_iou": 0.703125, "loss_num": 0.0556640625, "loss_xval": 1.6875, "num_input_tokens_seen": 30806992, "step": 465 }, { "epoch": 0.04361866429540881, "grad_norm": 12.820768356323242, "learning_rate": 5e-05, "loss": 2.1488, "num_input_tokens_seen": 30872864, "step": 466 }, { "epoch": 0.04361866429540881, "loss": 2.2483158111572266, "loss_ce": 0.007104871328920126, "loss_iou": 0.9609375, "loss_num": 0.06396484375, "loss_xval": 2.234375, "num_input_tokens_seen": 30872864, "step": 466 }, { "epoch": 0.043712266579304535, "grad_norm": 23.971820831298828, "learning_rate": 5e-05, "loss": 2.2016, "num_input_tokens_seen": 30938752, "step": 467 }, { "epoch": 0.043712266579304535, "loss": 2.1699142456054688, "loss_ce": 0.00585184246301651, "loss_iou": 0.921875, "loss_num": 0.06298828125, "loss_xval": 2.15625, "num_input_tokens_seen": 30938752, "step": 467 }, { "epoch": 0.04380586886320026, "grad_norm": 7.219677448272705, "learning_rate": 5e-05, "loss": 2.2216, "num_input_tokens_seen": 31004860, "step": 468 }, { "epoch": 0.04380586886320026, "loss": 2.246121883392334, "loss_ce": 0.007840558886528015, "loss_iou": 0.9375, "loss_num": 0.07177734375, "loss_xval": 2.234375, "num_input_tokens_seen": 31004860, "step": 468 }, { "epoch": 0.04389947114709599, "grad_norm": 9.824893951416016, "learning_rate": 5e-05, "loss": 2.1411, "num_input_tokens_seen": 31070460, "step": 469 }, { "epoch": 0.04389947114709599, "loss": 2.093827724456787, "loss_ce": 0.003007214516401291, "loss_iou": 0.875, "loss_num": 0.06884765625, "loss_xval": 2.09375, "num_input_tokens_seen": 31070460, "step": 469 }, { "epoch": 0.043993073430991715, "grad_norm": 13.35690689086914, "learning_rate": 5e-05, "loss": 2.1013, "num_input_tokens_seen": 31136604, "step": 470 }, { "epoch": 0.043993073430991715, "loss": 2.2541399002075195, "loss_ce": 0.009999187663197517, "loss_iou": 0.9453125, "loss_num": 0.07177734375, "loss_xval": 2.25, "num_input_tokens_seen": 31136604, "step": 470 }, { "epoch": 0.04408667571488745, "grad_norm": 10.634860038757324, "learning_rate": 5e-05, "loss": 1.8709, "num_input_tokens_seen": 31202396, "step": 471 }, { "epoch": 0.04408667571488745, "loss": 1.7519464492797852, "loss_ce": 0.0038995440118014812, "loss_iou": 0.7578125, "loss_num": 0.046630859375, "loss_xval": 1.75, "num_input_tokens_seen": 31202396, "step": 471 }, { "epoch": 0.04418027799878317, "grad_norm": 24.335004806518555, "learning_rate": 5e-05, "loss": 2.0886, "num_input_tokens_seen": 31269104, "step": 472 }, { "epoch": 0.04418027799878317, "loss": 2.0880887508392334, "loss_ce": 0.006057538092136383, "loss_iou": 0.890625, "loss_num": 0.059814453125, "loss_xval": 2.078125, "num_input_tokens_seen": 31269104, "step": 472 }, { "epoch": 0.044273880282678896, "grad_norm": 25.776592254638672, "learning_rate": 5e-05, "loss": 2.0023, "num_input_tokens_seen": 31335244, "step": 473 }, { "epoch": 0.044273880282678896, "loss": 1.954072117805481, "loss_ce": 0.0029002828523516655, "loss_iou": 0.86328125, "loss_num": 0.044677734375, "loss_xval": 1.953125, "num_input_tokens_seen": 31335244, "step": 473 }, { "epoch": 0.04436748256657463, "grad_norm": 9.409109115600586, "learning_rate": 5e-05, "loss": 2.4935, "num_input_tokens_seen": 31401428, "step": 474 }, { "epoch": 0.04436748256657463, "loss": 2.5019659996032715, "loss_ce": 0.0019660217221826315, "loss_iou": 1.0625, "loss_num": 0.0771484375, "loss_xval": 2.5, "num_input_tokens_seen": 31401428, "step": 474 }, { "epoch": 0.04446108485047035, "grad_norm": 16.741493225097656, "learning_rate": 5e-05, "loss": 2.2775, "num_input_tokens_seen": 31468948, "step": 475 }, { "epoch": 0.04446108485047035, "loss": 2.27400541305542, "loss_ce": 0.005450926721096039, "loss_iou": 0.94921875, "loss_num": 0.07421875, "loss_xval": 2.265625, "num_input_tokens_seen": 31468948, "step": 475 }, { "epoch": 0.044554687134366076, "grad_norm": 8.939950942993164, "learning_rate": 5e-05, "loss": 2.0622, "num_input_tokens_seen": 31536260, "step": 476 }, { "epoch": 0.044554687134366076, "loss": 2.106278419494629, "loss_ce": 0.006669181399047375, "loss_iou": 0.87109375, "loss_num": 0.07080078125, "loss_xval": 2.09375, "num_input_tokens_seen": 31536260, "step": 476 }, { "epoch": 0.04464828941826181, "grad_norm": 20.194087982177734, "learning_rate": 5e-05, "loss": 1.9375, "num_input_tokens_seen": 31602536, "step": 477 }, { "epoch": 0.04464828941826181, "loss": 1.848040223121643, "loss_ce": 0.007219946011900902, "loss_iou": 0.7734375, "loss_num": 0.05810546875, "loss_xval": 1.84375, "num_input_tokens_seen": 31602536, "step": 477 }, { "epoch": 0.04474189170215753, "grad_norm": 12.22110652923584, "learning_rate": 5e-05, "loss": 2.287, "num_input_tokens_seen": 31668696, "step": 478 }, { "epoch": 0.04474189170215753, "loss": 2.145697593688965, "loss_ce": 0.004096093587577343, "loss_iou": 0.91796875, "loss_num": 0.061279296875, "loss_xval": 2.140625, "num_input_tokens_seen": 31668696, "step": 478 }, { "epoch": 0.04483549398605326, "grad_norm": 12.94542407989502, "learning_rate": 5e-05, "loss": 2.0049, "num_input_tokens_seen": 31734556, "step": 479 }, { "epoch": 0.04483549398605326, "loss": 1.8130786418914795, "loss_ce": 0.007536758668720722, "loss_iou": 0.765625, "loss_num": 0.055908203125, "loss_xval": 1.8046875, "num_input_tokens_seen": 31734556, "step": 479 }, { "epoch": 0.04492909626994899, "grad_norm": 20.849462509155273, "learning_rate": 5e-05, "loss": 2.0953, "num_input_tokens_seen": 31801108, "step": 480 }, { "epoch": 0.04492909626994899, "loss": 2.0138297080993652, "loss_ce": 0.004063955508172512, "loss_iou": 0.83984375, "loss_num": 0.06640625, "loss_xval": 2.015625, "num_input_tokens_seen": 31801108, "step": 480 }, { "epoch": 0.04502269855384471, "grad_norm": 21.251911163330078, "learning_rate": 5e-05, "loss": 2.0417, "num_input_tokens_seen": 31868384, "step": 481 }, { "epoch": 0.04502269855384471, "loss": 2.1381373405456543, "loss_ce": 0.0014183730818331242, "loss_iou": 0.875, "loss_num": 0.0771484375, "loss_xval": 2.140625, "num_input_tokens_seen": 31868384, "step": 481 }, { "epoch": 0.04511630083774044, "grad_norm": 23.895750045776367, "learning_rate": 5e-05, "loss": 2.0814, "num_input_tokens_seen": 31934348, "step": 482 }, { "epoch": 0.04511630083774044, "loss": 2.180201530456543, "loss_ce": 0.008936937898397446, "loss_iou": 0.8984375, "loss_num": 0.07421875, "loss_xval": 2.171875, "num_input_tokens_seen": 31934348, "step": 482 }, { "epoch": 0.04520990312163617, "grad_norm": 13.015569686889648, "learning_rate": 5e-05, "loss": 2.3738, "num_input_tokens_seen": 31999700, "step": 483 }, { "epoch": 0.04520990312163617, "loss": 2.3209519386291504, "loss_ce": 0.007475219201296568, "loss_iou": 0.91796875, "loss_num": 0.09521484375, "loss_xval": 2.3125, "num_input_tokens_seen": 31999700, "step": 483 }, { "epoch": 0.045303505405531894, "grad_norm": 35.630584716796875, "learning_rate": 5e-05, "loss": 2.3771, "num_input_tokens_seen": 32065160, "step": 484 }, { "epoch": 0.045303505405531894, "loss": 2.546733856201172, "loss_ce": 0.005718107335269451, "loss_iou": 1.0078125, "loss_num": 0.10595703125, "loss_xval": 2.546875, "num_input_tokens_seen": 32065160, "step": 484 }, { "epoch": 0.045397107689427625, "grad_norm": 212.5460205078125, "learning_rate": 5e-05, "loss": 2.0635, "num_input_tokens_seen": 32131324, "step": 485 }, { "epoch": 0.045397107689427625, "loss": 1.9449450969696045, "loss_ce": 0.00939822755753994, "loss_iou": 0.75390625, "loss_num": 0.0859375, "loss_xval": 1.9375, "num_input_tokens_seen": 32131324, "step": 485 }, { "epoch": 0.04549070997332335, "grad_norm": 14.1449556350708, "learning_rate": 5e-05, "loss": 1.8435, "num_input_tokens_seen": 32196900, "step": 486 }, { "epoch": 0.04549070997332335, "loss": 1.8770546913146973, "loss_ce": 0.005228529218584299, "loss_iou": 0.77734375, "loss_num": 0.0625, "loss_xval": 1.875, "num_input_tokens_seen": 32196900, "step": 486 }, { "epoch": 0.045584312257219074, "grad_norm": 12.691750526428223, "learning_rate": 5e-05, "loss": 2.0293, "num_input_tokens_seen": 32262384, "step": 487 }, { "epoch": 0.045584312257219074, "loss": 2.024717330932617, "loss_ce": 0.006162787787616253, "loss_iou": 0.8203125, "loss_num": 0.0751953125, "loss_xval": 2.015625, "num_input_tokens_seen": 32262384, "step": 487 }, { "epoch": 0.045677914541114806, "grad_norm": 15.173015594482422, "learning_rate": 5e-05, "loss": 1.8394, "num_input_tokens_seen": 32328628, "step": 488 }, { "epoch": 0.045677914541114806, "loss": 1.748635172843933, "loss_ce": 0.01023173052817583, "loss_iou": 0.734375, "loss_num": 0.0537109375, "loss_xval": 1.7421875, "num_input_tokens_seen": 32328628, "step": 488 }, { "epoch": 0.04577151682501053, "grad_norm": 19.01584243774414, "learning_rate": 5e-05, "loss": 2.1774, "num_input_tokens_seen": 32395556, "step": 489 }, { "epoch": 0.04577151682501053, "loss": 2.153399705886841, "loss_ce": 0.006915212143212557, "loss_iou": 0.89453125, "loss_num": 0.0712890625, "loss_xval": 2.140625, "num_input_tokens_seen": 32395556, "step": 489 }, { "epoch": 0.045865119108906255, "grad_norm": 11.831512451171875, "learning_rate": 5e-05, "loss": 2.1523, "num_input_tokens_seen": 32461724, "step": 490 }, { "epoch": 0.045865119108906255, "loss": 2.3882484436035156, "loss_ce": 0.0025062626227736473, "loss_iou": 0.98828125, "loss_num": 0.0810546875, "loss_xval": 2.390625, "num_input_tokens_seen": 32461724, "step": 490 }, { "epoch": 0.045958721392801986, "grad_norm": 15.672270774841309, "learning_rate": 5e-05, "loss": 1.829, "num_input_tokens_seen": 32527456, "step": 491 }, { "epoch": 0.045958721392801986, "loss": 1.8705118894577026, "loss_ce": 0.005277492105960846, "loss_iou": 0.78125, "loss_num": 0.060791015625, "loss_xval": 1.8671875, "num_input_tokens_seen": 32527456, "step": 491 }, { "epoch": 0.04605232367669771, "grad_norm": 21.85772705078125, "learning_rate": 5e-05, "loss": 2.075, "num_input_tokens_seen": 32595248, "step": 492 }, { "epoch": 0.04605232367669771, "loss": 1.899980068206787, "loss_ce": 0.006425441242754459, "loss_iou": 0.828125, "loss_num": 0.047119140625, "loss_xval": 1.890625, "num_input_tokens_seen": 32595248, "step": 492 }, { "epoch": 0.046145925960593435, "grad_norm": 21.907089233398438, "learning_rate": 5e-05, "loss": 2.0562, "num_input_tokens_seen": 32661588, "step": 493 }, { "epoch": 0.046145925960593435, "loss": 2.0572381019592285, "loss_ce": 0.007433369755744934, "loss_iou": 0.84765625, "loss_num": 0.07080078125, "loss_xval": 2.046875, "num_input_tokens_seen": 32661588, "step": 493 }, { "epoch": 0.04623952824448917, "grad_norm": 13.75629997253418, "learning_rate": 5e-05, "loss": 2.0423, "num_input_tokens_seen": 32727640, "step": 494 }, { "epoch": 0.04623952824448917, "loss": 2.1492695808410645, "loss_ce": 0.0027852682396769524, "loss_iou": 0.9140625, "loss_num": 0.0625, "loss_xval": 2.140625, "num_input_tokens_seen": 32727640, "step": 494 }, { "epoch": 0.04633313052838489, "grad_norm": 29.714677810668945, "learning_rate": 5e-05, "loss": 2.2401, "num_input_tokens_seen": 32792568, "step": 495 }, { "epoch": 0.04633313052838489, "loss": 2.181577444076538, "loss_ce": 0.004819741006940603, "loss_iou": 0.921875, "loss_num": 0.06689453125, "loss_xval": 2.171875, "num_input_tokens_seen": 32792568, "step": 495 }, { "epoch": 0.04642673281228062, "grad_norm": 19.96394920349121, "learning_rate": 5e-05, "loss": 2.0922, "num_input_tokens_seen": 32859600, "step": 496 }, { "epoch": 0.04642673281228062, "loss": 2.0059657096862793, "loss_ce": 0.003035774687305093, "loss_iou": 0.828125, "loss_num": 0.068359375, "loss_xval": 2.0, "num_input_tokens_seen": 32859600, "step": 496 }, { "epoch": 0.04652033509617635, "grad_norm": 21.029605865478516, "learning_rate": 5e-05, "loss": 2.185, "num_input_tokens_seen": 32924776, "step": 497 }, { "epoch": 0.04652033509617635, "loss": 2.2143352031707764, "loss_ce": 0.006327410228550434, "loss_iou": 0.95703125, "loss_num": 0.0595703125, "loss_xval": 2.203125, "num_input_tokens_seen": 32924776, "step": 497 }, { "epoch": 0.04661393738007207, "grad_norm": 14.5270414352417, "learning_rate": 5e-05, "loss": 1.9225, "num_input_tokens_seen": 32991020, "step": 498 }, { "epoch": 0.04661393738007207, "loss": 2.088118076324463, "loss_ce": 0.00901655200868845, "loss_iou": 0.88671875, "loss_num": 0.06103515625, "loss_xval": 2.078125, "num_input_tokens_seen": 32991020, "step": 498 }, { "epoch": 0.046707539663967804, "grad_norm": 13.652289390563965, "learning_rate": 5e-05, "loss": 1.8995, "num_input_tokens_seen": 33056636, "step": 499 }, { "epoch": 0.046707539663967804, "loss": 1.8197847604751587, "loss_ce": 0.009237861260771751, "loss_iou": 0.7578125, "loss_num": 0.05908203125, "loss_xval": 1.8125, "num_input_tokens_seen": 33056636, "step": 499 }, { "epoch": 0.04680114194786353, "grad_norm": 14.034210205078125, "learning_rate": 5e-05, "loss": 1.7604, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04680114194786353, "eval_seeclick_CIoU": 0.010791782289743423, "eval_seeclick_GIoU": -0.034255435690283775, "eval_seeclick_IoU": 0.1710895374417305, "eval_seeclick_MAE_all": 0.15610893070697784, "eval_seeclick_MAE_h": 0.14781776815652847, "eval_seeclick_MAE_w": 0.14510643482208252, "eval_seeclick_MAE_x_boxes": 0.22438225895166397, "eval_seeclick_MAE_y_boxes": 0.14794477075338364, "eval_seeclick_NUM_probability": 0.9976383745670319, "eval_seeclick_inside_bbox": 0.20937500149011612, "eval_seeclick_loss": 2.8477022647857666, "eval_seeclick_loss_ce": 0.014594110660254955, "eval_seeclick_loss_iou": 1.03955078125, "eval_seeclick_loss_num": 0.1512451171875, "eval_seeclick_loss_xval": 2.8349609375, "eval_seeclick_runtime": 61.9813, "eval_seeclick_samples_per_second": 0.758, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04680114194786353, "eval_icons_CIoU": -0.18846502900123596, "eval_icons_GIoU": -0.17556016892194748, "eval_icons_IoU": 0.010889915749430656, "eval_icons_MAE_all": 0.21332991123199463, "eval_icons_MAE_h": 0.21519098430871964, "eval_icons_MAE_w": 0.25561511516571045, "eval_icons_MAE_x_boxes": 0.1543479636311531, "eval_icons_MAE_y_boxes": 0.12868855893611908, "eval_icons_NUM_probability": 0.9995498061180115, "eval_icons_inside_bbox": 0.05902777798473835, "eval_icons_loss": 3.4127511978149414, "eval_icons_loss_ce": 0.0002681785117601976, "eval_icons_loss_iou": 1.17431640625, "eval_icons_loss_num": 0.21893310546875, "eval_icons_loss_xval": 3.4443359375, "eval_icons_runtime": 64.3223, "eval_icons_samples_per_second": 0.777, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04680114194786353, "eval_screenspot_CIoU": -0.0015938772509495418, "eval_screenspot_GIoU": -0.02167066124578317, "eval_screenspot_IoU": 0.16240592549244562, "eval_screenspot_MAE_all": 0.1384421413143476, "eval_screenspot_MAE_h": 0.11245785405238469, "eval_screenspot_MAE_w": 0.16129297018051147, "eval_screenspot_MAE_x_boxes": 0.1720647563536962, "eval_screenspot_MAE_y_boxes": 0.09524397552013397, "eval_screenspot_NUM_probability": 0.9993804494539896, "eval_screenspot_inside_bbox": 0.420416663090388, "eval_screenspot_loss": 2.786745309829712, "eval_screenspot_loss_ce": 0.011604713276028633, "eval_screenspot_loss_iou": 1.0431315104166667, "eval_screenspot_loss_num": 0.14615885416666666, "eval_screenspot_loss_xval": 2.8170572916666665, "eval_screenspot_runtime": 128.2233, "eval_screenspot_samples_per_second": 0.694, "eval_screenspot_steps_per_second": 0.023, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04680114194786353, "eval_compot_CIoU": -0.07110186293721199, "eval_compot_GIoU": -0.06726318597793579, "eval_compot_IoU": 0.08910071477293968, "eval_compot_MAE_all": 0.10658146440982819, "eval_compot_MAE_h": 0.08906470611691475, "eval_compot_MAE_w": 0.11970876902341843, "eval_compot_MAE_x_boxes": 0.11287350952625275, "eval_compot_MAE_y_boxes": 0.09081846103072166, "eval_compot_NUM_probability": 0.9994225800037384, "eval_compot_inside_bbox": 0.2326388955116272, "eval_compot_loss": 2.6506786346435547, "eval_compot_loss_ce": 0.004752482753247023, "eval_compot_loss_iou": 1.05419921875, "eval_compot_loss_num": 0.1040191650390625, "eval_compot_loss_xval": 2.62841796875, "eval_compot_runtime": 74.8159, "eval_compot_samples_per_second": 0.668, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04680114194786353, "eval_custom_ui_MAE_all": 0.16714157909154892, "eval_custom_ui_MAE_x": 0.12132188677787781, "eval_custom_ui_MAE_y": 0.21296124905347824, "eval_custom_ui_NUM_probability": 0.996900349855423, "eval_custom_ui_loss": 0.8348149061203003, "eval_custom_ui_loss_ce": 0.017060188576579094, "eval_custom_ui_loss_num": 0.163848876953125, "eval_custom_ui_loss_xval": 0.819091796875, "eval_custom_ui_runtime": 50.998, "eval_custom_ui_samples_per_second": 0.98, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04680114194786353, "loss": 0.8233832120895386, "loss_ce": 0.018695715814828873, "loss_iou": 0.0, "loss_num": 0.1611328125, "loss_xval": 0.8046875, "num_input_tokens_seen": 33122848, "step": 500 }, { "epoch": 0.04689474423175925, "grad_norm": 13.490910530090332, "learning_rate": 5e-05, "loss": 2.1551, "num_input_tokens_seen": 33189200, "step": 501 }, { "epoch": 0.04689474423175925, "loss": 2.0641427040100098, "loss_ce": 0.008478588424623013, "loss_iou": 0.8828125, "loss_num": 0.0576171875, "loss_xval": 2.0625, "num_input_tokens_seen": 33189200, "step": 501 }, { "epoch": 0.046988346515654984, "grad_norm": 15.705336570739746, "learning_rate": 5e-05, "loss": 1.9723, "num_input_tokens_seen": 33255100, "step": 502 }, { "epoch": 0.046988346515654984, "loss": 1.782179832458496, "loss_ce": 0.007155276834964752, "loss_iou": 0.7421875, "loss_num": 0.058349609375, "loss_xval": 1.7734375, "num_input_tokens_seen": 33255100, "step": 502 }, { "epoch": 0.04708194879955071, "grad_norm": 60.570655822753906, "learning_rate": 5e-05, "loss": 2.112, "num_input_tokens_seen": 33321280, "step": 503 }, { "epoch": 0.04708194879955071, "loss": 2.0688400268554688, "loss_ce": 0.008293201215565205, "loss_iou": 0.890625, "loss_num": 0.054931640625, "loss_xval": 2.0625, "num_input_tokens_seen": 33321280, "step": 503 }, { "epoch": 0.04717555108344643, "grad_norm": 16.53862762451172, "learning_rate": 5e-05, "loss": 2.4633, "num_input_tokens_seen": 33386876, "step": 504 }, { "epoch": 0.04717555108344643, "loss": 2.282855987548828, "loss_ce": 0.008686106652021408, "loss_iou": 0.8828125, "loss_num": 0.10205078125, "loss_xval": 2.28125, "num_input_tokens_seen": 33386876, "step": 504 }, { "epoch": 0.047269153367342165, "grad_norm": 20.333419799804688, "learning_rate": 5e-05, "loss": 2.1628, "num_input_tokens_seen": 33454072, "step": 505 }, { "epoch": 0.047269153367342165, "loss": 2.0465087890625, "loss_ce": 0.003540055826306343, "loss_iou": 0.88671875, "loss_num": 0.05322265625, "loss_xval": 2.046875, "num_input_tokens_seen": 33454072, "step": 505 }, { "epoch": 0.04736275565123789, "grad_norm": 30.461502075195312, "learning_rate": 5e-05, "loss": 1.7795, "num_input_tokens_seen": 33520064, "step": 506 }, { "epoch": 0.04736275565123789, "loss": 2.113797664642334, "loss_ce": 0.005399085581302643, "loss_iou": 0.8984375, "loss_num": 0.062255859375, "loss_xval": 2.109375, "num_input_tokens_seen": 33520064, "step": 506 }, { "epoch": 0.047456357935133614, "grad_norm": 19.000221252441406, "learning_rate": 5e-05, "loss": 2.1664, "num_input_tokens_seen": 33586816, "step": 507 }, { "epoch": 0.047456357935133614, "loss": 2.2077221870422363, "loss_ce": 0.008503537625074387, "loss_iou": 0.91796875, "loss_num": 0.07373046875, "loss_xval": 2.203125, "num_input_tokens_seen": 33586816, "step": 507 }, { "epoch": 0.047549960219029345, "grad_norm": 21.011343002319336, "learning_rate": 5e-05, "loss": 2.1269, "num_input_tokens_seen": 33652068, "step": 508 }, { "epoch": 0.047549960219029345, "loss": 2.2789878845214844, "loss_ce": 0.007503424771130085, "loss_iou": 0.92578125, "loss_num": 0.08447265625, "loss_xval": 2.265625, "num_input_tokens_seen": 33652068, "step": 508 }, { "epoch": 0.04764356250292507, "grad_norm": 47.962913513183594, "learning_rate": 5e-05, "loss": 2.1327, "num_input_tokens_seen": 33717020, "step": 509 }, { "epoch": 0.04764356250292507, "loss": 2.3316895961761475, "loss_ce": 0.007470785174518824, "loss_iou": 0.96875, "loss_num": 0.07666015625, "loss_xval": 2.328125, "num_input_tokens_seen": 33717020, "step": 509 }, { "epoch": 0.0477371647868208, "grad_norm": 11.047908782958984, "learning_rate": 5e-05, "loss": 2.0486, "num_input_tokens_seen": 33783980, "step": 510 }, { "epoch": 0.0477371647868208, "loss": 2.0446388721466064, "loss_ce": 0.00362326018512249, "loss_iou": 0.8515625, "loss_num": 0.06884765625, "loss_xval": 2.046875, "num_input_tokens_seen": 33783980, "step": 510 }, { "epoch": 0.047830767070716526, "grad_norm": 29.023284912109375, "learning_rate": 5e-05, "loss": 1.9636, "num_input_tokens_seen": 33850420, "step": 511 }, { "epoch": 0.047830767070716526, "loss": 1.8442463874816895, "loss_ce": 0.010872384533286095, "loss_iou": 0.76953125, "loss_num": 0.057861328125, "loss_xval": 1.8359375, "num_input_tokens_seen": 33850420, "step": 511 }, { "epoch": 0.04792436935461225, "grad_norm": 11.395902633666992, "learning_rate": 5e-05, "loss": 2.2705, "num_input_tokens_seen": 33916568, "step": 512 }, { "epoch": 0.04792436935461225, "loss": 2.2476541996002197, "loss_ce": 0.007419949397444725, "loss_iou": 0.9296875, "loss_num": 0.076171875, "loss_xval": 2.234375, "num_input_tokens_seen": 33916568, "step": 512 }, { "epoch": 0.04801797163850798, "grad_norm": 14.564925193786621, "learning_rate": 5e-05, "loss": 2.3737, "num_input_tokens_seen": 33981932, "step": 513 }, { "epoch": 0.04801797163850798, "loss": 2.4528675079345703, "loss_ce": 0.011461199261248112, "loss_iou": 0.99609375, "loss_num": 0.08935546875, "loss_xval": 2.4375, "num_input_tokens_seen": 33981932, "step": 513 }, { "epoch": 0.04811157392240371, "grad_norm": 26.9398193359375, "learning_rate": 5e-05, "loss": 1.8815, "num_input_tokens_seen": 34048428, "step": 514 }, { "epoch": 0.04811157392240371, "loss": 1.838984489440918, "loss_ce": 0.006953159347176552, "loss_iou": 0.77734375, "loss_num": 0.05517578125, "loss_xval": 1.828125, "num_input_tokens_seen": 34048428, "step": 514 }, { "epoch": 0.04820517620629943, "grad_norm": 50.43394088745117, "learning_rate": 5e-05, "loss": 2.2906, "num_input_tokens_seen": 34114968, "step": 515 }, { "epoch": 0.04820517620629943, "loss": 2.0050594806671143, "loss_ce": 0.006036060396581888, "loss_iou": 0.86328125, "loss_num": 0.05419921875, "loss_xval": 2.0, "num_input_tokens_seen": 34114968, "step": 515 }, { "epoch": 0.04829877849019516, "grad_norm": 11.556628227233887, "learning_rate": 5e-05, "loss": 1.8818, "num_input_tokens_seen": 34181736, "step": 516 }, { "epoch": 0.04829877849019516, "loss": 1.8759956359863281, "loss_ce": 0.00587841309607029, "loss_iou": 0.796875, "loss_num": 0.055419921875, "loss_xval": 1.8671875, "num_input_tokens_seen": 34181736, "step": 516 }, { "epoch": 0.04839238077409089, "grad_norm": 18.406572341918945, "learning_rate": 5e-05, "loss": 1.9696, "num_input_tokens_seen": 34247268, "step": 517 }, { "epoch": 0.04839238077409089, "loss": 2.1460466384887695, "loss_ce": 0.0112810879945755, "loss_iou": 0.828125, "loss_num": 0.09619140625, "loss_xval": 2.140625, "num_input_tokens_seen": 34247268, "step": 517 }, { "epoch": 0.04848598305798661, "grad_norm": 13.858189582824707, "learning_rate": 5e-05, "loss": 2.0945, "num_input_tokens_seen": 34313260, "step": 518 }, { "epoch": 0.04848598305798661, "loss": 2.029033899307251, "loss_ce": 0.0016901454655453563, "loss_iou": 0.8359375, "loss_num": 0.0712890625, "loss_xval": 2.03125, "num_input_tokens_seen": 34313260, "step": 518 }, { "epoch": 0.04857958534188234, "grad_norm": 24.578332901000977, "learning_rate": 5e-05, "loss": 2.0907, "num_input_tokens_seen": 34379356, "step": 519 }, { "epoch": 0.04857958534188234, "loss": 2.06921124458313, "loss_ce": 0.010617459192872047, "loss_iou": 0.859375, "loss_num": 0.0673828125, "loss_xval": 2.0625, "num_input_tokens_seen": 34379356, "step": 519 }, { "epoch": 0.04867318762577807, "grad_norm": 15.262187004089355, "learning_rate": 5e-05, "loss": 2.2961, "num_input_tokens_seen": 34445688, "step": 520 }, { "epoch": 0.04867318762577807, "loss": 2.418579578399658, "loss_ce": 0.004517007619142532, "loss_iou": 0.97265625, "loss_num": 0.09326171875, "loss_xval": 2.40625, "num_input_tokens_seen": 34445688, "step": 520 }, { "epoch": 0.0487667899096738, "grad_norm": 50.165802001953125, "learning_rate": 5e-05, "loss": 1.9634, "num_input_tokens_seen": 34512316, "step": 521 }, { "epoch": 0.0487667899096738, "loss": 2.0381336212158203, "loss_ce": 0.005907262675464153, "loss_iou": 0.84765625, "loss_num": 0.068359375, "loss_xval": 2.03125, "num_input_tokens_seen": 34512316, "step": 521 }, { "epoch": 0.048860392193569524, "grad_norm": 35.08852005004883, "learning_rate": 5e-05, "loss": 2.096, "num_input_tokens_seen": 34579448, "step": 522 }, { "epoch": 0.048860392193569524, "loss": 2.0467257499694824, "loss_ce": 0.005710337311029434, "loss_iou": 0.84765625, "loss_num": 0.06884765625, "loss_xval": 2.046875, "num_input_tokens_seen": 34579448, "step": 522 }, { "epoch": 0.04895399447746525, "grad_norm": 11.056382179260254, "learning_rate": 5e-05, "loss": 2.4524, "num_input_tokens_seen": 34645752, "step": 523 }, { "epoch": 0.04895399447746525, "loss": 2.5007433891296387, "loss_ce": 0.006602696608752012, "loss_iou": 1.0390625, "loss_num": 0.08251953125, "loss_xval": 2.5, "num_input_tokens_seen": 34645752, "step": 523 }, { "epoch": 0.04904759676136098, "grad_norm": 8.489731788635254, "learning_rate": 5e-05, "loss": 2.5202, "num_input_tokens_seen": 34711724, "step": 524 }, { "epoch": 0.04904759676136098, "loss": 2.4595420360565186, "loss_ce": 0.008370120078325272, "loss_iou": 1.0390625, "loss_num": 0.07373046875, "loss_xval": 2.453125, "num_input_tokens_seen": 34711724, "step": 524 }, { "epoch": 0.049141199045256705, "grad_norm": 15.339666366577148, "learning_rate": 5e-05, "loss": 2.3298, "num_input_tokens_seen": 34778112, "step": 525 }, { "epoch": 0.049141199045256705, "loss": 2.4460458755493164, "loss_ce": 0.0065928734838962555, "loss_iou": 1.0078125, "loss_num": 0.08447265625, "loss_xval": 2.4375, "num_input_tokens_seen": 34778112, "step": 525 }, { "epoch": 0.04923480132915243, "grad_norm": 13.85021686553955, "learning_rate": 5e-05, "loss": 2.0732, "num_input_tokens_seen": 34845392, "step": 526 }, { "epoch": 0.04923480132915243, "loss": 2.011549234390259, "loss_ce": 0.006666369736194611, "loss_iou": 0.890625, "loss_num": 0.044189453125, "loss_xval": 2.0, "num_input_tokens_seen": 34845392, "step": 526 }, { "epoch": 0.04932840361304816, "grad_norm": 19.560449600219727, "learning_rate": 5e-05, "loss": 1.7883, "num_input_tokens_seen": 34911200, "step": 527 }, { "epoch": 0.04932840361304816, "loss": 1.6927893161773682, "loss_ce": 0.004434742033481598, "loss_iou": 0.68359375, "loss_num": 0.064453125, "loss_xval": 1.6875, "num_input_tokens_seen": 34911200, "step": 527 }, { "epoch": 0.049422005896943885, "grad_norm": 32.109779357910156, "learning_rate": 5e-05, "loss": 1.9413, "num_input_tokens_seen": 34977464, "step": 528 }, { "epoch": 0.049422005896943885, "loss": 1.9414345026016235, "loss_ce": 0.0019814781844615936, "loss_iou": 0.81640625, "loss_num": 0.062255859375, "loss_xval": 1.9375, "num_input_tokens_seen": 34977464, "step": 528 }, { "epoch": 0.04951560818083961, "grad_norm": 12.003063201904297, "learning_rate": 5e-05, "loss": 2.1411, "num_input_tokens_seen": 35043968, "step": 529 }, { "epoch": 0.04951560818083961, "loss": 2.186443328857422, "loss_ce": 0.004802762530744076, "loss_iou": 0.9375, "loss_num": 0.061767578125, "loss_xval": 2.1875, "num_input_tokens_seen": 35043968, "step": 529 }, { "epoch": 0.04960921046473534, "grad_norm": 13.4691743850708, "learning_rate": 5e-05, "loss": 2.0753, "num_input_tokens_seen": 35110340, "step": 530 }, { "epoch": 0.04960921046473534, "loss": 2.179971218109131, "loss_ce": 0.008096389472484589, "loss_iou": 0.890625, "loss_num": 0.07861328125, "loss_xval": 2.171875, "num_input_tokens_seen": 35110340, "step": 530 }, { "epoch": 0.049702812748631066, "grad_norm": 23.27361488342285, "learning_rate": 5e-05, "loss": 1.9764, "num_input_tokens_seen": 35175964, "step": 531 }, { "epoch": 0.049702812748631066, "loss": 2.1452090740203857, "loss_ce": 0.006537298671901226, "loss_iou": 0.93359375, "loss_num": 0.0537109375, "loss_xval": 2.140625, "num_input_tokens_seen": 35175964, "step": 531 }, { "epoch": 0.04979641503252679, "grad_norm": 12.105748176574707, "learning_rate": 5e-05, "loss": 1.9665, "num_input_tokens_seen": 35241672, "step": 532 }, { "epoch": 0.04979641503252679, "loss": 1.637925386428833, "loss_ce": 0.006455630529671907, "loss_iou": 0.67578125, "loss_num": 0.05615234375, "loss_xval": 1.6328125, "num_input_tokens_seen": 35241672, "step": 532 }, { "epoch": 0.04989001731642252, "grad_norm": 15.676896095275879, "learning_rate": 5e-05, "loss": 2.0951, "num_input_tokens_seen": 35307804, "step": 533 }, { "epoch": 0.04989001731642252, "loss": 1.8774535655975342, "loss_ce": 0.008312873542308807, "loss_iou": 0.7578125, "loss_num": 0.0712890625, "loss_xval": 1.8671875, "num_input_tokens_seen": 35307804, "step": 533 }, { "epoch": 0.049983619600318246, "grad_norm": 20.40330696105957, "learning_rate": 5e-05, "loss": 1.9254, "num_input_tokens_seen": 35373448, "step": 534 }, { "epoch": 0.049983619600318246, "loss": 2.0857203006744385, "loss_ce": 0.0075952280312776566, "loss_iou": 0.84765625, "loss_num": 0.0771484375, "loss_xval": 2.078125, "num_input_tokens_seen": 35373448, "step": 534 }, { "epoch": 0.05007722188421398, "grad_norm": 10.411561965942383, "learning_rate": 5e-05, "loss": 2.1476, "num_input_tokens_seen": 35439568, "step": 535 }, { "epoch": 0.05007722188421398, "loss": 2.1036837100982666, "loss_ce": 0.008102689869701862, "loss_iou": 0.86328125, "loss_num": 0.07373046875, "loss_xval": 2.09375, "num_input_tokens_seen": 35439568, "step": 535 }, { "epoch": 0.0501708241681097, "grad_norm": 10.970884323120117, "learning_rate": 5e-05, "loss": 1.7214, "num_input_tokens_seen": 35504736, "step": 536 }, { "epoch": 0.0501708241681097, "loss": 1.7694377899169922, "loss_ce": 0.007230841089040041, "loss_iou": 0.6953125, "loss_num": 0.07470703125, "loss_xval": 1.765625, "num_input_tokens_seen": 35504736, "step": 536 }, { "epoch": 0.05026442645200543, "grad_norm": 18.94941520690918, "learning_rate": 5e-05, "loss": 1.9199, "num_input_tokens_seen": 35571456, "step": 537 }, { "epoch": 0.05026442645200543, "loss": 1.9069634675979614, "loss_ce": 0.00461967196315527, "loss_iou": 0.8125, "loss_num": 0.055419921875, "loss_xval": 1.90625, "num_input_tokens_seen": 35571456, "step": 537 }, { "epoch": 0.05035802873590116, "grad_norm": 19.750707626342773, "learning_rate": 5e-05, "loss": 2.3119, "num_input_tokens_seen": 35638668, "step": 538 }, { "epoch": 0.05035802873590116, "loss": 2.2893307209014893, "loss_ce": 0.004174430388957262, "loss_iou": 1.0, "loss_num": 0.058349609375, "loss_xval": 2.28125, "num_input_tokens_seen": 35638668, "step": 538 }, { "epoch": 0.05045163101979688, "grad_norm": 49.66666793823242, "learning_rate": 5e-05, "loss": 2.0795, "num_input_tokens_seen": 35705296, "step": 539 }, { "epoch": 0.05045163101979688, "loss": 2.171050548553467, "loss_ce": 0.009917769581079483, "loss_iou": 0.90625, "loss_num": 0.0693359375, "loss_xval": 2.15625, "num_input_tokens_seen": 35705296, "step": 539 }, { "epoch": 0.05054523330369261, "grad_norm": 18.106250762939453, "learning_rate": 5e-05, "loss": 2.0437, "num_input_tokens_seen": 35771804, "step": 540 }, { "epoch": 0.05054523330369261, "loss": 2.156437397003174, "loss_ce": 0.006046803202480078, "loss_iou": 0.84765625, "loss_num": 0.09130859375, "loss_xval": 2.15625, "num_input_tokens_seen": 35771804, "step": 540 }, { "epoch": 0.05063883558758834, "grad_norm": 14.011467933654785, "learning_rate": 5e-05, "loss": 2.1245, "num_input_tokens_seen": 35838284, "step": 541 }, { "epoch": 0.05063883558758834, "loss": 2.0323169231414795, "loss_ce": 0.007902797311544418, "loss_iou": 0.796875, "loss_num": 0.0859375, "loss_xval": 2.03125, "num_input_tokens_seen": 35838284, "step": 541 }, { "epoch": 0.050732437871484064, "grad_norm": 41.99628829956055, "learning_rate": 5e-05, "loss": 1.9095, "num_input_tokens_seen": 35905368, "step": 542 }, { "epoch": 0.050732437871484064, "loss": 2.227463722229004, "loss_ce": 0.008713909424841404, "loss_iou": 0.9375, "loss_num": 0.068359375, "loss_xval": 2.21875, "num_input_tokens_seen": 35905368, "step": 542 }, { "epoch": 0.05082604015537979, "grad_norm": 22.376237869262695, "learning_rate": 5e-05, "loss": 1.9594, "num_input_tokens_seen": 35972180, "step": 543 }, { "epoch": 0.05082604015537979, "loss": 1.9037367105484009, "loss_ce": 0.005299141630530357, "loss_iou": 0.80859375, "loss_num": 0.0556640625, "loss_xval": 1.8984375, "num_input_tokens_seen": 35972180, "step": 543 }, { "epoch": 0.05091964243927552, "grad_norm": 15.99255657196045, "learning_rate": 5e-05, "loss": 2.2139, "num_input_tokens_seen": 36037984, "step": 544 }, { "epoch": 0.05091964243927552, "loss": 2.317139148712158, "loss_ce": 0.007568897679448128, "loss_iou": 0.94140625, "loss_num": 0.08544921875, "loss_xval": 2.3125, "num_input_tokens_seen": 36037984, "step": 544 }, { "epoch": 0.051013244723171244, "grad_norm": 10.45749282836914, "learning_rate": 5e-05, "loss": 1.9645, "num_input_tokens_seen": 36105256, "step": 545 }, { "epoch": 0.051013244723171244, "loss": 1.761710524559021, "loss_ce": 0.005851097404956818, "loss_iou": 0.75390625, "loss_num": 0.050048828125, "loss_xval": 1.7578125, "num_input_tokens_seen": 36105256, "step": 545 }, { "epoch": 0.051106847007066976, "grad_norm": 22.613332748413086, "learning_rate": 5e-05, "loss": 1.6618, "num_input_tokens_seen": 36170560, "step": 546 }, { "epoch": 0.051106847007066976, "loss": 1.6365282535552979, "loss_ce": 0.006645415909588337, "loss_iou": 0.6796875, "loss_num": 0.0537109375, "loss_xval": 1.6328125, "num_input_tokens_seen": 36170560, "step": 546 }, { "epoch": 0.0512004492909627, "grad_norm": 74.29531860351562, "learning_rate": 5e-05, "loss": 2.3012, "num_input_tokens_seen": 36237660, "step": 547 }, { "epoch": 0.0512004492909627, "loss": 2.348698854446411, "loss_ce": 0.00787852331995964, "loss_iou": 0.984375, "loss_num": 0.07373046875, "loss_xval": 2.34375, "num_input_tokens_seen": 36237660, "step": 547 }, { "epoch": 0.051294051574858425, "grad_norm": 19.30687141418457, "learning_rate": 5e-05, "loss": 2.1144, "num_input_tokens_seen": 36304992, "step": 548 }, { "epoch": 0.051294051574858425, "loss": 2.1274046897888184, "loss_ce": 0.004357866011559963, "loss_iou": 0.89453125, "loss_num": 0.06640625, "loss_xval": 2.125, "num_input_tokens_seen": 36304992, "step": 548 }, { "epoch": 0.051387653858754156, "grad_norm": 47.999019622802734, "learning_rate": 5e-05, "loss": 2.1222, "num_input_tokens_seen": 36370964, "step": 549 }, { "epoch": 0.051387653858754156, "loss": 2.293400764465332, "loss_ce": 0.00336168403737247, "loss_iou": 0.96484375, "loss_num": 0.072265625, "loss_xval": 2.296875, "num_input_tokens_seen": 36370964, "step": 549 }, { "epoch": 0.05148125614264988, "grad_norm": 12.98134994506836, "learning_rate": 5e-05, "loss": 2.174, "num_input_tokens_seen": 36436144, "step": 550 }, { "epoch": 0.05148125614264988, "loss": 2.130186080932617, "loss_ce": 0.007139342837035656, "loss_iou": 0.859375, "loss_num": 0.07958984375, "loss_xval": 2.125, "num_input_tokens_seen": 36436144, "step": 550 }, { "epoch": 0.051574858426545606, "grad_norm": 29.445837020874023, "learning_rate": 5e-05, "loss": 1.8647, "num_input_tokens_seen": 36502144, "step": 551 }, { "epoch": 0.051574858426545606, "loss": 1.7941217422485352, "loss_ce": 0.006035890430212021, "loss_iou": 0.7578125, "loss_num": 0.054443359375, "loss_xval": 1.7890625, "num_input_tokens_seen": 36502144, "step": 551 }, { "epoch": 0.05166846071044134, "grad_norm": 15.311250686645508, "learning_rate": 5e-05, "loss": 1.752, "num_input_tokens_seen": 36568284, "step": 552 }, { "epoch": 0.05166846071044134, "loss": 1.6321521997451782, "loss_ce": 0.0071521904319524765, "loss_iou": 0.62890625, "loss_num": 0.0732421875, "loss_xval": 1.625, "num_input_tokens_seen": 36568284, "step": 552 }, { "epoch": 0.05176206299433706, "grad_norm": 24.853679656982422, "learning_rate": 5e-05, "loss": 2.1207, "num_input_tokens_seen": 36635384, "step": 553 }, { "epoch": 0.05176206299433706, "loss": 1.975627064704895, "loss_ce": 0.009806775487959385, "loss_iou": 0.83203125, "loss_num": 0.060546875, "loss_xval": 1.96875, "num_input_tokens_seen": 36635384, "step": 553 }, { "epoch": 0.051855665278232786, "grad_norm": 14.415247917175293, "learning_rate": 5e-05, "loss": 1.9491, "num_input_tokens_seen": 36702264, "step": 554 }, { "epoch": 0.051855665278232786, "loss": 2.1889944076538086, "loss_ce": 0.007353785447776318, "loss_iou": 0.92578125, "loss_num": 0.06591796875, "loss_xval": 2.1875, "num_input_tokens_seen": 36702264, "step": 554 }, { "epoch": 0.05194926756212852, "grad_norm": 13.853069305419922, "learning_rate": 5e-05, "loss": 2.0479, "num_input_tokens_seen": 36768768, "step": 555 }, { "epoch": 0.05194926756212852, "loss": 1.9762237071990967, "loss_ce": 0.005520680919289589, "loss_iou": 0.81640625, "loss_num": 0.06787109375, "loss_xval": 1.96875, "num_input_tokens_seen": 36768768, "step": 555 }, { "epoch": 0.05204286984602424, "grad_norm": 8.91629695892334, "learning_rate": 5e-05, "loss": 1.8298, "num_input_tokens_seen": 36834424, "step": 556 }, { "epoch": 0.05204286984602424, "loss": 1.9549953937530518, "loss_ce": 0.003823609557002783, "loss_iou": 0.8046875, "loss_num": 0.068359375, "loss_xval": 1.953125, "num_input_tokens_seen": 36834424, "step": 556 }, { "epoch": 0.05213647212991997, "grad_norm": 15.065163612365723, "learning_rate": 5e-05, "loss": 1.8397, "num_input_tokens_seen": 36901232, "step": 557 }, { "epoch": 0.05213647212991997, "loss": 1.8255698680877686, "loss_ce": 0.008187083527445793, "loss_iou": 0.69921875, "loss_num": 0.0830078125, "loss_xval": 1.8203125, "num_input_tokens_seen": 36901232, "step": 557 }, { "epoch": 0.0522300744138157, "grad_norm": 84.12255859375, "learning_rate": 5e-05, "loss": 2.0238, "num_input_tokens_seen": 36967616, "step": 558 }, { "epoch": 0.0522300744138157, "loss": 2.149418354034424, "loss_ce": 0.009281693957746029, "loss_iou": 0.8671875, "loss_num": 0.08203125, "loss_xval": 2.140625, "num_input_tokens_seen": 36967616, "step": 558 }, { "epoch": 0.05232367669771142, "grad_norm": 11.452712059020996, "learning_rate": 5e-05, "loss": 2.4079, "num_input_tokens_seen": 37034176, "step": 559 }, { "epoch": 0.05232367669771142, "loss": 2.4366798400878906, "loss_ce": 0.006015540100634098, "loss_iou": 1.0078125, "loss_num": 0.08154296875, "loss_xval": 2.4375, "num_input_tokens_seen": 37034176, "step": 559 }, { "epoch": 0.052417278981607154, "grad_norm": 12.024419784545898, "learning_rate": 5e-05, "loss": 2.1908, "num_input_tokens_seen": 37099836, "step": 560 }, { "epoch": 0.052417278981607154, "loss": 2.415208578109741, "loss_ce": 0.007005504798144102, "loss_iou": 0.98046875, "loss_num": 0.08935546875, "loss_xval": 2.40625, "num_input_tokens_seen": 37099836, "step": 560 }, { "epoch": 0.05251088126550288, "grad_norm": 18.38412094116211, "learning_rate": 5e-05, "loss": 2.0019, "num_input_tokens_seen": 37166192, "step": 561 }, { "epoch": 0.05251088126550288, "loss": 2.0711967945098877, "loss_ce": 0.006743601523339748, "loss_iou": 0.84765625, "loss_num": 0.0732421875, "loss_xval": 2.0625, "num_input_tokens_seen": 37166192, "step": 561 }, { "epoch": 0.052604483549398603, "grad_norm": 15.95351505279541, "learning_rate": 5e-05, "loss": 1.943, "num_input_tokens_seen": 37232400, "step": 562 }, { "epoch": 0.052604483549398603, "loss": 1.9301117658615112, "loss_ce": 0.0053070904687047005, "loss_iou": 0.7734375, "loss_num": 0.07568359375, "loss_xval": 1.921875, "num_input_tokens_seen": 37232400, "step": 562 }, { "epoch": 0.052698085833294335, "grad_norm": 21.236230850219727, "learning_rate": 5e-05, "loss": 2.115, "num_input_tokens_seen": 37300176, "step": 563 }, { "epoch": 0.052698085833294335, "loss": 2.063028335571289, "loss_ce": 0.009317463263869286, "loss_iou": 0.85546875, "loss_num": 0.06884765625, "loss_xval": 2.046875, "num_input_tokens_seen": 37300176, "step": 563 }, { "epoch": 0.05279168811719006, "grad_norm": 22.604427337646484, "learning_rate": 5e-05, "loss": 1.7916, "num_input_tokens_seen": 37365768, "step": 564 }, { "epoch": 0.05279168811719006, "loss": 1.671072006225586, "loss_ce": 0.01677520014345646, "loss_iou": 0.640625, "loss_num": 0.0751953125, "loss_xval": 1.65625, "num_input_tokens_seen": 37365768, "step": 564 }, { "epoch": 0.052885290401085784, "grad_norm": 13.016477584838867, "learning_rate": 5e-05, "loss": 1.7893, "num_input_tokens_seen": 37432448, "step": 565 }, { "epoch": 0.052885290401085784, "loss": 1.833286166191101, "loss_ce": 0.00516126211732626, "loss_iou": 0.796875, "loss_num": 0.047607421875, "loss_xval": 1.828125, "num_input_tokens_seen": 37432448, "step": 565 }, { "epoch": 0.052978892684981516, "grad_norm": 61.25498962402344, "learning_rate": 5e-05, "loss": 1.6456, "num_input_tokens_seen": 37499932, "step": 566 }, { "epoch": 0.052978892684981516, "loss": 1.5449293851852417, "loss_ce": 0.00977318175137043, "loss_iou": 0.66015625, "loss_num": 0.04296875, "loss_xval": 1.53125, "num_input_tokens_seen": 37499932, "step": 566 }, { "epoch": 0.05307249496887724, "grad_norm": 27.738588333129883, "learning_rate": 5e-05, "loss": 1.9402, "num_input_tokens_seen": 37565112, "step": 567 }, { "epoch": 0.05307249496887724, "loss": 1.886468768119812, "loss_ce": 0.0070742275565862656, "loss_iou": 0.7109375, "loss_num": 0.09130859375, "loss_xval": 1.8828125, "num_input_tokens_seen": 37565112, "step": 567 }, { "epoch": 0.053166097252772965, "grad_norm": 18.664432525634766, "learning_rate": 5e-05, "loss": 1.83, "num_input_tokens_seen": 37630628, "step": 568 }, { "epoch": 0.053166097252772965, "loss": 1.7699365615844727, "loss_ce": 0.0038233078084886074, "loss_iou": 0.7265625, "loss_num": 0.06298828125, "loss_xval": 1.765625, "num_input_tokens_seen": 37630628, "step": 568 }, { "epoch": 0.053259699536668696, "grad_norm": 31.280494689941406, "learning_rate": 5e-05, "loss": 2.035, "num_input_tokens_seen": 37695876, "step": 569 }, { "epoch": 0.053259699536668696, "loss": 1.9658114910125732, "loss_ce": 0.004874029662460089, "loss_iou": 0.859375, "loss_num": 0.04833984375, "loss_xval": 1.9609375, "num_input_tokens_seen": 37695876, "step": 569 }, { "epoch": 0.05335330182056442, "grad_norm": 12.460577011108398, "learning_rate": 5e-05, "loss": 2.2548, "num_input_tokens_seen": 37761960, "step": 570 }, { "epoch": 0.05335330182056442, "loss": 2.080930709838867, "loss_ce": 0.007688452955335379, "loss_iou": 0.828125, "loss_num": 0.08349609375, "loss_xval": 2.078125, "num_input_tokens_seen": 37761960, "step": 570 }, { "epoch": 0.05344690410446015, "grad_norm": 24.75887680053711, "learning_rate": 5e-05, "loss": 2.1912, "num_input_tokens_seen": 37828200, "step": 571 }, { "epoch": 0.05344690410446015, "loss": 2.2149462699890137, "loss_ce": 0.006938466802239418, "loss_iou": 0.8671875, "loss_num": 0.09521484375, "loss_xval": 2.203125, "num_input_tokens_seen": 37828200, "step": 571 }, { "epoch": 0.05354050638835588, "grad_norm": 15.288697242736816, "learning_rate": 5e-05, "loss": 1.7935, "num_input_tokens_seen": 37894312, "step": 572 }, { "epoch": 0.05354050638835588, "loss": 1.7205119132995605, "loss_ce": 0.005424095317721367, "loss_iou": 0.71484375, "loss_num": 0.057373046875, "loss_xval": 1.71875, "num_input_tokens_seen": 37894312, "step": 572 }, { "epoch": 0.0536341086722516, "grad_norm": 16.898698806762695, "learning_rate": 5e-05, "loss": 1.9252, "num_input_tokens_seen": 37962084, "step": 573 }, { "epoch": 0.0536341086722516, "loss": 2.0075607299804688, "loss_ce": 0.0026777826715260744, "loss_iou": 0.88671875, "loss_num": 0.0458984375, "loss_xval": 2.0, "num_input_tokens_seen": 37962084, "step": 573 }, { "epoch": 0.05372771095614733, "grad_norm": 29.335344314575195, "learning_rate": 5e-05, "loss": 1.9544, "num_input_tokens_seen": 38028544, "step": 574 }, { "epoch": 0.05372771095614733, "loss": 1.9636216163635254, "loss_ce": 0.004637310281395912, "loss_iou": 0.8359375, "loss_num": 0.05810546875, "loss_xval": 1.9609375, "num_input_tokens_seen": 38028544, "step": 574 }, { "epoch": 0.05382131324004306, "grad_norm": 13.343282699584961, "learning_rate": 5e-05, "loss": 2.1594, "num_input_tokens_seen": 38093708, "step": 575 }, { "epoch": 0.05382131324004306, "loss": 2.0714712142944336, "loss_ce": 0.008971377275884151, "loss_iou": 0.828125, "loss_num": 0.0810546875, "loss_xval": 2.0625, "num_input_tokens_seen": 38093708, "step": 575 }, { "epoch": 0.05391491552393878, "grad_norm": 28.67262840270996, "learning_rate": 5e-05, "loss": 1.8941, "num_input_tokens_seen": 38159792, "step": 576 }, { "epoch": 0.05391491552393878, "loss": 1.9878249168395996, "loss_ce": 0.006379695143550634, "loss_iou": 0.796875, "loss_num": 0.07861328125, "loss_xval": 1.984375, "num_input_tokens_seen": 38159792, "step": 576 }, { "epoch": 0.054008517807834514, "grad_norm": 15.381333351135254, "learning_rate": 5e-05, "loss": 1.9251, "num_input_tokens_seen": 38226032, "step": 577 }, { "epoch": 0.054008517807834514, "loss": 1.8523845672607422, "loss_ce": 0.006681526079773903, "loss_iou": 0.76171875, "loss_num": 0.064453125, "loss_xval": 1.84375, "num_input_tokens_seen": 38226032, "step": 577 }, { "epoch": 0.05410212009173024, "grad_norm": 26.613903045654297, "learning_rate": 5e-05, "loss": 2.0962, "num_input_tokens_seen": 38292808, "step": 578 }, { "epoch": 0.05410212009173024, "loss": 2.060774803161621, "loss_ce": 0.007063917815685272, "loss_iou": 0.88671875, "loss_num": 0.05615234375, "loss_xval": 2.046875, "num_input_tokens_seen": 38292808, "step": 578 }, { "epoch": 0.05419572237562596, "grad_norm": 21.56264877319336, "learning_rate": 5e-05, "loss": 2.2777, "num_input_tokens_seen": 38359224, "step": 579 }, { "epoch": 0.05419572237562596, "loss": 2.1975607872009277, "loss_ce": 0.007131216116249561, "loss_iou": 0.9140625, "loss_num": 0.07177734375, "loss_xval": 2.1875, "num_input_tokens_seen": 38359224, "step": 579 }, { "epoch": 0.054289324659521694, "grad_norm": 12.033671379089355, "learning_rate": 5e-05, "loss": 2.2074, "num_input_tokens_seen": 38425232, "step": 580 }, { "epoch": 0.054289324659521694, "loss": 2.2009353637695312, "loss_ce": 0.007575890514999628, "loss_iou": 0.921875, "loss_num": 0.0703125, "loss_xval": 2.1875, "num_input_tokens_seen": 38425232, "step": 580 }, { "epoch": 0.05438292694341742, "grad_norm": 16.650014877319336, "learning_rate": 5e-05, "loss": 1.7616, "num_input_tokens_seen": 38491244, "step": 581 }, { "epoch": 0.05438292694341742, "loss": 1.8445103168487549, "loss_ce": 0.004666535183787346, "loss_iou": 0.76953125, "loss_num": 0.059814453125, "loss_xval": 1.84375, "num_input_tokens_seen": 38491244, "step": 581 }, { "epoch": 0.05447652922731314, "grad_norm": 32.606571197509766, "learning_rate": 5e-05, "loss": 1.7856, "num_input_tokens_seen": 38557004, "step": 582 }, { "epoch": 0.05447652922731314, "loss": 1.910442590713501, "loss_ce": 0.004680905491113663, "loss_iou": 0.734375, "loss_num": 0.087890625, "loss_xval": 1.90625, "num_input_tokens_seen": 38557004, "step": 582 }, { "epoch": 0.054570131511208875, "grad_norm": 17.818336486816406, "learning_rate": 5e-05, "loss": 1.8917, "num_input_tokens_seen": 38624396, "step": 583 }, { "epoch": 0.054570131511208875, "loss": 1.8677129745483398, "loss_ce": 0.005408269818872213, "loss_iou": 0.796875, "loss_num": 0.053955078125, "loss_xval": 1.859375, "num_input_tokens_seen": 38624396, "step": 583 }, { "epoch": 0.0546637337951046, "grad_norm": 20.405603408813477, "learning_rate": 5e-05, "loss": 2.127, "num_input_tokens_seen": 38690540, "step": 584 }, { "epoch": 0.0546637337951046, "loss": 1.979243278503418, "loss_ce": 0.009516758844256401, "loss_iou": 0.8359375, "loss_num": 0.059814453125, "loss_xval": 1.96875, "num_input_tokens_seen": 38690540, "step": 584 }, { "epoch": 0.05475733607900033, "grad_norm": 26.222898483276367, "learning_rate": 5e-05, "loss": 2.591, "num_input_tokens_seen": 38756172, "step": 585 }, { "epoch": 0.05475733607900033, "loss": 2.757906913757324, "loss_ce": 0.007906868122518063, "loss_iou": 1.1328125, "loss_num": 0.09765625, "loss_xval": 2.75, "num_input_tokens_seen": 38756172, "step": 585 }, { "epoch": 0.054850938362896055, "grad_norm": 11.419878005981445, "learning_rate": 5e-05, "loss": 1.9198, "num_input_tokens_seen": 38823404, "step": 586 }, { "epoch": 0.054850938362896055, "loss": 1.9623419046401978, "loss_ce": 0.00531068816781044, "loss_iou": 0.80859375, "loss_num": 0.0673828125, "loss_xval": 1.953125, "num_input_tokens_seen": 38823404, "step": 586 }, { "epoch": 0.05494454064679178, "grad_norm": 28.394208908081055, "learning_rate": 5e-05, "loss": 1.7159, "num_input_tokens_seen": 38890444, "step": 587 }, { "epoch": 0.05494454064679178, "loss": 1.6631174087524414, "loss_ce": 0.005402637645602226, "loss_iou": 0.72265625, "loss_num": 0.04248046875, "loss_xval": 1.65625, "num_input_tokens_seen": 38890444, "step": 587 }, { "epoch": 0.05503814293068751, "grad_norm": 18.359874725341797, "learning_rate": 5e-05, "loss": 2.1832, "num_input_tokens_seen": 38957780, "step": 588 }, { "epoch": 0.05503814293068751, "loss": 2.311920642852783, "loss_ce": 0.005279937759041786, "loss_iou": 0.9375, "loss_num": 0.08642578125, "loss_xval": 2.3125, "num_input_tokens_seen": 38957780, "step": 588 }, { "epoch": 0.055131745214583236, "grad_norm": 11.638762474060059, "learning_rate": 5e-05, "loss": 2.0758, "num_input_tokens_seen": 39025052, "step": 589 }, { "epoch": 0.055131745214583236, "loss": 2.085104465484619, "loss_ce": 0.00697963684797287, "loss_iou": 0.859375, "loss_num": 0.07080078125, "loss_xval": 2.078125, "num_input_tokens_seen": 39025052, "step": 589 }, { "epoch": 0.05522534749847896, "grad_norm": 14.214030265808105, "learning_rate": 5e-05, "loss": 1.7909, "num_input_tokens_seen": 39091696, "step": 590 }, { "epoch": 0.05522534749847896, "loss": 1.8609280586242676, "loss_ce": 0.0035061007365584373, "loss_iou": 0.78515625, "loss_num": 0.0576171875, "loss_xval": 1.859375, "num_input_tokens_seen": 39091696, "step": 590 }, { "epoch": 0.05531894978237469, "grad_norm": 51.06943130493164, "learning_rate": 5e-05, "loss": 2.0519, "num_input_tokens_seen": 39158200, "step": 591 }, { "epoch": 0.05531894978237469, "loss": 2.1332483291625977, "loss_ce": 0.0053186360746622086, "loss_iou": 0.90625, "loss_num": 0.06396484375, "loss_xval": 2.125, "num_input_tokens_seen": 39158200, "step": 591 }, { "epoch": 0.05541255206627042, "grad_norm": 41.73386764526367, "learning_rate": 5e-05, "loss": 2.1648, "num_input_tokens_seen": 39223556, "step": 592 }, { "epoch": 0.05541255206627042, "loss": 2.313202381134033, "loss_ce": 0.006561813876032829, "loss_iou": 0.984375, "loss_num": 0.06787109375, "loss_xval": 2.3125, "num_input_tokens_seen": 39223556, "step": 592 }, { "epoch": 0.05550615435016614, "grad_norm": 14.052983283996582, "learning_rate": 5e-05, "loss": 2.2862, "num_input_tokens_seen": 39289240, "step": 593 }, { "epoch": 0.05550615435016614, "loss": 2.210081100463867, "loss_ce": 0.005491130985319614, "loss_iou": 0.8984375, "loss_num": 0.08203125, "loss_xval": 2.203125, "num_input_tokens_seen": 39289240, "step": 593 }, { "epoch": 0.05559975663406187, "grad_norm": 21.231733322143555, "learning_rate": 5e-05, "loss": 2.2757, "num_input_tokens_seen": 39356272, "step": 594 }, { "epoch": 0.05559975663406187, "loss": 2.239682674407959, "loss_ce": 0.006284385919570923, "loss_iou": 0.921875, "loss_num": 0.07861328125, "loss_xval": 2.234375, "num_input_tokens_seen": 39356272, "step": 594 }, { "epoch": 0.0556933589179576, "grad_norm": 11.525460243225098, "learning_rate": 5e-05, "loss": 2.1444, "num_input_tokens_seen": 39423392, "step": 595 }, { "epoch": 0.0556933589179576, "loss": 2.2538256645202637, "loss_ce": 0.005778972990810871, "loss_iou": 0.90234375, "loss_num": 0.08837890625, "loss_xval": 2.25, "num_input_tokens_seen": 39423392, "step": 595 }, { "epoch": 0.05578696120185333, "grad_norm": 10.383962631225586, "learning_rate": 5e-05, "loss": 1.8663, "num_input_tokens_seen": 39490420, "step": 596 }, { "epoch": 0.05578696120185333, "loss": 1.938031792640686, "loss_ce": 0.008344260044395924, "loss_iou": 0.82421875, "loss_num": 0.05615234375, "loss_xval": 1.9296875, "num_input_tokens_seen": 39490420, "step": 596 }, { "epoch": 0.05588056348574905, "grad_norm": 16.740312576293945, "learning_rate": 5e-05, "loss": 1.7252, "num_input_tokens_seen": 39556180, "step": 597 }, { "epoch": 0.05588056348574905, "loss": 1.7972288131713867, "loss_ce": 0.005236555356532335, "loss_iou": 0.73046875, "loss_num": 0.0654296875, "loss_xval": 1.7890625, "num_input_tokens_seen": 39556180, "step": 597 }, { "epoch": 0.05597416576964478, "grad_norm": 13.414369583129883, "learning_rate": 5e-05, "loss": 2.1265, "num_input_tokens_seen": 39622388, "step": 598 }, { "epoch": 0.05597416576964478, "loss": 1.9705634117126465, "loss_ce": 0.004743036814033985, "loss_iou": 0.859375, "loss_num": 0.04931640625, "loss_xval": 1.96875, "num_input_tokens_seen": 39622388, "step": 598 }, { "epoch": 0.05606776805354051, "grad_norm": 25.500898361206055, "learning_rate": 5e-05, "loss": 1.7969, "num_input_tokens_seen": 39687900, "step": 599 }, { "epoch": 0.05606776805354051, "loss": 1.7924058437347412, "loss_ce": 0.009202754124999046, "loss_iou": 0.734375, "loss_num": 0.062255859375, "loss_xval": 1.78125, "num_input_tokens_seen": 39687900, "step": 599 }, { "epoch": 0.056161370337436234, "grad_norm": 18.349210739135742, "learning_rate": 5e-05, "loss": 1.9016, "num_input_tokens_seen": 39754580, "step": 600 }, { "epoch": 0.056161370337436234, "loss": 2.0565357208251953, "loss_ce": 0.004777735099196434, "loss_iou": 0.8515625, "loss_num": 0.06982421875, "loss_xval": 2.046875, "num_input_tokens_seen": 39754580, "step": 600 }, { "epoch": 0.05625497262133196, "grad_norm": 14.774345397949219, "learning_rate": 5e-05, "loss": 2.0181, "num_input_tokens_seen": 39820300, "step": 601 }, { "epoch": 0.05625497262133196, "loss": 1.978788137435913, "loss_ce": 0.010038059204816818, "loss_iou": 0.859375, "loss_num": 0.04931640625, "loss_xval": 1.96875, "num_input_tokens_seen": 39820300, "step": 601 }, { "epoch": 0.05634857490522769, "grad_norm": 15.486387252807617, "learning_rate": 5e-05, "loss": 1.7805, "num_input_tokens_seen": 39886928, "step": 602 }, { "epoch": 0.05634857490522769, "loss": 1.693159580230713, "loss_ce": 0.00395065825432539, "loss_iou": 0.70703125, "loss_num": 0.055419921875, "loss_xval": 1.6875, "num_input_tokens_seen": 39886928, "step": 602 }, { "epoch": 0.056442177189123414, "grad_norm": 41.67619705200195, "learning_rate": 5e-05, "loss": 1.9223, "num_input_tokens_seen": 39953608, "step": 603 }, { "epoch": 0.056442177189123414, "loss": 1.941701054573059, "loss_ce": 0.0032246247865259647, "loss_iou": 0.84765625, "loss_num": 0.049072265625, "loss_xval": 1.9375, "num_input_tokens_seen": 39953608, "step": 603 }, { "epoch": 0.05653577947301914, "grad_norm": 10.798108100891113, "learning_rate": 5e-05, "loss": 2.2574, "num_input_tokens_seen": 40019960, "step": 604 }, { "epoch": 0.05653577947301914, "loss": 2.2095208168029785, "loss_ce": 0.01420820876955986, "loss_iou": 0.91015625, "loss_num": 0.07470703125, "loss_xval": 2.1875, "num_input_tokens_seen": 40019960, "step": 604 }, { "epoch": 0.05662938175691487, "grad_norm": 13.647797584533691, "learning_rate": 5e-05, "loss": 2.004, "num_input_tokens_seen": 40086084, "step": 605 }, { "epoch": 0.05662938175691487, "loss": 2.0922958850860596, "loss_ce": 0.004405309911817312, "loss_iou": 0.88671875, "loss_num": 0.0625, "loss_xval": 2.09375, "num_input_tokens_seen": 40086084, "step": 605 }, { "epoch": 0.056722984040810595, "grad_norm": 12.498695373535156, "learning_rate": 5e-05, "loss": 1.7111, "num_input_tokens_seen": 40152652, "step": 606 }, { "epoch": 0.056722984040810595, "loss": 1.5924022197723389, "loss_ce": 0.0028025759384036064, "loss_iou": 0.66796875, "loss_num": 0.051025390625, "loss_xval": 1.5859375, "num_input_tokens_seen": 40152652, "step": 606 }, { "epoch": 0.05681658632470632, "grad_norm": 20.511404037475586, "learning_rate": 5e-05, "loss": 1.8864, "num_input_tokens_seen": 40218728, "step": 607 }, { "epoch": 0.05681658632470632, "loss": 1.9277386665344238, "loss_ce": 0.0039105541072785854, "loss_iou": 0.83203125, "loss_num": 0.052734375, "loss_xval": 1.921875, "num_input_tokens_seen": 40218728, "step": 607 }, { "epoch": 0.05691018860860205, "grad_norm": 13.575655937194824, "learning_rate": 5e-05, "loss": 1.6843, "num_input_tokens_seen": 40284284, "step": 608 }, { "epoch": 0.05691018860860205, "loss": 1.7808220386505127, "loss_ce": 0.005614531692117453, "loss_iou": 0.76953125, "loss_num": 0.047119140625, "loss_xval": 1.7734375, "num_input_tokens_seen": 40284284, "step": 608 }, { "epoch": 0.057003790892497776, "grad_norm": 39.97224807739258, "learning_rate": 5e-05, "loss": 2.0374, "num_input_tokens_seen": 40349908, "step": 609 }, { "epoch": 0.057003790892497776, "loss": 2.0007576942443848, "loss_ce": 0.005640394054353237, "loss_iou": 0.8359375, "loss_num": 0.064453125, "loss_xval": 1.9921875, "num_input_tokens_seen": 40349908, "step": 609 }, { "epoch": 0.05709739317639351, "grad_norm": 17.985727310180664, "learning_rate": 5e-05, "loss": 2.2559, "num_input_tokens_seen": 40415248, "step": 610 }, { "epoch": 0.05709739317639351, "loss": 2.138763427734375, "loss_ce": 0.00790414959192276, "loss_iou": 0.92578125, "loss_num": 0.055419921875, "loss_xval": 2.125, "num_input_tokens_seen": 40415248, "step": 610 }, { "epoch": 0.05719099546028923, "grad_norm": 15.21214485168457, "learning_rate": 5e-05, "loss": 2.0555, "num_input_tokens_seen": 40480984, "step": 611 }, { "epoch": 0.05719099546028923, "loss": 2.0109291076660156, "loss_ce": 0.006046364549547434, "loss_iou": 0.828125, "loss_num": 0.06982421875, "loss_xval": 2.0, "num_input_tokens_seen": 40480984, "step": 611 }, { "epoch": 0.057284597744184956, "grad_norm": 72.22217559814453, "learning_rate": 5e-05, "loss": 1.9259, "num_input_tokens_seen": 40547572, "step": 612 }, { "epoch": 0.057284597744184956, "loss": 1.910017490386963, "loss_ce": 0.005720725283026695, "loss_iou": 0.8359375, "loss_num": 0.046875, "loss_xval": 1.90625, "num_input_tokens_seen": 40547572, "step": 612 }, { "epoch": 0.05737820002808069, "grad_norm": 14.137224197387695, "learning_rate": 5e-05, "loss": 2.3107, "num_input_tokens_seen": 40614432, "step": 613 }, { "epoch": 0.05737820002808069, "loss": 2.463726043701172, "loss_ce": 0.004741773474961519, "loss_iou": 1.03125, "loss_num": 0.08056640625, "loss_xval": 2.453125, "num_input_tokens_seen": 40614432, "step": 613 }, { "epoch": 0.05747180231197641, "grad_norm": 8.538642883300781, "learning_rate": 5e-05, "loss": 2.183, "num_input_tokens_seen": 40680396, "step": 614 }, { "epoch": 0.05747180231197641, "loss": 2.1091527938842773, "loss_ce": 0.00392822315916419, "loss_iou": 0.828125, "loss_num": 0.0908203125, "loss_xval": 2.109375, "num_input_tokens_seen": 40680396, "step": 614 }, { "epoch": 0.05756540459587214, "grad_norm": 15.769079208374023, "learning_rate": 5e-05, "loss": 1.6501, "num_input_tokens_seen": 40747076, "step": 615 }, { "epoch": 0.05756540459587214, "loss": 1.5182501077651978, "loss_ce": 0.007507950533181429, "loss_iou": 0.6171875, "loss_num": 0.05517578125, "loss_xval": 1.5078125, "num_input_tokens_seen": 40747076, "step": 615 }, { "epoch": 0.05765900687976787, "grad_norm": 16.978391647338867, "learning_rate": 5e-05, "loss": 1.9324, "num_input_tokens_seen": 40812900, "step": 616 }, { "epoch": 0.05765900687976787, "loss": 1.905989646911621, "loss_ce": 0.0021810298785567284, "loss_iou": 0.76953125, "loss_num": 0.07275390625, "loss_xval": 1.90625, "num_input_tokens_seen": 40812900, "step": 616 }, { "epoch": 0.05775260916366359, "grad_norm": 21.10886573791504, "learning_rate": 5e-05, "loss": 1.8902, "num_input_tokens_seen": 40879000, "step": 617 }, { "epoch": 0.05775260916366359, "loss": 1.7256581783294678, "loss_ce": 0.009837780147790909, "loss_iou": 0.734375, "loss_num": 0.049072265625, "loss_xval": 1.71875, "num_input_tokens_seen": 40879000, "step": 617 }, { "epoch": 0.05784621144755932, "grad_norm": 41.98634338378906, "learning_rate": 5e-05, "loss": 2.1092, "num_input_tokens_seen": 40945376, "step": 618 }, { "epoch": 0.05784621144755932, "loss": 2.1062259674072266, "loss_ce": 0.003686754498630762, "loss_iou": 0.859375, "loss_num": 0.076171875, "loss_xval": 2.109375, "num_input_tokens_seen": 40945376, "step": 618 }, { "epoch": 0.05793981373145505, "grad_norm": 37.540283203125, "learning_rate": 5e-05, "loss": 2.1491, "num_input_tokens_seen": 41012208, "step": 619 }, { "epoch": 0.05793981373145505, "loss": 2.153731346130371, "loss_ce": 0.006270356476306915, "loss_iou": 0.93359375, "loss_num": 0.056640625, "loss_xval": 2.140625, "num_input_tokens_seen": 41012208, "step": 619 }, { "epoch": 0.058033416015350774, "grad_norm": 13.815699577331543, "learning_rate": 5e-05, "loss": 2.2309, "num_input_tokens_seen": 41078144, "step": 620 }, { "epoch": 0.058033416015350774, "loss": 2.1575770378112793, "loss_ce": 0.0071863653138279915, "loss_iou": 0.9140625, "loss_num": 0.06494140625, "loss_xval": 2.15625, "num_input_tokens_seen": 41078144, "step": 620 }, { "epoch": 0.0581270182992465, "grad_norm": 12.515998840332031, "learning_rate": 5e-05, "loss": 1.9803, "num_input_tokens_seen": 41145216, "step": 621 }, { "epoch": 0.0581270182992465, "loss": 2.062647819519043, "loss_ce": 0.003077602479606867, "loss_iou": 0.8984375, "loss_num": 0.05322265625, "loss_xval": 2.0625, "num_input_tokens_seen": 41145216, "step": 621 }, { "epoch": 0.05822062058314223, "grad_norm": 36.62022018432617, "learning_rate": 5e-05, "loss": 1.8181, "num_input_tokens_seen": 41211472, "step": 622 }, { "epoch": 0.05822062058314223, "loss": 1.7689921855926514, "loss_ce": 0.006296772044152021, "loss_iou": 0.734375, "loss_num": 0.058837890625, "loss_xval": 1.765625, "num_input_tokens_seen": 41211472, "step": 622 }, { "epoch": 0.058314222867037954, "grad_norm": 27.370723724365234, "learning_rate": 5e-05, "loss": 1.8636, "num_input_tokens_seen": 41279088, "step": 623 }, { "epoch": 0.058314222867037954, "loss": 1.9225337505340576, "loss_ce": 0.004564869683235884, "loss_iou": 0.828125, "loss_num": 0.05224609375, "loss_xval": 1.921875, "num_input_tokens_seen": 41279088, "step": 623 }, { "epoch": 0.058407825150933686, "grad_norm": 25.868284225463867, "learning_rate": 5e-05, "loss": 2.2106, "num_input_tokens_seen": 41344980, "step": 624 }, { "epoch": 0.058407825150933686, "loss": 2.377711296081543, "loss_ce": 0.005641047842800617, "loss_iou": 0.9921875, "loss_num": 0.07861328125, "loss_xval": 2.375, "num_input_tokens_seen": 41344980, "step": 624 }, { "epoch": 0.05850142743482941, "grad_norm": 13.196389198303223, "learning_rate": 5e-05, "loss": 1.9674, "num_input_tokens_seen": 41411676, "step": 625 }, { "epoch": 0.05850142743482941, "loss": 1.997706413269043, "loss_ce": 0.0025892811827361584, "loss_iou": 0.8515625, "loss_num": 0.05908203125, "loss_xval": 1.9921875, "num_input_tokens_seen": 41411676, "step": 625 }, { "epoch": 0.058595029718725135, "grad_norm": 9.63979721069336, "learning_rate": 5e-05, "loss": 1.7643, "num_input_tokens_seen": 41477900, "step": 626 }, { "epoch": 0.058595029718725135, "loss": 1.5383410453796387, "loss_ce": 0.003428956028074026, "loss_iou": 0.65625, "loss_num": 0.044921875, "loss_xval": 1.53125, "num_input_tokens_seen": 41477900, "step": 626 }, { "epoch": 0.058688632002620866, "grad_norm": 18.528329849243164, "learning_rate": 5e-05, "loss": 1.583, "num_input_tokens_seen": 41544016, "step": 627 }, { "epoch": 0.058688632002620866, "loss": 1.6905781030654907, "loss_ce": 0.007960943505167961, "loss_iou": 0.703125, "loss_num": 0.0556640625, "loss_xval": 1.6796875, "num_input_tokens_seen": 41544016, "step": 627 }, { "epoch": 0.05878223428651659, "grad_norm": 23.660940170288086, "learning_rate": 5e-05, "loss": 2.2274, "num_input_tokens_seen": 41610424, "step": 628 }, { "epoch": 0.05878223428651659, "loss": 2.3859333992004395, "loss_ce": 0.007027099374681711, "loss_iou": 1.0546875, "loss_num": 0.054931640625, "loss_xval": 2.375, "num_input_tokens_seen": 41610424, "step": 628 }, { "epoch": 0.058875836570412315, "grad_norm": 12.01035213470459, "learning_rate": 5e-05, "loss": 1.9972, "num_input_tokens_seen": 41675756, "step": 629 }, { "epoch": 0.058875836570412315, "loss": 1.9726507663726807, "loss_ce": 0.004877309314906597, "loss_iou": 0.84765625, "loss_num": 0.054931640625, "loss_xval": 1.96875, "num_input_tokens_seen": 41675756, "step": 629 }, { "epoch": 0.05896943885430805, "grad_norm": 12.94049072265625, "learning_rate": 5e-05, "loss": 1.9517, "num_input_tokens_seen": 41742104, "step": 630 }, { "epoch": 0.05896943885430805, "loss": 1.667283296585083, "loss_ce": 0.004197373986244202, "loss_iou": 0.72265625, "loss_num": 0.044189453125, "loss_xval": 1.6640625, "num_input_tokens_seen": 41742104, "step": 630 }, { "epoch": 0.05906304113820377, "grad_norm": 11.238037109375, "learning_rate": 5e-05, "loss": 1.8699, "num_input_tokens_seen": 41807904, "step": 631 }, { "epoch": 0.05906304113820377, "loss": 1.8402600288391113, "loss_ce": 0.006275675259530544, "loss_iou": 0.8125, "loss_num": 0.0419921875, "loss_xval": 1.8359375, "num_input_tokens_seen": 41807904, "step": 631 }, { "epoch": 0.059156643422099496, "grad_norm": 25.944297790527344, "learning_rate": 5e-05, "loss": 1.9746, "num_input_tokens_seen": 41874464, "step": 632 }, { "epoch": 0.059156643422099496, "loss": 2.068049669265747, "loss_ce": 0.004573073238134384, "loss_iou": 0.8828125, "loss_num": 0.059814453125, "loss_xval": 2.0625, "num_input_tokens_seen": 41874464, "step": 632 }, { "epoch": 0.05925024570599523, "grad_norm": 16.457990646362305, "learning_rate": 5e-05, "loss": 2.1437, "num_input_tokens_seen": 41941224, "step": 633 }, { "epoch": 0.05925024570599523, "loss": 2.3428397178649902, "loss_ce": 0.006902330555021763, "loss_iou": 1.0, "loss_num": 0.06787109375, "loss_xval": 2.34375, "num_input_tokens_seen": 41941224, "step": 633 }, { "epoch": 0.05934384798989095, "grad_norm": 12.140624046325684, "learning_rate": 5e-05, "loss": 1.9177, "num_input_tokens_seen": 42007972, "step": 634 }, { "epoch": 0.05934384798989095, "loss": 1.873486042022705, "loss_ce": 0.003368981881067157, "loss_iou": 0.7734375, "loss_num": 0.06494140625, "loss_xval": 1.8671875, "num_input_tokens_seen": 42007972, "step": 634 }, { "epoch": 0.059437450273786684, "grad_norm": 27.34258270263672, "learning_rate": 5e-05, "loss": 1.9138, "num_input_tokens_seen": 42074660, "step": 635 }, { "epoch": 0.059437450273786684, "loss": 1.9723542928695679, "loss_ce": 0.010440289974212646, "loss_iou": 0.8046875, "loss_num": 0.07080078125, "loss_xval": 1.9609375, "num_input_tokens_seen": 42074660, "step": 635 }, { "epoch": 0.05953105255768241, "grad_norm": 11.294136047363281, "learning_rate": 5e-05, "loss": 2.4134, "num_input_tokens_seen": 42141196, "step": 636 }, { "epoch": 0.05953105255768241, "loss": 2.4120917320251465, "loss_ce": 0.005841867998242378, "loss_iou": 1.0390625, "loss_num": 0.06689453125, "loss_xval": 2.40625, "num_input_tokens_seen": 42141196, "step": 636 }, { "epoch": 0.05962465484157813, "grad_norm": 12.519861221313477, "learning_rate": 5e-05, "loss": 1.9338, "num_input_tokens_seen": 42207028, "step": 637 }, { "epoch": 0.05962465484157813, "loss": 1.930042028427124, "loss_ce": 0.0037725483998656273, "loss_iou": 0.7734375, "loss_num": 0.076171875, "loss_xval": 1.9296875, "num_input_tokens_seen": 42207028, "step": 637 }, { "epoch": 0.059718257125473864, "grad_norm": 14.694958686828613, "learning_rate": 5e-05, "loss": 1.8074, "num_input_tokens_seen": 42273756, "step": 638 }, { "epoch": 0.059718257125473864, "loss": 1.8845767974853516, "loss_ce": 0.0037173698656260967, "loss_iou": 0.7890625, "loss_num": 0.060546875, "loss_xval": 1.8828125, "num_input_tokens_seen": 42273756, "step": 638 }, { "epoch": 0.05981185940936959, "grad_norm": 30.116161346435547, "learning_rate": 5e-05, "loss": 1.8737, "num_input_tokens_seen": 42339516, "step": 639 }, { "epoch": 0.05981185940936959, "loss": 1.8496448993682861, "loss_ce": 0.0029652677476406097, "loss_iou": 0.7890625, "loss_num": 0.05419921875, "loss_xval": 1.84375, "num_input_tokens_seen": 42339516, "step": 639 }, { "epoch": 0.05990546169326531, "grad_norm": 12.257732391357422, "learning_rate": 5e-05, "loss": 2.1148, "num_input_tokens_seen": 42406456, "step": 640 }, { "epoch": 0.05990546169326531, "loss": 2.2025976181030273, "loss_ce": 0.0024022057186812162, "loss_iou": 0.9375, "loss_num": 0.0654296875, "loss_xval": 2.203125, "num_input_tokens_seen": 42406456, "step": 640 }, { "epoch": 0.059999063977161045, "grad_norm": 10.195981979370117, "learning_rate": 5e-05, "loss": 2.0939, "num_input_tokens_seen": 42473056, "step": 641 }, { "epoch": 0.059999063977161045, "loss": 2.048806667327881, "loss_ce": 0.005837919190526009, "loss_iou": 0.82421875, "loss_num": 0.07958984375, "loss_xval": 2.046875, "num_input_tokens_seen": 42473056, "step": 641 }, { "epoch": 0.06009266626105677, "grad_norm": 21.322961807250977, "learning_rate": 5e-05, "loss": 1.8452, "num_input_tokens_seen": 42539812, "step": 642 }, { "epoch": 0.06009266626105677, "loss": 1.949399709701538, "loss_ce": 0.004087238572537899, "loss_iou": 0.79296875, "loss_num": 0.07275390625, "loss_xval": 1.9453125, "num_input_tokens_seen": 42539812, "step": 642 }, { "epoch": 0.060186268544952494, "grad_norm": 63.77423095703125, "learning_rate": 5e-05, "loss": 1.7791, "num_input_tokens_seen": 42607164, "step": 643 }, { "epoch": 0.060186268544952494, "loss": 1.7822136878967285, "loss_ce": 0.0029168049804866314, "loss_iou": 0.7734375, "loss_num": 0.047119140625, "loss_xval": 1.78125, "num_input_tokens_seen": 42607164, "step": 643 }, { "epoch": 0.060279870828848225, "grad_norm": 19.314645767211914, "learning_rate": 5e-05, "loss": 1.8867, "num_input_tokens_seen": 42671928, "step": 644 }, { "epoch": 0.060279870828848225, "loss": 1.986924171447754, "loss_ce": 0.010361703112721443, "loss_iou": 0.8359375, "loss_num": 0.06005859375, "loss_xval": 1.9765625, "num_input_tokens_seen": 42671928, "step": 644 }, { "epoch": 0.06037347311274395, "grad_norm": 9.597721099853516, "learning_rate": 5e-05, "loss": 1.5429, "num_input_tokens_seen": 42736592, "step": 645 }, { "epoch": 0.06037347311274395, "loss": 1.4693222045898438, "loss_ce": 0.004478428978472948, "loss_iou": 0.58203125, "loss_num": 0.060791015625, "loss_xval": 1.46875, "num_input_tokens_seen": 42736592, "step": 645 }, { "epoch": 0.060467075396639675, "grad_norm": 8.470300674438477, "learning_rate": 5e-05, "loss": 1.6664, "num_input_tokens_seen": 42802048, "step": 646 }, { "epoch": 0.060467075396639675, "loss": 1.5450406074523926, "loss_ce": 0.008907753974199295, "loss_iou": 0.60546875, "loss_num": 0.0654296875, "loss_xval": 1.5390625, "num_input_tokens_seen": 42802048, "step": 646 }, { "epoch": 0.060560677680535406, "grad_norm": 25.71698570251465, "learning_rate": 5e-05, "loss": 1.6715, "num_input_tokens_seen": 42868104, "step": 647 }, { "epoch": 0.060560677680535406, "loss": 1.5138490200042725, "loss_ce": 0.007501414977014065, "loss_iou": 0.640625, "loss_num": 0.0458984375, "loss_xval": 1.5078125, "num_input_tokens_seen": 42868104, "step": 647 }, { "epoch": 0.06065427996443113, "grad_norm": 18.959789276123047, "learning_rate": 5e-05, "loss": 1.5311, "num_input_tokens_seen": 42933812, "step": 648 }, { "epoch": 0.06065427996443113, "loss": 1.1889536380767822, "loss_ce": 0.004017080180346966, "loss_iou": 0.5078125, "loss_num": 0.033447265625, "loss_xval": 1.1875, "num_input_tokens_seen": 42933812, "step": 648 }, { "epoch": 0.06074788224832686, "grad_norm": 22.170230865478516, "learning_rate": 5e-05, "loss": 1.8908, "num_input_tokens_seen": 43001100, "step": 649 }, { "epoch": 0.06074788224832686, "loss": 1.8376333713531494, "loss_ce": 0.0026724031195044518, "loss_iou": 0.8125, "loss_num": 0.0419921875, "loss_xval": 1.8359375, "num_input_tokens_seen": 43001100, "step": 649 }, { "epoch": 0.06084148453222259, "grad_norm": 46.66634750366211, "learning_rate": 5e-05, "loss": 2.23, "num_input_tokens_seen": 43068388, "step": 650 }, { "epoch": 0.06084148453222259, "loss": 2.2623658180236816, "loss_ce": 0.005530000198632479, "loss_iou": 0.92578125, "loss_num": 0.08203125, "loss_xval": 2.25, "num_input_tokens_seen": 43068388, "step": 650 }, { "epoch": 0.06093508681611831, "grad_norm": 18.600927352905273, "learning_rate": 5e-05, "loss": 1.8137, "num_input_tokens_seen": 43135532, "step": 651 }, { "epoch": 0.06093508681611831, "loss": 1.8081564903259277, "loss_ce": 0.006398716941475868, "loss_iou": 0.7734375, "loss_num": 0.050048828125, "loss_xval": 1.8046875, "num_input_tokens_seen": 43135532, "step": 651 }, { "epoch": 0.06102868910001404, "grad_norm": 21.627056121826172, "learning_rate": 5e-05, "loss": 1.7722, "num_input_tokens_seen": 43201156, "step": 652 }, { "epoch": 0.06102868910001404, "loss": 1.8300975561141968, "loss_ce": 0.002949172630906105, "loss_iou": 0.75, "loss_num": 0.0654296875, "loss_xval": 1.828125, "num_input_tokens_seen": 43201156, "step": 652 }, { "epoch": 0.06112229138390977, "grad_norm": 18.532445907592773, "learning_rate": 5e-05, "loss": 1.7842, "num_input_tokens_seen": 43267520, "step": 653 }, { "epoch": 0.06112229138390977, "loss": 1.8301780223846436, "loss_ce": 0.00498274527490139, "loss_iou": 0.80078125, "loss_num": 0.04443359375, "loss_xval": 1.828125, "num_input_tokens_seen": 43267520, "step": 653 }, { "epoch": 0.06121589366780549, "grad_norm": 26.2631893157959, "learning_rate": 5e-05, "loss": 1.7561, "num_input_tokens_seen": 43334640, "step": 654 }, { "epoch": 0.06121589366780549, "loss": 1.7084355354309082, "loss_ce": 0.004333991091698408, "loss_iou": 0.75, "loss_num": 0.041748046875, "loss_xval": 1.703125, "num_input_tokens_seen": 43334640, "step": 654 }, { "epoch": 0.06130949595170122, "grad_norm": 18.518526077270508, "learning_rate": 5e-05, "loss": 2.4008, "num_input_tokens_seen": 43400440, "step": 655 }, { "epoch": 0.06130949595170122, "loss": 2.390713691711426, "loss_ce": 0.004971439018845558, "loss_iou": 1.03125, "loss_num": 0.06396484375, "loss_xval": 2.390625, "num_input_tokens_seen": 43400440, "step": 655 }, { "epoch": 0.06140309823559695, "grad_norm": 11.645074844360352, "learning_rate": 5e-05, "loss": 2.0933, "num_input_tokens_seen": 43466484, "step": 656 }, { "epoch": 0.06140309823559695, "loss": 2.1642441749572754, "loss_ce": 0.017759662121534348, "loss_iou": 0.88671875, "loss_num": 0.07421875, "loss_xval": 2.140625, "num_input_tokens_seen": 43466484, "step": 656 }, { "epoch": 0.06149670051949267, "grad_norm": 11.377114295959473, "learning_rate": 5e-05, "loss": 1.7413, "num_input_tokens_seen": 43532876, "step": 657 }, { "epoch": 0.06149670051949267, "loss": 1.8291823863983154, "loss_ce": 0.007893272675573826, "loss_iou": 0.76953125, "loss_num": 0.05615234375, "loss_xval": 1.8203125, "num_input_tokens_seen": 43532876, "step": 657 }, { "epoch": 0.061590302803388404, "grad_norm": 19.197315216064453, "learning_rate": 5e-05, "loss": 1.8288, "num_input_tokens_seen": 43598872, "step": 658 }, { "epoch": 0.061590302803388404, "loss": 1.8198094367980957, "loss_ce": 0.00828595645725727, "loss_iou": 0.75, "loss_num": 0.0625, "loss_xval": 1.8125, "num_input_tokens_seen": 43598872, "step": 658 }, { "epoch": 0.06168390508728413, "grad_norm": 19.02873420715332, "learning_rate": 5e-05, "loss": 2.044, "num_input_tokens_seen": 43665696, "step": 659 }, { "epoch": 0.06168390508728413, "loss": 2.211949586868286, "loss_ce": 0.006871475838124752, "loss_iou": 0.9140625, "loss_num": 0.07568359375, "loss_xval": 2.203125, "num_input_tokens_seen": 43665696, "step": 659 }, { "epoch": 0.06177750737117986, "grad_norm": 11.751163482666016, "learning_rate": 5e-05, "loss": 1.9957, "num_input_tokens_seen": 43731168, "step": 660 }, { "epoch": 0.06177750737117986, "loss": 1.927664041519165, "loss_ce": 0.006765660829842091, "loss_iou": 0.78125, "loss_num": 0.0712890625, "loss_xval": 1.921875, "num_input_tokens_seen": 43731168, "step": 660 }, { "epoch": 0.061871109655075585, "grad_norm": 13.9652099609375, "learning_rate": 5e-05, "loss": 1.7296, "num_input_tokens_seen": 43797268, "step": 661 }, { "epoch": 0.061871109655075585, "loss": 1.8963115215301514, "loss_ce": 0.0066630351357162, "loss_iou": 0.78515625, "loss_num": 0.06396484375, "loss_xval": 1.890625, "num_input_tokens_seen": 43797268, "step": 661 }, { "epoch": 0.06196471193897131, "grad_norm": 21.194671630859375, "learning_rate": 5e-05, "loss": 1.7519, "num_input_tokens_seen": 43864256, "step": 662 }, { "epoch": 0.06196471193897131, "loss": 1.8214362859725952, "loss_ce": 0.006006590090692043, "loss_iou": 0.78125, "loss_num": 0.05029296875, "loss_xval": 1.8125, "num_input_tokens_seen": 43864256, "step": 662 }, { "epoch": 0.06205831422286704, "grad_norm": 23.50273323059082, "learning_rate": 5e-05, "loss": 1.9001, "num_input_tokens_seen": 43930312, "step": 663 }, { "epoch": 0.06205831422286704, "loss": 2.0349676609039307, "loss_ce": 0.0017646612832322717, "loss_iou": 0.91015625, "loss_num": 0.043212890625, "loss_xval": 2.03125, "num_input_tokens_seen": 43930312, "step": 663 }, { "epoch": 0.062151916506762765, "grad_norm": 16.14592933654785, "learning_rate": 5e-05, "loss": 1.8992, "num_input_tokens_seen": 43996108, "step": 664 }, { "epoch": 0.062151916506762765, "loss": 1.7679128646850586, "loss_ce": 0.006194200832396746, "loss_iou": 0.76953125, "loss_num": 0.0439453125, "loss_xval": 1.765625, "num_input_tokens_seen": 43996108, "step": 664 }, { "epoch": 0.06224551879065849, "grad_norm": 18.69837188720703, "learning_rate": 5e-05, "loss": 1.7899, "num_input_tokens_seen": 44062072, "step": 665 }, { "epoch": 0.06224551879065849, "loss": 1.9077510833740234, "loss_ce": 0.009313540533185005, "loss_iou": 0.79296875, "loss_num": 0.06298828125, "loss_xval": 1.8984375, "num_input_tokens_seen": 44062072, "step": 665 }, { "epoch": 0.06233912107455422, "grad_norm": 24.879642486572266, "learning_rate": 5e-05, "loss": 2.2473, "num_input_tokens_seen": 44129180, "step": 666 }, { "epoch": 0.06233912107455422, "loss": 2.0507569313049316, "loss_ce": 0.008764711208641529, "loss_iou": 0.91015625, "loss_num": 0.044677734375, "loss_xval": 2.046875, "num_input_tokens_seen": 44129180, "step": 666 }, { "epoch": 0.062432723358449946, "grad_norm": 13.696084976196289, "learning_rate": 5e-05, "loss": 1.8711, "num_input_tokens_seen": 44193736, "step": 667 }, { "epoch": 0.062432723358449946, "loss": 1.863105297088623, "loss_ce": 0.0056832898408174515, "loss_iou": 0.8046875, "loss_num": 0.050537109375, "loss_xval": 1.859375, "num_input_tokens_seen": 44193736, "step": 667 }, { "epoch": 0.06252632564234567, "grad_norm": 13.253811836242676, "learning_rate": 5e-05, "loss": 1.8573, "num_input_tokens_seen": 44259736, "step": 668 }, { "epoch": 0.06252632564234567, "loss": 1.827341914176941, "loss_ce": 0.004099718295037746, "loss_iou": 0.77734375, "loss_num": 0.05419921875, "loss_xval": 1.8203125, "num_input_tokens_seen": 44259736, "step": 668 }, { "epoch": 0.0626199279262414, "grad_norm": 65.46825408935547, "learning_rate": 5e-05, "loss": 1.6073, "num_input_tokens_seen": 44326656, "step": 669 }, { "epoch": 0.0626199279262414, "loss": 1.829795002937317, "loss_ce": 0.0036230827681720257, "loss_iou": 0.8203125, "loss_num": 0.0380859375, "loss_xval": 1.828125, "num_input_tokens_seen": 44326656, "step": 669 }, { "epoch": 0.06271353021013713, "grad_norm": 15.645062446594238, "learning_rate": 5e-05, "loss": 1.7469, "num_input_tokens_seen": 44392964, "step": 670 }, { "epoch": 0.06271353021013713, "loss": 1.6751956939697266, "loss_ce": 0.009180095978081226, "loss_iou": 0.6875, "loss_num": 0.057861328125, "loss_xval": 1.6640625, "num_input_tokens_seen": 44392964, "step": 670 }, { "epoch": 0.06280713249403286, "grad_norm": 21.835399627685547, "learning_rate": 5e-05, "loss": 1.7665, "num_input_tokens_seen": 44457860, "step": 671 }, { "epoch": 0.06280713249403286, "loss": 1.8617095947265625, "loss_ce": 0.002334624994546175, "loss_iou": 0.796875, "loss_num": 0.053466796875, "loss_xval": 1.859375, "num_input_tokens_seen": 44457860, "step": 671 }, { "epoch": 0.06290073477792858, "grad_norm": 17.84896469116211, "learning_rate": 5e-05, "loss": 2.0174, "num_input_tokens_seen": 44524384, "step": 672 }, { "epoch": 0.06290073477792858, "loss": 1.902989387512207, "loss_ce": 0.0035754013806581497, "loss_iou": 0.8046875, "loss_num": 0.05712890625, "loss_xval": 1.8984375, "num_input_tokens_seen": 44524384, "step": 672 }, { "epoch": 0.06299433706182431, "grad_norm": 18.115175247192383, "learning_rate": 5e-05, "loss": 1.5983, "num_input_tokens_seen": 44590172, "step": 673 }, { "epoch": 0.06299433706182431, "loss": 1.6607317924499512, "loss_ce": 0.0035052087623625994, "loss_iou": 0.74609375, "loss_num": 0.03271484375, "loss_xval": 1.65625, "num_input_tokens_seen": 44590172, "step": 673 }, { "epoch": 0.06308793934572003, "grad_norm": 21.669931411743164, "learning_rate": 5e-05, "loss": 2.185, "num_input_tokens_seen": 44655864, "step": 674 }, { "epoch": 0.06308793934572003, "loss": 2.141312599182129, "loss_ce": 0.0026406331453472376, "loss_iou": 0.8671875, "loss_num": 0.07958984375, "loss_xval": 2.140625, "num_input_tokens_seen": 44655864, "step": 674 }, { "epoch": 0.06318154162961576, "grad_norm": 10.55396842956543, "learning_rate": 5e-05, "loss": 1.7654, "num_input_tokens_seen": 44722456, "step": 675 }, { "epoch": 0.06318154162961576, "loss": 1.8805890083312988, "loss_ce": 0.00461235037073493, "loss_iou": 0.84375, "loss_num": 0.037353515625, "loss_xval": 1.875, "num_input_tokens_seen": 44722456, "step": 675 }, { "epoch": 0.0632751439135115, "grad_norm": 13.018916130065918, "learning_rate": 5e-05, "loss": 1.6273, "num_input_tokens_seen": 44789424, "step": 676 }, { "epoch": 0.0632751439135115, "loss": 1.4644622802734375, "loss_ce": 0.004989564418792725, "loss_iou": 0.62890625, "loss_num": 0.039794921875, "loss_xval": 1.4609375, "num_input_tokens_seen": 44789424, "step": 676 }, { "epoch": 0.06336874619740722, "grad_norm": 24.016740798950195, "learning_rate": 5e-05, "loss": 1.7852, "num_input_tokens_seen": 44855256, "step": 677 }, { "epoch": 0.06336874619740722, "loss": 1.588794469833374, "loss_ce": 0.007739695720374584, "loss_iou": 0.61328125, "loss_num": 0.0703125, "loss_xval": 1.578125, "num_input_tokens_seen": 44855256, "step": 677 }, { "epoch": 0.06346234848130294, "grad_norm": 27.151763916015625, "learning_rate": 5e-05, "loss": 1.7278, "num_input_tokens_seen": 44921840, "step": 678 }, { "epoch": 0.06346234848130294, "loss": 1.693800687789917, "loss_ce": 0.006300638429820538, "loss_iou": 0.74609375, "loss_num": 0.038818359375, "loss_xval": 1.6875, "num_input_tokens_seen": 44921840, "step": 678 }, { "epoch": 0.06355595076519867, "grad_norm": 10.849607467651367, "learning_rate": 5e-05, "loss": 2.1003, "num_input_tokens_seen": 44987480, "step": 679 }, { "epoch": 0.06355595076519867, "loss": 2.2084901332855225, "loss_ce": 0.0024354765191674232, "loss_iou": 0.97265625, "loss_num": 0.052490234375, "loss_xval": 2.203125, "num_input_tokens_seen": 44987480, "step": 679 }, { "epoch": 0.06364955304909439, "grad_norm": 13.273422241210938, "learning_rate": 5e-05, "loss": 1.7229, "num_input_tokens_seen": 45053144, "step": 680 }, { "epoch": 0.06364955304909439, "loss": 1.7986541986465454, "loss_ce": 0.0056854551658034325, "loss_iou": 0.76171875, "loss_num": 0.053466796875, "loss_xval": 1.796875, "num_input_tokens_seen": 45053144, "step": 680 }, { "epoch": 0.06374315533299013, "grad_norm": 20.408994674682617, "learning_rate": 5e-05, "loss": 1.6493, "num_input_tokens_seen": 45119624, "step": 681 }, { "epoch": 0.06374315533299013, "loss": 1.6919260025024414, "loss_ce": 0.007355560548603535, "loss_iou": 0.72265625, "loss_num": 0.047607421875, "loss_xval": 1.6875, "num_input_tokens_seen": 45119624, "step": 681 }, { "epoch": 0.06383675761688586, "grad_norm": 13.204854011535645, "learning_rate": 5e-05, "loss": 1.5412, "num_input_tokens_seen": 45186636, "step": 682 }, { "epoch": 0.06383675761688586, "loss": 1.6893914937973022, "loss_ce": 0.005797786638140678, "loss_iou": 0.72265625, "loss_num": 0.04736328125, "loss_xval": 1.6875, "num_input_tokens_seen": 45186636, "step": 682 }, { "epoch": 0.06393035990078158, "grad_norm": 20.79660415649414, "learning_rate": 5e-05, "loss": 2.1258, "num_input_tokens_seen": 45251808, "step": 683 }, { "epoch": 0.06393035990078158, "loss": 2.0212926864624023, "loss_ce": 0.005667629651725292, "loss_iou": 0.83984375, "loss_num": 0.06689453125, "loss_xval": 2.015625, "num_input_tokens_seen": 45251808, "step": 683 }, { "epoch": 0.0640239621846773, "grad_norm": 16.694738388061523, "learning_rate": 5e-05, "loss": 1.8483, "num_input_tokens_seen": 45318916, "step": 684 }, { "epoch": 0.0640239621846773, "loss": 1.895155429840088, "loss_ce": 0.0055069150403141975, "loss_iou": 0.83984375, "loss_num": 0.042236328125, "loss_xval": 1.890625, "num_input_tokens_seen": 45318916, "step": 684 }, { "epoch": 0.06411756446857303, "grad_norm": 14.720990180969238, "learning_rate": 5e-05, "loss": 1.6624, "num_input_tokens_seen": 45385816, "step": 685 }, { "epoch": 0.06411756446857303, "loss": 1.6102463006973267, "loss_ce": 0.004777542781084776, "loss_iou": 0.71875, "loss_num": 0.034423828125, "loss_xval": 1.609375, "num_input_tokens_seen": 45385816, "step": 685 }, { "epoch": 0.06421116675246875, "grad_norm": 22.114540100097656, "learning_rate": 5e-05, "loss": 1.9662, "num_input_tokens_seen": 45452036, "step": 686 }, { "epoch": 0.06421116675246875, "loss": 1.892499566078186, "loss_ce": 0.007733910344541073, "loss_iou": 0.8359375, "loss_num": 0.0419921875, "loss_xval": 1.8828125, "num_input_tokens_seen": 45452036, "step": 686 }, { "epoch": 0.06430476903636449, "grad_norm": 22.834501266479492, "learning_rate": 5e-05, "loss": 1.6495, "num_input_tokens_seen": 45517740, "step": 687 }, { "epoch": 0.06430476903636449, "loss": 1.5179729461669922, "loss_ce": 0.002347860485315323, "loss_iou": 0.65234375, "loss_num": 0.04248046875, "loss_xval": 1.515625, "num_input_tokens_seen": 45517740, "step": 687 }, { "epoch": 0.06439837132026022, "grad_norm": 14.208182334899902, "learning_rate": 5e-05, "loss": 2.1564, "num_input_tokens_seen": 45584060, "step": 688 }, { "epoch": 0.06439837132026022, "loss": 2.085690975189209, "loss_ce": 0.007565838750451803, "loss_iou": 0.8828125, "loss_num": 0.06298828125, "loss_xval": 2.078125, "num_input_tokens_seen": 45584060, "step": 688 }, { "epoch": 0.06449197360415594, "grad_norm": 16.036930084228516, "learning_rate": 5e-05, "loss": 1.8611, "num_input_tokens_seen": 45649572, "step": 689 }, { "epoch": 0.06449197360415594, "loss": 2.0573134422302246, "loss_ce": 0.004579117055982351, "loss_iou": 0.859375, "loss_num": 0.06689453125, "loss_xval": 2.046875, "num_input_tokens_seen": 45649572, "step": 689 }, { "epoch": 0.06458557588805167, "grad_norm": 31.936649322509766, "learning_rate": 5e-05, "loss": 1.8204, "num_input_tokens_seen": 45713996, "step": 690 }, { "epoch": 0.06458557588805167, "loss": 1.8594334125518799, "loss_ce": 0.0029881575610488653, "loss_iou": 0.78125, "loss_num": 0.057861328125, "loss_xval": 1.859375, "num_input_tokens_seen": 45713996, "step": 690 }, { "epoch": 0.06467917817194739, "grad_norm": 12.208847045898438, "learning_rate": 5e-05, "loss": 2.1596, "num_input_tokens_seen": 45780704, "step": 691 }, { "epoch": 0.06467917817194739, "loss": 2.1293883323669434, "loss_ce": 0.008294601924717426, "loss_iou": 0.953125, "loss_num": 0.042236328125, "loss_xval": 2.125, "num_input_tokens_seen": 45780704, "step": 691 }, { "epoch": 0.06477278045584313, "grad_norm": 13.552273750305176, "learning_rate": 5e-05, "loss": 1.9639, "num_input_tokens_seen": 45846124, "step": 692 }, { "epoch": 0.06477278045584313, "loss": 1.8553943634033203, "loss_ce": 0.007738055661320686, "loss_iou": 0.78125, "loss_num": 0.057861328125, "loss_xval": 1.84375, "num_input_tokens_seen": 45846124, "step": 692 }, { "epoch": 0.06486638273973885, "grad_norm": 10.535679817199707, "learning_rate": 5e-05, "loss": 1.9231, "num_input_tokens_seen": 45912244, "step": 693 }, { "epoch": 0.06486638273973885, "loss": 1.6916513442993164, "loss_ce": 0.002198208589106798, "loss_iou": 0.73828125, "loss_num": 0.041748046875, "loss_xval": 1.6875, "num_input_tokens_seen": 45912244, "step": 693 }, { "epoch": 0.06495998502363458, "grad_norm": 13.686797142028809, "learning_rate": 5e-05, "loss": 1.84, "num_input_tokens_seen": 45978900, "step": 694 }, { "epoch": 0.06495998502363458, "loss": 1.8427501916885376, "loss_ce": 0.003883079392835498, "loss_iou": 0.79296875, "loss_num": 0.050537109375, "loss_xval": 1.8359375, "num_input_tokens_seen": 45978900, "step": 694 }, { "epoch": 0.0650535873075303, "grad_norm": 24.21961784362793, "learning_rate": 5e-05, "loss": 1.6559, "num_input_tokens_seen": 46043936, "step": 695 }, { "epoch": 0.0650535873075303, "loss": 1.4779677391052246, "loss_ce": 0.0028701634146273136, "loss_iou": 0.6171875, "loss_num": 0.0478515625, "loss_xval": 1.4765625, "num_input_tokens_seen": 46043936, "step": 695 }, { "epoch": 0.06514718959142603, "grad_norm": 18.41437339782715, "learning_rate": 5e-05, "loss": 1.8008, "num_input_tokens_seen": 46110496, "step": 696 }, { "epoch": 0.06514718959142603, "loss": 1.6277146339416504, "loss_ce": 0.0012497918214648962, "loss_iou": 0.73046875, "loss_num": 0.032470703125, "loss_xval": 1.625, "num_input_tokens_seen": 46110496, "step": 696 }, { "epoch": 0.06524079187532175, "grad_norm": 16.649349212646484, "learning_rate": 5e-05, "loss": 1.7843, "num_input_tokens_seen": 46176756, "step": 697 }, { "epoch": 0.06524079187532175, "loss": 1.8919517993927002, "loss_ce": 0.0032798913307487965, "loss_iou": 0.81640625, "loss_num": 0.05126953125, "loss_xval": 1.890625, "num_input_tokens_seen": 46176756, "step": 697 }, { "epoch": 0.06533439415921749, "grad_norm": 38.61091613769531, "learning_rate": 5e-05, "loss": 1.7536, "num_input_tokens_seen": 46242792, "step": 698 }, { "epoch": 0.06533439415921749, "loss": 1.642912745475769, "loss_ce": 0.004729137755930424, "loss_iou": 0.73046875, "loss_num": 0.034912109375, "loss_xval": 1.640625, "num_input_tokens_seen": 46242792, "step": 698 }, { "epoch": 0.06542799644311321, "grad_norm": 58.10789489746094, "learning_rate": 5e-05, "loss": 1.853, "num_input_tokens_seen": 46307004, "step": 699 }, { "epoch": 0.06542799644311321, "loss": 2.120300531387329, "loss_ce": 0.006042658817023039, "loss_iou": 0.86328125, "loss_num": 0.0771484375, "loss_xval": 2.109375, "num_input_tokens_seen": 46307004, "step": 699 }, { "epoch": 0.06552159872700894, "grad_norm": 20.29197120666504, "learning_rate": 5e-05, "loss": 2.0745, "num_input_tokens_seen": 46373032, "step": 700 }, { "epoch": 0.06552159872700894, "loss": 2.0715878009796143, "loss_ce": 0.004205000586807728, "loss_iou": 0.8984375, "loss_num": 0.05419921875, "loss_xval": 2.0625, "num_input_tokens_seen": 46373032, "step": 700 }, { "epoch": 0.06561520101090466, "grad_norm": 12.449002265930176, "learning_rate": 5e-05, "loss": 1.587, "num_input_tokens_seen": 46438588, "step": 701 }, { "epoch": 0.06561520101090466, "loss": 1.4610304832458496, "loss_ce": 0.003511058399453759, "loss_iou": 0.6484375, "loss_num": 0.031982421875, "loss_xval": 1.4609375, "num_input_tokens_seen": 46438588, "step": 701 }, { "epoch": 0.06570880329480039, "grad_norm": 10.84778881072998, "learning_rate": 5e-05, "loss": 1.6071, "num_input_tokens_seen": 46505752, "step": 702 }, { "epoch": 0.06570880329480039, "loss": 1.400804877281189, "loss_ce": 0.004320591688156128, "loss_iou": 0.625, "loss_num": 0.030029296875, "loss_xval": 1.3984375, "num_input_tokens_seen": 46505752, "step": 702 }, { "epoch": 0.06580240557869613, "grad_norm": 24.643667221069336, "learning_rate": 5e-05, "loss": 1.6514, "num_input_tokens_seen": 46571916, "step": 703 }, { "epoch": 0.06580240557869613, "loss": 1.572337031364441, "loss_ce": 0.01179010234773159, "loss_iou": 0.68359375, "loss_num": 0.03857421875, "loss_xval": 1.5625, "num_input_tokens_seen": 46571916, "step": 703 }, { "epoch": 0.06589600786259185, "grad_norm": 15.583794593811035, "learning_rate": 5e-05, "loss": 2.0572, "num_input_tokens_seen": 46638960, "step": 704 }, { "epoch": 0.06589600786259185, "loss": 1.9588515758514404, "loss_ce": 0.012562518008053303, "loss_iou": 0.8671875, "loss_num": 0.0419921875, "loss_xval": 1.9453125, "num_input_tokens_seen": 46638960, "step": 704 }, { "epoch": 0.06598961014648758, "grad_norm": 10.499127388000488, "learning_rate": 5e-05, "loss": 1.8242, "num_input_tokens_seen": 46704448, "step": 705 }, { "epoch": 0.06598961014648758, "loss": 1.8935816287994385, "loss_ce": 0.003933214582502842, "loss_iou": 0.7734375, "loss_num": 0.068359375, "loss_xval": 1.890625, "num_input_tokens_seen": 46704448, "step": 705 }, { "epoch": 0.0660832124303833, "grad_norm": 24.402984619140625, "learning_rate": 5e-05, "loss": 1.6786, "num_input_tokens_seen": 46770648, "step": 706 }, { "epoch": 0.0660832124303833, "loss": 1.7661579847335815, "loss_ce": 0.009383074007928371, "loss_iou": 0.7265625, "loss_num": 0.0615234375, "loss_xval": 1.7578125, "num_input_tokens_seen": 46770648, "step": 706 }, { "epoch": 0.06617681471427903, "grad_norm": 16.042818069458008, "learning_rate": 5e-05, "loss": 1.7377, "num_input_tokens_seen": 46836960, "step": 707 }, { "epoch": 0.06617681471427903, "loss": 1.7936598062515259, "loss_ce": 0.0055738468654453754, "loss_iou": 0.78125, "loss_num": 0.044921875, "loss_xval": 1.7890625, "num_input_tokens_seen": 46836960, "step": 707 }, { "epoch": 0.06627041699817475, "grad_norm": 35.5173454284668, "learning_rate": 5e-05, "loss": 1.8091, "num_input_tokens_seen": 46903948, "step": 708 }, { "epoch": 0.06627041699817475, "loss": 2.017773151397705, "loss_ce": 0.004101386293768883, "loss_iou": 0.83984375, "loss_num": 0.06640625, "loss_xval": 2.015625, "num_input_tokens_seen": 46903948, "step": 708 }, { "epoch": 0.06636401928207049, "grad_norm": 18.368568420410156, "learning_rate": 5e-05, "loss": 1.7415, "num_input_tokens_seen": 46969168, "step": 709 }, { "epoch": 0.06636401928207049, "loss": 1.5899379253387451, "loss_ce": 0.0038172572385519743, "loss_iou": 0.68359375, "loss_num": 0.043212890625, "loss_xval": 1.5859375, "num_input_tokens_seen": 46969168, "step": 709 }, { "epoch": 0.06645762156596621, "grad_norm": 34.08951950073242, "learning_rate": 5e-05, "loss": 1.7784, "num_input_tokens_seen": 47035368, "step": 710 }, { "epoch": 0.06645762156596621, "loss": 1.9146358966827393, "loss_ce": 0.0035031517036259174, "loss_iou": 0.84765625, "loss_num": 0.04345703125, "loss_xval": 1.9140625, "num_input_tokens_seen": 47035368, "step": 710 }, { "epoch": 0.06655122384986194, "grad_norm": 12.061261177062988, "learning_rate": 5e-05, "loss": 2.0622, "num_input_tokens_seen": 47102132, "step": 711 }, { "epoch": 0.06655122384986194, "loss": 1.9517844915390015, "loss_ce": 0.0040306393057107925, "loss_iou": 0.8203125, "loss_num": 0.060791015625, "loss_xval": 1.9453125, "num_input_tokens_seen": 47102132, "step": 711 }, { "epoch": 0.06664482613375766, "grad_norm": 25.1570987701416, "learning_rate": 5e-05, "loss": 1.7394, "num_input_tokens_seen": 47167796, "step": 712 }, { "epoch": 0.06664482613375766, "loss": 1.977304458618164, "loss_ce": 0.00464824540540576, "loss_iou": 0.82421875, "loss_num": 0.06591796875, "loss_xval": 1.96875, "num_input_tokens_seen": 47167796, "step": 712 }, { "epoch": 0.06673842841765339, "grad_norm": 25.616376876831055, "learning_rate": 5e-05, "loss": 1.3528, "num_input_tokens_seen": 47233524, "step": 713 }, { "epoch": 0.06673842841765339, "loss": 1.328101634979248, "loss_ce": 0.010718777775764465, "loss_iou": 0.51171875, "loss_num": 0.058349609375, "loss_xval": 1.3203125, "num_input_tokens_seen": 47233524, "step": 713 }, { "epoch": 0.06683203070154911, "grad_norm": 14.906170845031738, "learning_rate": 5e-05, "loss": 1.6391, "num_input_tokens_seen": 47300436, "step": 714 }, { "epoch": 0.06683203070154911, "loss": 1.5979814529418945, "loss_ce": 0.004231479484587908, "loss_iou": 0.7109375, "loss_num": 0.034912109375, "loss_xval": 1.59375, "num_input_tokens_seen": 47300436, "step": 714 }, { "epoch": 0.06692563298544485, "grad_norm": 30.154172897338867, "learning_rate": 5e-05, "loss": 1.8866, "num_input_tokens_seen": 47366896, "step": 715 }, { "epoch": 0.06692563298544485, "loss": 2.0287551879882812, "loss_ce": 0.003364487085491419, "loss_iou": 0.875, "loss_num": 0.05419921875, "loss_xval": 2.03125, "num_input_tokens_seen": 47366896, "step": 715 }, { "epoch": 0.06701923526934057, "grad_norm": 21.635082244873047, "learning_rate": 5e-05, "loss": 1.8791, "num_input_tokens_seen": 47433372, "step": 716 }, { "epoch": 0.06701923526934057, "loss": 1.9303206205368042, "loss_ce": 0.004539405927062035, "loss_iou": 0.8046875, "loss_num": 0.06298828125, "loss_xval": 1.921875, "num_input_tokens_seen": 47433372, "step": 716 }, { "epoch": 0.0671128375532363, "grad_norm": 26.324674606323242, "learning_rate": 5e-05, "loss": 1.6057, "num_input_tokens_seen": 47499168, "step": 717 }, { "epoch": 0.0671128375532363, "loss": 1.7501418590545654, "loss_ce": 0.005024688318371773, "loss_iou": 0.76953125, "loss_num": 0.04150390625, "loss_xval": 1.7421875, "num_input_tokens_seen": 47499168, "step": 717 }, { "epoch": 0.06720643983713202, "grad_norm": 10.50040054321289, "learning_rate": 5e-05, "loss": 2.0523, "num_input_tokens_seen": 47564628, "step": 718 }, { "epoch": 0.06720643983713202, "loss": 2.2206830978393555, "loss_ce": 0.00876903161406517, "loss_iou": 0.9765625, "loss_num": 0.05126953125, "loss_xval": 2.21875, "num_input_tokens_seen": 47564628, "step": 718 }, { "epoch": 0.06730004212102775, "grad_norm": 31.882604598999023, "learning_rate": 5e-05, "loss": 1.8316, "num_input_tokens_seen": 47631796, "step": 719 }, { "epoch": 0.06730004212102775, "loss": 1.78173828125, "loss_ce": 0.006347662769258022, "loss_iou": 0.74609375, "loss_num": 0.056640625, "loss_xval": 1.7734375, "num_input_tokens_seen": 47631796, "step": 719 }, { "epoch": 0.06739364440492349, "grad_norm": 15.604974746704102, "learning_rate": 5e-05, "loss": 1.6485, "num_input_tokens_seen": 47697144, "step": 720 }, { "epoch": 0.06739364440492349, "loss": 1.6074495315551758, "loss_ce": 0.008633635938167572, "loss_iou": 0.6640625, "loss_num": 0.053955078125, "loss_xval": 1.6015625, "num_input_tokens_seen": 47697144, "step": 720 }, { "epoch": 0.06748724668881921, "grad_norm": 12.568963050842285, "learning_rate": 5e-05, "loss": 1.5024, "num_input_tokens_seen": 47763272, "step": 721 }, { "epoch": 0.06748724668881921, "loss": 1.5843994617462158, "loss_ce": 0.002368141431361437, "loss_iou": 0.71875, "loss_num": 0.0289306640625, "loss_xval": 1.578125, "num_input_tokens_seen": 47763272, "step": 721 }, { "epoch": 0.06758084897271494, "grad_norm": 27.118867874145508, "learning_rate": 5e-05, "loss": 1.7845, "num_input_tokens_seen": 47828356, "step": 722 }, { "epoch": 0.06758084897271494, "loss": 1.8423880338668823, "loss_ce": 0.0059622423723340034, "loss_iou": 0.78515625, "loss_num": 0.05322265625, "loss_xval": 1.8359375, "num_input_tokens_seen": 47828356, "step": 722 }, { "epoch": 0.06767445125661066, "grad_norm": 17.231006622314453, "learning_rate": 5e-05, "loss": 2.0219, "num_input_tokens_seen": 47895416, "step": 723 }, { "epoch": 0.06767445125661066, "loss": 2.1317527294158936, "loss_ce": 0.001869881758466363, "loss_iou": 0.921875, "loss_num": 0.05712890625, "loss_xval": 2.125, "num_input_tokens_seen": 47895416, "step": 723 }, { "epoch": 0.06776805354050638, "grad_norm": 15.999128341674805, "learning_rate": 5e-05, "loss": 1.883, "num_input_tokens_seen": 47961972, "step": 724 }, { "epoch": 0.06776805354050638, "loss": 1.8662493228912354, "loss_ce": 0.006874297745525837, "loss_iou": 0.78125, "loss_num": 0.05908203125, "loss_xval": 1.859375, "num_input_tokens_seen": 47961972, "step": 724 }, { "epoch": 0.06786165582440211, "grad_norm": 15.403785705566406, "learning_rate": 5e-05, "loss": 1.5873, "num_input_tokens_seen": 48028404, "step": 725 }, { "epoch": 0.06786165582440211, "loss": 1.6128623485565186, "loss_ce": 0.0064169904217123985, "loss_iou": 0.65625, "loss_num": 0.058837890625, "loss_xval": 1.609375, "num_input_tokens_seen": 48028404, "step": 725 }, { "epoch": 0.06795525810829785, "grad_norm": 25.324594497680664, "learning_rate": 5e-05, "loss": 1.5274, "num_input_tokens_seen": 48093988, "step": 726 }, { "epoch": 0.06795525810829785, "loss": 1.6561145782470703, "loss_ce": 0.006700488738715649, "loss_iou": 0.71484375, "loss_num": 0.044189453125, "loss_xval": 1.6484375, "num_input_tokens_seen": 48093988, "step": 726 }, { "epoch": 0.06804886039219357, "grad_norm": 32.763973236083984, "learning_rate": 5e-05, "loss": 1.8417, "num_input_tokens_seen": 48159828, "step": 727 }, { "epoch": 0.06804886039219357, "loss": 1.7738572359085083, "loss_ce": 0.005302562844008207, "loss_iou": 0.7265625, "loss_num": 0.0625, "loss_xval": 1.765625, "num_input_tokens_seen": 48159828, "step": 727 }, { "epoch": 0.0681424626760893, "grad_norm": 21.48606300354004, "learning_rate": 5e-05, "loss": 1.8958, "num_input_tokens_seen": 48226332, "step": 728 }, { "epoch": 0.0681424626760893, "loss": 1.9721516370773315, "loss_ce": 0.0024250440765172243, "loss_iou": 0.828125, "loss_num": 0.0625, "loss_xval": 1.96875, "num_input_tokens_seen": 48226332, "step": 728 }, { "epoch": 0.06823606495998502, "grad_norm": 18.606369018554688, "learning_rate": 5e-05, "loss": 1.5867, "num_input_tokens_seen": 48290772, "step": 729 }, { "epoch": 0.06823606495998502, "loss": 1.4791977405548096, "loss_ce": 0.006541408132761717, "loss_iou": 0.65625, "loss_num": 0.031494140625, "loss_xval": 1.46875, "num_input_tokens_seen": 48290772, "step": 729 }, { "epoch": 0.06832966724388075, "grad_norm": 33.228878021240234, "learning_rate": 5e-05, "loss": 1.7367, "num_input_tokens_seen": 48356540, "step": 730 }, { "epoch": 0.06832966724388075, "loss": 1.8499488830566406, "loss_ce": 0.0032692099921405315, "loss_iou": 0.7734375, "loss_num": 0.0595703125, "loss_xval": 1.84375, "num_input_tokens_seen": 48356540, "step": 730 }, { "epoch": 0.06842326952777648, "grad_norm": 11.724018096923828, "learning_rate": 5e-05, "loss": 1.6791, "num_input_tokens_seen": 48423580, "step": 731 }, { "epoch": 0.06842326952777648, "loss": 1.742972731590271, "loss_ce": 0.0046915700659155846, "loss_iou": 0.75, "loss_num": 0.0478515625, "loss_xval": 1.734375, "num_input_tokens_seen": 48423580, "step": 731 }, { "epoch": 0.06851687181167221, "grad_norm": 16.34622573852539, "learning_rate": 5e-05, "loss": 1.734, "num_input_tokens_seen": 48489440, "step": 732 }, { "epoch": 0.06851687181167221, "loss": 1.684220790863037, "loss_ce": 0.004533355124294758, "loss_iou": 0.69921875, "loss_num": 0.055419921875, "loss_xval": 1.6796875, "num_input_tokens_seen": 48489440, "step": 732 }, { "epoch": 0.06861047409556793, "grad_norm": 16.87637710571289, "learning_rate": 5e-05, "loss": 1.8085, "num_input_tokens_seen": 48555192, "step": 733 }, { "epoch": 0.06861047409556793, "loss": 1.616333246231079, "loss_ce": 0.005005074664950371, "loss_iou": 0.6796875, "loss_num": 0.05078125, "loss_xval": 1.609375, "num_input_tokens_seen": 48555192, "step": 733 }, { "epoch": 0.06870407637946366, "grad_norm": 28.047243118286133, "learning_rate": 5e-05, "loss": 1.6381, "num_input_tokens_seen": 48621144, "step": 734 }, { "epoch": 0.06870407637946366, "loss": 1.5632829666137695, "loss_ce": 0.007618976291269064, "loss_iou": 0.65234375, "loss_num": 0.050537109375, "loss_xval": 1.5546875, "num_input_tokens_seen": 48621144, "step": 734 }, { "epoch": 0.06879767866335938, "grad_norm": 11.642561912536621, "learning_rate": 5e-05, "loss": 2.2517, "num_input_tokens_seen": 48687896, "step": 735 }, { "epoch": 0.06879767866335938, "loss": 2.342609167098999, "loss_ce": 0.001789049245417118, "loss_iou": 0.984375, "loss_num": 0.0751953125, "loss_xval": 2.34375, "num_input_tokens_seen": 48687896, "step": 735 }, { "epoch": 0.0688912809472551, "grad_norm": 16.74951171875, "learning_rate": 5e-05, "loss": 1.7637, "num_input_tokens_seen": 48754184, "step": 736 }, { "epoch": 0.0688912809472551, "loss": 1.548715353012085, "loss_ce": 0.004159669391810894, "loss_iou": 0.62890625, "loss_num": 0.0576171875, "loss_xval": 1.546875, "num_input_tokens_seen": 48754184, "step": 736 }, { "epoch": 0.06898488323115085, "grad_norm": 18.810588836669922, "learning_rate": 5e-05, "loss": 1.7837, "num_input_tokens_seen": 48819948, "step": 737 }, { "epoch": 0.06898488323115085, "loss": 1.82712984085083, "loss_ce": 0.004864188842475414, "loss_iou": 0.7578125, "loss_num": 0.060546875, "loss_xval": 1.8203125, "num_input_tokens_seen": 48819948, "step": 737 }, { "epoch": 0.06907848551504657, "grad_norm": 16.910188674926758, "learning_rate": 5e-05, "loss": 1.7753, "num_input_tokens_seen": 48885528, "step": 738 }, { "epoch": 0.06907848551504657, "loss": 1.668896198272705, "loss_ce": 0.0022703027352690697, "loss_iou": 0.75, "loss_num": 0.0341796875, "loss_xval": 1.6640625, "num_input_tokens_seen": 48885528, "step": 738 }, { "epoch": 0.0691720877989423, "grad_norm": 14.250575065612793, "learning_rate": 5e-05, "loss": 1.9187, "num_input_tokens_seen": 48952572, "step": 739 }, { "epoch": 0.0691720877989423, "loss": 1.8930304050445557, "loss_ce": 0.005823412910103798, "loss_iou": 0.79296875, "loss_num": 0.0595703125, "loss_xval": 1.890625, "num_input_tokens_seen": 48952572, "step": 739 }, { "epoch": 0.06926569008283802, "grad_norm": 35.549198150634766, "learning_rate": 5e-05, "loss": 1.7614, "num_input_tokens_seen": 49019840, "step": 740 }, { "epoch": 0.06926569008283802, "loss": 1.556145191192627, "loss_ce": 0.005241925362497568, "loss_iou": 0.6875, "loss_num": 0.03466796875, "loss_xval": 1.5546875, "num_input_tokens_seen": 49019840, "step": 740 }, { "epoch": 0.06935929236673374, "grad_norm": 14.610987663269043, "learning_rate": 5e-05, "loss": 2.45, "num_input_tokens_seen": 49085132, "step": 741 }, { "epoch": 0.06935929236673374, "loss": 2.3336081504821777, "loss_ce": 0.003530060639604926, "loss_iou": 0.9609375, "loss_num": 0.08251953125, "loss_xval": 2.328125, "num_input_tokens_seen": 49085132, "step": 741 }, { "epoch": 0.06945289465062948, "grad_norm": 11.338419914245605, "learning_rate": 5e-05, "loss": 1.7574, "num_input_tokens_seen": 49150472, "step": 742 }, { "epoch": 0.06945289465062948, "loss": 2.0119495391845703, "loss_ce": 0.005113525316119194, "loss_iou": 0.88671875, "loss_num": 0.047119140625, "loss_xval": 2.0, "num_input_tokens_seen": 49150472, "step": 742 }, { "epoch": 0.0695464969345252, "grad_norm": 16.81510353088379, "learning_rate": 5e-05, "loss": 1.7434, "num_input_tokens_seen": 49217468, "step": 743 }, { "epoch": 0.0695464969345252, "loss": 1.855400562286377, "loss_ce": 0.005791161209344864, "loss_iou": 0.84375, "loss_num": 0.032470703125, "loss_xval": 1.8515625, "num_input_tokens_seen": 49217468, "step": 743 }, { "epoch": 0.06964009921842093, "grad_norm": 19.771026611328125, "learning_rate": 5e-05, "loss": 1.3439, "num_input_tokens_seen": 49283552, "step": 744 }, { "epoch": 0.06964009921842093, "loss": 1.2848083972930908, "loss_ce": 0.007952988147735596, "loss_iou": 0.5625, "loss_num": 0.03076171875, "loss_xval": 1.2734375, "num_input_tokens_seen": 49283552, "step": 744 }, { "epoch": 0.06973370150231666, "grad_norm": 12.469643592834473, "learning_rate": 5e-05, "loss": 1.8382, "num_input_tokens_seen": 49350048, "step": 745 }, { "epoch": 0.06973370150231666, "loss": 2.0062437057495117, "loss_ce": 0.003313942113891244, "loss_iou": 0.89453125, "loss_num": 0.04296875, "loss_xval": 2.0, "num_input_tokens_seen": 49350048, "step": 745 }, { "epoch": 0.06982730378621238, "grad_norm": 17.502429962158203, "learning_rate": 5e-05, "loss": 1.7413, "num_input_tokens_seen": 49416564, "step": 746 }, { "epoch": 0.06982730378621238, "loss": 1.8351719379425049, "loss_ce": 0.009976711124181747, "loss_iou": 0.7578125, "loss_num": 0.061279296875, "loss_xval": 1.828125, "num_input_tokens_seen": 49416564, "step": 746 }, { "epoch": 0.0699209060701081, "grad_norm": 27.377117156982422, "learning_rate": 5e-05, "loss": 1.6046, "num_input_tokens_seen": 49482492, "step": 747 }, { "epoch": 0.0699209060701081, "loss": 1.574857234954834, "loss_ce": 0.007596466690301895, "loss_iou": 0.66015625, "loss_num": 0.048583984375, "loss_xval": 1.5703125, "num_input_tokens_seen": 49482492, "step": 747 }, { "epoch": 0.07001450835400384, "grad_norm": 15.532224655151367, "learning_rate": 5e-05, "loss": 1.882, "num_input_tokens_seen": 49549728, "step": 748 }, { "epoch": 0.07001450835400384, "loss": 1.9582514762878418, "loss_ce": 0.01000935211777687, "loss_iou": 0.875, "loss_num": 0.03955078125, "loss_xval": 1.9453125, "num_input_tokens_seen": 49549728, "step": 748 }, { "epoch": 0.07010811063789957, "grad_norm": 12.926255226135254, "learning_rate": 5e-05, "loss": 1.7915, "num_input_tokens_seen": 49615236, "step": 749 }, { "epoch": 0.07010811063789957, "loss": 1.8438667058944702, "loss_ce": 0.009394025430083275, "loss_iou": 0.7578125, "loss_num": 0.0634765625, "loss_xval": 1.8359375, "num_input_tokens_seen": 49615236, "step": 749 }, { "epoch": 0.07020171292179529, "grad_norm": 23.295557022094727, "learning_rate": 5e-05, "loss": 1.5518, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07020171292179529, "eval_seeclick_CIoU": 0.0821551838889718, "eval_seeclick_GIoU": 0.08433011546730995, "eval_seeclick_IoU": 0.1993928849697113, "eval_seeclick_MAE_all": 0.15256305783987045, "eval_seeclick_MAE_h": 0.07523206993937492, "eval_seeclick_MAE_w": 0.11818498000502586, "eval_seeclick_MAE_x_boxes": 0.21669039130210876, "eval_seeclick_MAE_y_boxes": 0.13354936987161636, "eval_seeclick_NUM_probability": 0.997737854719162, "eval_seeclick_inside_bbox": 0.2958333343267441, "eval_seeclick_loss": 2.6831507682800293, "eval_seeclick_loss_ce": 0.013645281083881855, "eval_seeclick_loss_iou": 0.971923828125, "eval_seeclick_loss_num": 0.1595611572265625, "eval_seeclick_loss_xval": 2.7431640625, "eval_seeclick_runtime": 64.8097, "eval_seeclick_samples_per_second": 0.725, "eval_seeclick_steps_per_second": 0.031, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07020171292179529, "eval_icons_CIoU": -0.12176968157291412, "eval_icons_GIoU": -0.05442662164568901, "eval_icons_IoU": 0.038698263466358185, "eval_icons_MAE_all": 0.16640260815620422, "eval_icons_MAE_h": 0.13604643195867538, "eval_icons_MAE_w": 0.22236677259206772, "eval_icons_MAE_x_boxes": 0.07750638574361801, "eval_icons_MAE_y_boxes": 0.09784968942403793, "eval_icons_NUM_probability": 0.9974802136421204, "eval_icons_inside_bbox": 0.04340277798473835, "eval_icons_loss": 2.9876580238342285, "eval_icons_loss_ce": 0.0013713959779124707, "eval_icons_loss_iou": 1.064453125, "eval_icons_loss_num": 0.1716156005859375, "eval_icons_loss_xval": 2.990234375, "eval_icons_runtime": 73.0627, "eval_icons_samples_per_second": 0.684, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07020171292179529, "eval_screenspot_CIoU": -0.01705087659259637, "eval_screenspot_GIoU": -0.02666320465505123, "eval_screenspot_IoU": 0.15526391565799713, "eval_screenspot_MAE_all": 0.14108752955993017, "eval_screenspot_MAE_h": 0.07705104599396388, "eval_screenspot_MAE_w": 0.15436455110708872, "eval_screenspot_MAE_x_boxes": 0.2198748638232549, "eval_screenspot_MAE_y_boxes": 0.08775023867686589, "eval_screenspot_NUM_probability": 0.9970801870028178, "eval_screenspot_inside_bbox": 0.34916667143503827, "eval_screenspot_loss": 2.797074794769287, "eval_screenspot_loss_ce": 0.01522288036843141, "eval_screenspot_loss_iou": 1.0514322916666667, "eval_screenspot_loss_num": 0.147674560546875, "eval_screenspot_loss_xval": 2.8414713541666665, "eval_screenspot_runtime": 118.8564, "eval_screenspot_samples_per_second": 0.749, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07020171292179529, "eval_compot_CIoU": -0.040124996565282345, "eval_compot_GIoU": -0.005084183067083359, "eval_compot_IoU": 0.1061786413192749, "eval_compot_MAE_all": 0.11273051053285599, "eval_compot_MAE_h": 0.0688395258039236, "eval_compot_MAE_w": 0.14000537246465683, "eval_compot_MAE_x_boxes": 0.08593297004699707, "eval_compot_MAE_y_boxes": 0.09216885641217232, "eval_compot_NUM_probability": 0.996105432510376, "eval_compot_inside_bbox": 0.1579861119389534, "eval_compot_loss": 2.632723331451416, "eval_compot_loss_ce": 0.0066622712183743715, "eval_compot_loss_iou": 1.03125, "eval_compot_loss_num": 0.11548614501953125, "eval_compot_loss_xval": 2.63916015625, "eval_compot_runtime": 70.3618, "eval_compot_samples_per_second": 0.711, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07020171292179529, "eval_custom_ui_MAE_all": 0.17065107077360153, "eval_custom_ui_MAE_x": 0.13065431267023087, "eval_custom_ui_MAE_y": 0.2106478363275528, "eval_custom_ui_NUM_probability": 0.9943723678588867, "eval_custom_ui_loss": 0.8370248675346375, "eval_custom_ui_loss_ce": 0.03082600887864828, "eval_custom_ui_loss_num": 0.163543701171875, "eval_custom_ui_loss_xval": 0.8179931640625, "eval_custom_ui_runtime": 55.5237, "eval_custom_ui_samples_per_second": 0.901, "eval_custom_ui_steps_per_second": 0.036, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07020171292179529, "loss": 0.8694114685058594, "loss_ce": 0.03200910612940788, "loss_iou": 0.0, "loss_num": 0.1669921875, "loss_xval": 0.8359375, "num_input_tokens_seen": 49680536, "step": 750 }, { "epoch": 0.07029531520569102, "grad_norm": 19.479524612426758, "learning_rate": 5e-05, "loss": 1.8365, "num_input_tokens_seen": 49746644, "step": 751 }, { "epoch": 0.07029531520569102, "loss": 1.8767595291137695, "loss_ce": 0.002736043417826295, "loss_iou": 0.828125, "loss_num": 0.04345703125, "loss_xval": 1.875, "num_input_tokens_seen": 49746644, "step": 751 }, { "epoch": 0.07038891748958674, "grad_norm": 14.468050956726074, "learning_rate": 5e-05, "loss": 1.5694, "num_input_tokens_seen": 49814392, "step": 752 }, { "epoch": 0.07038891748958674, "loss": 1.7327210903167725, "loss_ce": 0.00322896521538496, "loss_iou": 0.72265625, "loss_num": 0.056884765625, "loss_xval": 1.7265625, "num_input_tokens_seen": 49814392, "step": 752 }, { "epoch": 0.07048251977348248, "grad_norm": 26.36949920654297, "learning_rate": 5e-05, "loss": 1.8411, "num_input_tokens_seen": 49880920, "step": 753 }, { "epoch": 0.07048251977348248, "loss": 1.820433259010315, "loss_ce": 0.005980121437460184, "loss_iou": 0.8046875, "loss_num": 0.041259765625, "loss_xval": 1.8125, "num_input_tokens_seen": 49880920, "step": 753 }, { "epoch": 0.0705761220573782, "grad_norm": 14.882464408874512, "learning_rate": 5e-05, "loss": 1.7455, "num_input_tokens_seen": 49946800, "step": 754 }, { "epoch": 0.0705761220573782, "loss": 1.517045259475708, "loss_ce": 0.004350018221884966, "loss_iou": 0.6328125, "loss_num": 0.050048828125, "loss_xval": 1.515625, "num_input_tokens_seen": 49946800, "step": 754 }, { "epoch": 0.07066972434127393, "grad_norm": 26.518571853637695, "learning_rate": 5e-05, "loss": 1.7098, "num_input_tokens_seen": 50013044, "step": 755 }, { "epoch": 0.07066972434127393, "loss": 1.873121738433838, "loss_ce": 0.008863825350999832, "loss_iou": 0.76171875, "loss_num": 0.06787109375, "loss_xval": 1.8671875, "num_input_tokens_seen": 50013044, "step": 755 }, { "epoch": 0.07076332662516965, "grad_norm": 30.003734588623047, "learning_rate": 5e-05, "loss": 1.8209, "num_input_tokens_seen": 50079532, "step": 756 }, { "epoch": 0.07076332662516965, "loss": 1.817097544670105, "loss_ce": 0.0036210219841450453, "loss_iou": 0.796875, "loss_num": 0.043701171875, "loss_xval": 1.8125, "num_input_tokens_seen": 50079532, "step": 756 }, { "epoch": 0.07085692890906538, "grad_norm": 20.43360137939453, "learning_rate": 5e-05, "loss": 1.6892, "num_input_tokens_seen": 50145584, "step": 757 }, { "epoch": 0.07085692890906538, "loss": 1.3948793411254883, "loss_ce": 0.0019349617650732398, "loss_iou": 0.5625, "loss_num": 0.05419921875, "loss_xval": 1.390625, "num_input_tokens_seen": 50145584, "step": 757 }, { "epoch": 0.0709505311929611, "grad_norm": 15.142757415771484, "learning_rate": 5e-05, "loss": 1.8305, "num_input_tokens_seen": 50212980, "step": 758 }, { "epoch": 0.0709505311929611, "loss": 1.6338109970092773, "loss_ce": 0.005148960277438164, "loss_iou": 0.6875, "loss_num": 0.05078125, "loss_xval": 1.625, "num_input_tokens_seen": 50212980, "step": 758 }, { "epoch": 0.07104413347685684, "grad_norm": 18.16373062133789, "learning_rate": 5e-05, "loss": 1.4583, "num_input_tokens_seen": 50279488, "step": 759 }, { "epoch": 0.07104413347685684, "loss": 1.4060198068618774, "loss_ce": 0.008558880537748337, "loss_iou": 0.6015625, "loss_num": 0.038330078125, "loss_xval": 1.3984375, "num_input_tokens_seen": 50279488, "step": 759 }, { "epoch": 0.07113773576075257, "grad_norm": 15.06195068359375, "learning_rate": 5e-05, "loss": 1.8918, "num_input_tokens_seen": 50344944, "step": 760 }, { "epoch": 0.07113773576075257, "loss": 1.9246017932891846, "loss_ce": 0.006632945500314236, "loss_iou": 0.84765625, "loss_num": 0.044677734375, "loss_xval": 1.921875, "num_input_tokens_seen": 50344944, "step": 760 }, { "epoch": 0.07123133804464829, "grad_norm": 26.571983337402344, "learning_rate": 5e-05, "loss": 1.8377, "num_input_tokens_seen": 50411756, "step": 761 }, { "epoch": 0.07123133804464829, "loss": 1.8060612678527832, "loss_ce": 0.010162830352783203, "loss_iou": 0.765625, "loss_num": 0.05322265625, "loss_xval": 1.796875, "num_input_tokens_seen": 50411756, "step": 761 }, { "epoch": 0.07132494032854401, "grad_norm": 16.121057510375977, "learning_rate": 5e-05, "loss": 1.6626, "num_input_tokens_seen": 50478228, "step": 762 }, { "epoch": 0.07132494032854401, "loss": 1.7193970680236816, "loss_ce": 0.006506370846182108, "loss_iou": 0.703125, "loss_num": 0.061279296875, "loss_xval": 1.7109375, "num_input_tokens_seen": 50478228, "step": 762 }, { "epoch": 0.07141854261243974, "grad_norm": 30.311779022216797, "learning_rate": 5e-05, "loss": 2.0037, "num_input_tokens_seen": 50543592, "step": 763 }, { "epoch": 0.07141854261243974, "loss": 1.9180309772491455, "loss_ce": 0.00494496151804924, "loss_iou": 0.8359375, "loss_num": 0.0478515625, "loss_xval": 1.9140625, "num_input_tokens_seen": 50543592, "step": 763 }, { "epoch": 0.07151214489633546, "grad_norm": 33.463836669921875, "learning_rate": 5e-05, "loss": 2.0935, "num_input_tokens_seen": 50610348, "step": 764 }, { "epoch": 0.07151214489633546, "loss": 2.2562096118927, "loss_ce": 0.0032799867913126945, "loss_iou": 0.9296875, "loss_num": 0.078125, "loss_xval": 2.25, "num_input_tokens_seen": 50610348, "step": 764 }, { "epoch": 0.0716057471802312, "grad_norm": 16.288908004760742, "learning_rate": 5e-05, "loss": 1.7871, "num_input_tokens_seen": 50677068, "step": 765 }, { "epoch": 0.0716057471802312, "loss": 1.954931378364563, "loss_ce": 0.007665722630918026, "loss_iou": 0.796875, "loss_num": 0.07177734375, "loss_xval": 1.9453125, "num_input_tokens_seen": 50677068, "step": 765 }, { "epoch": 0.07169934946412693, "grad_norm": 26.749717712402344, "learning_rate": 5e-05, "loss": 1.628, "num_input_tokens_seen": 50743656, "step": 766 }, { "epoch": 0.07169934946412693, "loss": 1.78788161277771, "loss_ce": 0.00760816503316164, "loss_iou": 0.74609375, "loss_num": 0.05810546875, "loss_xval": 1.78125, "num_input_tokens_seen": 50743656, "step": 766 }, { "epoch": 0.07179295174802265, "grad_norm": 21.085527420043945, "learning_rate": 5e-05, "loss": 1.6886, "num_input_tokens_seen": 50808832, "step": 767 }, { "epoch": 0.07179295174802265, "loss": 1.9450939893722534, "loss_ce": 0.005640930961817503, "loss_iou": 0.79296875, "loss_num": 0.07080078125, "loss_xval": 1.9375, "num_input_tokens_seen": 50808832, "step": 767 }, { "epoch": 0.07188655403191838, "grad_norm": 27.9982852935791, "learning_rate": 5e-05, "loss": 1.7369, "num_input_tokens_seen": 50875812, "step": 768 }, { "epoch": 0.07188655403191838, "loss": 1.67847740650177, "loss_ce": 0.0026961141265928745, "loss_iou": 0.7421875, "loss_num": 0.03857421875, "loss_xval": 1.671875, "num_input_tokens_seen": 50875812, "step": 768 }, { "epoch": 0.0719801563158141, "grad_norm": 15.898396492004395, "learning_rate": 5e-05, "loss": 2.0964, "num_input_tokens_seen": 50941828, "step": 769 }, { "epoch": 0.0719801563158141, "loss": 2.1420369148254395, "loss_ce": 0.004341588821262121, "loss_iou": 0.8984375, "loss_num": 0.06884765625, "loss_xval": 2.140625, "num_input_tokens_seen": 50941828, "step": 769 }, { "epoch": 0.07207375859970984, "grad_norm": 18.860252380371094, "learning_rate": 5e-05, "loss": 1.7662, "num_input_tokens_seen": 51008116, "step": 770 }, { "epoch": 0.07207375859970984, "loss": 1.9062767028808594, "loss_ce": 0.005153778474777937, "loss_iou": 0.796875, "loss_num": 0.061279296875, "loss_xval": 1.8984375, "num_input_tokens_seen": 51008116, "step": 770 }, { "epoch": 0.07216736088360556, "grad_norm": 23.90975570678711, "learning_rate": 5e-05, "loss": 1.7491, "num_input_tokens_seen": 51073896, "step": 771 }, { "epoch": 0.07216736088360556, "loss": 1.8947341442108154, "loss_ce": 0.008992090821266174, "loss_iou": 0.7890625, "loss_num": 0.062255859375, "loss_xval": 1.8828125, "num_input_tokens_seen": 51073896, "step": 771 }, { "epoch": 0.07226096316750129, "grad_norm": 30.103145599365234, "learning_rate": 5e-05, "loss": 1.8469, "num_input_tokens_seen": 51139272, "step": 772 }, { "epoch": 0.07226096316750129, "loss": 1.950621247291565, "loss_ce": 0.006285249255597591, "loss_iou": 0.85546875, "loss_num": 0.046142578125, "loss_xval": 1.9453125, "num_input_tokens_seen": 51139272, "step": 772 }, { "epoch": 0.07235456545139701, "grad_norm": 17.932331085205078, "learning_rate": 5e-05, "loss": 2.3164, "num_input_tokens_seen": 51206216, "step": 773 }, { "epoch": 0.07235456545139701, "loss": 2.350923538208008, "loss_ce": 0.008150112815201283, "loss_iou": 1.046875, "loss_num": 0.049560546875, "loss_xval": 2.34375, "num_input_tokens_seen": 51206216, "step": 773 }, { "epoch": 0.07244816773529274, "grad_norm": 12.034122467041016, "learning_rate": 5e-05, "loss": 1.6248, "num_input_tokens_seen": 51271780, "step": 774 }, { "epoch": 0.07244816773529274, "loss": 1.6848853826522827, "loss_ce": 0.0032448233105242252, "loss_iou": 0.7421875, "loss_num": 0.039794921875, "loss_xval": 1.6796875, "num_input_tokens_seen": 51271780, "step": 774 }, { "epoch": 0.07254177001918846, "grad_norm": 14.1908540725708, "learning_rate": 5e-05, "loss": 1.6863, "num_input_tokens_seen": 51338512, "step": 775 }, { "epoch": 0.07254177001918846, "loss": 1.6020501852035522, "loss_ce": 0.002440792042762041, "loss_iou": 0.71875, "loss_num": 0.032958984375, "loss_xval": 1.6015625, "num_input_tokens_seen": 51338512, "step": 775 }, { "epoch": 0.0726353723030842, "grad_norm": 15.499090194702148, "learning_rate": 5e-05, "loss": 1.7165, "num_input_tokens_seen": 51403940, "step": 776 }, { "epoch": 0.0726353723030842, "loss": 1.53767728805542, "loss_ce": 0.00398589763790369, "loss_iou": 0.6328125, "loss_num": 0.05419921875, "loss_xval": 1.53125, "num_input_tokens_seen": 51403940, "step": 776 }, { "epoch": 0.07272897458697992, "grad_norm": 22.869251251220703, "learning_rate": 5e-05, "loss": 1.8355, "num_input_tokens_seen": 51469816, "step": 777 }, { "epoch": 0.07272897458697992, "loss": 2.0655317306518555, "loss_ce": 0.006937808357179165, "loss_iou": 0.859375, "loss_num": 0.06884765625, "loss_xval": 2.0625, "num_input_tokens_seen": 51469816, "step": 777 }, { "epoch": 0.07282257687087565, "grad_norm": 19.780359268188477, "learning_rate": 5e-05, "loss": 1.9059, "num_input_tokens_seen": 51536020, "step": 778 }, { "epoch": 0.07282257687087565, "loss": 1.9265518188476562, "loss_ce": 0.003700188361108303, "loss_iou": 0.8125, "loss_num": 0.05908203125, "loss_xval": 1.921875, "num_input_tokens_seen": 51536020, "step": 778 }, { "epoch": 0.07291617915477137, "grad_norm": 20.66790771484375, "learning_rate": 5e-05, "loss": 1.5468, "num_input_tokens_seen": 51603044, "step": 779 }, { "epoch": 0.07291617915477137, "loss": 1.4181262254714966, "loss_ce": 0.007634267210960388, "loss_iou": 0.61328125, "loss_num": 0.03662109375, "loss_xval": 1.4140625, "num_input_tokens_seen": 51603044, "step": 779 }, { "epoch": 0.0730097814386671, "grad_norm": 14.053909301757812, "learning_rate": 5e-05, "loss": 2.1016, "num_input_tokens_seen": 51669320, "step": 780 }, { "epoch": 0.0730097814386671, "loss": 2.0850119590759277, "loss_ce": 0.008840063586831093, "loss_iou": 0.86328125, "loss_num": 0.0693359375, "loss_xval": 2.078125, "num_input_tokens_seen": 51669320, "step": 780 }, { "epoch": 0.07310338372256284, "grad_norm": 13.39023494720459, "learning_rate": 5e-05, "loss": 1.5063, "num_input_tokens_seen": 51736344, "step": 781 }, { "epoch": 0.07310338372256284, "loss": 1.3768768310546875, "loss_ce": 0.0038299686275422573, "loss_iou": 0.62109375, "loss_num": 0.0255126953125, "loss_xval": 1.375, "num_input_tokens_seen": 51736344, "step": 781 }, { "epoch": 0.07319698600645856, "grad_norm": 12.973723411560059, "learning_rate": 5e-05, "loss": 1.7162, "num_input_tokens_seen": 51801984, "step": 782 }, { "epoch": 0.07319698600645856, "loss": 1.7734899520874023, "loss_ce": 0.004935131408274174, "loss_iou": 0.77734375, "loss_num": 0.043212890625, "loss_xval": 1.765625, "num_input_tokens_seen": 51801984, "step": 782 }, { "epoch": 0.07329058829035429, "grad_norm": 18.570348739624023, "learning_rate": 5e-05, "loss": 1.6957, "num_input_tokens_seen": 51867540, "step": 783 }, { "epoch": 0.07329058829035429, "loss": 1.6712414026260376, "loss_ce": 0.002296093385666609, "loss_iou": 0.703125, "loss_num": 0.052734375, "loss_xval": 1.671875, "num_input_tokens_seen": 51867540, "step": 783 }, { "epoch": 0.07338419057425001, "grad_norm": 11.181182861328125, "learning_rate": 5e-05, "loss": 1.674, "num_input_tokens_seen": 51934840, "step": 784 }, { "epoch": 0.07338419057425001, "loss": 1.7133533954620361, "loss_ce": 0.005345569923520088, "loss_iou": 0.7734375, "loss_num": 0.03173828125, "loss_xval": 1.7109375, "num_input_tokens_seen": 51934840, "step": 784 }, { "epoch": 0.07347779285814574, "grad_norm": 10.359596252441406, "learning_rate": 5e-05, "loss": 1.7389, "num_input_tokens_seen": 52002108, "step": 785 }, { "epoch": 0.07347779285814574, "loss": 1.7151685953140259, "loss_ce": 0.00520765595138073, "loss_iou": 0.75, "loss_num": 0.0419921875, "loss_xval": 1.7109375, "num_input_tokens_seen": 52002108, "step": 785 }, { "epoch": 0.07357139514204146, "grad_norm": 16.918203353881836, "learning_rate": 5e-05, "loss": 1.7026, "num_input_tokens_seen": 52068252, "step": 786 }, { "epoch": 0.07357139514204146, "loss": 1.8131422996520996, "loss_ce": 0.005525010172277689, "loss_iou": 0.76171875, "loss_num": 0.056640625, "loss_xval": 1.8046875, "num_input_tokens_seen": 52068252, "step": 786 }, { "epoch": 0.0736649974259372, "grad_norm": 18.828004837036133, "learning_rate": 5e-05, "loss": 1.7878, "num_input_tokens_seen": 52135224, "step": 787 }, { "epoch": 0.0736649974259372, "loss": 2.062363862991333, "loss_ce": 0.003770082723349333, "loss_iou": 0.890625, "loss_num": 0.055908203125, "loss_xval": 2.0625, "num_input_tokens_seen": 52135224, "step": 787 }, { "epoch": 0.07375859970983292, "grad_norm": 21.628116607666016, "learning_rate": 5e-05, "loss": 1.8181, "num_input_tokens_seen": 52201892, "step": 788 }, { "epoch": 0.07375859970983292, "loss": 1.8004341125488281, "loss_ce": 0.0035592170897871256, "loss_iou": 0.76953125, "loss_num": 0.051025390625, "loss_xval": 1.796875, "num_input_tokens_seen": 52201892, "step": 788 }, { "epoch": 0.07385220199372865, "grad_norm": 51.08279037475586, "learning_rate": 5e-05, "loss": 1.6725, "num_input_tokens_seen": 52267464, "step": 789 }, { "epoch": 0.07385220199372865, "loss": 1.689554214477539, "loss_ce": 0.002542572095990181, "loss_iou": 0.73828125, "loss_num": 0.042236328125, "loss_xval": 1.6875, "num_input_tokens_seen": 52267464, "step": 789 }, { "epoch": 0.07394580427762437, "grad_norm": 11.357071876525879, "learning_rate": 5e-05, "loss": 2.0696, "num_input_tokens_seen": 52333484, "step": 790 }, { "epoch": 0.07394580427762437, "loss": 1.8198058605194092, "loss_ce": 0.014508059248328209, "loss_iou": 0.6953125, "loss_num": 0.083984375, "loss_xval": 1.8046875, "num_input_tokens_seen": 52333484, "step": 790 }, { "epoch": 0.0740394065615201, "grad_norm": 11.868918418884277, "learning_rate": 5e-05, "loss": 2.0323, "num_input_tokens_seen": 52400340, "step": 791 }, { "epoch": 0.0740394065615201, "loss": 2.188082695007324, "loss_ce": 0.005465551745146513, "loss_iou": 0.88671875, "loss_num": 0.08251953125, "loss_xval": 2.1875, "num_input_tokens_seen": 52400340, "step": 791 }, { "epoch": 0.07413300884541583, "grad_norm": 12.103880882263184, "learning_rate": 5e-05, "loss": 1.5496, "num_input_tokens_seen": 52467120, "step": 792 }, { "epoch": 0.07413300884541583, "loss": 1.575488567352295, "loss_ce": 0.007129143923521042, "loss_iou": 0.671875, "loss_num": 0.04541015625, "loss_xval": 1.5703125, "num_input_tokens_seen": 52467120, "step": 792 }, { "epoch": 0.07422661112931156, "grad_norm": 18.401817321777344, "learning_rate": 5e-05, "loss": 1.3835, "num_input_tokens_seen": 52533096, "step": 793 }, { "epoch": 0.07422661112931156, "loss": 1.6278660297393799, "loss_ce": 0.0077488478273153305, "loss_iou": 0.6875, "loss_num": 0.049072265625, "loss_xval": 1.6171875, "num_input_tokens_seen": 52533096, "step": 793 }, { "epoch": 0.07432021341320728, "grad_norm": 15.680350303649902, "learning_rate": 5e-05, "loss": 1.712, "num_input_tokens_seen": 52598404, "step": 794 }, { "epoch": 0.07432021341320728, "loss": 1.5534054040908813, "loss_ce": 0.003600743133574724, "loss_iou": 0.6875, "loss_num": 0.0341796875, "loss_xval": 1.546875, "num_input_tokens_seen": 52598404, "step": 794 }, { "epoch": 0.07441381569710301, "grad_norm": 14.900457382202148, "learning_rate": 5e-05, "loss": 1.7548, "num_input_tokens_seen": 52664512, "step": 795 }, { "epoch": 0.07441381569710301, "loss": 1.6482797861099243, "loss_ce": 0.004725020378828049, "loss_iou": 0.72265625, "loss_num": 0.03955078125, "loss_xval": 1.640625, "num_input_tokens_seen": 52664512, "step": 795 }, { "epoch": 0.07450741798099873, "grad_norm": 22.29669189453125, "learning_rate": 5e-05, "loss": 1.6021, "num_input_tokens_seen": 52730696, "step": 796 }, { "epoch": 0.07450741798099873, "loss": 1.7194867134094238, "loss_ce": 0.0026898817159235477, "loss_iou": 0.7578125, "loss_num": 0.041015625, "loss_xval": 1.71875, "num_input_tokens_seen": 52730696, "step": 796 }, { "epoch": 0.07460102026489446, "grad_norm": 20.31715202331543, "learning_rate": 5e-05, "loss": 1.8804, "num_input_tokens_seen": 52796632, "step": 797 }, { "epoch": 0.07460102026489446, "loss": 1.814350962638855, "loss_ce": 0.0018509826622903347, "loss_iou": 0.796875, "loss_num": 0.0439453125, "loss_xval": 1.8125, "num_input_tokens_seen": 52796632, "step": 797 }, { "epoch": 0.0746946225487902, "grad_norm": 12.104740142822266, "learning_rate": 5e-05, "loss": 1.433, "num_input_tokens_seen": 52862996, "step": 798 }, { "epoch": 0.0746946225487902, "loss": 1.4822030067443848, "loss_ce": 0.0046640620566904545, "loss_iou": 0.65234375, "loss_num": 0.034423828125, "loss_xval": 1.4765625, "num_input_tokens_seen": 52862996, "step": 798 }, { "epoch": 0.07478822483268592, "grad_norm": 19.013381958007812, "learning_rate": 5e-05, "loss": 1.761, "num_input_tokens_seen": 52930324, "step": 799 }, { "epoch": 0.07478822483268592, "loss": 1.7661657333374023, "loss_ce": 0.003470430849120021, "loss_iou": 0.78515625, "loss_num": 0.03857421875, "loss_xval": 1.765625, "num_input_tokens_seen": 52930324, "step": 799 }, { "epoch": 0.07488182711658165, "grad_norm": 12.877026557922363, "learning_rate": 5e-05, "loss": 1.849, "num_input_tokens_seen": 52996436, "step": 800 }, { "epoch": 0.07488182711658165, "loss": 1.742581844329834, "loss_ce": 0.0033240043558180332, "loss_iou": 0.77734375, "loss_num": 0.037353515625, "loss_xval": 1.7421875, "num_input_tokens_seen": 52996436, "step": 800 }, { "epoch": 0.07497542940047737, "grad_norm": 13.212465286254883, "learning_rate": 5e-05, "loss": 1.387, "num_input_tokens_seen": 53063164, "step": 801 }, { "epoch": 0.07497542940047737, "loss": 1.4713304042816162, "loss_ce": 0.0025803588796406984, "loss_iou": 0.66015625, "loss_num": 0.0299072265625, "loss_xval": 1.46875, "num_input_tokens_seen": 53063164, "step": 801 }, { "epoch": 0.0750690316843731, "grad_norm": 32.19801330566406, "learning_rate": 5e-05, "loss": 1.8543, "num_input_tokens_seen": 53127832, "step": 802 }, { "epoch": 0.0750690316843731, "loss": 1.6698976755142212, "loss_ce": 0.004370349459350109, "loss_iou": 0.71484375, "loss_num": 0.047119140625, "loss_xval": 1.6640625, "num_input_tokens_seen": 53127832, "step": 802 }, { "epoch": 0.07516263396826883, "grad_norm": 12.36397647857666, "learning_rate": 5e-05, "loss": 2.0378, "num_input_tokens_seen": 53193568, "step": 803 }, { "epoch": 0.07516263396826883, "loss": 2.122973918914795, "loss_ce": 0.002856625011190772, "loss_iou": 0.89453125, "loss_num": 0.06689453125, "loss_xval": 2.125, "num_input_tokens_seen": 53193568, "step": 803 }, { "epoch": 0.07525623625216456, "grad_norm": 15.287789344787598, "learning_rate": 5e-05, "loss": 1.7173, "num_input_tokens_seen": 53258844, "step": 804 }, { "epoch": 0.07525623625216456, "loss": 1.6398072242736816, "loss_ce": 0.0016236234223470092, "loss_iou": 0.6796875, "loss_num": 0.0556640625, "loss_xval": 1.640625, "num_input_tokens_seen": 53258844, "step": 804 }, { "epoch": 0.07534983853606028, "grad_norm": 27.020416259765625, "learning_rate": 5e-05, "loss": 1.4972, "num_input_tokens_seen": 53324624, "step": 805 }, { "epoch": 0.07534983853606028, "loss": 1.4142091274261475, "loss_ce": 0.0016114177415147424, "loss_iou": 0.6015625, "loss_num": 0.04248046875, "loss_xval": 1.4140625, "num_input_tokens_seen": 53324624, "step": 805 }, { "epoch": 0.075443440819956, "grad_norm": 59.48409652709961, "learning_rate": 5e-05, "loss": 1.789, "num_input_tokens_seen": 53390328, "step": 806 }, { "epoch": 0.075443440819956, "loss": 1.6413440704345703, "loss_ce": 0.002916322322562337, "loss_iou": 0.6875, "loss_num": 0.05224609375, "loss_xval": 1.640625, "num_input_tokens_seen": 53390328, "step": 806 }, { "epoch": 0.07553704310385173, "grad_norm": 10.196464538574219, "learning_rate": 5e-05, "loss": 1.3888, "num_input_tokens_seen": 53457428, "step": 807 }, { "epoch": 0.07553704310385173, "loss": 1.3960691690444946, "loss_ce": 0.004467627964913845, "loss_iou": 0.625, "loss_num": 0.0281982421875, "loss_xval": 1.390625, "num_input_tokens_seen": 53457428, "step": 807 }, { "epoch": 0.07563064538774746, "grad_norm": 27.761362075805664, "learning_rate": 5e-05, "loss": 1.6037, "num_input_tokens_seen": 53522556, "step": 808 }, { "epoch": 0.07563064538774746, "loss": 1.5643582344055176, "loss_ce": 0.00588653702288866, "loss_iou": 0.65234375, "loss_num": 0.050048828125, "loss_xval": 1.5546875, "num_input_tokens_seen": 53522556, "step": 808 }, { "epoch": 0.0757242476716432, "grad_norm": 13.68408203125, "learning_rate": 5e-05, "loss": 1.8785, "num_input_tokens_seen": 53589788, "step": 809 }, { "epoch": 0.0757242476716432, "loss": 1.970529317855835, "loss_ce": 0.0037324335426092148, "loss_iou": 0.828125, "loss_num": 0.06103515625, "loss_xval": 1.96875, "num_input_tokens_seen": 53589788, "step": 809 }, { "epoch": 0.07581784995553892, "grad_norm": 33.96290969848633, "learning_rate": 5e-05, "loss": 1.7861, "num_input_tokens_seen": 53656668, "step": 810 }, { "epoch": 0.07581784995553892, "loss": 1.8013924360275269, "loss_ce": 0.010376828722655773, "loss_iou": 0.76953125, "loss_num": 0.050048828125, "loss_xval": 1.7890625, "num_input_tokens_seen": 53656668, "step": 810 }, { "epoch": 0.07591145223943464, "grad_norm": 19.82717514038086, "learning_rate": 5e-05, "loss": 1.7128, "num_input_tokens_seen": 53722308, "step": 811 }, { "epoch": 0.07591145223943464, "loss": 1.6208107471466064, "loss_ce": 0.0026467025745660067, "loss_iou": 0.72265625, "loss_num": 0.03515625, "loss_xval": 1.6171875, "num_input_tokens_seen": 53722308, "step": 811 }, { "epoch": 0.07600505452333037, "grad_norm": 11.903279304504395, "learning_rate": 5e-05, "loss": 1.5115, "num_input_tokens_seen": 53788076, "step": 812 }, { "epoch": 0.07600505452333037, "loss": 1.4619097709655762, "loss_ce": 0.00561102107167244, "loss_iou": 0.59375, "loss_num": 0.0546875, "loss_xval": 1.453125, "num_input_tokens_seen": 53788076, "step": 812 }, { "epoch": 0.07609865680722609, "grad_norm": 17.889984130859375, "learning_rate": 5e-05, "loss": 1.5059, "num_input_tokens_seen": 53855092, "step": 813 }, { "epoch": 0.07609865680722609, "loss": 1.3990848064422607, "loss_ce": 0.0035768812522292137, "loss_iou": 0.62890625, "loss_num": 0.028076171875, "loss_xval": 1.3984375, "num_input_tokens_seen": 53855092, "step": 813 }, { "epoch": 0.07619225909112182, "grad_norm": 23.897436141967773, "learning_rate": 5e-05, "loss": 1.91, "num_input_tokens_seen": 53920380, "step": 814 }, { "epoch": 0.07619225909112182, "loss": 1.6896394491195679, "loss_ce": 0.0065340036526322365, "loss_iou": 0.71484375, "loss_num": 0.051025390625, "loss_xval": 1.6796875, "num_input_tokens_seen": 53920380, "step": 814 }, { "epoch": 0.07628586137501756, "grad_norm": 15.09005355834961, "learning_rate": 5e-05, "loss": 1.6063, "num_input_tokens_seen": 53986744, "step": 815 }, { "epoch": 0.07628586137501756, "loss": 1.7806751728057861, "loss_ce": 0.005284649785608053, "loss_iou": 0.7578125, "loss_num": 0.052001953125, "loss_xval": 1.7734375, "num_input_tokens_seen": 53986744, "step": 815 }, { "epoch": 0.07637946365891328, "grad_norm": 35.94722366333008, "learning_rate": 5e-05, "loss": 1.7237, "num_input_tokens_seen": 54052980, "step": 816 }, { "epoch": 0.07637946365891328, "loss": 1.668999195098877, "loss_ce": 0.006889849901199341, "loss_iou": 0.74609375, "loss_num": 0.0341796875, "loss_xval": 1.6640625, "num_input_tokens_seen": 54052980, "step": 816 }, { "epoch": 0.076473065942809, "grad_norm": 12.649584770202637, "learning_rate": 5e-05, "loss": 2.0055, "num_input_tokens_seen": 54119408, "step": 817 }, { "epoch": 0.076473065942809, "loss": 1.992591142654419, "loss_ce": 0.0033332628663629293, "loss_iou": 0.8671875, "loss_num": 0.052001953125, "loss_xval": 1.9921875, "num_input_tokens_seen": 54119408, "step": 817 }, { "epoch": 0.07656666822670473, "grad_norm": 24.400339126586914, "learning_rate": 5e-05, "loss": 1.6179, "num_input_tokens_seen": 54185236, "step": 818 }, { "epoch": 0.07656666822670473, "loss": 1.61531400680542, "loss_ce": 0.0047183167189359665, "loss_iou": 0.59375, "loss_num": 0.08544921875, "loss_xval": 1.609375, "num_input_tokens_seen": 54185236, "step": 818 }, { "epoch": 0.07666027051060045, "grad_norm": 10.225992202758789, "learning_rate": 5e-05, "loss": 1.4889, "num_input_tokens_seen": 54251224, "step": 819 }, { "epoch": 0.07666027051060045, "loss": 1.5908461809158325, "loss_ce": 0.0068618240766227245, "loss_iou": 0.6796875, "loss_num": 0.04443359375, "loss_xval": 1.5859375, "num_input_tokens_seen": 54251224, "step": 819 }, { "epoch": 0.07675387279449619, "grad_norm": 10.83730697631836, "learning_rate": 5e-05, "loss": 2.0075, "num_input_tokens_seen": 54317208, "step": 820 }, { "epoch": 0.07675387279449619, "loss": 2.03198504447937, "loss_ce": 0.00464137177914381, "loss_iou": 0.86328125, "loss_num": 0.06005859375, "loss_xval": 2.03125, "num_input_tokens_seen": 54317208, "step": 820 }, { "epoch": 0.07684747507839192, "grad_norm": 40.60268783569336, "learning_rate": 5e-05, "loss": 1.7607, "num_input_tokens_seen": 54382544, "step": 821 }, { "epoch": 0.07684747507839192, "loss": 1.7447590827941895, "loss_ce": 0.005501284264028072, "loss_iou": 0.78125, "loss_num": 0.03466796875, "loss_xval": 1.7421875, "num_input_tokens_seen": 54382544, "step": 821 }, { "epoch": 0.07694107736228764, "grad_norm": 8.829503059387207, "learning_rate": 5e-05, "loss": 2.058, "num_input_tokens_seen": 54448428, "step": 822 }, { "epoch": 0.07694107736228764, "loss": 2.0724472999572754, "loss_ce": 0.002134760608896613, "loss_iou": 0.890625, "loss_num": 0.057373046875, "loss_xval": 2.0625, "num_input_tokens_seen": 54448428, "step": 822 }, { "epoch": 0.07703467964618337, "grad_norm": 11.689741134643555, "learning_rate": 5e-05, "loss": 1.9954, "num_input_tokens_seen": 54515384, "step": 823 }, { "epoch": 0.07703467964618337, "loss": 2.0363917350769043, "loss_ce": 0.003188550937920809, "loss_iou": 0.9140625, "loss_num": 0.0400390625, "loss_xval": 2.03125, "num_input_tokens_seen": 54515384, "step": 823 }, { "epoch": 0.07712828193007909, "grad_norm": 17.452810287475586, "learning_rate": 5e-05, "loss": 1.8118, "num_input_tokens_seen": 54582156, "step": 824 }, { "epoch": 0.07712828193007909, "loss": 1.853980541229248, "loss_ce": 0.005347815342247486, "loss_iou": 0.796875, "loss_num": 0.05078125, "loss_xval": 1.8515625, "num_input_tokens_seen": 54582156, "step": 824 }, { "epoch": 0.07722188421397481, "grad_norm": 48.813594818115234, "learning_rate": 5e-05, "loss": 1.6049, "num_input_tokens_seen": 54647936, "step": 825 }, { "epoch": 0.07722188421397481, "loss": 1.3968123197555542, "loss_ce": 0.004966678563505411, "loss_iou": 0.5859375, "loss_num": 0.043701171875, "loss_xval": 1.390625, "num_input_tokens_seen": 54647936, "step": 825 }, { "epoch": 0.07731548649787055, "grad_norm": 23.729625701904297, "learning_rate": 5e-05, "loss": 1.6444, "num_input_tokens_seen": 54711476, "step": 826 }, { "epoch": 0.07731548649787055, "loss": 1.8388928174972534, "loss_ce": 0.003931805491447449, "loss_iou": 0.7734375, "loss_num": 0.0576171875, "loss_xval": 1.8359375, "num_input_tokens_seen": 54711476, "step": 826 }, { "epoch": 0.07740908878176628, "grad_norm": 15.217606544494629, "learning_rate": 5e-05, "loss": 1.6576, "num_input_tokens_seen": 54778988, "step": 827 }, { "epoch": 0.07740908878176628, "loss": 1.6610138416290283, "loss_ce": 0.0028106600511819124, "loss_iou": 0.734375, "loss_num": 0.037109375, "loss_xval": 1.65625, "num_input_tokens_seen": 54778988, "step": 827 }, { "epoch": 0.077502691065662, "grad_norm": 17.45100212097168, "learning_rate": 5e-05, "loss": 1.7092, "num_input_tokens_seen": 54845004, "step": 828 }, { "epoch": 0.077502691065662, "loss": 1.8006932735443115, "loss_ce": 0.00418446259573102, "loss_iou": 0.7578125, "loss_num": 0.055908203125, "loss_xval": 1.796875, "num_input_tokens_seen": 54845004, "step": 828 }, { "epoch": 0.07759629334955773, "grad_norm": 19.25678825378418, "learning_rate": 5e-05, "loss": 1.6017, "num_input_tokens_seen": 54910868, "step": 829 }, { "epoch": 0.07759629334955773, "loss": 1.8255311250686646, "loss_ce": 0.004242067225277424, "loss_iou": 0.78125, "loss_num": 0.05126953125, "loss_xval": 1.8203125, "num_input_tokens_seen": 54910868, "step": 829 }, { "epoch": 0.07768989563345345, "grad_norm": 18.64685821533203, "learning_rate": 5e-05, "loss": 1.8397, "num_input_tokens_seen": 54976972, "step": 830 }, { "epoch": 0.07768989563345345, "loss": 1.816882848739624, "loss_ce": 0.0034062829799950123, "loss_iou": 0.78125, "loss_num": 0.05126953125, "loss_xval": 1.8125, "num_input_tokens_seen": 54976972, "step": 830 }, { "epoch": 0.07778349791734919, "grad_norm": 18.513874053955078, "learning_rate": 5e-05, "loss": 1.6702, "num_input_tokens_seen": 55042652, "step": 831 }, { "epoch": 0.07778349791734919, "loss": 1.558748722076416, "loss_ce": 0.005037764087319374, "loss_iou": 0.703125, "loss_num": 0.0296630859375, "loss_xval": 1.5546875, "num_input_tokens_seen": 55042652, "step": 831 }, { "epoch": 0.07787710020124491, "grad_norm": 34.15970230102539, "learning_rate": 5e-05, "loss": 1.7013, "num_input_tokens_seen": 55109936, "step": 832 }, { "epoch": 0.07787710020124491, "loss": 1.7062803506851196, "loss_ce": 0.002178819617256522, "loss_iou": 0.74609375, "loss_num": 0.0419921875, "loss_xval": 1.703125, "num_input_tokens_seen": 55109936, "step": 832 }, { "epoch": 0.07797070248514064, "grad_norm": 25.373519897460938, "learning_rate": 5e-05, "loss": 1.5804, "num_input_tokens_seen": 55175960, "step": 833 }, { "epoch": 0.07797070248514064, "loss": 1.596665859222412, "loss_ce": 0.0009626125684008002, "loss_iou": 0.6875, "loss_num": 0.043212890625, "loss_xval": 1.59375, "num_input_tokens_seen": 55175960, "step": 833 }, { "epoch": 0.07806430476903636, "grad_norm": 12.413987159729004, "learning_rate": 5e-05, "loss": 1.8258, "num_input_tokens_seen": 55242060, "step": 834 }, { "epoch": 0.07806430476903636, "loss": 1.925345778465271, "loss_ce": 0.00542389415204525, "loss_iou": 0.828125, "loss_num": 0.052734375, "loss_xval": 1.921875, "num_input_tokens_seen": 55242060, "step": 834 }, { "epoch": 0.07815790705293209, "grad_norm": 14.300704956054688, "learning_rate": 5e-05, "loss": 1.6578, "num_input_tokens_seen": 55308092, "step": 835 }, { "epoch": 0.07815790705293209, "loss": 1.5969938039779663, "loss_ce": 0.002267252653837204, "loss_iou": 0.6953125, "loss_num": 0.040283203125, "loss_xval": 1.59375, "num_input_tokens_seen": 55308092, "step": 835 }, { "epoch": 0.07825150933682781, "grad_norm": 22.78268051147461, "learning_rate": 5e-05, "loss": 1.9089, "num_input_tokens_seen": 55373980, "step": 836 }, { "epoch": 0.07825150933682781, "loss": 2.0152018070220947, "loss_ce": 0.004459593910723925, "loss_iou": 0.84765625, "loss_num": 0.0634765625, "loss_xval": 2.015625, "num_input_tokens_seen": 55373980, "step": 836 }, { "epoch": 0.07834511162072355, "grad_norm": 16.693683624267578, "learning_rate": 5e-05, "loss": 2.1208, "num_input_tokens_seen": 55439788, "step": 837 }, { "epoch": 0.07834511162072355, "loss": 2.102088451385498, "loss_ce": 0.0073621273040771484, "loss_iou": 0.90625, "loss_num": 0.0556640625, "loss_xval": 2.09375, "num_input_tokens_seen": 55439788, "step": 837 }, { "epoch": 0.07843871390461928, "grad_norm": 8.320754051208496, "learning_rate": 5e-05, "loss": 1.426, "num_input_tokens_seen": 55506704, "step": 838 }, { "epoch": 0.07843871390461928, "loss": 1.4811341762542725, "loss_ce": 0.0055482941679656506, "loss_iou": 0.62109375, "loss_num": 0.047119140625, "loss_xval": 1.4765625, "num_input_tokens_seen": 55506704, "step": 838 }, { "epoch": 0.078532316188515, "grad_norm": 10.999988555908203, "learning_rate": 5e-05, "loss": 1.4888, "num_input_tokens_seen": 55572076, "step": 839 }, { "epoch": 0.078532316188515, "loss": 1.1216778755187988, "loss_ce": 0.0038800942711532116, "loss_iou": 0.421875, "loss_num": 0.054443359375, "loss_xval": 1.1171875, "num_input_tokens_seen": 55572076, "step": 839 }, { "epoch": 0.07862591847241072, "grad_norm": 15.208309173583984, "learning_rate": 5e-05, "loss": 1.6326, "num_input_tokens_seen": 55637976, "step": 840 }, { "epoch": 0.07862591847241072, "loss": 1.6669137477874756, "loss_ce": 0.003827746957540512, "loss_iou": 0.734375, "loss_num": 0.03955078125, "loss_xval": 1.6640625, "num_input_tokens_seen": 55637976, "step": 840 }, { "epoch": 0.07871952075630645, "grad_norm": 22.545604705810547, "learning_rate": 5e-05, "loss": 1.641, "num_input_tokens_seen": 55704360, "step": 841 }, { "epoch": 0.07871952075630645, "loss": 1.773759126663208, "loss_ce": 0.002274680184200406, "loss_iou": 0.765625, "loss_num": 0.048583984375, "loss_xval": 1.7734375, "num_input_tokens_seen": 55704360, "step": 841 }, { "epoch": 0.07881312304020219, "grad_norm": 19.781217575073242, "learning_rate": 5e-05, "loss": 1.6991, "num_input_tokens_seen": 55770648, "step": 842 }, { "epoch": 0.07881312304020219, "loss": 1.7591058015823364, "loss_ce": 0.006664406508207321, "loss_iou": 0.76953125, "loss_num": 0.043212890625, "loss_xval": 1.75, "num_input_tokens_seen": 55770648, "step": 842 }, { "epoch": 0.07890672532409791, "grad_norm": 15.822068214416504, "learning_rate": 5e-05, "loss": 1.7774, "num_input_tokens_seen": 55837896, "step": 843 }, { "epoch": 0.07890672532409791, "loss": 1.8255829811096191, "loss_ce": 0.004293967038393021, "loss_iou": 0.796875, "loss_num": 0.046630859375, "loss_xval": 1.8203125, "num_input_tokens_seen": 55837896, "step": 843 }, { "epoch": 0.07900032760799364, "grad_norm": 14.252395629882812, "learning_rate": 5e-05, "loss": 1.6272, "num_input_tokens_seen": 55904532, "step": 844 }, { "epoch": 0.07900032760799364, "loss": 1.6053745746612549, "loss_ce": 0.0028354646638035774, "loss_iou": 0.6796875, "loss_num": 0.049560546875, "loss_xval": 1.6015625, "num_input_tokens_seen": 55904532, "step": 844 }, { "epoch": 0.07909392989188936, "grad_norm": 27.333084106445312, "learning_rate": 5e-05, "loss": 1.5755, "num_input_tokens_seen": 55971000, "step": 845 }, { "epoch": 0.07909392989188936, "loss": 1.3764841556549072, "loss_ce": 0.002460794523358345, "loss_iou": 0.62890625, "loss_num": 0.022216796875, "loss_xval": 1.375, "num_input_tokens_seen": 55971000, "step": 845 }, { "epoch": 0.07918753217578509, "grad_norm": 16.1622314453125, "learning_rate": 5e-05, "loss": 2.0193, "num_input_tokens_seen": 56037060, "step": 846 }, { "epoch": 0.07918753217578509, "loss": 2.0520763397216797, "loss_ce": 0.0052011888474226, "loss_iou": 0.8671875, "loss_num": 0.06298828125, "loss_xval": 2.046875, "num_input_tokens_seen": 56037060, "step": 846 }, { "epoch": 0.07928113445968081, "grad_norm": 13.176560401916504, "learning_rate": 5e-05, "loss": 1.6272, "num_input_tokens_seen": 56102972, "step": 847 }, { "epoch": 0.07928113445968081, "loss": 1.5083166360855103, "loss_ce": 0.0029455760959535837, "loss_iou": 0.6328125, "loss_num": 0.047607421875, "loss_xval": 1.5078125, "num_input_tokens_seen": 56102972, "step": 847 }, { "epoch": 0.07937473674357655, "grad_norm": 25.884740829467773, "learning_rate": 5e-05, "loss": 1.7202, "num_input_tokens_seen": 56168500, "step": 848 }, { "epoch": 0.07937473674357655, "loss": 1.7484092712402344, "loss_ce": 0.0023155626840889454, "loss_iou": 0.765625, "loss_num": 0.04248046875, "loss_xval": 1.75, "num_input_tokens_seen": 56168500, "step": 848 }, { "epoch": 0.07946833902747227, "grad_norm": 15.529861450195312, "learning_rate": 5e-05, "loss": 1.9193, "num_input_tokens_seen": 56234944, "step": 849 }, { "epoch": 0.07946833902747227, "loss": 1.848803997039795, "loss_ce": 0.005054059904068708, "loss_iou": 0.828125, "loss_num": 0.038330078125, "loss_xval": 1.84375, "num_input_tokens_seen": 56234944, "step": 849 }, { "epoch": 0.079561941311368, "grad_norm": 20.716421127319336, "learning_rate": 5e-05, "loss": 1.7752, "num_input_tokens_seen": 56300952, "step": 850 }, { "epoch": 0.079561941311368, "loss": 1.8582499027252197, "loss_ce": 0.006687378976494074, "loss_iou": 0.79296875, "loss_num": 0.052734375, "loss_xval": 1.8515625, "num_input_tokens_seen": 56300952, "step": 850 }, { "epoch": 0.07965554359526372, "grad_norm": 17.88934898376465, "learning_rate": 5e-05, "loss": 1.7198, "num_input_tokens_seen": 56367440, "step": 851 }, { "epoch": 0.07965554359526372, "loss": 1.7001900672912598, "loss_ce": 0.0029244059696793556, "loss_iou": 0.7109375, "loss_num": 0.0556640625, "loss_xval": 1.6953125, "num_input_tokens_seen": 56367440, "step": 851 }, { "epoch": 0.07974914587915945, "grad_norm": 28.757474899291992, "learning_rate": 5e-05, "loss": 1.9239, "num_input_tokens_seen": 56434612, "step": 852 }, { "epoch": 0.07974914587915945, "loss": 1.8636595010757446, "loss_ce": 0.003307967446744442, "loss_iou": 0.8359375, "loss_num": 0.03857421875, "loss_xval": 1.859375, "num_input_tokens_seen": 56434612, "step": 852 }, { "epoch": 0.07984274816305517, "grad_norm": 12.52474308013916, "learning_rate": 5e-05, "loss": 1.7685, "num_input_tokens_seen": 56500432, "step": 853 }, { "epoch": 0.07984274816305517, "loss": 1.8079160451889038, "loss_ce": 0.003228507237508893, "loss_iou": 0.8046875, "loss_num": 0.038818359375, "loss_xval": 1.8046875, "num_input_tokens_seen": 56500432, "step": 853 }, { "epoch": 0.07993635044695091, "grad_norm": 13.405757904052734, "learning_rate": 5e-05, "loss": 1.6683, "num_input_tokens_seen": 56566408, "step": 854 }, { "epoch": 0.07993635044695091, "loss": 1.7243707180023193, "loss_ce": 0.0026910006999969482, "loss_iou": 0.7578125, "loss_num": 0.040283203125, "loss_xval": 1.71875, "num_input_tokens_seen": 56566408, "step": 854 }, { "epoch": 0.08002995273084663, "grad_norm": 9.93342399597168, "learning_rate": 5e-05, "loss": 1.4518, "num_input_tokens_seen": 56633640, "step": 855 }, { "epoch": 0.08002995273084663, "loss": 1.5757908821105957, "loss_ce": 0.006455020979046822, "loss_iou": 0.67578125, "loss_num": 0.0439453125, "loss_xval": 1.5703125, "num_input_tokens_seen": 56633640, "step": 855 }, { "epoch": 0.08012355501474236, "grad_norm": 11.159290313720703, "learning_rate": 5e-05, "loss": 1.3769, "num_input_tokens_seen": 56698748, "step": 856 }, { "epoch": 0.08012355501474236, "loss": 1.2927894592285156, "loss_ce": 0.004337380640208721, "loss_iou": 0.578125, "loss_num": 0.0264892578125, "loss_xval": 1.2890625, "num_input_tokens_seen": 56698748, "step": 856 }, { "epoch": 0.08021715729863808, "grad_norm": 28.94257926940918, "learning_rate": 5e-05, "loss": 1.8194, "num_input_tokens_seen": 56765084, "step": 857 }, { "epoch": 0.08021715729863808, "loss": 1.9139653444290161, "loss_ce": 0.00576228741556406, "loss_iou": 0.84375, "loss_num": 0.04345703125, "loss_xval": 1.90625, "num_input_tokens_seen": 56765084, "step": 857 }, { "epoch": 0.08031075958253381, "grad_norm": 14.588022232055664, "learning_rate": 5e-05, "loss": 1.9083, "num_input_tokens_seen": 56831688, "step": 858 }, { "epoch": 0.08031075958253381, "loss": 2.0022599697113037, "loss_ce": 0.0061661312356591225, "loss_iou": 0.8515625, "loss_num": 0.059326171875, "loss_xval": 2.0, "num_input_tokens_seen": 56831688, "step": 858 }, { "epoch": 0.08040436186642955, "grad_norm": 17.378747940063477, "learning_rate": 5e-05, "loss": 1.8547, "num_input_tokens_seen": 56898100, "step": 859 }, { "epoch": 0.08040436186642955, "loss": 2.0381417274475098, "loss_ce": 0.0039619808085262775, "loss_iou": 0.86328125, "loss_num": 0.0615234375, "loss_xval": 2.03125, "num_input_tokens_seen": 56898100, "step": 859 }, { "epoch": 0.08049796415032527, "grad_norm": 23.05256462097168, "learning_rate": 5e-05, "loss": 1.5782, "num_input_tokens_seen": 56964556, "step": 860 }, { "epoch": 0.08049796415032527, "loss": 1.7002537250518799, "loss_ce": 0.0020115277729928493, "loss_iou": 0.76171875, "loss_num": 0.03466796875, "loss_xval": 1.6953125, "num_input_tokens_seen": 56964556, "step": 860 }, { "epoch": 0.080591566434221, "grad_norm": 13.92392635345459, "learning_rate": 5e-05, "loss": 1.8908, "num_input_tokens_seen": 57029556, "step": 861 }, { "epoch": 0.080591566434221, "loss": 1.8966186046600342, "loss_ce": 0.0020873812027275562, "loss_iou": 0.8515625, "loss_num": 0.03759765625, "loss_xval": 1.890625, "num_input_tokens_seen": 57029556, "step": 861 }, { "epoch": 0.08068516871811672, "grad_norm": 43.778316497802734, "learning_rate": 5e-05, "loss": 1.6896, "num_input_tokens_seen": 57096008, "step": 862 }, { "epoch": 0.08068516871811672, "loss": 1.8149826526641846, "loss_ce": 0.0063889408484101295, "loss_iou": 0.77734375, "loss_num": 0.05078125, "loss_xval": 1.8125, "num_input_tokens_seen": 57096008, "step": 862 }, { "epoch": 0.08077877100201244, "grad_norm": 38.19126892089844, "learning_rate": 5e-05, "loss": 1.7982, "num_input_tokens_seen": 57162588, "step": 863 }, { "epoch": 0.08077877100201244, "loss": 1.8195277452468872, "loss_ce": 0.004586325958371162, "loss_iou": 0.78515625, "loss_num": 0.048828125, "loss_xval": 1.8125, "num_input_tokens_seen": 57162588, "step": 863 }, { "epoch": 0.08087237328590817, "grad_norm": 19.279468536376953, "learning_rate": 5e-05, "loss": 1.6604, "num_input_tokens_seen": 57229352, "step": 864 }, { "epoch": 0.08087237328590817, "loss": 1.5278937816619873, "loss_ce": 0.0059212022460997105, "loss_iou": 0.66796875, "loss_num": 0.0380859375, "loss_xval": 1.5234375, "num_input_tokens_seen": 57229352, "step": 864 }, { "epoch": 0.08096597556980391, "grad_norm": 31.98632049560547, "learning_rate": 5e-05, "loss": 1.7721, "num_input_tokens_seen": 57295448, "step": 865 }, { "epoch": 0.08096597556980391, "loss": 1.9108335971832275, "loss_ce": 0.005560180637985468, "loss_iou": 0.796875, "loss_num": 0.0625, "loss_xval": 1.90625, "num_input_tokens_seen": 57295448, "step": 865 }, { "epoch": 0.08105957785369963, "grad_norm": 12.68753433227539, "learning_rate": 5e-05, "loss": 1.7506, "num_input_tokens_seen": 57361884, "step": 866 }, { "epoch": 0.08105957785369963, "loss": 1.7101788520812988, "loss_ce": 0.003147585317492485, "loss_iou": 0.77734375, "loss_num": 0.031494140625, "loss_xval": 1.703125, "num_input_tokens_seen": 57361884, "step": 866 }, { "epoch": 0.08115318013759536, "grad_norm": 15.463430404663086, "learning_rate": 5e-05, "loss": 1.4298, "num_input_tokens_seen": 57427388, "step": 867 }, { "epoch": 0.08115318013759536, "loss": 1.436558485031128, "loss_ce": 0.0063826944679021835, "loss_iou": 0.63671875, "loss_num": 0.0306396484375, "loss_xval": 1.4296875, "num_input_tokens_seen": 57427388, "step": 867 }, { "epoch": 0.08124678242149108, "grad_norm": 30.34566307067871, "learning_rate": 5e-05, "loss": 1.7791, "num_input_tokens_seen": 57493328, "step": 868 }, { "epoch": 0.08124678242149108, "loss": 1.914674162864685, "loss_ce": 0.004517912864685059, "loss_iou": 0.84765625, "loss_num": 0.043212890625, "loss_xval": 1.90625, "num_input_tokens_seen": 57493328, "step": 868 }, { "epoch": 0.0813403847053868, "grad_norm": 14.137175559997559, "learning_rate": 5e-05, "loss": 1.8035, "num_input_tokens_seen": 57560060, "step": 869 }, { "epoch": 0.0813403847053868, "loss": 1.8605313301086426, "loss_ce": 0.005062506068497896, "loss_iou": 0.82421875, "loss_num": 0.0419921875, "loss_xval": 1.859375, "num_input_tokens_seen": 57560060, "step": 869 }, { "epoch": 0.08143398698928254, "grad_norm": 16.45473289489746, "learning_rate": 5e-05, "loss": 1.668, "num_input_tokens_seen": 57626524, "step": 870 }, { "epoch": 0.08143398698928254, "loss": 1.745182991027832, "loss_ce": 0.002995454939082265, "loss_iou": 0.76953125, "loss_num": 0.0400390625, "loss_xval": 1.7421875, "num_input_tokens_seen": 57626524, "step": 870 }, { "epoch": 0.08152758927317827, "grad_norm": 11.80198860168457, "learning_rate": 5e-05, "loss": 1.3168, "num_input_tokens_seen": 57693256, "step": 871 }, { "epoch": 0.08152758927317827, "loss": 1.2056353092193604, "loss_ce": 0.00934632495045662, "loss_iou": 0.53125, "loss_num": 0.0264892578125, "loss_xval": 1.1953125, "num_input_tokens_seen": 57693256, "step": 871 }, { "epoch": 0.081621191557074, "grad_norm": 15.624382019042969, "learning_rate": 5e-05, "loss": 1.5162, "num_input_tokens_seen": 57759600, "step": 872 }, { "epoch": 0.081621191557074, "loss": 1.4846746921539307, "loss_ce": 0.0071356394328176975, "loss_iou": 0.59375, "loss_num": 0.05810546875, "loss_xval": 1.4765625, "num_input_tokens_seen": 57759600, "step": 872 }, { "epoch": 0.08171479384096972, "grad_norm": 34.51239013671875, "learning_rate": 5e-05, "loss": 1.6347, "num_input_tokens_seen": 57825212, "step": 873 }, { "epoch": 0.08171479384096972, "loss": 1.7366607189178467, "loss_ce": 0.00814506970345974, "loss_iou": 0.7109375, "loss_num": 0.0625, "loss_xval": 1.7265625, "num_input_tokens_seen": 57825212, "step": 873 }, { "epoch": 0.08180839612486544, "grad_norm": 10.777180671691895, "learning_rate": 5e-05, "loss": 1.984, "num_input_tokens_seen": 57891160, "step": 874 }, { "epoch": 0.08180839612486544, "loss": 1.9887115955352783, "loss_ce": 0.0043366048485040665, "loss_iou": 0.8984375, "loss_num": 0.03759765625, "loss_xval": 1.984375, "num_input_tokens_seen": 57891160, "step": 874 }, { "epoch": 0.08190199840876117, "grad_norm": 20.751922607421875, "learning_rate": 5e-05, "loss": 1.5662, "num_input_tokens_seen": 57955572, "step": 875 }, { "epoch": 0.08190199840876117, "loss": 1.4291478395462036, "loss_ce": 0.004831395577639341, "loss_iou": 0.61328125, "loss_num": 0.0400390625, "loss_xval": 1.421875, "num_input_tokens_seen": 57955572, "step": 875 }, { "epoch": 0.0819956006926569, "grad_norm": 19.744140625, "learning_rate": 5e-05, "loss": 1.898, "num_input_tokens_seen": 58022716, "step": 876 }, { "epoch": 0.0819956006926569, "loss": 2.0219719409942627, "loss_ce": 0.007323550060391426, "loss_iou": 0.859375, "loss_num": 0.05859375, "loss_xval": 2.015625, "num_input_tokens_seen": 58022716, "step": 876 }, { "epoch": 0.08208920297655263, "grad_norm": 14.733153343200684, "learning_rate": 5e-05, "loss": 1.7235, "num_input_tokens_seen": 58089180, "step": 877 }, { "epoch": 0.08208920297655263, "loss": 1.681530237197876, "loss_ce": 0.004772431682795286, "loss_iou": 0.71484375, "loss_num": 0.050048828125, "loss_xval": 1.6796875, "num_input_tokens_seen": 58089180, "step": 877 }, { "epoch": 0.08218280526044835, "grad_norm": 12.678238868713379, "learning_rate": 5e-05, "loss": 1.4754, "num_input_tokens_seen": 58155328, "step": 878 }, { "epoch": 0.08218280526044835, "loss": 1.3437457084655762, "loss_ce": 0.005855005234479904, "loss_iou": 0.609375, "loss_num": 0.0238037109375, "loss_xval": 1.3359375, "num_input_tokens_seen": 58155328, "step": 878 }, { "epoch": 0.08227640754434408, "grad_norm": 17.92420196533203, "learning_rate": 5e-05, "loss": 1.6522, "num_input_tokens_seen": 58221740, "step": 879 }, { "epoch": 0.08227640754434408, "loss": 1.6979612112045288, "loss_ce": 0.003625293727964163, "loss_iou": 0.71875, "loss_num": 0.05126953125, "loss_xval": 1.6953125, "num_input_tokens_seen": 58221740, "step": 879 }, { "epoch": 0.0823700098282398, "grad_norm": 35.58506774902344, "learning_rate": 5e-05, "loss": 1.7944, "num_input_tokens_seen": 58288656, "step": 880 }, { "epoch": 0.0823700098282398, "loss": 1.6057441234588623, "loss_ce": 0.00418162252753973, "loss_iou": 0.7265625, "loss_num": 0.0302734375, "loss_xval": 1.6015625, "num_input_tokens_seen": 58288656, "step": 880 }, { "epoch": 0.08246361211213554, "grad_norm": 18.107791900634766, "learning_rate": 5e-05, "loss": 1.855, "num_input_tokens_seen": 58355404, "step": 881 }, { "epoch": 0.08246361211213554, "loss": 1.9610812664031982, "loss_ce": 0.0030734376050531864, "loss_iou": 0.89453125, "loss_num": 0.03466796875, "loss_xval": 1.9609375, "num_input_tokens_seen": 58355404, "step": 881 }, { "epoch": 0.08255721439603127, "grad_norm": 15.651595115661621, "learning_rate": 5e-05, "loss": 1.5946, "num_input_tokens_seen": 58421224, "step": 882 }, { "epoch": 0.08255721439603127, "loss": 1.7618749141693115, "loss_ce": 0.003085812320932746, "loss_iou": 0.7578125, "loss_num": 0.0478515625, "loss_xval": 1.7578125, "num_input_tokens_seen": 58421224, "step": 882 }, { "epoch": 0.08265081667992699, "grad_norm": 32.5485725402832, "learning_rate": 5e-05, "loss": 1.7482, "num_input_tokens_seen": 58487880, "step": 883 }, { "epoch": 0.08265081667992699, "loss": 1.725155234336853, "loss_ce": 0.003475519362837076, "loss_iou": 0.7734375, "loss_num": 0.034912109375, "loss_xval": 1.71875, "num_input_tokens_seen": 58487880, "step": 883 }, { "epoch": 0.08274441896382272, "grad_norm": 14.286800384521484, "learning_rate": 5e-05, "loss": 1.7444, "num_input_tokens_seen": 58555228, "step": 884 }, { "epoch": 0.08274441896382272, "loss": 1.6388722658157349, "loss_ce": 0.002397665288299322, "loss_iou": 0.6953125, "loss_num": 0.04931640625, "loss_xval": 1.6328125, "num_input_tokens_seen": 58555228, "step": 884 }, { "epoch": 0.08283802124771844, "grad_norm": 15.21658706665039, "learning_rate": 5e-05, "loss": 1.5943, "num_input_tokens_seen": 58620780, "step": 885 }, { "epoch": 0.08283802124771844, "loss": 1.4916024208068848, "loss_ce": 0.005762574728578329, "loss_iou": 0.6640625, "loss_num": 0.031982421875, "loss_xval": 1.484375, "num_input_tokens_seen": 58620780, "step": 885 }, { "epoch": 0.08293162353161417, "grad_norm": 31.50017738342285, "learning_rate": 5e-05, "loss": 1.5109, "num_input_tokens_seen": 58687504, "step": 886 }, { "epoch": 0.08293162353161417, "loss": 1.4624536037445068, "loss_ce": 0.0029809139668941498, "loss_iou": 0.6640625, "loss_num": 0.0255126953125, "loss_xval": 1.4609375, "num_input_tokens_seen": 58687504, "step": 886 }, { "epoch": 0.0830252258155099, "grad_norm": 17.916427612304688, "learning_rate": 5e-05, "loss": 1.8547, "num_input_tokens_seen": 58753604, "step": 887 }, { "epoch": 0.0830252258155099, "loss": 1.8118348121643066, "loss_ce": 0.0022645373828709126, "loss_iou": 0.7578125, "loss_num": 0.05908203125, "loss_xval": 1.8125, "num_input_tokens_seen": 58753604, "step": 887 }, { "epoch": 0.08311882809940563, "grad_norm": 27.366714477539062, "learning_rate": 5e-05, "loss": 2.0535, "num_input_tokens_seen": 58819832, "step": 888 }, { "epoch": 0.08311882809940563, "loss": 2.1956310272216797, "loss_ce": 0.004224792122840881, "loss_iou": 0.96484375, "loss_num": 0.05322265625, "loss_xval": 2.1875, "num_input_tokens_seen": 58819832, "step": 888 }, { "epoch": 0.08321243038330135, "grad_norm": 15.780597686767578, "learning_rate": 5e-05, "loss": 1.7807, "num_input_tokens_seen": 58885640, "step": 889 }, { "epoch": 0.08321243038330135, "loss": 1.7835116386413574, "loss_ce": 0.0022615769412368536, "loss_iou": 0.765625, "loss_num": 0.04931640625, "loss_xval": 1.78125, "num_input_tokens_seen": 58885640, "step": 889 }, { "epoch": 0.08330603266719708, "grad_norm": 10.075730323791504, "learning_rate": 5e-05, "loss": 1.4639, "num_input_tokens_seen": 58952504, "step": 890 }, { "epoch": 0.08330603266719708, "loss": 1.5905461311340332, "loss_ce": 0.005585219245404005, "loss_iou": 0.62890625, "loss_num": 0.06494140625, "loss_xval": 1.5859375, "num_input_tokens_seen": 58952504, "step": 890 }, { "epoch": 0.0833996349510928, "grad_norm": 45.0782585144043, "learning_rate": 5e-05, "loss": 1.3896, "num_input_tokens_seen": 59019296, "step": 891 }, { "epoch": 0.0833996349510928, "loss": 1.3953213691711426, "loss_ce": 0.0012783545535057783, "loss_iou": 0.60546875, "loss_num": 0.036376953125, "loss_xval": 1.390625, "num_input_tokens_seen": 59019296, "step": 891 }, { "epoch": 0.08349323723498854, "grad_norm": 33.37999725341797, "learning_rate": 5e-05, "loss": 1.8417, "num_input_tokens_seen": 59085368, "step": 892 }, { "epoch": 0.08349323723498854, "loss": 1.6494685411453247, "loss_ce": 0.003960754722356796, "loss_iou": 0.76953125, "loss_num": 0.021484375, "loss_xval": 1.6484375, "num_input_tokens_seen": 59085368, "step": 892 }, { "epoch": 0.08358683951888426, "grad_norm": 24.009674072265625, "learning_rate": 5e-05, "loss": 1.9718, "num_input_tokens_seen": 59151100, "step": 893 }, { "epoch": 0.08358683951888426, "loss": 2.168516159057617, "loss_ce": 0.004453645087778568, "loss_iou": 0.9375, "loss_num": 0.05810546875, "loss_xval": 2.15625, "num_input_tokens_seen": 59151100, "step": 893 }, { "epoch": 0.08368044180277999, "grad_norm": 12.443723678588867, "learning_rate": 5e-05, "loss": 1.4969, "num_input_tokens_seen": 59217480, "step": 894 }, { "epoch": 0.08368044180277999, "loss": 1.652637243270874, "loss_ce": 0.0022466834634542465, "loss_iou": 0.72265625, "loss_num": 0.041015625, "loss_xval": 1.6484375, "num_input_tokens_seen": 59217480, "step": 894 }, { "epoch": 0.08377404408667571, "grad_norm": 11.240812301635742, "learning_rate": 5e-05, "loss": 1.5245, "num_input_tokens_seen": 59282928, "step": 895 }, { "epoch": 0.08377404408667571, "loss": 1.4794633388519287, "loss_ce": 0.00778358755633235, "loss_iou": 0.578125, "loss_num": 0.06298828125, "loss_xval": 1.46875, "num_input_tokens_seen": 59282928, "step": 895 }, { "epoch": 0.08386764637057144, "grad_norm": 14.911636352539062, "learning_rate": 5e-05, "loss": 1.6399, "num_input_tokens_seen": 59348696, "step": 896 }, { "epoch": 0.08386764637057144, "loss": 1.6345075368881226, "loss_ce": 0.0026716054417192936, "loss_iou": 0.671875, "loss_num": 0.058349609375, "loss_xval": 1.6328125, "num_input_tokens_seen": 59348696, "step": 896 }, { "epoch": 0.08396124865446716, "grad_norm": 20.957368850708008, "learning_rate": 5e-05, "loss": 1.5178, "num_input_tokens_seen": 59415700, "step": 897 }, { "epoch": 0.08396124865446716, "loss": 1.464629054069519, "loss_ce": 0.0017384872771799564, "loss_iou": 0.6484375, "loss_num": 0.033203125, "loss_xval": 1.4609375, "num_input_tokens_seen": 59415700, "step": 897 }, { "epoch": 0.0840548509383629, "grad_norm": 8.092466354370117, "learning_rate": 5e-05, "loss": 1.4732, "num_input_tokens_seen": 59481892, "step": 898 }, { "epoch": 0.0840548509383629, "loss": 1.374036431312561, "loss_ce": 0.009778676554560661, "loss_iou": 0.5703125, "loss_num": 0.044189453125, "loss_xval": 1.3671875, "num_input_tokens_seen": 59481892, "step": 898 }, { "epoch": 0.08414845322225863, "grad_norm": 12.05978775024414, "learning_rate": 5e-05, "loss": 1.5063, "num_input_tokens_seen": 59548916, "step": 899 }, { "epoch": 0.08414845322225863, "loss": 1.3426138162612915, "loss_ce": 0.005211510695517063, "loss_iou": 0.56640625, "loss_num": 0.041015625, "loss_xval": 1.3359375, "num_input_tokens_seen": 59548916, "step": 899 }, { "epoch": 0.08424205550615435, "grad_norm": 17.22496223449707, "learning_rate": 5e-05, "loss": 1.7005, "num_input_tokens_seen": 59615852, "step": 900 }, { "epoch": 0.08424205550615435, "loss": 1.6572275161743164, "loss_ce": 0.006836886517703533, "loss_iou": 0.734375, "loss_num": 0.03662109375, "loss_xval": 1.6484375, "num_input_tokens_seen": 59615852, "step": 900 }, { "epoch": 0.08433565779005008, "grad_norm": 18.689083099365234, "learning_rate": 5e-05, "loss": 1.6872, "num_input_tokens_seen": 59683120, "step": 901 }, { "epoch": 0.08433565779005008, "loss": 1.8009494543075562, "loss_ce": 0.007980726659297943, "loss_iou": 0.76171875, "loss_num": 0.05419921875, "loss_xval": 1.796875, "num_input_tokens_seen": 59683120, "step": 901 }, { "epoch": 0.0844292600739458, "grad_norm": 14.570134162902832, "learning_rate": 5e-05, "loss": 1.4228, "num_input_tokens_seen": 59749288, "step": 902 }, { "epoch": 0.0844292600739458, "loss": 1.4552466869354248, "loss_ce": 0.0044409530237317085, "loss_iou": 0.64453125, "loss_num": 0.032470703125, "loss_xval": 1.453125, "num_input_tokens_seen": 59749288, "step": 902 }, { "epoch": 0.08452286235784152, "grad_norm": 27.418418884277344, "learning_rate": 5e-05, "loss": 1.5693, "num_input_tokens_seen": 59815208, "step": 903 }, { "epoch": 0.08452286235784152, "loss": 1.429563045501709, "loss_ce": 0.005246618762612343, "loss_iou": 0.62109375, "loss_num": 0.0361328125, "loss_xval": 1.421875, "num_input_tokens_seen": 59815208, "step": 903 }, { "epoch": 0.08461646464173726, "grad_norm": 14.06216812133789, "learning_rate": 5e-05, "loss": 2.0079, "num_input_tokens_seen": 59882632, "step": 904 }, { "epoch": 0.08461646464173726, "loss": 1.9677444696426392, "loss_ce": 0.006806934252381325, "loss_iou": 0.84765625, "loss_num": 0.052734375, "loss_xval": 1.9609375, "num_input_tokens_seen": 59882632, "step": 904 }, { "epoch": 0.08471006692563299, "grad_norm": 13.0679292678833, "learning_rate": 5e-05, "loss": 1.6538, "num_input_tokens_seen": 59948612, "step": 905 }, { "epoch": 0.08471006692563299, "loss": 1.3045681715011597, "loss_ce": 0.001986362971365452, "loss_iou": 0.5546875, "loss_num": 0.038818359375, "loss_xval": 1.3046875, "num_input_tokens_seen": 59948612, "step": 905 }, { "epoch": 0.08480366920952871, "grad_norm": 25.379179000854492, "learning_rate": 5e-05, "loss": 1.6384, "num_input_tokens_seen": 60014432, "step": 906 }, { "epoch": 0.08480366920952871, "loss": 1.6766581535339355, "loss_ce": 0.007712891325354576, "loss_iou": 0.71875, "loss_num": 0.0458984375, "loss_xval": 1.671875, "num_input_tokens_seen": 60014432, "step": 906 }, { "epoch": 0.08489727149342444, "grad_norm": 18.72802734375, "learning_rate": 5e-05, "loss": 1.8672, "num_input_tokens_seen": 60081776, "step": 907 }, { "epoch": 0.08489727149342444, "loss": 1.750260591506958, "loss_ce": 0.0022137737832963467, "loss_iou": 0.734375, "loss_num": 0.055908203125, "loss_xval": 1.75, "num_input_tokens_seen": 60081776, "step": 907 }, { "epoch": 0.08499087377732016, "grad_norm": 12.83232593536377, "learning_rate": 5e-05, "loss": 1.5873, "num_input_tokens_seen": 60147888, "step": 908 }, { "epoch": 0.08499087377732016, "loss": 1.647781252861023, "loss_ce": 0.0056913672015070915, "loss_iou": 0.6640625, "loss_num": 0.0625, "loss_xval": 1.640625, "num_input_tokens_seen": 60147888, "step": 908 }, { "epoch": 0.0850844760612159, "grad_norm": 23.512590408325195, "learning_rate": 5e-05, "loss": 1.5675, "num_input_tokens_seen": 60214828, "step": 909 }, { "epoch": 0.0850844760612159, "loss": 1.4222242832183838, "loss_ce": 0.0023023297544568777, "loss_iou": 0.5546875, "loss_num": 0.0625, "loss_xval": 1.421875, "num_input_tokens_seen": 60214828, "step": 909 }, { "epoch": 0.08517807834511162, "grad_norm": 23.48229217529297, "learning_rate": 5e-05, "loss": 1.5639, "num_input_tokens_seen": 60281228, "step": 910 }, { "epoch": 0.08517807834511162, "loss": 1.6129997968673706, "loss_ce": 0.0036248769611120224, "loss_iou": 0.71484375, "loss_num": 0.036376953125, "loss_xval": 1.609375, "num_input_tokens_seen": 60281228, "step": 910 }, { "epoch": 0.08527168062900735, "grad_norm": 15.640399932861328, "learning_rate": 5e-05, "loss": 1.8778, "num_input_tokens_seen": 60347096, "step": 911 }, { "epoch": 0.08527168062900735, "loss": 1.9476490020751953, "loss_ce": 0.0042894878424704075, "loss_iou": 0.8515625, "loss_num": 0.0478515625, "loss_xval": 1.9453125, "num_input_tokens_seen": 60347096, "step": 911 }, { "epoch": 0.08536528291290307, "grad_norm": 8.834310531616211, "learning_rate": 5e-05, "loss": 1.5887, "num_input_tokens_seen": 60414356, "step": 912 }, { "epoch": 0.08536528291290307, "loss": 1.7419757843017578, "loss_ce": 0.005647665821015835, "loss_iou": 0.7578125, "loss_num": 0.044189453125, "loss_xval": 1.734375, "num_input_tokens_seen": 60414356, "step": 912 }, { "epoch": 0.0854588851967988, "grad_norm": 23.853418350219727, "learning_rate": 5e-05, "loss": 1.622, "num_input_tokens_seen": 60480536, "step": 913 }, { "epoch": 0.0854588851967988, "loss": 1.509779930114746, "loss_ce": 0.002943947445601225, "loss_iou": 0.67578125, "loss_num": 0.030517578125, "loss_xval": 1.5078125, "num_input_tokens_seen": 60480536, "step": 913 }, { "epoch": 0.08555248748069452, "grad_norm": 44.07422637939453, "learning_rate": 5e-05, "loss": 2.0665, "num_input_tokens_seen": 60546960, "step": 914 }, { "epoch": 0.08555248748069452, "loss": 2.2943153381347656, "loss_ce": 0.00720581691712141, "loss_iou": 0.96484375, "loss_num": 0.072265625, "loss_xval": 2.28125, "num_input_tokens_seen": 60546960, "step": 914 }, { "epoch": 0.08564608976459026, "grad_norm": 16.99718475341797, "learning_rate": 5e-05, "loss": 1.6176, "num_input_tokens_seen": 60614616, "step": 915 }, { "epoch": 0.08564608976459026, "loss": 1.5280823707580566, "loss_ce": 0.005133206490427256, "loss_iou": 0.64453125, "loss_num": 0.04638671875, "loss_xval": 1.5234375, "num_input_tokens_seen": 60614616, "step": 915 }, { "epoch": 0.08573969204848599, "grad_norm": 25.052688598632812, "learning_rate": 5e-05, "loss": 1.9113, "num_input_tokens_seen": 60681420, "step": 916 }, { "epoch": 0.08573969204848599, "loss": 2.1192517280578613, "loss_ce": 0.004994058981537819, "loss_iou": 0.89453125, "loss_num": 0.0654296875, "loss_xval": 2.109375, "num_input_tokens_seen": 60681420, "step": 916 }, { "epoch": 0.08583329433238171, "grad_norm": 15.865578651428223, "learning_rate": 5e-05, "loss": 1.9126, "num_input_tokens_seen": 60747748, "step": 917 }, { "epoch": 0.08583329433238171, "loss": 2.0023865699768066, "loss_ce": 0.006292684003710747, "loss_iou": 0.84375, "loss_num": 0.0625, "loss_xval": 2.0, "num_input_tokens_seen": 60747748, "step": 917 }, { "epoch": 0.08592689661627743, "grad_norm": 15.136137008666992, "learning_rate": 5e-05, "loss": 1.3747, "num_input_tokens_seen": 60812176, "step": 918 }, { "epoch": 0.08592689661627743, "loss": 1.3903110027313232, "loss_ce": 0.003012371016666293, "loss_iou": 0.5703125, "loss_num": 0.050048828125, "loss_xval": 1.390625, "num_input_tokens_seen": 60812176, "step": 918 }, { "epoch": 0.08602049890017316, "grad_norm": 30.088048934936523, "learning_rate": 5e-05, "loss": 1.4225, "num_input_tokens_seen": 60878408, "step": 919 }, { "epoch": 0.08602049890017316, "loss": 1.393389105796814, "loss_ce": 0.003496528370305896, "loss_iou": 0.6171875, "loss_num": 0.031982421875, "loss_xval": 1.390625, "num_input_tokens_seen": 60878408, "step": 919 }, { "epoch": 0.0861141011840689, "grad_norm": 16.893909454345703, "learning_rate": 5e-05, "loss": 1.6646, "num_input_tokens_seen": 60944844, "step": 920 }, { "epoch": 0.0861141011840689, "loss": 1.6034700870513916, "loss_ce": 0.004349029157310724, "loss_iou": 0.703125, "loss_num": 0.039306640625, "loss_xval": 1.6015625, "num_input_tokens_seen": 60944844, "step": 920 }, { "epoch": 0.08620770346796462, "grad_norm": 15.906914710998535, "learning_rate": 5e-05, "loss": 1.6203, "num_input_tokens_seen": 61011520, "step": 921 }, { "epoch": 0.08620770346796462, "loss": 1.7279441356658936, "loss_ce": 0.003334770444780588, "loss_iou": 0.76171875, "loss_num": 0.04052734375, "loss_xval": 1.7265625, "num_input_tokens_seen": 61011520, "step": 921 }, { "epoch": 0.08630130575186035, "grad_norm": 22.44422721862793, "learning_rate": 5e-05, "loss": 1.487, "num_input_tokens_seen": 61077408, "step": 922 }, { "epoch": 0.08630130575186035, "loss": 1.4773905277252197, "loss_ce": 0.0027811103500425816, "loss_iou": 0.671875, "loss_num": 0.0269775390625, "loss_xval": 1.4765625, "num_input_tokens_seen": 61077408, "step": 922 }, { "epoch": 0.08639490803575607, "grad_norm": 15.368884086608887, "learning_rate": 5e-05, "loss": 1.1949, "num_input_tokens_seen": 61141920, "step": 923 }, { "epoch": 0.08639490803575607, "loss": 1.0608628988265991, "loss_ce": 0.005839703604578972, "loss_iou": 0.4453125, "loss_num": 0.033203125, "loss_xval": 1.0546875, "num_input_tokens_seen": 61141920, "step": 923 }, { "epoch": 0.0864885103196518, "grad_norm": 22.95160484313965, "learning_rate": 5e-05, "loss": 1.5344, "num_input_tokens_seen": 61208808, "step": 924 }, { "epoch": 0.0864885103196518, "loss": 1.5811108350753784, "loss_ce": 0.003962422721087933, "loss_iou": 0.67578125, "loss_num": 0.045654296875, "loss_xval": 1.578125, "num_input_tokens_seen": 61208808, "step": 924 }, { "epoch": 0.08658211260354752, "grad_norm": 16.989505767822266, "learning_rate": 5e-05, "loss": 1.8756, "num_input_tokens_seen": 61274632, "step": 925 }, { "epoch": 0.08658211260354752, "loss": 1.8655545711517334, "loss_ce": 0.008132727816700935, "loss_iou": 0.77734375, "loss_num": 0.060546875, "loss_xval": 1.859375, "num_input_tokens_seen": 61274632, "step": 925 }, { "epoch": 0.08667571488744326, "grad_norm": 12.435086250305176, "learning_rate": 5e-05, "loss": 1.3212, "num_input_tokens_seen": 61340672, "step": 926 }, { "epoch": 0.08667571488744326, "loss": 1.4617154598236084, "loss_ce": 0.003707642201334238, "loss_iou": 0.640625, "loss_num": 0.035888671875, "loss_xval": 1.4609375, "num_input_tokens_seen": 61340672, "step": 926 }, { "epoch": 0.08676931717133898, "grad_norm": 13.720226287841797, "learning_rate": 5e-05, "loss": 1.6492, "num_input_tokens_seen": 61407436, "step": 927 }, { "epoch": 0.08676931717133898, "loss": 1.6545886993408203, "loss_ce": 0.005174613557755947, "loss_iou": 0.703125, "loss_num": 0.048583984375, "loss_xval": 1.6484375, "num_input_tokens_seen": 61407436, "step": 927 }, { "epoch": 0.08686291945523471, "grad_norm": 19.40408706665039, "learning_rate": 5e-05, "loss": 1.5196, "num_input_tokens_seen": 61474000, "step": 928 }, { "epoch": 0.08686291945523471, "loss": 1.5394935607910156, "loss_ce": 0.0014076820807531476, "loss_iou": 0.6796875, "loss_num": 0.034912109375, "loss_xval": 1.5390625, "num_input_tokens_seen": 61474000, "step": 928 }, { "epoch": 0.08695652173913043, "grad_norm": 40.76962661743164, "learning_rate": 5e-05, "loss": 1.67, "num_input_tokens_seen": 61540732, "step": 929 }, { "epoch": 0.08695652173913043, "loss": 1.6722123622894287, "loss_ce": 0.006196821108460426, "loss_iou": 0.71875, "loss_num": 0.0458984375, "loss_xval": 1.6640625, "num_input_tokens_seen": 61540732, "step": 929 }, { "epoch": 0.08705012402302616, "grad_norm": 14.666232109069824, "learning_rate": 5e-05, "loss": 2.0639, "num_input_tokens_seen": 61607380, "step": 930 }, { "epoch": 0.08705012402302616, "loss": 2.0115692615509033, "loss_ce": 0.005709872581064701, "loss_iou": 0.86328125, "loss_num": 0.055908203125, "loss_xval": 2.0, "num_input_tokens_seen": 61607380, "step": 930 }, { "epoch": 0.0871437263069219, "grad_norm": 13.070978164672852, "learning_rate": 5e-05, "loss": 1.6374, "num_input_tokens_seen": 61673452, "step": 931 }, { "epoch": 0.0871437263069219, "loss": 1.5387829542160034, "loss_ce": 0.003626648336648941, "loss_iou": 0.625, "loss_num": 0.05712890625, "loss_xval": 1.53125, "num_input_tokens_seen": 61673452, "step": 931 }, { "epoch": 0.08723732859081762, "grad_norm": 16.796812057495117, "learning_rate": 5e-05, "loss": 1.7552, "num_input_tokens_seen": 61740540, "step": 932 }, { "epoch": 0.08723732859081762, "loss": 1.6623833179473877, "loss_ce": 0.007109887897968292, "loss_iou": 0.703125, "loss_num": 0.049560546875, "loss_xval": 1.65625, "num_input_tokens_seen": 61740540, "step": 932 }, { "epoch": 0.08733093087471334, "grad_norm": 23.889570236206055, "learning_rate": 5e-05, "loss": 1.5407, "num_input_tokens_seen": 61807348, "step": 933 }, { "epoch": 0.08733093087471334, "loss": 1.667421817779541, "loss_ce": 0.0038475480396300554, "loss_iou": 0.71875, "loss_num": 0.0458984375, "loss_xval": 1.6640625, "num_input_tokens_seen": 61807348, "step": 933 }, { "epoch": 0.08742453315860907, "grad_norm": 14.975662231445312, "learning_rate": 5e-05, "loss": 1.6892, "num_input_tokens_seen": 61875108, "step": 934 }, { "epoch": 0.08742453315860907, "loss": 1.7929887771606445, "loss_ce": 0.006855999119579792, "loss_iou": 0.8046875, "loss_num": 0.034423828125, "loss_xval": 1.7890625, "num_input_tokens_seen": 61875108, "step": 934 }, { "epoch": 0.0875181354425048, "grad_norm": 11.098994255065918, "learning_rate": 5e-05, "loss": 1.6709, "num_input_tokens_seen": 61942064, "step": 935 }, { "epoch": 0.0875181354425048, "loss": 1.6905457973480225, "loss_ce": 0.002557536819949746, "loss_iou": 0.74609375, "loss_num": 0.038330078125, "loss_xval": 1.6875, "num_input_tokens_seen": 61942064, "step": 935 }, { "epoch": 0.08761173772640052, "grad_norm": 14.541089057922363, "learning_rate": 5e-05, "loss": 1.3169, "num_input_tokens_seen": 62007472, "step": 936 }, { "epoch": 0.08761173772640052, "loss": 1.4198613166809082, "loss_ce": 0.007324597332626581, "loss_iou": 0.5859375, "loss_num": 0.047607421875, "loss_xval": 1.4140625, "num_input_tokens_seen": 62007472, "step": 936 }, { "epoch": 0.08770534001029626, "grad_norm": 16.678556442260742, "learning_rate": 5e-05, "loss": 1.4454, "num_input_tokens_seen": 62074592, "step": 937 }, { "epoch": 0.08770534001029626, "loss": 1.5979841947555542, "loss_ce": 0.004234226420521736, "loss_iou": 0.7109375, "loss_num": 0.033447265625, "loss_xval": 1.59375, "num_input_tokens_seen": 62074592, "step": 937 }, { "epoch": 0.08779894229419198, "grad_norm": 28.974525451660156, "learning_rate": 5e-05, "loss": 1.7548, "num_input_tokens_seen": 62141012, "step": 938 }, { "epoch": 0.08779894229419198, "loss": 1.825490951538086, "loss_ce": 0.0022488520480692387, "loss_iou": 0.796875, "loss_num": 0.046875, "loss_xval": 1.8203125, "num_input_tokens_seen": 62141012, "step": 938 }, { "epoch": 0.0878925445780877, "grad_norm": 39.65151596069336, "learning_rate": 5e-05, "loss": 1.8679, "num_input_tokens_seen": 62205744, "step": 939 }, { "epoch": 0.0878925445780877, "loss": 1.9288134574890137, "loss_ce": 0.0059618037194013596, "loss_iou": 0.8359375, "loss_num": 0.05078125, "loss_xval": 1.921875, "num_input_tokens_seen": 62205744, "step": 939 }, { "epoch": 0.08798614686198343, "grad_norm": 11.894693374633789, "learning_rate": 5e-05, "loss": 1.4746, "num_input_tokens_seen": 62271900, "step": 940 }, { "epoch": 0.08798614686198343, "loss": 1.4833482503890991, "loss_ce": 0.0043443311005830765, "loss_iou": 0.66015625, "loss_num": 0.031982421875, "loss_xval": 1.4765625, "num_input_tokens_seen": 62271900, "step": 940 }, { "epoch": 0.08807974914587915, "grad_norm": 18.46121597290039, "learning_rate": 5e-05, "loss": 1.368, "num_input_tokens_seen": 62338956, "step": 941 }, { "epoch": 0.08807974914587915, "loss": 1.403991937637329, "loss_ce": 0.0021365319844335318, "loss_iou": 0.60546875, "loss_num": 0.038818359375, "loss_xval": 1.3984375, "num_input_tokens_seen": 62338956, "step": 941 }, { "epoch": 0.0881733514297749, "grad_norm": 15.685604095458984, "learning_rate": 5e-05, "loss": 1.5076, "num_input_tokens_seen": 62405004, "step": 942 }, { "epoch": 0.0881733514297749, "loss": 1.4380909204483032, "loss_ce": 0.0035205574240535498, "loss_iou": 0.62890625, "loss_num": 0.03515625, "loss_xval": 1.4375, "num_input_tokens_seen": 62405004, "step": 942 }, { "epoch": 0.08826695371367062, "grad_norm": 70.2530288696289, "learning_rate": 5e-05, "loss": 1.5796, "num_input_tokens_seen": 62470268, "step": 943 }, { "epoch": 0.08826695371367062, "loss": 1.508975863456726, "loss_ce": 0.005557904951274395, "loss_iou": 0.625, "loss_num": 0.05029296875, "loss_xval": 1.5, "num_input_tokens_seen": 62470268, "step": 943 }, { "epoch": 0.08836055599756634, "grad_norm": 18.887065887451172, "learning_rate": 5e-05, "loss": 1.8409, "num_input_tokens_seen": 62535964, "step": 944 }, { "epoch": 0.08836055599756634, "loss": 1.9452190399169922, "loss_ce": 0.0038127328734844923, "loss_iou": 0.86328125, "loss_num": 0.043212890625, "loss_xval": 1.9375, "num_input_tokens_seen": 62535964, "step": 944 }, { "epoch": 0.08845415828146207, "grad_norm": 15.448012351989746, "learning_rate": 5e-05, "loss": 1.7827, "num_input_tokens_seen": 62601912, "step": 945 }, { "epoch": 0.08845415828146207, "loss": 1.796706199645996, "loss_ce": 0.0030049309134483337, "loss_iou": 0.765625, "loss_num": 0.052734375, "loss_xval": 1.796875, "num_input_tokens_seen": 62601912, "step": 945 }, { "epoch": 0.08854776056535779, "grad_norm": 16.356338500976562, "learning_rate": 5e-05, "loss": 1.559, "num_input_tokens_seen": 62668316, "step": 946 }, { "epoch": 0.08854776056535779, "loss": 1.63395357131958, "loss_ce": 0.005047284997999668, "loss_iou": 0.734375, "loss_num": 0.032470703125, "loss_xval": 1.625, "num_input_tokens_seen": 62668316, "step": 946 }, { "epoch": 0.08864136284925352, "grad_norm": 29.35332489013672, "learning_rate": 5e-05, "loss": 1.6491, "num_input_tokens_seen": 62734160, "step": 947 }, { "epoch": 0.08864136284925352, "loss": 1.5638450384140015, "loss_ce": 0.003603259800001979, "loss_iou": 0.67578125, "loss_num": 0.041259765625, "loss_xval": 1.5625, "num_input_tokens_seen": 62734160, "step": 947 }, { "epoch": 0.08873496513314925, "grad_norm": 14.649731636047363, "learning_rate": 5e-05, "loss": 1.9831, "num_input_tokens_seen": 62800212, "step": 948 }, { "epoch": 0.08873496513314925, "loss": 2.0474257469177246, "loss_ce": 0.0034804297611117363, "loss_iou": 0.87890625, "loss_num": 0.05712890625, "loss_xval": 2.046875, "num_input_tokens_seen": 62800212, "step": 948 }, { "epoch": 0.08882856741704498, "grad_norm": 17.539445877075195, "learning_rate": 5e-05, "loss": 1.3548, "num_input_tokens_seen": 62865900, "step": 949 }, { "epoch": 0.08882856741704498, "loss": 0.9725180864334106, "loss_ce": 0.004164813086390495, "loss_iou": 0.41015625, "loss_num": 0.0299072265625, "loss_xval": 0.96875, "num_input_tokens_seen": 62865900, "step": 949 }, { "epoch": 0.0889221697009407, "grad_norm": 13.716097831726074, "learning_rate": 5e-05, "loss": 1.6504, "num_input_tokens_seen": 62931220, "step": 950 }, { "epoch": 0.0889221697009407, "loss": 1.743227481842041, "loss_ce": 0.0068993838503956795, "loss_iou": 0.703125, "loss_num": 0.0654296875, "loss_xval": 1.734375, "num_input_tokens_seen": 62931220, "step": 950 }, { "epoch": 0.08901577198483643, "grad_norm": 13.76308536529541, "learning_rate": 5e-05, "loss": 1.5302, "num_input_tokens_seen": 62997712, "step": 951 }, { "epoch": 0.08901577198483643, "loss": 1.5223302841186523, "loss_ce": 0.005423550494015217, "loss_iou": 0.63671875, "loss_num": 0.048583984375, "loss_xval": 1.515625, "num_input_tokens_seen": 62997712, "step": 951 }, { "epoch": 0.08910937426873215, "grad_norm": 30.656652450561523, "learning_rate": 5e-05, "loss": 1.6095, "num_input_tokens_seen": 63063788, "step": 952 }, { "epoch": 0.08910937426873215, "loss": 1.8086628913879395, "loss_ce": 0.006905019748955965, "loss_iou": 0.796875, "loss_num": 0.041015625, "loss_xval": 1.8046875, "num_input_tokens_seen": 63063788, "step": 952 }, { "epoch": 0.08920297655262788, "grad_norm": 12.013118743896484, "learning_rate": 5e-05, "loss": 1.8896, "num_input_tokens_seen": 63129584, "step": 953 }, { "epoch": 0.08920297655262788, "loss": 1.8647828102111816, "loss_ce": 0.005529878661036491, "loss_iou": 0.765625, "loss_num": 0.06591796875, "loss_xval": 1.859375, "num_input_tokens_seen": 63129584, "step": 953 }, { "epoch": 0.08929657883652362, "grad_norm": 17.934505462646484, "learning_rate": 5e-05, "loss": 1.8149, "num_input_tokens_seen": 63196748, "step": 954 }, { "epoch": 0.08929657883652362, "loss": 1.9400184154510498, "loss_ce": 0.0064246766269207, "loss_iou": 0.8203125, "loss_num": 0.05859375, "loss_xval": 1.9375, "num_input_tokens_seen": 63196748, "step": 954 }, { "epoch": 0.08939018112041934, "grad_norm": 17.4960880279541, "learning_rate": 5e-05, "loss": 1.7313, "num_input_tokens_seen": 63262920, "step": 955 }, { "epoch": 0.08939018112041934, "loss": 1.6163601875305176, "loss_ce": 0.002346484223380685, "loss_iou": 0.66796875, "loss_num": 0.055419921875, "loss_xval": 1.6171875, "num_input_tokens_seen": 63262920, "step": 955 }, { "epoch": 0.08948378340431506, "grad_norm": 23.754989624023438, "learning_rate": 5e-05, "loss": 1.7231, "num_input_tokens_seen": 63330032, "step": 956 }, { "epoch": 0.08948378340431506, "loss": 1.6572893857955933, "loss_ce": 0.004945650231093168, "loss_iou": 0.734375, "loss_num": 0.03759765625, "loss_xval": 1.65625, "num_input_tokens_seen": 63330032, "step": 956 }, { "epoch": 0.08957738568821079, "grad_norm": 21.422574996948242, "learning_rate": 5e-05, "loss": 1.6317, "num_input_tokens_seen": 63396576, "step": 957 }, { "epoch": 0.08957738568821079, "loss": 1.9376003742218018, "loss_ce": 0.0020536172669380903, "loss_iou": 0.83203125, "loss_num": 0.054931640625, "loss_xval": 1.9375, "num_input_tokens_seen": 63396576, "step": 957 }, { "epoch": 0.08967098797210651, "grad_norm": 22.355945587158203, "learning_rate": 5e-05, "loss": 1.912, "num_input_tokens_seen": 63463616, "step": 958 }, { "epoch": 0.08967098797210651, "loss": 2.114009380340576, "loss_ce": 0.00463454844430089, "loss_iou": 0.90625, "loss_num": 0.059814453125, "loss_xval": 2.109375, "num_input_tokens_seen": 63463616, "step": 958 }, { "epoch": 0.08976459025600225, "grad_norm": 18.09412956237793, "learning_rate": 5e-05, "loss": 1.5119, "num_input_tokens_seen": 63530248, "step": 959 }, { "epoch": 0.08976459025600225, "loss": 1.5951027870178223, "loss_ce": 0.008677100762724876, "loss_iou": 0.640625, "loss_num": 0.060791015625, "loss_xval": 1.5859375, "num_input_tokens_seen": 63530248, "step": 959 }, { "epoch": 0.08985819253989798, "grad_norm": 20.48488426208496, "learning_rate": 5e-05, "loss": 1.6661, "num_input_tokens_seen": 63596024, "step": 960 }, { "epoch": 0.08985819253989798, "loss": 1.8077481985092163, "loss_ce": 0.005990276113152504, "loss_iou": 0.76953125, "loss_num": 0.052734375, "loss_xval": 1.8046875, "num_input_tokens_seen": 63596024, "step": 960 }, { "epoch": 0.0899517948237937, "grad_norm": 17.473188400268555, "learning_rate": 5e-05, "loss": 1.5852, "num_input_tokens_seen": 63662480, "step": 961 }, { "epoch": 0.0899517948237937, "loss": 1.464072823524475, "loss_ce": 0.0070415991358459, "loss_iou": 0.60546875, "loss_num": 0.048828125, "loss_xval": 1.453125, "num_input_tokens_seen": 63662480, "step": 961 }, { "epoch": 0.09004539710768943, "grad_norm": 14.590004920959473, "learning_rate": 5e-05, "loss": 1.618, "num_input_tokens_seen": 63727416, "step": 962 }, { "epoch": 0.09004539710768943, "loss": 1.4077198505401611, "loss_ce": 0.0036671289708465338, "loss_iou": 0.58984375, "loss_num": 0.045166015625, "loss_xval": 1.40625, "num_input_tokens_seen": 63727416, "step": 962 }, { "epoch": 0.09013899939158515, "grad_norm": 21.898130416870117, "learning_rate": 5e-05, "loss": 1.5763, "num_input_tokens_seen": 63794508, "step": 963 }, { "epoch": 0.09013899939158515, "loss": 1.5762133598327637, "loss_ce": 0.003947695717215538, "loss_iou": 0.70703125, "loss_num": 0.031494140625, "loss_xval": 1.5703125, "num_input_tokens_seen": 63794508, "step": 963 }, { "epoch": 0.09023260167548088, "grad_norm": 69.45911407470703, "learning_rate": 5e-05, "loss": 1.9389, "num_input_tokens_seen": 63861312, "step": 964 }, { "epoch": 0.09023260167548088, "loss": 2.0766565799713135, "loss_ce": 0.0063440739177167416, "loss_iou": 0.87109375, "loss_num": 0.0654296875, "loss_xval": 2.0625, "num_input_tokens_seen": 63861312, "step": 964 }, { "epoch": 0.09032620395937661, "grad_norm": 16.2425537109375, "learning_rate": 5e-05, "loss": 1.6971, "num_input_tokens_seen": 63927640, "step": 965 }, { "epoch": 0.09032620395937661, "loss": 1.727881669998169, "loss_ce": 0.004248844925314188, "loss_iou": 0.75, "loss_num": 0.045166015625, "loss_xval": 1.7265625, "num_input_tokens_seen": 63927640, "step": 965 }, { "epoch": 0.09041980624327234, "grad_norm": 28.23118782043457, "learning_rate": 5e-05, "loss": 1.535, "num_input_tokens_seen": 63993164, "step": 966 }, { "epoch": 0.09041980624327234, "loss": 1.6622929573059082, "loss_ce": 0.006042903289198875, "loss_iou": 0.75, "loss_num": 0.031005859375, "loss_xval": 1.65625, "num_input_tokens_seen": 63993164, "step": 966 }, { "epoch": 0.09051340852716806, "grad_norm": 14.857102394104004, "learning_rate": 5e-05, "loss": 1.86, "num_input_tokens_seen": 64058972, "step": 967 }, { "epoch": 0.09051340852716806, "loss": 1.811227560043335, "loss_ce": 0.0014131104107946157, "loss_iou": 0.76953125, "loss_num": 0.053466796875, "loss_xval": 1.8125, "num_input_tokens_seen": 64058972, "step": 967 }, { "epoch": 0.09060701081106379, "grad_norm": 19.822324752807617, "learning_rate": 5e-05, "loss": 1.6783, "num_input_tokens_seen": 64125460, "step": 968 }, { "epoch": 0.09060701081106379, "loss": 1.712416410446167, "loss_ce": 0.001479036989621818, "loss_iou": 0.703125, "loss_num": 0.061279296875, "loss_xval": 1.7109375, "num_input_tokens_seen": 64125460, "step": 968 }, { "epoch": 0.09070061309495951, "grad_norm": 31.154621124267578, "learning_rate": 5e-05, "loss": 1.6444, "num_input_tokens_seen": 64192432, "step": 969 }, { "epoch": 0.09070061309495951, "loss": 1.865007996559143, "loss_ce": 0.002703359816223383, "loss_iou": 0.8125, "loss_num": 0.046630859375, "loss_xval": 1.859375, "num_input_tokens_seen": 64192432, "step": 969 }, { "epoch": 0.09079421537885525, "grad_norm": 14.616561889648438, "learning_rate": 5e-05, "loss": 1.8386, "num_input_tokens_seen": 64258844, "step": 970 }, { "epoch": 0.09079421537885525, "loss": 1.719399094581604, "loss_ce": 0.004555476363748312, "loss_iou": 0.75390625, "loss_num": 0.041259765625, "loss_xval": 1.71875, "num_input_tokens_seen": 64258844, "step": 970 }, { "epoch": 0.09088781766275097, "grad_norm": 14.370298385620117, "learning_rate": 5e-05, "loss": 1.4226, "num_input_tokens_seen": 64324916, "step": 971 }, { "epoch": 0.09088781766275097, "loss": 1.3002216815948486, "loss_ce": 0.002370052505284548, "loss_iou": 0.51171875, "loss_num": 0.055419921875, "loss_xval": 1.296875, "num_input_tokens_seen": 64324916, "step": 971 }, { "epoch": 0.0909814199466467, "grad_norm": 16.304475784301758, "learning_rate": 5e-05, "loss": 1.5028, "num_input_tokens_seen": 64391260, "step": 972 }, { "epoch": 0.0909814199466467, "loss": 1.6733556985855103, "loss_ce": 0.0034338238183408976, "loss_iou": 0.69921875, "loss_num": 0.054931640625, "loss_xval": 1.671875, "num_input_tokens_seen": 64391260, "step": 972 }, { "epoch": 0.09107502223054242, "grad_norm": 16.182010650634766, "learning_rate": 5e-05, "loss": 1.8612, "num_input_tokens_seen": 64458276, "step": 973 }, { "epoch": 0.09107502223054242, "loss": 1.7737977504730225, "loss_ce": 0.004266506526619196, "loss_iou": 0.75, "loss_num": 0.053955078125, "loss_xval": 1.765625, "num_input_tokens_seen": 64458276, "step": 973 }, { "epoch": 0.09116862451443815, "grad_norm": 78.58245086669922, "learning_rate": 5e-05, "loss": 1.5493, "num_input_tokens_seen": 64524384, "step": 974 }, { "epoch": 0.09116862451443815, "loss": 1.5430245399475098, "loss_ce": 0.007868239656090736, "loss_iou": 0.6796875, "loss_num": 0.03564453125, "loss_xval": 1.53125, "num_input_tokens_seen": 64524384, "step": 974 }, { "epoch": 0.09126222679833387, "grad_norm": 28.305130004882812, "learning_rate": 5e-05, "loss": 1.5059, "num_input_tokens_seen": 64590308, "step": 975 }, { "epoch": 0.09126222679833387, "loss": 1.351921796798706, "loss_ce": 0.004998022690415382, "loss_iou": 0.5703125, "loss_num": 0.04150390625, "loss_xval": 1.34375, "num_input_tokens_seen": 64590308, "step": 975 }, { "epoch": 0.09135582908222961, "grad_norm": 27.344154357910156, "learning_rate": 5e-05, "loss": 1.6304, "num_input_tokens_seen": 64657676, "step": 976 }, { "epoch": 0.09135582908222961, "loss": 1.6344707012176514, "loss_ce": 0.0045879255048930645, "loss_iou": 0.7109375, "loss_num": 0.041015625, "loss_xval": 1.6328125, "num_input_tokens_seen": 64657676, "step": 976 }, { "epoch": 0.09144943136612534, "grad_norm": 17.74469566345215, "learning_rate": 5e-05, "loss": 1.8817, "num_input_tokens_seen": 64724036, "step": 977 }, { "epoch": 0.09144943136612534, "loss": 1.943572998046875, "loss_ce": 0.00412002531811595, "loss_iou": 0.828125, "loss_num": 0.05615234375, "loss_xval": 1.9375, "num_input_tokens_seen": 64724036, "step": 977 }, { "epoch": 0.09154303365002106, "grad_norm": 11.425139427185059, "learning_rate": 5e-05, "loss": 1.5377, "num_input_tokens_seen": 64789396, "step": 978 }, { "epoch": 0.09154303365002106, "loss": 1.3548119068145752, "loss_ce": 0.003005302045494318, "loss_iou": 0.5859375, "loss_num": 0.036376953125, "loss_xval": 1.3515625, "num_input_tokens_seen": 64789396, "step": 978 }, { "epoch": 0.09163663593391679, "grad_norm": 21.837615966796875, "learning_rate": 5e-05, "loss": 1.6378, "num_input_tokens_seen": 64855472, "step": 979 }, { "epoch": 0.09163663593391679, "loss": 1.6065376996994019, "loss_ce": 0.006684138905256987, "loss_iou": 0.671875, "loss_num": 0.0517578125, "loss_xval": 1.6015625, "num_input_tokens_seen": 64855472, "step": 979 }, { "epoch": 0.09173023821781251, "grad_norm": 25.40251922607422, "learning_rate": 5e-05, "loss": 1.4749, "num_input_tokens_seen": 64922676, "step": 980 }, { "epoch": 0.09173023821781251, "loss": 1.4515924453735352, "loss_ce": 0.005303313955664635, "loss_iou": 0.59375, "loss_num": 0.051025390625, "loss_xval": 1.4453125, "num_input_tokens_seen": 64922676, "step": 980 }, { "epoch": 0.09182384050170825, "grad_norm": 22.099281311035156, "learning_rate": 5e-05, "loss": 1.8676, "num_input_tokens_seen": 64988632, "step": 981 }, { "epoch": 0.09182384050170825, "loss": 2.053835391998291, "loss_ce": 0.004030559211969376, "loss_iou": 0.87890625, "loss_num": 0.057861328125, "loss_xval": 2.046875, "num_input_tokens_seen": 64988632, "step": 981 }, { "epoch": 0.09191744278560397, "grad_norm": 15.45548152923584, "learning_rate": 5e-05, "loss": 1.4767, "num_input_tokens_seen": 65054192, "step": 982 }, { "epoch": 0.09191744278560397, "loss": 1.357534646987915, "loss_ce": 0.0018217508913949132, "loss_iou": 0.5390625, "loss_num": 0.056396484375, "loss_xval": 1.359375, "num_input_tokens_seen": 65054192, "step": 982 }, { "epoch": 0.0920110450694997, "grad_norm": 14.510873794555664, "learning_rate": 5e-05, "loss": 1.7271, "num_input_tokens_seen": 65121220, "step": 983 }, { "epoch": 0.0920110450694997, "loss": 1.6353819370269775, "loss_ce": 0.00256940396502614, "loss_iou": 0.70703125, "loss_num": 0.0439453125, "loss_xval": 1.6328125, "num_input_tokens_seen": 65121220, "step": 983 }, { "epoch": 0.09210464735339542, "grad_norm": 22.228652954101562, "learning_rate": 5e-05, "loss": 1.6905, "num_input_tokens_seen": 65188016, "step": 984 }, { "epoch": 0.09210464735339542, "loss": 1.6479146480560303, "loss_ce": 0.0038717109709978104, "loss_iou": 0.7421875, "loss_num": 0.0311279296875, "loss_xval": 1.640625, "num_input_tokens_seen": 65188016, "step": 984 }, { "epoch": 0.09219824963729115, "grad_norm": 20.86760139465332, "learning_rate": 5e-05, "loss": 1.7681, "num_input_tokens_seen": 65253952, "step": 985 }, { "epoch": 0.09219824963729115, "loss": 1.8290784358978271, "loss_ce": 0.002906452864408493, "loss_iou": 0.75390625, "loss_num": 0.06298828125, "loss_xval": 1.828125, "num_input_tokens_seen": 65253952, "step": 985 }, { "epoch": 0.09229185192118687, "grad_norm": 21.462255477905273, "learning_rate": 5e-05, "loss": 1.8411, "num_input_tokens_seen": 65320044, "step": 986 }, { "epoch": 0.09229185192118687, "loss": 1.9524469375610352, "loss_ce": 0.00518146064132452, "loss_iou": 0.86328125, "loss_num": 0.043212890625, "loss_xval": 1.9453125, "num_input_tokens_seen": 65320044, "step": 986 }, { "epoch": 0.09238545420508261, "grad_norm": 19.662405014038086, "learning_rate": 5e-05, "loss": 1.8031, "num_input_tokens_seen": 65386520, "step": 987 }, { "epoch": 0.09238545420508261, "loss": 1.8032032251358032, "loss_ce": 0.004375106655061245, "loss_iou": 0.78515625, "loss_num": 0.045654296875, "loss_xval": 1.796875, "num_input_tokens_seen": 65386520, "step": 987 }, { "epoch": 0.09247905648897833, "grad_norm": 30.860292434692383, "learning_rate": 5e-05, "loss": 1.5476, "num_input_tokens_seen": 65453280, "step": 988 }, { "epoch": 0.09247905648897833, "loss": 1.5351617336273193, "loss_ce": 0.006841444410383701, "loss_iou": 0.62890625, "loss_num": 0.054443359375, "loss_xval": 1.53125, "num_input_tokens_seen": 65453280, "step": 988 }, { "epoch": 0.09257265877287406, "grad_norm": 17.08321189880371, "learning_rate": 5e-05, "loss": 1.5348, "num_input_tokens_seen": 65519240, "step": 989 }, { "epoch": 0.09257265877287406, "loss": 1.6281179189682007, "loss_ce": 0.005559375509619713, "loss_iou": 0.71875, "loss_num": 0.037841796875, "loss_xval": 1.625, "num_input_tokens_seen": 65519240, "step": 989 }, { "epoch": 0.09266626105676978, "grad_norm": 27.292034149169922, "learning_rate": 5e-05, "loss": 1.5467, "num_input_tokens_seen": 65584904, "step": 990 }, { "epoch": 0.09266626105676978, "loss": 1.4985840320587158, "loss_ce": 0.003466897876933217, "loss_iou": 0.609375, "loss_num": 0.056396484375, "loss_xval": 1.4921875, "num_input_tokens_seen": 65584904, "step": 990 }, { "epoch": 0.09275986334066551, "grad_norm": 16.059263229370117, "learning_rate": 5e-05, "loss": 2.0579, "num_input_tokens_seen": 65650572, "step": 991 }, { "epoch": 0.09275986334066551, "loss": 2.122138500213623, "loss_ce": 0.01007784716784954, "loss_iou": 0.859375, "loss_num": 0.07861328125, "loss_xval": 2.109375, "num_input_tokens_seen": 65650572, "step": 991 }, { "epoch": 0.09285346562456125, "grad_norm": 23.014415740966797, "learning_rate": 5e-05, "loss": 1.5944, "num_input_tokens_seen": 65716568, "step": 992 }, { "epoch": 0.09285346562456125, "loss": 1.6441433429718018, "loss_ce": 0.004494882188737392, "loss_iou": 0.6953125, "loss_num": 0.0498046875, "loss_xval": 1.640625, "num_input_tokens_seen": 65716568, "step": 992 }, { "epoch": 0.09294706790845697, "grad_norm": 15.229471206665039, "learning_rate": 5e-05, "loss": 1.4162, "num_input_tokens_seen": 65782944, "step": 993 }, { "epoch": 0.09294706790845697, "loss": 1.480794906616211, "loss_ce": 0.013021371327340603, "loss_iou": 0.6015625, "loss_num": 0.05322265625, "loss_xval": 1.46875, "num_input_tokens_seen": 65782944, "step": 993 }, { "epoch": 0.0930406701923527, "grad_norm": 14.0590238571167, "learning_rate": 5e-05, "loss": 1.8864, "num_input_tokens_seen": 65848924, "step": 994 }, { "epoch": 0.0930406701923527, "loss": 1.8647971153259277, "loss_ce": 0.008351797237992287, "loss_iou": 0.81640625, "loss_num": 0.045654296875, "loss_xval": 1.859375, "num_input_tokens_seen": 65848924, "step": 994 }, { "epoch": 0.09313427247624842, "grad_norm": 18.439361572265625, "learning_rate": 5e-05, "loss": 1.4629, "num_input_tokens_seen": 65914444, "step": 995 }, { "epoch": 0.09313427247624842, "loss": 1.2895300388336182, "loss_ce": 0.002695329487323761, "loss_iou": 0.5234375, "loss_num": 0.0478515625, "loss_xval": 1.2890625, "num_input_tokens_seen": 65914444, "step": 995 }, { "epoch": 0.09322787476014414, "grad_norm": 21.675338745117188, "learning_rate": 5e-05, "loss": 1.5215, "num_input_tokens_seen": 65980580, "step": 996 }, { "epoch": 0.09322787476014414, "loss": 1.4937338829040527, "loss_ce": 0.005452618934214115, "loss_iou": 0.65625, "loss_num": 0.0361328125, "loss_xval": 1.484375, "num_input_tokens_seen": 65980580, "step": 996 }, { "epoch": 0.09332147704403987, "grad_norm": 23.317962646484375, "learning_rate": 5e-05, "loss": 1.5736, "num_input_tokens_seen": 66046996, "step": 997 }, { "epoch": 0.09332147704403987, "loss": 1.5401999950408936, "loss_ce": 0.0030905466992408037, "loss_iou": 0.67578125, "loss_num": 0.036865234375, "loss_xval": 1.5390625, "num_input_tokens_seen": 66046996, "step": 997 }, { "epoch": 0.09341507932793561, "grad_norm": 17.197994232177734, "learning_rate": 5e-05, "loss": 1.8094, "num_input_tokens_seen": 66113444, "step": 998 }, { "epoch": 0.09341507932793561, "loss": 1.8027560710906982, "loss_ce": 0.003439765190705657, "loss_iou": 0.75, "loss_num": 0.06005859375, "loss_xval": 1.796875, "num_input_tokens_seen": 66113444, "step": 998 }, { "epoch": 0.09350868161183133, "grad_norm": 16.98614501953125, "learning_rate": 5e-05, "loss": 1.6693, "num_input_tokens_seen": 66178728, "step": 999 }, { "epoch": 0.09350868161183133, "loss": 1.5198097229003906, "loss_ce": 0.007114410400390625, "loss_iou": 0.58984375, "loss_num": 0.06591796875, "loss_xval": 1.515625, "num_input_tokens_seen": 66178728, "step": 999 }, { "epoch": 0.09360228389572706, "grad_norm": 16.32314682006836, "learning_rate": 5e-05, "loss": 1.5448, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09360228389572706, "eval_seeclick_CIoU": 0.03466115053743124, "eval_seeclick_GIoU": 0.012590939877554774, "eval_seeclick_IoU": 0.18651100993156433, "eval_seeclick_MAE_all": 0.1619100198149681, "eval_seeclick_MAE_h": 0.1370251551270485, "eval_seeclick_MAE_w": 0.13397662341594696, "eval_seeclick_MAE_x_boxes": 0.2869945168495178, "eval_seeclick_MAE_y_boxes": 0.0982743352651596, "eval_seeclick_NUM_probability": 0.9988741278648376, "eval_seeclick_inside_bbox": 0.3739583343267441, "eval_seeclick_loss": 2.791210174560547, "eval_seeclick_loss_ce": 0.01485899556428194, "eval_seeclick_loss_iou": 0.987060546875, "eval_seeclick_loss_num": 0.163177490234375, "eval_seeclick_loss_xval": 2.7919921875, "eval_seeclick_runtime": 61.7115, "eval_seeclick_samples_per_second": 0.762, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09360228389572706, "eval_icons_CIoU": -0.1584959700703621, "eval_icons_GIoU": -0.08978072926402092, "eval_icons_IoU": 0.06438343971967697, "eval_icons_MAE_all": 0.20530376583337784, "eval_icons_MAE_h": 0.26106757670640945, "eval_icons_MAE_w": 0.18720195442438126, "eval_icons_MAE_x_boxes": 0.1660088151693344, "eval_icons_MAE_y_boxes": 0.09818163141608238, "eval_icons_NUM_probability": 0.9957078993320465, "eval_icons_inside_bbox": 0.1180555559694767, "eval_icons_loss": 3.2430856227874756, "eval_icons_loss_ce": 0.0015207797405309975, "eval_icons_loss_iou": 1.114501953125, "eval_icons_loss_num": 0.2159423828125, "eval_icons_loss_xval": 3.310546875, "eval_icons_runtime": 63.9957, "eval_icons_samples_per_second": 0.781, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09360228389572706, "eval_screenspot_CIoU": -0.03200580676396688, "eval_screenspot_GIoU": -0.029922740999609232, "eval_screenspot_IoU": 0.16735319793224335, "eval_screenspot_MAE_all": 0.19810542464256287, "eval_screenspot_MAE_h": 0.19106672704219818, "eval_screenspot_MAE_w": 0.18381081521511078, "eval_screenspot_MAE_x_boxes": 0.24538678924242655, "eval_screenspot_MAE_y_boxes": 0.11639802157878876, "eval_screenspot_NUM_probability": 0.9960946639378866, "eval_screenspot_inside_bbox": 0.3937500019868215, "eval_screenspot_loss": 3.08681058883667, "eval_screenspot_loss_ce": 0.012565320047239462, "eval_screenspot_loss_iou": 1.0498046875, "eval_screenspot_loss_num": 0.20220947265625, "eval_screenspot_loss_xval": 3.1106770833333335, "eval_screenspot_runtime": 119.1968, "eval_screenspot_samples_per_second": 0.747, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09360228389572706, "eval_compot_CIoU": -0.0126183507964015, "eval_compot_GIoU": -0.0007607042789459229, "eval_compot_IoU": 0.1446545273065567, "eval_compot_MAE_all": 0.14552883058786392, "eval_compot_MAE_h": 0.07847815565764904, "eval_compot_MAE_w": 0.15001967549324036, "eval_compot_MAE_x_boxes": 0.15651856362819672, "eval_compot_MAE_y_boxes": 0.12546472623944283, "eval_compot_NUM_probability": 0.9959275722503662, "eval_compot_inside_bbox": 0.3194444477558136, "eval_compot_loss": 2.76811146736145, "eval_compot_loss_ce": 0.00711844814941287, "eval_compot_loss_iou": 1.0244140625, "eval_compot_loss_num": 0.15118408203125, "eval_compot_loss_xval": 2.8056640625, "eval_compot_runtime": 70.0337, "eval_compot_samples_per_second": 0.714, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09360228389572706, "eval_custom_ui_MAE_all": 0.14354324340820312, "eval_custom_ui_MAE_x": 0.10222059860825539, "eval_custom_ui_MAE_y": 0.18486589938402176, "eval_custom_ui_NUM_probability": 0.9998778402805328, "eval_custom_ui_loss": 0.7289488315582275, "eval_custom_ui_loss_ce": 0.02813083864748478, "eval_custom_ui_loss_num": 0.15020751953125, "eval_custom_ui_loss_xval": 0.75146484375, "eval_custom_ui_runtime": 51.587, "eval_custom_ui_samples_per_second": 0.969, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09360228389572706, "loss": 0.8095696568489075, "loss_ce": 0.03222590684890747, "loss_iou": 0.0, "loss_num": 0.1552734375, "loss_xval": 0.77734375, "num_input_tokens_seen": 66245348, "step": 1000 }, { "epoch": 0.09369588617962278, "grad_norm": 22.253944396972656, "learning_rate": 5e-05, "loss": 1.4737, "num_input_tokens_seen": 66310660, "step": 1001 }, { "epoch": 0.09369588617962278, "loss": 1.4513487815856934, "loss_ce": 0.004083187319338322, "loss_iou": 0.6171875, "loss_num": 0.04296875, "loss_xval": 1.4453125, "num_input_tokens_seen": 66310660, "step": 1001 }, { "epoch": 0.0937894884635185, "grad_norm": 13.835038185119629, "learning_rate": 5e-05, "loss": 1.7459, "num_input_tokens_seen": 66376868, "step": 1002 }, { "epoch": 0.0937894884635185, "loss": 1.898911476135254, "loss_ce": 0.004380341153591871, "loss_iou": 0.83203125, "loss_num": 0.04541015625, "loss_xval": 1.890625, "num_input_tokens_seen": 66376868, "step": 1002 }, { "epoch": 0.09388309074741423, "grad_norm": 15.812870979309082, "learning_rate": 5e-05, "loss": 1.3805, "num_input_tokens_seen": 66443344, "step": 1003 }, { "epoch": 0.09388309074741423, "loss": 1.3290150165557861, "loss_ce": 0.0028431350365281105, "loss_iou": 0.6015625, "loss_num": 0.0238037109375, "loss_xval": 1.328125, "num_input_tokens_seen": 66443344, "step": 1003 }, { "epoch": 0.09397669303130997, "grad_norm": 38.81221389770508, "learning_rate": 5e-05, "loss": 1.72, "num_input_tokens_seen": 66509552, "step": 1004 }, { "epoch": 0.09397669303130997, "loss": 1.5138416290283203, "loss_ce": 0.0021228091791272163, "loss_iou": 0.6953125, "loss_num": 0.0235595703125, "loss_xval": 1.515625, "num_input_tokens_seen": 66509552, "step": 1004 }, { "epoch": 0.0940702953152057, "grad_norm": 12.50516414642334, "learning_rate": 5e-05, "loss": 1.7761, "num_input_tokens_seen": 66575992, "step": 1005 }, { "epoch": 0.0940702953152057, "loss": 1.655672550201416, "loss_ce": 0.0037561198696494102, "loss_iou": 0.69921875, "loss_num": 0.051025390625, "loss_xval": 1.6484375, "num_input_tokens_seen": 66575992, "step": 1005 }, { "epoch": 0.09416389759910142, "grad_norm": 16.19294548034668, "learning_rate": 5e-05, "loss": 1.8414, "num_input_tokens_seen": 66643048, "step": 1006 }, { "epoch": 0.09416389759910142, "loss": 1.7385921478271484, "loss_ce": 0.003240494290366769, "loss_iou": 0.765625, "loss_num": 0.04052734375, "loss_xval": 1.734375, "num_input_tokens_seen": 66643048, "step": 1006 }, { "epoch": 0.09425749988299714, "grad_norm": 98.92877197265625, "learning_rate": 5e-05, "loss": 1.7013, "num_input_tokens_seen": 66708988, "step": 1007 }, { "epoch": 0.09425749988299714, "loss": 1.6064229011535645, "loss_ce": 0.00779008911922574, "loss_iou": 0.6640625, "loss_num": 0.053955078125, "loss_xval": 1.6015625, "num_input_tokens_seen": 66708988, "step": 1007 }, { "epoch": 0.09435110216689287, "grad_norm": 25.077068328857422, "learning_rate": 5e-05, "loss": 1.651, "num_input_tokens_seen": 66774484, "step": 1008 }, { "epoch": 0.09435110216689287, "loss": 1.7158145904541016, "loss_ce": 0.002924002707004547, "loss_iou": 0.7578125, "loss_num": 0.039306640625, "loss_xval": 1.7109375, "num_input_tokens_seen": 66774484, "step": 1008 }, { "epoch": 0.0944447044507886, "grad_norm": 12.136792182922363, "learning_rate": 5e-05, "loss": 1.8415, "num_input_tokens_seen": 66840160, "step": 1009 }, { "epoch": 0.0944447044507886, "loss": 1.9895738363265991, "loss_ce": 0.004222306422889233, "loss_iou": 0.83203125, "loss_num": 0.06396484375, "loss_xval": 1.984375, "num_input_tokens_seen": 66840160, "step": 1009 }, { "epoch": 0.09453830673468433, "grad_norm": 18.081323623657227, "learning_rate": 5e-05, "loss": 1.4525, "num_input_tokens_seen": 66906144, "step": 1010 }, { "epoch": 0.09453830673468433, "loss": 1.523803949356079, "loss_ce": 0.004272680729627609, "loss_iou": 0.62890625, "loss_num": 0.052978515625, "loss_xval": 1.515625, "num_input_tokens_seen": 66906144, "step": 1010 }, { "epoch": 0.09463190901858005, "grad_norm": 22.84792137145996, "learning_rate": 5e-05, "loss": 1.4413, "num_input_tokens_seen": 66972860, "step": 1011 }, { "epoch": 0.09463190901858005, "loss": 1.464858055114746, "loss_ce": 0.007826870307326317, "loss_iou": 0.62109375, "loss_num": 0.043212890625, "loss_xval": 1.453125, "num_input_tokens_seen": 66972860, "step": 1011 }, { "epoch": 0.09472551130247578, "grad_norm": 18.576574325561523, "learning_rate": 5e-05, "loss": 2.0495, "num_input_tokens_seen": 67039584, "step": 1012 }, { "epoch": 0.09472551130247578, "loss": 1.9049363136291504, "loss_ce": 0.002592559903860092, "loss_iou": 0.81640625, "loss_num": 0.0537109375, "loss_xval": 1.90625, "num_input_tokens_seen": 67039584, "step": 1012 }, { "epoch": 0.0948191135863715, "grad_norm": 16.99166488647461, "learning_rate": 5e-05, "loss": 1.5516, "num_input_tokens_seen": 67106348, "step": 1013 }, { "epoch": 0.0948191135863715, "loss": 1.682436466217041, "loss_ce": 0.007631717249751091, "loss_iou": 0.70703125, "loss_num": 0.0517578125, "loss_xval": 1.671875, "num_input_tokens_seen": 67106348, "step": 1013 }, { "epoch": 0.09491271587026723, "grad_norm": 29.552988052368164, "learning_rate": 5e-05, "loss": 1.372, "num_input_tokens_seen": 67172900, "step": 1014 }, { "epoch": 0.09491271587026723, "loss": 1.5988967418670654, "loss_ce": 0.005146836396306753, "loss_iou": 0.6953125, "loss_num": 0.040283203125, "loss_xval": 1.59375, "num_input_tokens_seen": 67172900, "step": 1014 }, { "epoch": 0.09500631815416297, "grad_norm": 16.46036720275879, "learning_rate": 5e-05, "loss": 1.5688, "num_input_tokens_seen": 67239208, "step": 1015 }, { "epoch": 0.09500631815416297, "loss": 1.612764835357666, "loss_ce": 0.0024134009145200253, "loss_iou": 0.734375, "loss_num": 0.0279541015625, "loss_xval": 1.609375, "num_input_tokens_seen": 67239208, "step": 1015 }, { "epoch": 0.09509992043805869, "grad_norm": 25.499847412109375, "learning_rate": 5e-05, "loss": 1.4352, "num_input_tokens_seen": 67305596, "step": 1016 }, { "epoch": 0.09509992043805869, "loss": 1.2837343215942383, "loss_ce": 0.0063906023278832436, "loss_iou": 0.55859375, "loss_num": 0.0322265625, "loss_xval": 1.28125, "num_input_tokens_seen": 67305596, "step": 1016 }, { "epoch": 0.09519352272195442, "grad_norm": 29.628381729125977, "learning_rate": 5e-05, "loss": 1.8951, "num_input_tokens_seen": 67371004, "step": 1017 }, { "epoch": 0.09519352272195442, "loss": 1.9087352752685547, "loss_ce": 0.0034617616329342127, "loss_iou": 0.83984375, "loss_num": 0.04541015625, "loss_xval": 1.90625, "num_input_tokens_seen": 67371004, "step": 1017 }, { "epoch": 0.09528712500585014, "grad_norm": 23.89756202697754, "learning_rate": 5e-05, "loss": 1.4198, "num_input_tokens_seen": 67437148, "step": 1018 }, { "epoch": 0.09528712500585014, "loss": 1.547162413597107, "loss_ce": 0.0061468021012842655, "loss_iou": 0.671875, "loss_num": 0.0390625, "loss_xval": 1.5390625, "num_input_tokens_seen": 67437148, "step": 1018 }, { "epoch": 0.09538072728974586, "grad_norm": 17.92390251159668, "learning_rate": 5e-05, "loss": 1.69, "num_input_tokens_seen": 67502872, "step": 1019 }, { "epoch": 0.09538072728974586, "loss": 1.6546707153320312, "loss_ce": 0.004280084278434515, "loss_iou": 0.74609375, "loss_num": 0.0322265625, "loss_xval": 1.6484375, "num_input_tokens_seen": 67502872, "step": 1019 }, { "epoch": 0.0954743295736416, "grad_norm": 16.330259323120117, "learning_rate": 5e-05, "loss": 1.6055, "num_input_tokens_seen": 67569084, "step": 1020 }, { "epoch": 0.0954743295736416, "loss": 1.5637905597686768, "loss_ce": 0.007149962708353996, "loss_iou": 0.6875, "loss_num": 0.036376953125, "loss_xval": 1.5546875, "num_input_tokens_seen": 67569084, "step": 1020 }, { "epoch": 0.09556793185753733, "grad_norm": 26.579259872436523, "learning_rate": 5e-05, "loss": 1.8511, "num_input_tokens_seen": 67635584, "step": 1021 }, { "epoch": 0.09556793185753733, "loss": 1.7766225337982178, "loss_ce": 0.007091335952281952, "loss_iou": 0.73828125, "loss_num": 0.05908203125, "loss_xval": 1.765625, "num_input_tokens_seen": 67635584, "step": 1021 }, { "epoch": 0.09566153414143305, "grad_norm": 16.56151008605957, "learning_rate": 5e-05, "loss": 1.9727, "num_input_tokens_seen": 67703132, "step": 1022 }, { "epoch": 0.09566153414143305, "loss": 1.791287899017334, "loss_ce": 0.003201971994712949, "loss_iou": 0.78515625, "loss_num": 0.04296875, "loss_xval": 1.7890625, "num_input_tokens_seen": 67703132, "step": 1022 }, { "epoch": 0.09575513642532878, "grad_norm": 14.52706527709961, "learning_rate": 5e-05, "loss": 1.4742, "num_input_tokens_seen": 67768912, "step": 1023 }, { "epoch": 0.09575513642532878, "loss": 1.4676893949508667, "loss_ce": 0.0028456742875277996, "loss_iou": 0.6484375, "loss_num": 0.033203125, "loss_xval": 1.46875, "num_input_tokens_seen": 67768912, "step": 1023 }, { "epoch": 0.0958487387092245, "grad_norm": 61.90464401245117, "learning_rate": 5e-05, "loss": 1.5133, "num_input_tokens_seen": 67834020, "step": 1024 }, { "epoch": 0.0958487387092245, "loss": 1.5269737243652344, "loss_ce": 0.0019493482541292906, "loss_iou": 0.68359375, "loss_num": 0.03076171875, "loss_xval": 1.5234375, "num_input_tokens_seen": 67834020, "step": 1024 }, { "epoch": 0.09594234099312023, "grad_norm": 12.636026382446289, "learning_rate": 5e-05, "loss": 1.988, "num_input_tokens_seen": 67900200, "step": 1025 }, { "epoch": 0.09594234099312023, "loss": 2.130302906036377, "loss_ce": 0.002373168943449855, "loss_iou": 0.8984375, "loss_num": 0.06640625, "loss_xval": 2.125, "num_input_tokens_seen": 67900200, "step": 1025 }, { "epoch": 0.09603594327701596, "grad_norm": 17.263626098632812, "learning_rate": 5e-05, "loss": 1.6959, "num_input_tokens_seen": 67965520, "step": 1026 }, { "epoch": 0.09603594327701596, "loss": 1.4326584339141846, "loss_ce": 0.0063889846205711365, "loss_iou": 0.6015625, "loss_num": 0.04541015625, "loss_xval": 1.4296875, "num_input_tokens_seen": 67965520, "step": 1026 }, { "epoch": 0.09612954556091169, "grad_norm": 29.37456512451172, "learning_rate": 5e-05, "loss": 1.5141, "num_input_tokens_seen": 68031500, "step": 1027 }, { "epoch": 0.09612954556091169, "loss": 1.3183093070983887, "loss_ce": 0.0039782398380339146, "loss_iou": 0.55859375, "loss_num": 0.0390625, "loss_xval": 1.3125, "num_input_tokens_seen": 68031500, "step": 1027 }, { "epoch": 0.09622314784480741, "grad_norm": 16.778858184814453, "learning_rate": 5e-05, "loss": 1.6878, "num_input_tokens_seen": 68098236, "step": 1028 }, { "epoch": 0.09622314784480741, "loss": 1.533916711807251, "loss_ce": 0.0036432542838156223, "loss_iou": 0.63671875, "loss_num": 0.051513671875, "loss_xval": 1.53125, "num_input_tokens_seen": 68098236, "step": 1028 }, { "epoch": 0.09631675012870314, "grad_norm": 69.1749267578125, "learning_rate": 5e-05, "loss": 1.5744, "num_input_tokens_seen": 68164000, "step": 1029 }, { "epoch": 0.09631675012870314, "loss": 1.693444013595581, "loss_ce": 0.00447924854233861, "loss_iou": 0.734375, "loss_num": 0.044921875, "loss_xval": 1.6875, "num_input_tokens_seen": 68164000, "step": 1029 }, { "epoch": 0.09641035241259886, "grad_norm": 15.717203140258789, "learning_rate": 5e-05, "loss": 1.7683, "num_input_tokens_seen": 68230360, "step": 1030 }, { "epoch": 0.09641035241259886, "loss": 1.7388529777526855, "loss_ce": 0.006431044079363346, "loss_iou": 0.80078125, "loss_num": 0.0272216796875, "loss_xval": 1.734375, "num_input_tokens_seen": 68230360, "step": 1030 }, { "epoch": 0.0965039546964946, "grad_norm": 15.72042179107666, "learning_rate": 5e-05, "loss": 1.6475, "num_input_tokens_seen": 68296604, "step": 1031 }, { "epoch": 0.0965039546964946, "loss": 1.5739208459854126, "loss_ce": 0.0036083075683563948, "loss_iou": 0.671875, "loss_num": 0.04443359375, "loss_xval": 1.5703125, "num_input_tokens_seen": 68296604, "step": 1031 }, { "epoch": 0.09659755698039033, "grad_norm": 14.753561973571777, "learning_rate": 5e-05, "loss": 1.5485, "num_input_tokens_seen": 68362820, "step": 1032 }, { "epoch": 0.09659755698039033, "loss": 1.3991491794586182, "loss_ce": 0.011453813873231411, "loss_iou": 0.5859375, "loss_num": 0.043212890625, "loss_xval": 1.390625, "num_input_tokens_seen": 68362820, "step": 1032 }, { "epoch": 0.09669115926428605, "grad_norm": 21.489585876464844, "learning_rate": 5e-05, "loss": 1.5655, "num_input_tokens_seen": 68428880, "step": 1033 }, { "epoch": 0.09669115926428605, "loss": 1.5785999298095703, "loss_ce": 0.00438120448961854, "loss_iou": 0.6640625, "loss_num": 0.049560546875, "loss_xval": 1.578125, "num_input_tokens_seen": 68428880, "step": 1033 }, { "epoch": 0.09678476154818177, "grad_norm": 24.65547752380371, "learning_rate": 5e-05, "loss": 1.5685, "num_input_tokens_seen": 68495700, "step": 1034 }, { "epoch": 0.09678476154818177, "loss": 1.5529437065124512, "loss_ce": 0.006068655289709568, "loss_iou": 0.671875, "loss_num": 0.041259765625, "loss_xval": 1.546875, "num_input_tokens_seen": 68495700, "step": 1034 }, { "epoch": 0.0968783638320775, "grad_norm": 22.68483543395996, "learning_rate": 5e-05, "loss": 1.5734, "num_input_tokens_seen": 68561484, "step": 1035 }, { "epoch": 0.0968783638320775, "loss": 1.5440155267715454, "loss_ce": 0.0028777297120541334, "loss_iou": 0.65625, "loss_num": 0.04541015625, "loss_xval": 1.5390625, "num_input_tokens_seen": 68561484, "step": 1035 }, { "epoch": 0.09697196611597322, "grad_norm": 18.365602493286133, "learning_rate": 5e-05, "loss": 1.71, "num_input_tokens_seen": 68628544, "step": 1036 }, { "epoch": 0.09697196611597322, "loss": 1.703133225440979, "loss_ce": 0.000984803307801485, "loss_iou": 0.76953125, "loss_num": 0.0322265625, "loss_xval": 1.703125, "num_input_tokens_seen": 68628544, "step": 1036 }, { "epoch": 0.09706556839986896, "grad_norm": 29.792518615722656, "learning_rate": 5e-05, "loss": 1.5854, "num_input_tokens_seen": 68695448, "step": 1037 }, { "epoch": 0.09706556839986896, "loss": 1.714836835861206, "loss_ce": 0.006829025689512491, "loss_iou": 0.734375, "loss_num": 0.048583984375, "loss_xval": 1.7109375, "num_input_tokens_seen": 68695448, "step": 1037 }, { "epoch": 0.09715917068376469, "grad_norm": 16.52050018310547, "learning_rate": 5e-05, "loss": 1.6163, "num_input_tokens_seen": 68761552, "step": 1038 }, { "epoch": 0.09715917068376469, "loss": 1.5071520805358887, "loss_ce": 0.005687330383807421, "loss_iou": 0.6796875, "loss_num": 0.0291748046875, "loss_xval": 1.5, "num_input_tokens_seen": 68761552, "step": 1038 }, { "epoch": 0.09725277296766041, "grad_norm": 15.1870756149292, "learning_rate": 5e-05, "loss": 1.6116, "num_input_tokens_seen": 68827932, "step": 1039 }, { "epoch": 0.09725277296766041, "loss": 1.667130947113037, "loss_ce": 0.005998114589601755, "loss_iou": 0.68359375, "loss_num": 0.058349609375, "loss_xval": 1.6640625, "num_input_tokens_seen": 68827932, "step": 1039 }, { "epoch": 0.09734637525155614, "grad_norm": 18.27654266357422, "learning_rate": 5e-05, "loss": 1.7088, "num_input_tokens_seen": 68894556, "step": 1040 }, { "epoch": 0.09734637525155614, "loss": 1.6159359216690063, "loss_ce": 0.0036311917938292027, "loss_iou": 0.6640625, "loss_num": 0.056884765625, "loss_xval": 1.609375, "num_input_tokens_seen": 68894556, "step": 1040 }, { "epoch": 0.09743997753545186, "grad_norm": 32.86941909790039, "learning_rate": 5e-05, "loss": 1.5318, "num_input_tokens_seen": 68959732, "step": 1041 }, { "epoch": 0.09743997753545186, "loss": 1.702520489692688, "loss_ce": 0.005254846997559071, "loss_iou": 0.734375, "loss_num": 0.04541015625, "loss_xval": 1.6953125, "num_input_tokens_seen": 68959732, "step": 1041 }, { "epoch": 0.0975335798193476, "grad_norm": 13.069723129272461, "learning_rate": 5e-05, "loss": 1.8367, "num_input_tokens_seen": 69026096, "step": 1042 }, { "epoch": 0.0975335798193476, "loss": 2.051509380340576, "loss_ce": 0.001704599242657423, "loss_iou": 0.890625, "loss_num": 0.052734375, "loss_xval": 2.046875, "num_input_tokens_seen": 69026096, "step": 1042 }, { "epoch": 0.09762718210324332, "grad_norm": 20.314245223999023, "learning_rate": 5e-05, "loss": 1.4561, "num_input_tokens_seen": 69093532, "step": 1043 }, { "epoch": 0.09762718210324332, "loss": 1.501004695892334, "loss_ce": 0.006375749129801989, "loss_iou": 0.67578125, "loss_num": 0.0294189453125, "loss_xval": 1.4921875, "num_input_tokens_seen": 69093532, "step": 1043 }, { "epoch": 0.09772078438713905, "grad_norm": 24.271562576293945, "learning_rate": 5e-05, "loss": 1.6173, "num_input_tokens_seen": 69159004, "step": 1044 }, { "epoch": 0.09772078438713905, "loss": 1.531834602355957, "loss_ce": 0.005467416252940893, "loss_iou": 0.6484375, "loss_num": 0.0458984375, "loss_xval": 1.5234375, "num_input_tokens_seen": 69159004, "step": 1044 }, { "epoch": 0.09781438667103477, "grad_norm": 18.976776123046875, "learning_rate": 5e-05, "loss": 1.799, "num_input_tokens_seen": 69225708, "step": 1045 }, { "epoch": 0.09781438667103477, "loss": 1.7624338865280151, "loss_ce": 0.00559794157743454, "loss_iou": 0.7734375, "loss_num": 0.0419921875, "loss_xval": 1.7578125, "num_input_tokens_seen": 69225708, "step": 1045 }, { "epoch": 0.0979079889549305, "grad_norm": 12.693644523620605, "learning_rate": 5e-05, "loss": 1.6855, "num_input_tokens_seen": 69291768, "step": 1046 }, { "epoch": 0.0979079889549305, "loss": 1.4835333824157715, "loss_ce": 0.0020880727097392082, "loss_iou": 0.66015625, "loss_num": 0.032470703125, "loss_xval": 1.484375, "num_input_tokens_seen": 69291768, "step": 1046 }, { "epoch": 0.09800159123882622, "grad_norm": 13.826536178588867, "learning_rate": 5e-05, "loss": 1.3811, "num_input_tokens_seen": 69357984, "step": 1047 }, { "epoch": 0.09800159123882622, "loss": 0.954777717590332, "loss_ce": 0.00214099558070302, "loss_iou": 0.416015625, "loss_num": 0.0242919921875, "loss_xval": 0.953125, "num_input_tokens_seen": 69357984, "step": 1047 }, { "epoch": 0.09809519352272196, "grad_norm": 29.4437313079834, "learning_rate": 5e-05, "loss": 1.5258, "num_input_tokens_seen": 69423936, "step": 1048 }, { "epoch": 0.09809519352272196, "loss": 1.5787353515625, "loss_ce": 0.004516693763434887, "loss_iou": 0.65625, "loss_num": 0.05224609375, "loss_xval": 1.578125, "num_input_tokens_seen": 69423936, "step": 1048 }, { "epoch": 0.09818879580661768, "grad_norm": 37.3914909362793, "learning_rate": 5e-05, "loss": 1.5761, "num_input_tokens_seen": 69489296, "step": 1049 }, { "epoch": 0.09818879580661768, "loss": 1.8066282272338867, "loss_ce": 0.0031614694744348526, "loss_iou": 0.8125, "loss_num": 0.03564453125, "loss_xval": 1.8046875, "num_input_tokens_seen": 69489296, "step": 1049 }, { "epoch": 0.09828239809051341, "grad_norm": 218.51792907714844, "learning_rate": 5e-05, "loss": 1.8707, "num_input_tokens_seen": 69555684, "step": 1050 }, { "epoch": 0.09828239809051341, "loss": 1.7996981143951416, "loss_ce": 0.006729439832270145, "loss_iou": 0.8125, "loss_num": 0.03369140625, "loss_xval": 1.796875, "num_input_tokens_seen": 69555684, "step": 1050 }, { "epoch": 0.09837600037440913, "grad_norm": 19.0709228515625, "learning_rate": 5e-05, "loss": 1.8382, "num_input_tokens_seen": 69622460, "step": 1051 }, { "epoch": 0.09837600037440913, "loss": 1.8147270679473877, "loss_ce": 0.006133353337645531, "loss_iou": 0.78125, "loss_num": 0.0498046875, "loss_xval": 1.8125, "num_input_tokens_seen": 69622460, "step": 1051 }, { "epoch": 0.09846960265830486, "grad_norm": 11.224459648132324, "learning_rate": 5e-05, "loss": 1.5486, "num_input_tokens_seen": 69688848, "step": 1052 }, { "epoch": 0.09846960265830486, "loss": 1.5167632102966309, "loss_ce": 0.005044479388743639, "loss_iou": 0.6484375, "loss_num": 0.04296875, "loss_xval": 1.515625, "num_input_tokens_seen": 69688848, "step": 1052 }, { "epoch": 0.09856320494220058, "grad_norm": 28.384679794311523, "learning_rate": 5e-05, "loss": 1.631, "num_input_tokens_seen": 69754772, "step": 1053 }, { "epoch": 0.09856320494220058, "loss": 1.6727714538574219, "loss_ce": 0.006755872629582882, "loss_iou": 0.74609375, "loss_num": 0.03564453125, "loss_xval": 1.6640625, "num_input_tokens_seen": 69754772, "step": 1053 }, { "epoch": 0.09865680722609632, "grad_norm": 17.086957931518555, "learning_rate": 5e-05, "loss": 1.7335, "num_input_tokens_seen": 69820760, "step": 1054 }, { "epoch": 0.09865680722609632, "loss": 1.7270066738128662, "loss_ce": 0.002397257601842284, "loss_iou": 0.76953125, "loss_num": 0.037841796875, "loss_xval": 1.7265625, "num_input_tokens_seen": 69820760, "step": 1054 }, { "epoch": 0.09875040950999205, "grad_norm": 31.31211280822754, "learning_rate": 5e-05, "loss": 1.4944, "num_input_tokens_seen": 69886556, "step": 1055 }, { "epoch": 0.09875040950999205, "loss": 1.4780758619308472, "loss_ce": 0.0074948593974113464, "loss_iou": 0.56640625, "loss_num": 0.06787109375, "loss_xval": 1.46875, "num_input_tokens_seen": 69886556, "step": 1055 }, { "epoch": 0.09884401179388777, "grad_norm": 18.1871337890625, "learning_rate": 5e-05, "loss": 1.8154, "num_input_tokens_seen": 69953852, "step": 1056 }, { "epoch": 0.09884401179388777, "loss": 1.6344540119171143, "loss_ce": 0.004571170080453157, "loss_iou": 0.703125, "loss_num": 0.04541015625, "loss_xval": 1.6328125, "num_input_tokens_seen": 69953852, "step": 1056 }, { "epoch": 0.0989376140777835, "grad_norm": 11.456923484802246, "learning_rate": 5e-05, "loss": 1.512, "num_input_tokens_seen": 70020220, "step": 1057 }, { "epoch": 0.0989376140777835, "loss": 1.4629371166229248, "loss_ce": 0.004929323680698872, "loss_iou": 0.65625, "loss_num": 0.028564453125, "loss_xval": 1.4609375, "num_input_tokens_seen": 70020220, "step": 1057 }, { "epoch": 0.09903121636167922, "grad_norm": 17.7427921295166, "learning_rate": 5e-05, "loss": 1.6399, "num_input_tokens_seen": 70086860, "step": 1058 }, { "epoch": 0.09903121636167922, "loss": 1.7626097202301025, "loss_ce": 0.0038206689059734344, "loss_iou": 0.73828125, "loss_num": 0.055908203125, "loss_xval": 1.7578125, "num_input_tokens_seen": 70086860, "step": 1058 }, { "epoch": 0.09912481864557496, "grad_norm": 34.55023193359375, "learning_rate": 5e-05, "loss": 1.7016, "num_input_tokens_seen": 70153744, "step": 1059 }, { "epoch": 0.09912481864557496, "loss": 1.7080399990081787, "loss_ce": 0.0029619333799928427, "loss_iou": 0.765625, "loss_num": 0.035400390625, "loss_xval": 1.703125, "num_input_tokens_seen": 70153744, "step": 1059 }, { "epoch": 0.09921842092947068, "grad_norm": 13.886670112609863, "learning_rate": 5e-05, "loss": 1.7789, "num_input_tokens_seen": 70221132, "step": 1060 }, { "epoch": 0.09921842092947068, "loss": 1.9111485481262207, "loss_ce": 0.006851662881672382, "loss_iou": 0.8203125, "loss_num": 0.053466796875, "loss_xval": 1.90625, "num_input_tokens_seen": 70221132, "step": 1060 }, { "epoch": 0.09931202321336641, "grad_norm": 19.265901565551758, "learning_rate": 5e-05, "loss": 1.4447, "num_input_tokens_seen": 70286868, "step": 1061 }, { "epoch": 0.09931202321336641, "loss": 1.4278239011764526, "loss_ce": 0.003507505403831601, "loss_iou": 0.59765625, "loss_num": 0.0458984375, "loss_xval": 1.421875, "num_input_tokens_seen": 70286868, "step": 1061 }, { "epoch": 0.09940562549726213, "grad_norm": 23.00331687927246, "learning_rate": 5e-05, "loss": 1.6059, "num_input_tokens_seen": 70354820, "step": 1062 }, { "epoch": 0.09940562549726213, "loss": 1.4556865692138672, "loss_ce": 0.0035380988847464323, "loss_iou": 0.60546875, "loss_num": 0.048583984375, "loss_xval": 1.453125, "num_input_tokens_seen": 70354820, "step": 1062 }, { "epoch": 0.09949922778115786, "grad_norm": 19.529693603515625, "learning_rate": 5e-05, "loss": 1.8307, "num_input_tokens_seen": 70419364, "step": 1063 }, { "epoch": 0.09949922778115786, "loss": 2.0091640949249268, "loss_ce": 0.0033047685865312815, "loss_iou": 0.86328125, "loss_num": 0.0556640625, "loss_xval": 2.0, "num_input_tokens_seen": 70419364, "step": 1063 }, { "epoch": 0.09959283006505358, "grad_norm": 19.776260375976562, "learning_rate": 5e-05, "loss": 1.628, "num_input_tokens_seen": 70485440, "step": 1064 }, { "epoch": 0.09959283006505358, "loss": 1.560286045074463, "loss_ce": 0.004622036591172218, "loss_iou": 0.6875, "loss_num": 0.036376953125, "loss_xval": 1.5546875, "num_input_tokens_seen": 70485440, "step": 1064 }, { "epoch": 0.09968643234894932, "grad_norm": 39.927310943603516, "learning_rate": 5e-05, "loss": 1.5625, "num_input_tokens_seen": 70552364, "step": 1065 }, { "epoch": 0.09968643234894932, "loss": 1.6188111305236816, "loss_ce": 0.009436175227165222, "loss_iou": 0.6875, "loss_num": 0.047119140625, "loss_xval": 1.609375, "num_input_tokens_seen": 70552364, "step": 1065 }, { "epoch": 0.09978003463284504, "grad_norm": 15.731878280639648, "learning_rate": 5e-05, "loss": 2.0476, "num_input_tokens_seen": 70618948, "step": 1066 }, { "epoch": 0.09978003463284504, "loss": 1.9860522747039795, "loss_ce": 0.008513141423463821, "loss_iou": 0.81640625, "loss_num": 0.068359375, "loss_xval": 1.9765625, "num_input_tokens_seen": 70618948, "step": 1066 }, { "epoch": 0.09987363691674077, "grad_norm": 16.6303653717041, "learning_rate": 5e-05, "loss": 1.5697, "num_input_tokens_seen": 70685280, "step": 1067 }, { "epoch": 0.09987363691674077, "loss": 1.6117045879364014, "loss_ce": 0.0033061308786273003, "loss_iou": 0.71875, "loss_num": 0.03369140625, "loss_xval": 1.609375, "num_input_tokens_seen": 70685280, "step": 1067 }, { "epoch": 0.09996723920063649, "grad_norm": 32.4035758972168, "learning_rate": 5e-05, "loss": 1.4768, "num_input_tokens_seen": 70751844, "step": 1068 }, { "epoch": 0.09996723920063649, "loss": 1.5377182960510254, "loss_ce": 0.003050293307751417, "loss_iou": 0.62109375, "loss_num": 0.058349609375, "loss_xval": 1.53125, "num_input_tokens_seen": 70751844, "step": 1068 }, { "epoch": 0.10006084148453222, "grad_norm": 16.004093170166016, "learning_rate": 5e-05, "loss": 1.7652, "num_input_tokens_seen": 70817780, "step": 1069 }, { "epoch": 0.10006084148453222, "loss": 1.643068790435791, "loss_ce": 0.0023826994001865387, "loss_iou": 0.7109375, "loss_num": 0.04296875, "loss_xval": 1.640625, "num_input_tokens_seen": 70817780, "step": 1069 }, { "epoch": 0.10015444376842796, "grad_norm": 19.020105361938477, "learning_rate": 5e-05, "loss": 1.3034, "num_input_tokens_seen": 70883888, "step": 1070 }, { "epoch": 0.10015444376842796, "loss": 1.3595044612884521, "loss_ce": 0.002082610735669732, "loss_iou": 0.5859375, "loss_num": 0.03662109375, "loss_xval": 1.359375, "num_input_tokens_seen": 70883888, "step": 1070 }, { "epoch": 0.10024804605232368, "grad_norm": 46.259517669677734, "learning_rate": 5e-05, "loss": 1.8026, "num_input_tokens_seen": 70950500, "step": 1071 }, { "epoch": 0.10024804605232368, "loss": 1.6754218339920044, "loss_ce": 0.0035468616988509893, "loss_iou": 0.71484375, "loss_num": 0.049072265625, "loss_xval": 1.671875, "num_input_tokens_seen": 70950500, "step": 1071 }, { "epoch": 0.1003416483362194, "grad_norm": 20.967409133911133, "learning_rate": 5e-05, "loss": 1.8289, "num_input_tokens_seen": 71015636, "step": 1072 }, { "epoch": 0.1003416483362194, "loss": 1.8284236192703247, "loss_ce": 0.004204882774502039, "loss_iou": 0.7890625, "loss_num": 0.04833984375, "loss_xval": 1.828125, "num_input_tokens_seen": 71015636, "step": 1072 }, { "epoch": 0.10043525062011513, "grad_norm": 15.622443199157715, "learning_rate": 5e-05, "loss": 1.485, "num_input_tokens_seen": 71082532, "step": 1073 }, { "epoch": 0.10043525062011513, "loss": 1.4984934329986572, "loss_ce": 0.004841048736125231, "loss_iou": 0.65234375, "loss_num": 0.037841796875, "loss_xval": 1.4921875, "num_input_tokens_seen": 71082532, "step": 1073 }, { "epoch": 0.10052885290401085, "grad_norm": 20.51572036743164, "learning_rate": 5e-05, "loss": 1.592, "num_input_tokens_seen": 71148244, "step": 1074 }, { "epoch": 0.10052885290401085, "loss": 1.701202392578125, "loss_ce": 0.004424963146448135, "loss_iou": 0.7421875, "loss_num": 0.042236328125, "loss_xval": 1.6953125, "num_input_tokens_seen": 71148244, "step": 1074 }, { "epoch": 0.10062245518790658, "grad_norm": 21.5648250579834, "learning_rate": 5e-05, "loss": 1.5863, "num_input_tokens_seen": 71214616, "step": 1075 }, { "epoch": 0.10062245518790658, "loss": 1.763230800628662, "loss_ce": 0.009324410930275917, "loss_iou": 0.79296875, "loss_num": 0.032958984375, "loss_xval": 1.75, "num_input_tokens_seen": 71214616, "step": 1075 }, { "epoch": 0.10071605747180232, "grad_norm": 28.579771041870117, "learning_rate": 5e-05, "loss": 1.4247, "num_input_tokens_seen": 71280288, "step": 1076 }, { "epoch": 0.10071605747180232, "loss": 1.4551047086715698, "loss_ce": 0.009792180731892586, "loss_iou": 0.64453125, "loss_num": 0.0306396484375, "loss_xval": 1.4453125, "num_input_tokens_seen": 71280288, "step": 1076 }, { "epoch": 0.10080965975569804, "grad_norm": 18.032018661499023, "learning_rate": 5e-05, "loss": 1.824, "num_input_tokens_seen": 71346528, "step": 1077 }, { "epoch": 0.10080965975569804, "loss": 1.9830670356750488, "loss_ce": 0.007481178268790245, "loss_iou": 0.80859375, "loss_num": 0.07177734375, "loss_xval": 1.9765625, "num_input_tokens_seen": 71346528, "step": 1077 }, { "epoch": 0.10090326203959377, "grad_norm": 21.930185317993164, "learning_rate": 5e-05, "loss": 1.3254, "num_input_tokens_seen": 71412284, "step": 1078 }, { "epoch": 0.10090326203959377, "loss": 1.3273941278457642, "loss_ce": 0.001222223974764347, "loss_iou": 0.59375, "loss_num": 0.02783203125, "loss_xval": 1.328125, "num_input_tokens_seen": 71412284, "step": 1078 }, { "epoch": 0.10099686432348949, "grad_norm": 20.907901763916016, "learning_rate": 5e-05, "loss": 1.4868, "num_input_tokens_seen": 71478152, "step": 1079 }, { "epoch": 0.10099686432348949, "loss": 1.4336016178131104, "loss_ce": 0.012947340495884418, "loss_iou": 0.578125, "loss_num": 0.0537109375, "loss_xval": 1.421875, "num_input_tokens_seen": 71478152, "step": 1079 }, { "epoch": 0.10109046660738522, "grad_norm": 13.136309623718262, "learning_rate": 5e-05, "loss": 1.4114, "num_input_tokens_seen": 71544900, "step": 1080 }, { "epoch": 0.10109046660738522, "loss": 1.6138828992843628, "loss_ce": 0.006949296221137047, "loss_iou": 0.6640625, "loss_num": 0.056884765625, "loss_xval": 1.609375, "num_input_tokens_seen": 71544900, "step": 1080 }, { "epoch": 0.10118406889128095, "grad_norm": 78.87405395507812, "learning_rate": 5e-05, "loss": 1.6984, "num_input_tokens_seen": 71610936, "step": 1081 }, { "epoch": 0.10118406889128095, "loss": 1.7695459127426147, "loss_ce": 0.007827112451195717, "loss_iou": 0.75, "loss_num": 0.052001953125, "loss_xval": 1.765625, "num_input_tokens_seen": 71610936, "step": 1081 }, { "epoch": 0.10127767117517668, "grad_norm": 14.12183952331543, "learning_rate": 5e-05, "loss": 1.5883, "num_input_tokens_seen": 71677840, "step": 1082 }, { "epoch": 0.10127767117517668, "loss": 1.5694940090179443, "loss_ce": 0.005285100545734167, "loss_iou": 0.69921875, "loss_num": 0.032470703125, "loss_xval": 1.5625, "num_input_tokens_seen": 71677840, "step": 1082 }, { "epoch": 0.1013712734590724, "grad_norm": 17.401836395263672, "learning_rate": 5e-05, "loss": 1.6485, "num_input_tokens_seen": 71743168, "step": 1083 }, { "epoch": 0.1013712734590724, "loss": 1.7952377796173096, "loss_ce": 0.0071518197655677795, "loss_iou": 0.74609375, "loss_num": 0.059326171875, "loss_xval": 1.7890625, "num_input_tokens_seen": 71743168, "step": 1083 }, { "epoch": 0.10146487574296813, "grad_norm": 29.285566329956055, "learning_rate": 5e-05, "loss": 1.7169, "num_input_tokens_seen": 71809468, "step": 1084 }, { "epoch": 0.10146487574296813, "loss": 1.5882303714752197, "loss_ce": 0.006199192255735397, "loss_iou": 0.69140625, "loss_num": 0.040283203125, "loss_xval": 1.578125, "num_input_tokens_seen": 71809468, "step": 1084 }, { "epoch": 0.10155847802686385, "grad_norm": 21.07638168334961, "learning_rate": 5e-05, "loss": 1.5334, "num_input_tokens_seen": 71874744, "step": 1085 }, { "epoch": 0.10155847802686385, "loss": 1.4922783374786377, "loss_ce": 0.004973745439201593, "loss_iou": 0.66015625, "loss_num": 0.033935546875, "loss_xval": 1.484375, "num_input_tokens_seen": 71874744, "step": 1085 }, { "epoch": 0.10165208031075958, "grad_norm": 35.0526237487793, "learning_rate": 5e-05, "loss": 1.4507, "num_input_tokens_seen": 71940584, "step": 1086 }, { "epoch": 0.10165208031075958, "loss": 1.3636212348937988, "loss_ce": 0.009617269039154053, "loss_iou": 0.5625, "loss_num": 0.045654296875, "loss_xval": 1.3515625, "num_input_tokens_seen": 71940584, "step": 1086 }, { "epoch": 0.10174568259465532, "grad_norm": 21.897987365722656, "learning_rate": 5e-05, "loss": 1.6272, "num_input_tokens_seen": 72005496, "step": 1087 }, { "epoch": 0.10174568259465532, "loss": 1.599699854850769, "loss_ce": 0.0035084174014627934, "loss_iou": 0.65234375, "loss_num": 0.057861328125, "loss_xval": 1.59375, "num_input_tokens_seen": 72005496, "step": 1087 }, { "epoch": 0.10183928487855104, "grad_norm": 26.753427505493164, "learning_rate": 5e-05, "loss": 1.6557, "num_input_tokens_seen": 72072488, "step": 1088 }, { "epoch": 0.10183928487855104, "loss": 1.7499940395355225, "loss_ce": 0.007318269927054644, "loss_iou": 0.73828125, "loss_num": 0.052001953125, "loss_xval": 1.7421875, "num_input_tokens_seen": 72072488, "step": 1088 }, { "epoch": 0.10193288716244676, "grad_norm": 19.3857479095459, "learning_rate": 5e-05, "loss": 1.5798, "num_input_tokens_seen": 72137396, "step": 1089 }, { "epoch": 0.10193288716244676, "loss": 1.4376049041748047, "loss_ce": 0.001081545022316277, "loss_iou": 0.625, "loss_num": 0.037353515625, "loss_xval": 1.4375, "num_input_tokens_seen": 72137396, "step": 1089 }, { "epoch": 0.10202648944634249, "grad_norm": 10.583497047424316, "learning_rate": 5e-05, "loss": 1.3534, "num_input_tokens_seen": 72203864, "step": 1090 }, { "epoch": 0.10202648944634249, "loss": 1.4205148220062256, "loss_ce": 0.005963992327451706, "loss_iou": 0.60546875, "loss_num": 0.04052734375, "loss_xval": 1.4140625, "num_input_tokens_seen": 72203864, "step": 1090 }, { "epoch": 0.10212009173023821, "grad_norm": 30.084596633911133, "learning_rate": 5e-05, "loss": 1.5118, "num_input_tokens_seen": 72270124, "step": 1091 }, { "epoch": 0.10212009173023821, "loss": 1.6620097160339355, "loss_ce": 0.011619108729064465, "loss_iou": 0.7265625, "loss_num": 0.0400390625, "loss_xval": 1.6484375, "num_input_tokens_seen": 72270124, "step": 1091 }, { "epoch": 0.10221369401413395, "grad_norm": 36.06584930419922, "learning_rate": 5e-05, "loss": 1.4752, "num_input_tokens_seen": 72336408, "step": 1092 }, { "epoch": 0.10221369401413395, "loss": 1.4394707679748535, "loss_ce": 0.006121148355305195, "loss_iou": 0.6328125, "loss_num": 0.03271484375, "loss_xval": 1.4296875, "num_input_tokens_seen": 72336408, "step": 1092 }, { "epoch": 0.10230729629802968, "grad_norm": 13.94032096862793, "learning_rate": 5e-05, "loss": 1.7626, "num_input_tokens_seen": 72399836, "step": 1093 }, { "epoch": 0.10230729629802968, "loss": 1.9253475666046143, "loss_ce": 0.0034724862780421972, "loss_iou": 0.86328125, "loss_num": 0.039794921875, "loss_xval": 1.921875, "num_input_tokens_seen": 72399836, "step": 1093 }, { "epoch": 0.1024008985819254, "grad_norm": 19.62771224975586, "learning_rate": 5e-05, "loss": 1.5887, "num_input_tokens_seen": 72466636, "step": 1094 }, { "epoch": 0.1024008985819254, "loss": 1.4443734884262085, "loss_ce": 0.005042454227805138, "loss_iou": 0.609375, "loss_num": 0.04345703125, "loss_xval": 1.4375, "num_input_tokens_seen": 72466636, "step": 1094 }, { "epoch": 0.10249450086582113, "grad_norm": 25.884469985961914, "learning_rate": 5e-05, "loss": 1.3426, "num_input_tokens_seen": 72533320, "step": 1095 }, { "epoch": 0.10249450086582113, "loss": 1.1323490142822266, "loss_ce": 0.0034426813945174217, "loss_iou": 0.5078125, "loss_num": 0.023193359375, "loss_xval": 1.125, "num_input_tokens_seen": 72533320, "step": 1095 }, { "epoch": 0.10258810314971685, "grad_norm": 26.165925979614258, "learning_rate": 5e-05, "loss": 1.6928, "num_input_tokens_seen": 72599888, "step": 1096 }, { "epoch": 0.10258810314971685, "loss": 1.6118435859680176, "loss_ce": 0.005398129113018513, "loss_iou": 0.703125, "loss_num": 0.03955078125, "loss_xval": 1.609375, "num_input_tokens_seen": 72599888, "step": 1096 }, { "epoch": 0.10268170543361257, "grad_norm": 28.40653419494629, "learning_rate": 5e-05, "loss": 1.6217, "num_input_tokens_seen": 72666576, "step": 1097 }, { "epoch": 0.10268170543361257, "loss": 1.5436112880706787, "loss_ce": 0.0033280765637755394, "loss_iou": 0.65234375, "loss_num": 0.04736328125, "loss_xval": 1.5390625, "num_input_tokens_seen": 72666576, "step": 1097 }, { "epoch": 0.10277530771750831, "grad_norm": 10.888528823852539, "learning_rate": 5e-05, "loss": 1.2432, "num_input_tokens_seen": 72732152, "step": 1098 }, { "epoch": 0.10277530771750831, "loss": 1.4281492233276367, "loss_ce": 0.0052976300939917564, "loss_iou": 0.625, "loss_num": 0.03466796875, "loss_xval": 1.421875, "num_input_tokens_seen": 72732152, "step": 1098 }, { "epoch": 0.10286891000140404, "grad_norm": 24.421236038208008, "learning_rate": 5e-05, "loss": 1.4365, "num_input_tokens_seen": 72797844, "step": 1099 }, { "epoch": 0.10286891000140404, "loss": 1.8412877321243286, "loss_ce": 0.00437368405982852, "loss_iou": 0.7578125, "loss_num": 0.0634765625, "loss_xval": 1.8359375, "num_input_tokens_seen": 72797844, "step": 1099 }, { "epoch": 0.10296251228529976, "grad_norm": 46.921199798583984, "learning_rate": 5e-05, "loss": 1.4816, "num_input_tokens_seen": 72863276, "step": 1100 }, { "epoch": 0.10296251228529976, "loss": 1.644058108329773, "loss_ce": 0.008804120123386383, "loss_iou": 0.67578125, "loss_num": 0.056884765625, "loss_xval": 1.6328125, "num_input_tokens_seen": 72863276, "step": 1100 }, { "epoch": 0.10305611456919549, "grad_norm": 16.586036682128906, "learning_rate": 5e-05, "loss": 1.4811, "num_input_tokens_seen": 72928792, "step": 1101 }, { "epoch": 0.10305611456919549, "loss": 1.5740876197814941, "loss_ce": 0.0037751158233731985, "loss_iou": 0.6640625, "loss_num": 0.04833984375, "loss_xval": 1.5703125, "num_input_tokens_seen": 72928792, "step": 1101 }, { "epoch": 0.10314971685309121, "grad_norm": 37.889278411865234, "learning_rate": 5e-05, "loss": 1.5914, "num_input_tokens_seen": 72995832, "step": 1102 }, { "epoch": 0.10314971685309121, "loss": 1.3846938610076904, "loss_ce": 0.00578762823715806, "loss_iou": 0.6328125, "loss_num": 0.022705078125, "loss_xval": 1.375, "num_input_tokens_seen": 72995832, "step": 1102 }, { "epoch": 0.10324331913698694, "grad_norm": 15.632954597473145, "learning_rate": 5e-05, "loss": 1.9994, "num_input_tokens_seen": 73062164, "step": 1103 }, { "epoch": 0.10324331913698694, "loss": 1.82685124874115, "loss_ce": 0.00165596604347229, "loss_iou": 0.7890625, "loss_num": 0.05029296875, "loss_xval": 1.828125, "num_input_tokens_seen": 73062164, "step": 1103 }, { "epoch": 0.10333692142088267, "grad_norm": 29.71272850036621, "learning_rate": 5e-05, "loss": 1.5723, "num_input_tokens_seen": 73128172, "step": 1104 }, { "epoch": 0.10333692142088267, "loss": 1.4010519981384277, "loss_ce": 0.006642777472734451, "loss_iou": 0.60546875, "loss_num": 0.037109375, "loss_xval": 1.390625, "num_input_tokens_seen": 73128172, "step": 1104 }, { "epoch": 0.1034305237047784, "grad_norm": 18.238737106323242, "learning_rate": 5e-05, "loss": 1.5136, "num_input_tokens_seen": 73195452, "step": 1105 }, { "epoch": 0.1034305237047784, "loss": 1.4810616970062256, "loss_ce": 0.00156954035628587, "loss_iou": 0.66015625, "loss_num": 0.03271484375, "loss_xval": 1.4765625, "num_input_tokens_seen": 73195452, "step": 1105 }, { "epoch": 0.10352412598867412, "grad_norm": 18.990901947021484, "learning_rate": 5e-05, "loss": 1.613, "num_input_tokens_seen": 73262228, "step": 1106 }, { "epoch": 0.10352412598867412, "loss": 1.7205805778503418, "loss_ce": 0.0037837019190192223, "loss_iou": 0.7578125, "loss_num": 0.04052734375, "loss_xval": 1.71875, "num_input_tokens_seen": 73262228, "step": 1106 }, { "epoch": 0.10361772827256985, "grad_norm": 39.51871109008789, "learning_rate": 5e-05, "loss": 1.6039, "num_input_tokens_seen": 73329256, "step": 1107 }, { "epoch": 0.10361772827256985, "loss": 1.646957516670227, "loss_ce": 0.004379414487630129, "loss_iou": 0.71875, "loss_num": 0.041259765625, "loss_xval": 1.640625, "num_input_tokens_seen": 73329256, "step": 1107 }, { "epoch": 0.10371133055646557, "grad_norm": 16.212440490722656, "learning_rate": 5e-05, "loss": 1.8582, "num_input_tokens_seen": 73395804, "step": 1108 }, { "epoch": 0.10371133055646557, "loss": 1.9633493423461914, "loss_ce": 0.0033883987925946712, "loss_iou": 0.86328125, "loss_num": 0.046142578125, "loss_xval": 1.9609375, "num_input_tokens_seen": 73395804, "step": 1108 }, { "epoch": 0.10380493284036131, "grad_norm": 28.32777976989746, "learning_rate": 5e-05, "loss": 1.8139, "num_input_tokens_seen": 73463132, "step": 1109 }, { "epoch": 0.10380493284036131, "loss": 1.838038444519043, "loss_ce": 0.006007233634591103, "loss_iou": 0.79296875, "loss_num": 0.049560546875, "loss_xval": 1.828125, "num_input_tokens_seen": 73463132, "step": 1109 }, { "epoch": 0.10389853512425704, "grad_norm": 23.26406478881836, "learning_rate": 5e-05, "loss": 1.4926, "num_input_tokens_seen": 73527964, "step": 1110 }, { "epoch": 0.10389853512425704, "loss": 1.2852752208709717, "loss_ce": 0.005856221076101065, "loss_iou": 0.51953125, "loss_num": 0.048828125, "loss_xval": 1.28125, "num_input_tokens_seen": 73527964, "step": 1110 }, { "epoch": 0.10399213740815276, "grad_norm": 16.523807525634766, "learning_rate": 5e-05, "loss": 1.5728, "num_input_tokens_seen": 73594392, "step": 1111 }, { "epoch": 0.10399213740815276, "loss": 1.7789335250854492, "loss_ce": 0.010378850623965263, "loss_iou": 0.75390625, "loss_num": 0.052490234375, "loss_xval": 1.765625, "num_input_tokens_seen": 73594392, "step": 1111 }, { "epoch": 0.10408573969204848, "grad_norm": 9.896635055541992, "learning_rate": 5e-05, "loss": 1.4108, "num_input_tokens_seen": 73661308, "step": 1112 }, { "epoch": 0.10408573969204848, "loss": 1.3604661226272583, "loss_ce": 0.003044302109628916, "loss_iou": 0.5859375, "loss_num": 0.037109375, "loss_xval": 1.359375, "num_input_tokens_seen": 73661308, "step": 1112 }, { "epoch": 0.10417934197594421, "grad_norm": 22.082138061523438, "learning_rate": 5e-05, "loss": 1.5699, "num_input_tokens_seen": 73727672, "step": 1113 }, { "epoch": 0.10417934197594421, "loss": 1.567997932434082, "loss_ce": 0.007451091427356005, "loss_iou": 0.66015625, "loss_num": 0.048828125, "loss_xval": 1.5625, "num_input_tokens_seen": 73727672, "step": 1113 }, { "epoch": 0.10427294425983993, "grad_norm": 20.720991134643555, "learning_rate": 5e-05, "loss": 1.7929, "num_input_tokens_seen": 73792744, "step": 1114 }, { "epoch": 0.10427294425983993, "loss": 2.0892069339752197, "loss_ce": 0.007175697945058346, "loss_iou": 0.88671875, "loss_num": 0.062255859375, "loss_xval": 2.078125, "num_input_tokens_seen": 73792744, "step": 1114 }, { "epoch": 0.10436654654373567, "grad_norm": 51.49057388305664, "learning_rate": 5e-05, "loss": 1.4627, "num_input_tokens_seen": 73858620, "step": 1115 }, { "epoch": 0.10436654654373567, "loss": 1.385817289352417, "loss_ce": 0.006422717124223709, "loss_iou": 0.59375, "loss_num": 0.039306640625, "loss_xval": 1.3828125, "num_input_tokens_seen": 73858620, "step": 1115 }, { "epoch": 0.1044601488276314, "grad_norm": 16.018857955932617, "learning_rate": 5e-05, "loss": 1.6839, "num_input_tokens_seen": 73924180, "step": 1116 }, { "epoch": 0.1044601488276314, "loss": 1.8350038528442383, "loss_ce": 0.0049257902428507805, "loss_iou": 0.7734375, "loss_num": 0.0576171875, "loss_xval": 1.828125, "num_input_tokens_seen": 73924180, "step": 1116 }, { "epoch": 0.10455375111152712, "grad_norm": 12.003381729125977, "learning_rate": 5e-05, "loss": 1.6046, "num_input_tokens_seen": 73990164, "step": 1117 }, { "epoch": 0.10455375111152712, "loss": 1.821317434310913, "loss_ce": 0.006864245980978012, "loss_iou": 0.76953125, "loss_num": 0.055419921875, "loss_xval": 1.8125, "num_input_tokens_seen": 73990164, "step": 1117 }, { "epoch": 0.10464735339542285, "grad_norm": 17.458560943603516, "learning_rate": 5e-05, "loss": 1.5408, "num_input_tokens_seen": 74056232, "step": 1118 }, { "epoch": 0.10464735339542285, "loss": 1.5779576301574707, "loss_ce": 0.005691987462341785, "loss_iou": 0.63671875, "loss_num": 0.06005859375, "loss_xval": 1.5703125, "num_input_tokens_seen": 74056232, "step": 1118 }, { "epoch": 0.10474095567931857, "grad_norm": 24.969619750976562, "learning_rate": 5e-05, "loss": 1.8344, "num_input_tokens_seen": 74123288, "step": 1119 }, { "epoch": 0.10474095567931857, "loss": 1.7968626022338867, "loss_ce": 0.002917362842708826, "loss_iou": 0.76953125, "loss_num": 0.050537109375, "loss_xval": 1.796875, "num_input_tokens_seen": 74123288, "step": 1119 }, { "epoch": 0.10483455796321431, "grad_norm": 20.472679138183594, "learning_rate": 5e-05, "loss": 1.5267, "num_input_tokens_seen": 74189428, "step": 1120 }, { "epoch": 0.10483455796321431, "loss": 1.5565311908721924, "loss_ce": 0.006238183006644249, "loss_iou": 0.6640625, "loss_num": 0.044189453125, "loss_xval": 1.546875, "num_input_tokens_seen": 74189428, "step": 1120 }, { "epoch": 0.10492816024711003, "grad_norm": 54.381935119628906, "learning_rate": 5e-05, "loss": 1.5578, "num_input_tokens_seen": 74256172, "step": 1121 }, { "epoch": 0.10492816024711003, "loss": 1.524882435798645, "loss_ce": 0.004374627023935318, "loss_iou": 0.66796875, "loss_num": 0.036865234375, "loss_xval": 1.5234375, "num_input_tokens_seen": 74256172, "step": 1121 }, { "epoch": 0.10502176253100576, "grad_norm": 15.588903427124023, "learning_rate": 5e-05, "loss": 1.4205, "num_input_tokens_seen": 74322556, "step": 1122 }, { "epoch": 0.10502176253100576, "loss": 1.4770457744598389, "loss_ce": 0.004145374521613121, "loss_iou": 0.66015625, "loss_num": 0.0299072265625, "loss_xval": 1.4765625, "num_input_tokens_seen": 74322556, "step": 1122 }, { "epoch": 0.10511536481490148, "grad_norm": 18.953041076660156, "learning_rate": 5e-05, "loss": 1.427, "num_input_tokens_seen": 74389180, "step": 1123 }, { "epoch": 0.10511536481490148, "loss": 1.4715644121170044, "loss_ce": 0.0028144335374236107, "loss_iou": 0.609375, "loss_num": 0.051025390625, "loss_xval": 1.46875, "num_input_tokens_seen": 74389180, "step": 1123 }, { "epoch": 0.10520896709879721, "grad_norm": 21.091703414916992, "learning_rate": 5e-05, "loss": 1.4477, "num_input_tokens_seen": 74455756, "step": 1124 }, { "epoch": 0.10520896709879721, "loss": 1.4070497751235962, "loss_ce": 0.0037294612266123295, "loss_iou": 0.65625, "loss_num": 0.0184326171875, "loss_xval": 1.40625, "num_input_tokens_seen": 74455756, "step": 1124 }, { "epoch": 0.10530256938269293, "grad_norm": 36.85136795043945, "learning_rate": 5e-05, "loss": 1.7221, "num_input_tokens_seen": 74521528, "step": 1125 }, { "epoch": 0.10530256938269293, "loss": 1.437005877494812, "loss_ce": 0.003045933321118355, "loss_iou": 0.61328125, "loss_num": 0.0419921875, "loss_xval": 1.4375, "num_input_tokens_seen": 74521528, "step": 1125 }, { "epoch": 0.10539617166658867, "grad_norm": 14.059605598449707, "learning_rate": 5e-05, "loss": 1.9773, "num_input_tokens_seen": 74587804, "step": 1126 }, { "epoch": 0.10539617166658867, "loss": 2.071012020111084, "loss_ce": 0.008512133732438087, "loss_iou": 0.84765625, "loss_num": 0.0732421875, "loss_xval": 2.0625, "num_input_tokens_seen": 74587804, "step": 1126 }, { "epoch": 0.1054897739504844, "grad_norm": 19.6878604888916, "learning_rate": 5e-05, "loss": 1.4327, "num_input_tokens_seen": 74654252, "step": 1127 }, { "epoch": 0.1054897739504844, "loss": 1.3145666122436523, "loss_ce": 0.0015783183043822646, "loss_iou": 0.6015625, "loss_num": 0.021484375, "loss_xval": 1.3125, "num_input_tokens_seen": 74654252, "step": 1127 }, { "epoch": 0.10558337623438012, "grad_norm": 18.852392196655273, "learning_rate": 5e-05, "loss": 1.5887, "num_input_tokens_seen": 74720920, "step": 1128 }, { "epoch": 0.10558337623438012, "loss": 1.7087366580963135, "loss_ce": 0.003658563131466508, "loss_iou": 0.6953125, "loss_num": 0.06396484375, "loss_xval": 1.703125, "num_input_tokens_seen": 74720920, "step": 1128 }, { "epoch": 0.10567697851827584, "grad_norm": 15.907039642333984, "learning_rate": 5e-05, "loss": 1.6453, "num_input_tokens_seen": 74787364, "step": 1129 }, { "epoch": 0.10567697851827584, "loss": 1.430016279220581, "loss_ce": 0.005211664829403162, "loss_iou": 0.6328125, "loss_num": 0.03173828125, "loss_xval": 1.421875, "num_input_tokens_seen": 74787364, "step": 1129 }, { "epoch": 0.10577058080217157, "grad_norm": 13.575602531433105, "learning_rate": 5e-05, "loss": 1.3277, "num_input_tokens_seen": 74852120, "step": 1130 }, { "epoch": 0.10577058080217157, "loss": 1.2877238988876343, "loss_ce": 0.0025675965007394552, "loss_iou": 0.51171875, "loss_num": 0.052490234375, "loss_xval": 1.28125, "num_input_tokens_seen": 74852120, "step": 1130 }, { "epoch": 0.1058641830860673, "grad_norm": 18.148401260375977, "learning_rate": 5e-05, "loss": 1.4902, "num_input_tokens_seen": 74917696, "step": 1131 }, { "epoch": 0.1058641830860673, "loss": 1.5735406875610352, "loss_ce": 0.0032282189931720495, "loss_iou": 0.671875, "loss_num": 0.044921875, "loss_xval": 1.5703125, "num_input_tokens_seen": 74917696, "step": 1131 }, { "epoch": 0.10595778536996303, "grad_norm": 17.016647338867188, "learning_rate": 5e-05, "loss": 1.5977, "num_input_tokens_seen": 74983904, "step": 1132 }, { "epoch": 0.10595778536996303, "loss": 1.4564955234527588, "loss_ce": 0.00922992080450058, "loss_iou": 0.62109375, "loss_num": 0.041259765625, "loss_xval": 1.4453125, "num_input_tokens_seen": 74983904, "step": 1132 }, { "epoch": 0.10605138765385876, "grad_norm": 30.827829360961914, "learning_rate": 5e-05, "loss": 1.5576, "num_input_tokens_seen": 75050432, "step": 1133 }, { "epoch": 0.10605138765385876, "loss": 1.4756646156311035, "loss_ce": 0.0027642915956676006, "loss_iou": 0.640625, "loss_num": 0.038818359375, "loss_xval": 1.4765625, "num_input_tokens_seen": 75050432, "step": 1133 }, { "epoch": 0.10614498993775448, "grad_norm": 14.184310913085938, "learning_rate": 5e-05, "loss": 1.6376, "num_input_tokens_seen": 75116180, "step": 1134 }, { "epoch": 0.10614498993775448, "loss": 1.1849550008773804, "loss_ce": 0.004596044309437275, "loss_iou": 0.498046875, "loss_num": 0.03662109375, "loss_xval": 1.1796875, "num_input_tokens_seen": 75116180, "step": 1134 }, { "epoch": 0.1062385922216502, "grad_norm": 12.556612968444824, "learning_rate": 5e-05, "loss": 1.472, "num_input_tokens_seen": 75182036, "step": 1135 }, { "epoch": 0.1062385922216502, "loss": 1.4659476280212402, "loss_ce": 0.0050101205706596375, "loss_iou": 0.62890625, "loss_num": 0.0400390625, "loss_xval": 1.4609375, "num_input_tokens_seen": 75182036, "step": 1135 }, { "epoch": 0.10633219450554593, "grad_norm": 14.626906394958496, "learning_rate": 5e-05, "loss": 1.3054, "num_input_tokens_seen": 75249140, "step": 1136 }, { "epoch": 0.10633219450554593, "loss": 1.2172424793243408, "loss_ce": 0.003375312313437462, "loss_iou": 0.5390625, "loss_num": 0.02734375, "loss_xval": 1.2109375, "num_input_tokens_seen": 75249140, "step": 1136 }, { "epoch": 0.10642579678944167, "grad_norm": 19.031736373901367, "learning_rate": 5e-05, "loss": 1.4979, "num_input_tokens_seen": 75316344, "step": 1137 }, { "epoch": 0.10642579678944167, "loss": 1.5610942840576172, "loss_ce": 0.008359957486391068, "loss_iou": 0.69921875, "loss_num": 0.03076171875, "loss_xval": 1.5546875, "num_input_tokens_seen": 75316344, "step": 1137 }, { "epoch": 0.10651939907333739, "grad_norm": 15.841978073120117, "learning_rate": 5e-05, "loss": 1.6722, "num_input_tokens_seen": 75382524, "step": 1138 }, { "epoch": 0.10651939907333739, "loss": 1.7715243101119995, "loss_ce": 0.00492275133728981, "loss_iou": 0.7734375, "loss_num": 0.043701171875, "loss_xval": 1.765625, "num_input_tokens_seen": 75382524, "step": 1138 }, { "epoch": 0.10661300135723312, "grad_norm": 24.407976150512695, "learning_rate": 5e-05, "loss": 1.5927, "num_input_tokens_seen": 75449316, "step": 1139 }, { "epoch": 0.10661300135723312, "loss": 1.6843119859695435, "loss_ce": 0.003647799603641033, "loss_iou": 0.7109375, "loss_num": 0.0517578125, "loss_xval": 1.6796875, "num_input_tokens_seen": 75449316, "step": 1139 }, { "epoch": 0.10670660364112884, "grad_norm": 17.990219116210938, "learning_rate": 5e-05, "loss": 1.6142, "num_input_tokens_seen": 75515588, "step": 1140 }, { "epoch": 0.10670660364112884, "loss": 1.5659946203231812, "loss_ce": 0.007400865666568279, "loss_iou": 0.6796875, "loss_num": 0.039794921875, "loss_xval": 1.5625, "num_input_tokens_seen": 75515588, "step": 1140 }, { "epoch": 0.10680020592502457, "grad_norm": 28.435983657836914, "learning_rate": 5e-05, "loss": 1.6111, "num_input_tokens_seen": 75581540, "step": 1141 }, { "epoch": 0.10680020592502457, "loss": 1.423324465751648, "loss_ce": 0.008651572279632092, "loss_iou": 0.640625, "loss_num": 0.0269775390625, "loss_xval": 1.4140625, "num_input_tokens_seen": 75581540, "step": 1141 }, { "epoch": 0.1068938082089203, "grad_norm": 27.305858612060547, "learning_rate": 5e-05, "loss": 1.7486, "num_input_tokens_seen": 75647720, "step": 1142 }, { "epoch": 0.1068938082089203, "loss": 1.6713736057281494, "loss_ce": 0.007311083376407623, "loss_iou": 0.6796875, "loss_num": 0.061767578125, "loss_xval": 1.6640625, "num_input_tokens_seen": 75647720, "step": 1142 }, { "epoch": 0.10698741049281603, "grad_norm": 15.698596000671387, "learning_rate": 5e-05, "loss": 1.5414, "num_input_tokens_seen": 75714040, "step": 1143 }, { "epoch": 0.10698741049281603, "loss": 1.4805867671966553, "loss_ce": 0.005000769160687923, "loss_iou": 0.640625, "loss_num": 0.039306640625, "loss_xval": 1.4765625, "num_input_tokens_seen": 75714040, "step": 1143 }, { "epoch": 0.10708101277671175, "grad_norm": 31.52184295654297, "learning_rate": 5e-05, "loss": 1.3789, "num_input_tokens_seen": 75780376, "step": 1144 }, { "epoch": 0.10708101277671175, "loss": 1.4856374263763428, "loss_ce": 0.004680377896875143, "loss_iou": 0.625, "loss_num": 0.046142578125, "loss_xval": 1.484375, "num_input_tokens_seen": 75780376, "step": 1144 }, { "epoch": 0.10717461506060748, "grad_norm": 17.525299072265625, "learning_rate": 5e-05, "loss": 1.7459, "num_input_tokens_seen": 75846568, "step": 1145 }, { "epoch": 0.10717461506060748, "loss": 1.8164008855819702, "loss_ce": 0.00780714675784111, "loss_iou": 0.796875, "loss_num": 0.042724609375, "loss_xval": 1.8125, "num_input_tokens_seen": 75846568, "step": 1145 }, { "epoch": 0.1072682173445032, "grad_norm": 19.073184967041016, "learning_rate": 5e-05, "loss": 1.4566, "num_input_tokens_seen": 75911820, "step": 1146 }, { "epoch": 0.1072682173445032, "loss": 1.1922235488891602, "loss_ce": 0.006188367493450642, "loss_iou": 0.50390625, "loss_num": 0.035888671875, "loss_xval": 1.1875, "num_input_tokens_seen": 75911820, "step": 1146 }, { "epoch": 0.10736181962839893, "grad_norm": 13.780308723449707, "learning_rate": 5e-05, "loss": 1.2774, "num_input_tokens_seen": 75977592, "step": 1147 }, { "epoch": 0.10736181962839893, "loss": 1.286110281944275, "loss_ce": 0.006325116381049156, "loss_iou": 0.5390625, "loss_num": 0.04052734375, "loss_xval": 1.28125, "num_input_tokens_seen": 75977592, "step": 1147 }, { "epoch": 0.10745542191229467, "grad_norm": 25.382017135620117, "learning_rate": 5e-05, "loss": 1.6546, "num_input_tokens_seen": 76043848, "step": 1148 }, { "epoch": 0.10745542191229467, "loss": 1.7315950393676758, "loss_ce": 0.006497358903288841, "loss_iou": 0.75, "loss_num": 0.04443359375, "loss_xval": 1.7265625, "num_input_tokens_seen": 76043848, "step": 1148 }, { "epoch": 0.10754902419619039, "grad_norm": 15.511128425598145, "learning_rate": 5e-05, "loss": 1.7734, "num_input_tokens_seen": 76110168, "step": 1149 }, { "epoch": 0.10754902419619039, "loss": 1.8539836406707764, "loss_ce": 0.003885905258357525, "loss_iou": 0.73828125, "loss_num": 0.07470703125, "loss_xval": 1.8515625, "num_input_tokens_seen": 76110168, "step": 1149 }, { "epoch": 0.10764262648008611, "grad_norm": 18.047719955444336, "learning_rate": 5e-05, "loss": 1.3925, "num_input_tokens_seen": 76176880, "step": 1150 }, { "epoch": 0.10764262648008611, "loss": 1.360733985900879, "loss_ce": 0.0033121337182819843, "loss_iou": 0.609375, "loss_num": 0.02783203125, "loss_xval": 1.359375, "num_input_tokens_seen": 76176880, "step": 1150 }, { "epoch": 0.10773622876398184, "grad_norm": 17.841157913208008, "learning_rate": 5e-05, "loss": 1.4778, "num_input_tokens_seen": 76243416, "step": 1151 }, { "epoch": 0.10773622876398184, "loss": 1.3315365314483643, "loss_ce": 0.005181404761970043, "loss_iou": 0.5546875, "loss_num": 0.043212890625, "loss_xval": 1.328125, "num_input_tokens_seen": 76243416, "step": 1151 }, { "epoch": 0.10782983104787756, "grad_norm": 17.448850631713867, "learning_rate": 5e-05, "loss": 1.452, "num_input_tokens_seen": 76308860, "step": 1152 }, { "epoch": 0.10782983104787756, "loss": 1.2795655727386475, "loss_ce": 0.003442568937316537, "loss_iou": 0.54296875, "loss_num": 0.03857421875, "loss_xval": 1.2734375, "num_input_tokens_seen": 76308860, "step": 1152 }, { "epoch": 0.10792343333177329, "grad_norm": 32.93611145019531, "learning_rate": 5e-05, "loss": 1.5278, "num_input_tokens_seen": 76375076, "step": 1153 }, { "epoch": 0.10792343333177329, "loss": 1.5748491287231445, "loss_ce": 0.0025834650732576847, "loss_iou": 0.6953125, "loss_num": 0.0361328125, "loss_xval": 1.5703125, "num_input_tokens_seen": 76375076, "step": 1153 }, { "epoch": 0.10801703561566903, "grad_norm": 18.724693298339844, "learning_rate": 5e-05, "loss": 1.6567, "num_input_tokens_seen": 76441148, "step": 1154 }, { "epoch": 0.10801703561566903, "loss": 1.6741814613342285, "loss_ce": 0.0023064701817929745, "loss_iou": 0.75, "loss_num": 0.034912109375, "loss_xval": 1.671875, "num_input_tokens_seen": 76441148, "step": 1154 }, { "epoch": 0.10811063789956475, "grad_norm": 16.60007667541504, "learning_rate": 5e-05, "loss": 1.4983, "num_input_tokens_seen": 76508524, "step": 1155 }, { "epoch": 0.10811063789956475, "loss": 1.4101048707962036, "loss_ce": 0.004343102686107159, "loss_iou": 0.6171875, "loss_num": 0.034912109375, "loss_xval": 1.40625, "num_input_tokens_seen": 76508524, "step": 1155 }, { "epoch": 0.10820424018346048, "grad_norm": 35.742671966552734, "learning_rate": 5e-05, "loss": 1.4645, "num_input_tokens_seen": 76574908, "step": 1156 }, { "epoch": 0.10820424018346048, "loss": 1.3999061584472656, "loss_ce": 0.0014685725327581167, "loss_iou": 0.609375, "loss_num": 0.03515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 76574908, "step": 1156 }, { "epoch": 0.1082978424673562, "grad_norm": 14.900714874267578, "learning_rate": 5e-05, "loss": 1.9227, "num_input_tokens_seen": 76641568, "step": 1157 }, { "epoch": 0.1082978424673562, "loss": 2.0242905616760254, "loss_ce": 0.004759383387863636, "loss_iou": 0.8984375, "loss_num": 0.04541015625, "loss_xval": 2.015625, "num_input_tokens_seen": 76641568, "step": 1157 }, { "epoch": 0.10839144475125193, "grad_norm": 19.15547752380371, "learning_rate": 5e-05, "loss": 1.6838, "num_input_tokens_seen": 76708668, "step": 1158 }, { "epoch": 0.10839144475125193, "loss": 1.7164533138275146, "loss_ce": 0.0035627796314656734, "loss_iou": 0.75390625, "loss_num": 0.04150390625, "loss_xval": 1.7109375, "num_input_tokens_seen": 76708668, "step": 1158 }, { "epoch": 0.10848504703514766, "grad_norm": 23.434572219848633, "learning_rate": 5e-05, "loss": 1.4897, "num_input_tokens_seen": 76774852, "step": 1159 }, { "epoch": 0.10848504703514766, "loss": 1.4935109615325928, "loss_ce": 0.0071828728541731834, "loss_iou": 0.609375, "loss_num": 0.05419921875, "loss_xval": 1.484375, "num_input_tokens_seen": 76774852, "step": 1159 }, { "epoch": 0.10857864931904339, "grad_norm": 17.213069915771484, "learning_rate": 5e-05, "loss": 1.5325, "num_input_tokens_seen": 76840692, "step": 1160 }, { "epoch": 0.10857864931904339, "loss": 1.3890526294708252, "loss_ce": 0.004775314591825008, "loss_iou": 0.609375, "loss_num": 0.033447265625, "loss_xval": 1.3828125, "num_input_tokens_seen": 76840692, "step": 1160 }, { "epoch": 0.10867225160293911, "grad_norm": 10.26416301727295, "learning_rate": 5e-05, "loss": 1.4267, "num_input_tokens_seen": 76907736, "step": 1161 }, { "epoch": 0.10867225160293911, "loss": 1.375671625137329, "loss_ce": 0.005554396193474531, "loss_iou": 0.6015625, "loss_num": 0.0341796875, "loss_xval": 1.3671875, "num_input_tokens_seen": 76907736, "step": 1161 }, { "epoch": 0.10876585388683484, "grad_norm": 86.20824432373047, "learning_rate": 5e-05, "loss": 1.7641, "num_input_tokens_seen": 76973836, "step": 1162 }, { "epoch": 0.10876585388683484, "loss": 1.735271692276001, "loss_ce": 0.004802928771823645, "loss_iou": 0.671875, "loss_num": 0.078125, "loss_xval": 1.734375, "num_input_tokens_seen": 76973836, "step": 1162 }, { "epoch": 0.10885945617073056, "grad_norm": 19.393543243408203, "learning_rate": 5e-05, "loss": 1.9077, "num_input_tokens_seen": 77040316, "step": 1163 }, { "epoch": 0.10885945617073056, "loss": 1.9816315174102783, "loss_ce": 0.005069003440439701, "loss_iou": 0.84375, "loss_num": 0.056884765625, "loss_xval": 1.9765625, "num_input_tokens_seen": 77040316, "step": 1163 }, { "epoch": 0.10895305845462629, "grad_norm": 25.78728675842285, "learning_rate": 5e-05, "loss": 1.6901, "num_input_tokens_seen": 77105984, "step": 1164 }, { "epoch": 0.10895305845462629, "loss": 1.5702900886535645, "loss_ce": 0.0038838728796690702, "loss_iou": 0.70703125, "loss_num": 0.0301513671875, "loss_xval": 1.5625, "num_input_tokens_seen": 77105984, "step": 1164 }, { "epoch": 0.10904666073852202, "grad_norm": 26.9300537109375, "learning_rate": 5e-05, "loss": 1.5518, "num_input_tokens_seen": 77172180, "step": 1165 }, { "epoch": 0.10904666073852202, "loss": 1.6828746795654297, "loss_ce": 0.008069908246397972, "loss_iou": 0.72265625, "loss_num": 0.04638671875, "loss_xval": 1.671875, "num_input_tokens_seen": 77172180, "step": 1165 }, { "epoch": 0.10914026302241775, "grad_norm": 19.31071662902832, "learning_rate": 5e-05, "loss": 1.7768, "num_input_tokens_seen": 77238456, "step": 1166 }, { "epoch": 0.10914026302241775, "loss": 1.920846939086914, "loss_ce": 0.005807799287140369, "loss_iou": 0.82421875, "loss_num": 0.05322265625, "loss_xval": 1.9140625, "num_input_tokens_seen": 77238456, "step": 1166 }, { "epoch": 0.10923386530631347, "grad_norm": 11.709778785705566, "learning_rate": 5e-05, "loss": 1.4159, "num_input_tokens_seen": 77305156, "step": 1167 }, { "epoch": 0.10923386530631347, "loss": 1.3690240383148193, "loss_ce": 0.002813051687553525, "loss_iou": 0.60546875, "loss_num": 0.031494140625, "loss_xval": 1.3671875, "num_input_tokens_seen": 77305156, "step": 1167 }, { "epoch": 0.1093274675902092, "grad_norm": 16.924415588378906, "learning_rate": 5e-05, "loss": 1.5059, "num_input_tokens_seen": 77369532, "step": 1168 }, { "epoch": 0.1093274675902092, "loss": 1.5888309478759766, "loss_ce": 0.006799612659960985, "loss_iou": 0.609375, "loss_num": 0.07275390625, "loss_xval": 1.578125, "num_input_tokens_seen": 77369532, "step": 1168 }, { "epoch": 0.10942106987410492, "grad_norm": 14.870468139648438, "learning_rate": 5e-05, "loss": 1.6683, "num_input_tokens_seen": 77436100, "step": 1169 }, { "epoch": 0.10942106987410492, "loss": 1.7231367826461792, "loss_ce": 0.006339915096759796, "loss_iou": 0.75, "loss_num": 0.043701171875, "loss_xval": 1.71875, "num_input_tokens_seen": 77436100, "step": 1169 }, { "epoch": 0.10951467215800066, "grad_norm": 15.986180305480957, "learning_rate": 5e-05, "loss": 1.454, "num_input_tokens_seen": 77500004, "step": 1170 }, { "epoch": 0.10951467215800066, "loss": 1.194554090499878, "loss_ce": 0.003269959706813097, "loss_iou": 0.51171875, "loss_num": 0.03369140625, "loss_xval": 1.1875, "num_input_tokens_seen": 77500004, "step": 1170 }, { "epoch": 0.10960827444189639, "grad_norm": 19.57866668701172, "learning_rate": 5e-05, "loss": 1.429, "num_input_tokens_seen": 77565940, "step": 1171 }, { "epoch": 0.10960827444189639, "loss": 1.4926564693450928, "loss_ce": 0.005840121768414974, "loss_iou": 0.62890625, "loss_num": 0.04541015625, "loss_xval": 1.484375, "num_input_tokens_seen": 77565940, "step": 1171 }, { "epoch": 0.10970187672579211, "grad_norm": 33.70952224731445, "learning_rate": 5e-05, "loss": 1.8232, "num_input_tokens_seen": 77632920, "step": 1172 }, { "epoch": 0.10970187672579211, "loss": 1.8883156776428223, "loss_ce": 0.008432943373918533, "loss_iou": 0.8125, "loss_num": 0.05029296875, "loss_xval": 1.8828125, "num_input_tokens_seen": 77632920, "step": 1172 }, { "epoch": 0.10979547900968784, "grad_norm": 15.238907814025879, "learning_rate": 5e-05, "loss": 1.6148, "num_input_tokens_seen": 77699720, "step": 1173 }, { "epoch": 0.10979547900968784, "loss": 1.4983510971069336, "loss_ce": 0.006163555197417736, "loss_iou": 0.6328125, "loss_num": 0.045654296875, "loss_xval": 1.4921875, "num_input_tokens_seen": 77699720, "step": 1173 }, { "epoch": 0.10988908129358356, "grad_norm": 15.282456398010254, "learning_rate": 5e-05, "loss": 1.6071, "num_input_tokens_seen": 77765748, "step": 1174 }, { "epoch": 0.10988908129358356, "loss": 1.3932451009750366, "loss_ce": 0.004573243670165539, "loss_iou": 0.6015625, "loss_num": 0.03759765625, "loss_xval": 1.390625, "num_input_tokens_seen": 77765748, "step": 1174 }, { "epoch": 0.10998268357747928, "grad_norm": 10.538078308105469, "learning_rate": 5e-05, "loss": 1.642, "num_input_tokens_seen": 77832460, "step": 1175 }, { "epoch": 0.10998268357747928, "loss": 1.758239984512329, "loss_ce": 0.008239900693297386, "loss_iou": 0.734375, "loss_num": 0.055908203125, "loss_xval": 1.75, "num_input_tokens_seen": 77832460, "step": 1175 }, { "epoch": 0.11007628586137502, "grad_norm": 13.04195785522461, "learning_rate": 5e-05, "loss": 1.4915, "num_input_tokens_seen": 77898296, "step": 1176 }, { "epoch": 0.11007628586137502, "loss": 1.4313489198684692, "loss_ce": 0.005323539488017559, "loss_iou": 0.58984375, "loss_num": 0.04931640625, "loss_xval": 1.4296875, "num_input_tokens_seen": 77898296, "step": 1176 }, { "epoch": 0.11016988814527075, "grad_norm": 15.571574211120605, "learning_rate": 5e-05, "loss": 1.5161, "num_input_tokens_seen": 77965436, "step": 1177 }, { "epoch": 0.11016988814527075, "loss": 1.526175618171692, "loss_ce": 0.009085720404982567, "loss_iou": 0.62109375, "loss_num": 0.055419921875, "loss_xval": 1.515625, "num_input_tokens_seen": 77965436, "step": 1177 }, { "epoch": 0.11026349042916647, "grad_norm": 20.632858276367188, "learning_rate": 5e-05, "loss": 1.5977, "num_input_tokens_seen": 78031220, "step": 1178 }, { "epoch": 0.11026349042916647, "loss": 1.8103036880493164, "loss_ce": 0.0017099155811592937, "loss_iou": 0.7734375, "loss_num": 0.052734375, "loss_xval": 1.8125, "num_input_tokens_seen": 78031220, "step": 1178 }, { "epoch": 0.1103570927130622, "grad_norm": 48.338584899902344, "learning_rate": 5e-05, "loss": 1.7292, "num_input_tokens_seen": 78097232, "step": 1179 }, { "epoch": 0.1103570927130622, "loss": 1.6525057554244995, "loss_ce": 0.009988706558942795, "loss_iou": 0.64453125, "loss_num": 0.0703125, "loss_xval": 1.640625, "num_input_tokens_seen": 78097232, "step": 1179 }, { "epoch": 0.11045069499695792, "grad_norm": 17.45369529724121, "learning_rate": 5e-05, "loss": 1.7281, "num_input_tokens_seen": 78163752, "step": 1180 }, { "epoch": 0.11045069499695792, "loss": 1.7762234210968018, "loss_ce": 0.0018094154074788094, "loss_iou": 0.77734375, "loss_num": 0.04443359375, "loss_xval": 1.7734375, "num_input_tokens_seen": 78163752, "step": 1180 }, { "epoch": 0.11054429728085366, "grad_norm": 16.85646629333496, "learning_rate": 5e-05, "loss": 1.5844, "num_input_tokens_seen": 78229032, "step": 1181 }, { "epoch": 0.11054429728085366, "loss": 1.802498698234558, "loss_ce": 0.003182369517162442, "loss_iou": 0.734375, "loss_num": 0.06591796875, "loss_xval": 1.796875, "num_input_tokens_seen": 78229032, "step": 1181 }, { "epoch": 0.11063789956474938, "grad_norm": 30.342195510864258, "learning_rate": 5e-05, "loss": 1.5089, "num_input_tokens_seen": 78295148, "step": 1182 }, { "epoch": 0.11063789956474938, "loss": 1.5325207710266113, "loss_ce": 0.008106620982289314, "loss_iou": 0.63671875, "loss_num": 0.05029296875, "loss_xval": 1.5234375, "num_input_tokens_seen": 78295148, "step": 1182 }, { "epoch": 0.11073150184864511, "grad_norm": 74.4539794921875, "learning_rate": 5e-05, "loss": 1.6102, "num_input_tokens_seen": 78358716, "step": 1183 }, { "epoch": 0.11073150184864511, "loss": 1.818831205368042, "loss_ce": 0.002913268283009529, "loss_iou": 0.796875, "loss_num": 0.043212890625, "loss_xval": 1.8125, "num_input_tokens_seen": 78358716, "step": 1183 }, { "epoch": 0.11082510413254083, "grad_norm": 17.200271606445312, "learning_rate": 5e-05, "loss": 1.5589, "num_input_tokens_seen": 78425420, "step": 1184 }, { "epoch": 0.11082510413254083, "loss": 1.528570532798767, "loss_ce": 0.004766830243170261, "loss_iou": 0.671875, "loss_num": 0.036865234375, "loss_xval": 1.5234375, "num_input_tokens_seen": 78425420, "step": 1184 }, { "epoch": 0.11091870641643656, "grad_norm": 22.485254287719727, "learning_rate": 5e-05, "loss": 1.7477, "num_input_tokens_seen": 78491684, "step": 1185 }, { "epoch": 0.11091870641643656, "loss": 1.7003130912780762, "loss_ce": 0.005000641569495201, "loss_iou": 0.69921875, "loss_num": 0.05908203125, "loss_xval": 1.6953125, "num_input_tokens_seen": 78491684, "step": 1185 }, { "epoch": 0.11101230870033228, "grad_norm": 17.89415740966797, "learning_rate": 5e-05, "loss": 1.7692, "num_input_tokens_seen": 78557320, "step": 1186 }, { "epoch": 0.11101230870033228, "loss": 1.7814184427261353, "loss_ce": 0.0021215705201029778, "loss_iou": 0.765625, "loss_num": 0.05029296875, "loss_xval": 1.78125, "num_input_tokens_seen": 78557320, "step": 1186 }, { "epoch": 0.11110591098422802, "grad_norm": 13.24902629852295, "learning_rate": 5e-05, "loss": 1.5665, "num_input_tokens_seen": 78623740, "step": 1187 }, { "epoch": 0.11110591098422802, "loss": 1.5412065982818604, "loss_ce": 0.005073728505522013, "loss_iou": 0.6484375, "loss_num": 0.04833984375, "loss_xval": 1.5390625, "num_input_tokens_seen": 78623740, "step": 1187 }, { "epoch": 0.11119951326812375, "grad_norm": 23.279170989990234, "learning_rate": 5e-05, "loss": 1.2237, "num_input_tokens_seen": 78689792, "step": 1188 }, { "epoch": 0.11119951326812375, "loss": 1.0964205265045166, "loss_ce": 0.0032809292897582054, "loss_iou": 0.4375, "loss_num": 0.04345703125, "loss_xval": 1.09375, "num_input_tokens_seen": 78689792, "step": 1188 }, { "epoch": 0.11129311555201947, "grad_norm": 14.37207317352295, "learning_rate": 5e-05, "loss": 1.5489, "num_input_tokens_seen": 78755728, "step": 1189 }, { "epoch": 0.11129311555201947, "loss": 1.81374990940094, "loss_ce": 0.004179581068456173, "loss_iou": 0.76171875, "loss_num": 0.057373046875, "loss_xval": 1.8125, "num_input_tokens_seen": 78755728, "step": 1189 }, { "epoch": 0.1113867178359152, "grad_norm": 23.36548614501953, "learning_rate": 5e-05, "loss": 1.4067, "num_input_tokens_seen": 78822156, "step": 1190 }, { "epoch": 0.1113867178359152, "loss": 1.4242346286773682, "loss_ce": 0.006265765056014061, "loss_iou": 0.59765625, "loss_num": 0.044677734375, "loss_xval": 1.421875, "num_input_tokens_seen": 78822156, "step": 1190 }, { "epoch": 0.11148032011981092, "grad_norm": 18.049659729003906, "learning_rate": 5e-05, "loss": 1.5596, "num_input_tokens_seen": 78888476, "step": 1191 }, { "epoch": 0.11148032011981092, "loss": 1.4020355939865112, "loss_ce": 0.00359805254265666, "loss_iou": 0.59375, "loss_num": 0.042724609375, "loss_xval": 1.3984375, "num_input_tokens_seen": 78888476, "step": 1191 }, { "epoch": 0.11157392240370666, "grad_norm": 20.797494888305664, "learning_rate": 5e-05, "loss": 1.5431, "num_input_tokens_seen": 78954864, "step": 1192 }, { "epoch": 0.11157392240370666, "loss": 1.367311954498291, "loss_ce": 0.005495484918355942, "loss_iou": 0.5078125, "loss_num": 0.06884765625, "loss_xval": 1.359375, "num_input_tokens_seen": 78954864, "step": 1192 }, { "epoch": 0.11166752468760238, "grad_norm": 24.160797119140625, "learning_rate": 5e-05, "loss": 1.578, "num_input_tokens_seen": 79022160, "step": 1193 }, { "epoch": 0.11166752468760238, "loss": 1.5062320232391357, "loss_ce": 0.0013492072466760874, "loss_iou": 0.66015625, "loss_num": 0.036865234375, "loss_xval": 1.5078125, "num_input_tokens_seen": 79022160, "step": 1193 }, { "epoch": 0.1117611269714981, "grad_norm": 121.45783996582031, "learning_rate": 5e-05, "loss": 1.6582, "num_input_tokens_seen": 79088028, "step": 1194 }, { "epoch": 0.1117611269714981, "loss": 1.662060022354126, "loss_ce": 0.006786718033254147, "loss_iou": 0.69140625, "loss_num": 0.053955078125, "loss_xval": 1.65625, "num_input_tokens_seen": 79088028, "step": 1194 }, { "epoch": 0.11185472925539383, "grad_norm": 26.80828094482422, "learning_rate": 5e-05, "loss": 1.5069, "num_input_tokens_seen": 79154080, "step": 1195 }, { "epoch": 0.11185472925539383, "loss": 1.549135446548462, "loss_ce": 0.005190128460526466, "loss_iou": 0.6875, "loss_num": 0.0341796875, "loss_xval": 1.546875, "num_input_tokens_seen": 79154080, "step": 1195 }, { "epoch": 0.11194833153928956, "grad_norm": 44.29054641723633, "learning_rate": 5e-05, "loss": 1.6384, "num_input_tokens_seen": 79222248, "step": 1196 }, { "epoch": 0.11194833153928956, "loss": 1.426701307296753, "loss_ce": 0.002873264020308852, "loss_iou": 0.64453125, "loss_num": 0.027099609375, "loss_xval": 1.421875, "num_input_tokens_seen": 79222248, "step": 1196 }, { "epoch": 0.11204193382318528, "grad_norm": 17.937210083007812, "learning_rate": 5e-05, "loss": 1.4789, "num_input_tokens_seen": 79287212, "step": 1197 }, { "epoch": 0.11204193382318528, "loss": 1.263474702835083, "loss_ce": 0.004197364207357168, "loss_iou": 0.515625, "loss_num": 0.044921875, "loss_xval": 1.2578125, "num_input_tokens_seen": 79287212, "step": 1197 }, { "epoch": 0.11213553610708102, "grad_norm": 26.6240291595459, "learning_rate": 5e-05, "loss": 1.4653, "num_input_tokens_seen": 79353820, "step": 1198 }, { "epoch": 0.11213553610708102, "loss": 1.3803198337554932, "loss_ce": 0.009226055815815926, "loss_iou": 0.56640625, "loss_num": 0.0478515625, "loss_xval": 1.375, "num_input_tokens_seen": 79353820, "step": 1198 }, { "epoch": 0.11222913839097674, "grad_norm": 17.058929443359375, "learning_rate": 5e-05, "loss": 1.6894, "num_input_tokens_seen": 79420152, "step": 1199 }, { "epoch": 0.11222913839097674, "loss": 1.6565816402435303, "loss_ce": 0.005214488599449396, "loss_iou": 0.73828125, "loss_num": 0.03515625, "loss_xval": 1.6484375, "num_input_tokens_seen": 79420152, "step": 1199 }, { "epoch": 0.11232274067487247, "grad_norm": 17.481552124023438, "learning_rate": 5e-05, "loss": 1.3827, "num_input_tokens_seen": 79487128, "step": 1200 }, { "epoch": 0.11232274067487247, "loss": 1.5578417778015137, "loss_ce": 0.0031542000360786915, "loss_iou": 0.67578125, "loss_num": 0.04052734375, "loss_xval": 1.5546875, "num_input_tokens_seen": 79487128, "step": 1200 }, { "epoch": 0.11241634295876819, "grad_norm": 17.883695602416992, "learning_rate": 5e-05, "loss": 1.824, "num_input_tokens_seen": 79553028, "step": 1201 }, { "epoch": 0.11241634295876819, "loss": 1.742805004119873, "loss_ce": 0.006476853042840958, "loss_iou": 0.71875, "loss_num": 0.0595703125, "loss_xval": 1.734375, "num_input_tokens_seen": 79553028, "step": 1201 }, { "epoch": 0.11250994524266392, "grad_norm": 31.487567901611328, "learning_rate": 5e-05, "loss": 1.6337, "num_input_tokens_seen": 79618784, "step": 1202 }, { "epoch": 0.11250994524266392, "loss": 1.580298662185669, "loss_ce": 0.0070565007627010345, "loss_iou": 0.67578125, "loss_num": 0.044677734375, "loss_xval": 1.5703125, "num_input_tokens_seen": 79618784, "step": 1202 }, { "epoch": 0.11260354752655964, "grad_norm": 13.180530548095703, "learning_rate": 5e-05, "loss": 1.6058, "num_input_tokens_seen": 79684240, "step": 1203 }, { "epoch": 0.11260354752655964, "loss": 1.654146671295166, "loss_ce": 0.005556548945605755, "loss_iou": 0.69921875, "loss_num": 0.05029296875, "loss_xval": 1.6484375, "num_input_tokens_seen": 79684240, "step": 1203 }, { "epoch": 0.11269714981045538, "grad_norm": 18.37868881225586, "learning_rate": 5e-05, "loss": 1.5486, "num_input_tokens_seen": 79749764, "step": 1204 }, { "epoch": 0.11269714981045538, "loss": 1.5879004001617432, "loss_ce": 0.005380784161388874, "loss_iou": 0.69921875, "loss_num": 0.037109375, "loss_xval": 1.5859375, "num_input_tokens_seen": 79749764, "step": 1204 }, { "epoch": 0.1127907520943511, "grad_norm": 21.069881439208984, "learning_rate": 5e-05, "loss": 1.2377, "num_input_tokens_seen": 79815504, "step": 1205 }, { "epoch": 0.1127907520943511, "loss": 1.0924522876739502, "loss_ce": 0.0040734270587563515, "loss_iou": 0.48046875, "loss_num": 0.02587890625, "loss_xval": 1.0859375, "num_input_tokens_seen": 79815504, "step": 1205 }, { "epoch": 0.11288435437824683, "grad_norm": 55.62563705444336, "learning_rate": 5e-05, "loss": 1.534, "num_input_tokens_seen": 79880720, "step": 1206 }, { "epoch": 0.11288435437824683, "loss": 1.7343441247940063, "loss_ce": 0.001189854578115046, "loss_iou": 0.76171875, "loss_num": 0.04150390625, "loss_xval": 1.734375, "num_input_tokens_seen": 79880720, "step": 1206 }, { "epoch": 0.11297795666214255, "grad_norm": 13.673062324523926, "learning_rate": 5e-05, "loss": 1.2566, "num_input_tokens_seen": 79947628, "step": 1207 }, { "epoch": 0.11297795666214255, "loss": 1.2388091087341309, "loss_ce": 0.012490655295550823, "loss_iou": 0.5234375, "loss_num": 0.036376953125, "loss_xval": 1.2265625, "num_input_tokens_seen": 79947628, "step": 1207 }, { "epoch": 0.11307155894603828, "grad_norm": 31.4323787689209, "learning_rate": 5e-05, "loss": 1.5795, "num_input_tokens_seen": 80013796, "step": 1208 }, { "epoch": 0.11307155894603828, "loss": 1.6102848052978516, "loss_ce": 0.006769146770238876, "loss_iou": 0.6953125, "loss_num": 0.04296875, "loss_xval": 1.6015625, "num_input_tokens_seen": 80013796, "step": 1208 }, { "epoch": 0.11316516122993402, "grad_norm": 19.149446487426758, "learning_rate": 5e-05, "loss": 1.7813, "num_input_tokens_seen": 80080252, "step": 1209 }, { "epoch": 0.11316516122993402, "loss": 2.0932846069335938, "loss_ce": 0.004417495336383581, "loss_iou": 0.84765625, "loss_num": 0.0791015625, "loss_xval": 2.09375, "num_input_tokens_seen": 80080252, "step": 1209 }, { "epoch": 0.11325876351382974, "grad_norm": 23.946191787719727, "learning_rate": 5e-05, "loss": 1.4635, "num_input_tokens_seen": 80147240, "step": 1210 }, { "epoch": 0.11325876351382974, "loss": 1.636222243309021, "loss_ce": 0.010733958333730698, "loss_iou": 0.6875, "loss_num": 0.05029296875, "loss_xval": 1.625, "num_input_tokens_seen": 80147240, "step": 1210 }, { "epoch": 0.11335236579772547, "grad_norm": 15.929207801818848, "learning_rate": 5e-05, "loss": 1.1694, "num_input_tokens_seen": 80212560, "step": 1211 }, { "epoch": 0.11335236579772547, "loss": 1.4668045043945312, "loss_ce": 0.005378691479563713, "loss_iou": 0.6015625, "loss_num": 0.051025390625, "loss_xval": 1.4609375, "num_input_tokens_seen": 80212560, "step": 1211 }, { "epoch": 0.11344596808162119, "grad_norm": 16.284374237060547, "learning_rate": 5e-05, "loss": 1.6608, "num_input_tokens_seen": 80277664, "step": 1212 }, { "epoch": 0.11344596808162119, "loss": 1.7077348232269287, "loss_ce": 0.0076615395955741405, "loss_iou": 0.6953125, "loss_num": 0.061279296875, "loss_xval": 1.703125, "num_input_tokens_seen": 80277664, "step": 1212 }, { "epoch": 0.11353957036551691, "grad_norm": 20.27515983581543, "learning_rate": 5e-05, "loss": 1.6985, "num_input_tokens_seen": 80344340, "step": 1213 }, { "epoch": 0.11353957036551691, "loss": 1.7379794120788574, "loss_ce": 0.005557590164244175, "loss_iou": 0.734375, "loss_num": 0.0517578125, "loss_xval": 1.734375, "num_input_tokens_seen": 80344340, "step": 1213 }, { "epoch": 0.11363317264941264, "grad_norm": 28.41344451904297, "learning_rate": 5e-05, "loss": 1.1733, "num_input_tokens_seen": 80410172, "step": 1214 }, { "epoch": 0.11363317264941264, "loss": 1.2601943016052246, "loss_ce": 0.005311502143740654, "loss_iou": 0.55078125, "loss_num": 0.03125, "loss_xval": 1.2578125, "num_input_tokens_seen": 80410172, "step": 1214 }, { "epoch": 0.11372677493330838, "grad_norm": 24.274377822875977, "learning_rate": 5e-05, "loss": 1.6907, "num_input_tokens_seen": 80475008, "step": 1215 }, { "epoch": 0.11372677493330838, "loss": 1.854823112487793, "loss_ce": 0.005213777534663677, "loss_iou": 0.7578125, "loss_num": 0.0673828125, "loss_xval": 1.8515625, "num_input_tokens_seen": 80475008, "step": 1215 }, { "epoch": 0.1138203772172041, "grad_norm": 11.962911605834961, "learning_rate": 5e-05, "loss": 1.7314, "num_input_tokens_seen": 80540392, "step": 1216 }, { "epoch": 0.1138203772172041, "loss": 1.848841667175293, "loss_ce": 0.004115085117518902, "loss_iou": 0.796875, "loss_num": 0.049560546875, "loss_xval": 1.84375, "num_input_tokens_seen": 80540392, "step": 1216 }, { "epoch": 0.11391397950109983, "grad_norm": 12.080779075622559, "learning_rate": 5e-05, "loss": 1.3515, "num_input_tokens_seen": 80607548, "step": 1217 }, { "epoch": 0.11391397950109983, "loss": 1.11570143699646, "loss_ce": 0.0020539246033877134, "loss_iou": 0.46484375, "loss_num": 0.036376953125, "loss_xval": 1.1171875, "num_input_tokens_seen": 80607548, "step": 1217 }, { "epoch": 0.11400758178499555, "grad_norm": 39.70487976074219, "learning_rate": 5e-05, "loss": 1.6139, "num_input_tokens_seen": 80673836, "step": 1218 }, { "epoch": 0.11400758178499555, "loss": 1.6444294452667236, "loss_ce": 0.003804431762546301, "loss_iou": 0.69140625, "loss_num": 0.051025390625, "loss_xval": 1.640625, "num_input_tokens_seen": 80673836, "step": 1218 }, { "epoch": 0.11410118406889128, "grad_norm": 11.806645393371582, "learning_rate": 5e-05, "loss": 1.796, "num_input_tokens_seen": 80739184, "step": 1219 }, { "epoch": 0.11410118406889128, "loss": 1.8441953659057617, "loss_ce": 0.007281385827809572, "loss_iou": 0.8359375, "loss_num": 0.033447265625, "loss_xval": 1.8359375, "num_input_tokens_seen": 80739184, "step": 1219 }, { "epoch": 0.11419478635278701, "grad_norm": 42.97815704345703, "learning_rate": 5e-05, "loss": 1.6916, "num_input_tokens_seen": 80804612, "step": 1220 }, { "epoch": 0.11419478635278701, "loss": 1.8572298288345337, "loss_ce": 0.001761129591614008, "loss_iou": 0.80859375, "loss_num": 0.04736328125, "loss_xval": 1.859375, "num_input_tokens_seen": 80804612, "step": 1220 }, { "epoch": 0.11428838863668274, "grad_norm": 32.125545501708984, "learning_rate": 5e-05, "loss": 1.4851, "num_input_tokens_seen": 80871492, "step": 1221 }, { "epoch": 0.11428838863668274, "loss": 1.7160561084747314, "loss_ce": 0.005118582397699356, "loss_iou": 0.75, "loss_num": 0.04150390625, "loss_xval": 1.7109375, "num_input_tokens_seen": 80871492, "step": 1221 }, { "epoch": 0.11438199092057846, "grad_norm": 23.3707218170166, "learning_rate": 5e-05, "loss": 1.2845, "num_input_tokens_seen": 80936608, "step": 1222 }, { "epoch": 0.11438199092057846, "loss": 1.3835852146148682, "loss_ce": 0.006143780425190926, "loss_iou": 0.5703125, "loss_num": 0.047607421875, "loss_xval": 1.375, "num_input_tokens_seen": 80936608, "step": 1222 }, { "epoch": 0.11447559320447419, "grad_norm": 17.40350914001465, "learning_rate": 5e-05, "loss": 1.4408, "num_input_tokens_seen": 81003080, "step": 1223 }, { "epoch": 0.11447559320447419, "loss": 1.3919970989227295, "loss_ce": 0.003813605522736907, "loss_iou": 0.6484375, "loss_num": 0.01806640625, "loss_xval": 1.390625, "num_input_tokens_seen": 81003080, "step": 1223 }, { "epoch": 0.11456919548836991, "grad_norm": 29.1763973236084, "learning_rate": 5e-05, "loss": 1.7472, "num_input_tokens_seen": 81069080, "step": 1224 }, { "epoch": 0.11456919548836991, "loss": 1.8503440618515015, "loss_ce": 0.001711297663860023, "loss_iou": 0.84375, "loss_num": 0.0322265625, "loss_xval": 1.8515625, "num_input_tokens_seen": 81069080, "step": 1224 }, { "epoch": 0.11466279777226564, "grad_norm": 15.609042167663574, "learning_rate": 5e-05, "loss": 1.5495, "num_input_tokens_seen": 81136428, "step": 1225 }, { "epoch": 0.11466279777226564, "loss": 1.3167450428009033, "loss_ce": 0.0060760462656617165, "loss_iou": 0.58984375, "loss_num": 0.0257568359375, "loss_xval": 1.3125, "num_input_tokens_seen": 81136428, "step": 1225 }, { "epoch": 0.11475640005616138, "grad_norm": 24.668973922729492, "learning_rate": 5e-05, "loss": 1.5587, "num_input_tokens_seen": 81202296, "step": 1226 }, { "epoch": 0.11475640005616138, "loss": 1.5846580266952515, "loss_ce": 0.0016501974314451218, "loss_iou": 0.68359375, "loss_num": 0.04345703125, "loss_xval": 1.5859375, "num_input_tokens_seen": 81202296, "step": 1226 }, { "epoch": 0.1148500023400571, "grad_norm": 78.94477081298828, "learning_rate": 5e-05, "loss": 1.8377, "num_input_tokens_seen": 81269728, "step": 1227 }, { "epoch": 0.1148500023400571, "loss": 1.6405736207962036, "loss_ce": 0.003854891285300255, "loss_iou": 0.75, "loss_num": 0.02685546875, "loss_xval": 1.640625, "num_input_tokens_seen": 81269728, "step": 1227 }, { "epoch": 0.11494360462395282, "grad_norm": 13.085610389709473, "learning_rate": 5e-05, "loss": 1.6935, "num_input_tokens_seen": 81336104, "step": 1228 }, { "epoch": 0.11494360462395282, "loss": 1.722845196723938, "loss_ce": 0.002264118054881692, "loss_iou": 0.74609375, "loss_num": 0.04638671875, "loss_xval": 1.71875, "num_input_tokens_seen": 81336104, "step": 1228 }, { "epoch": 0.11503720690784855, "grad_norm": 15.163686752319336, "learning_rate": 5e-05, "loss": 1.6346, "num_input_tokens_seen": 81403708, "step": 1229 }, { "epoch": 0.11503720690784855, "loss": 1.617916464805603, "loss_ce": 0.006588327698409557, "loss_iou": 0.7109375, "loss_num": 0.038330078125, "loss_xval": 1.609375, "num_input_tokens_seen": 81403708, "step": 1229 }, { "epoch": 0.11513080919174427, "grad_norm": 9.518630981445312, "learning_rate": 5e-05, "loss": 1.2484, "num_input_tokens_seen": 81469744, "step": 1230 }, { "epoch": 0.11513080919174427, "loss": 1.3734846115112305, "loss_ce": 0.004344025161117315, "loss_iou": 0.61328125, "loss_num": 0.0281982421875, "loss_xval": 1.3671875, "num_input_tokens_seen": 81469744, "step": 1230 }, { "epoch": 0.11522441147564001, "grad_norm": 17.46893882751465, "learning_rate": 5e-05, "loss": 1.7261, "num_input_tokens_seen": 81535924, "step": 1231 }, { "epoch": 0.11522441147564001, "loss": 1.5726174116134644, "loss_ce": 0.007187769748270512, "loss_iou": 0.671875, "loss_num": 0.044677734375, "loss_xval": 1.5625, "num_input_tokens_seen": 81535924, "step": 1231 }, { "epoch": 0.11531801375953574, "grad_norm": 22.973176956176758, "learning_rate": 5e-05, "loss": 1.5868, "num_input_tokens_seen": 81602740, "step": 1232 }, { "epoch": 0.11531801375953574, "loss": 1.644413948059082, "loss_ce": 0.0028124612290412188, "loss_iou": 0.7265625, "loss_num": 0.036865234375, "loss_xval": 1.640625, "num_input_tokens_seen": 81602740, "step": 1232 }, { "epoch": 0.11541161604343146, "grad_norm": 22.150278091430664, "learning_rate": 5e-05, "loss": 1.4655, "num_input_tokens_seen": 81668772, "step": 1233 }, { "epoch": 0.11541161604343146, "loss": 1.3677325248718262, "loss_ce": 0.0022540693171322346, "loss_iou": 0.59765625, "loss_num": 0.033203125, "loss_xval": 1.3671875, "num_input_tokens_seen": 81668772, "step": 1233 }, { "epoch": 0.11550521832732719, "grad_norm": 16.834016799926758, "learning_rate": 5e-05, "loss": 1.7075, "num_input_tokens_seen": 81736168, "step": 1234 }, { "epoch": 0.11550521832732719, "loss": 1.7671141624450684, "loss_ce": 0.0014891426544636488, "loss_iou": 0.7734375, "loss_num": 0.044189453125, "loss_xval": 1.765625, "num_input_tokens_seen": 81736168, "step": 1234 }, { "epoch": 0.11559882061122291, "grad_norm": 11.076343536376953, "learning_rate": 5e-05, "loss": 1.2712, "num_input_tokens_seen": 81802404, "step": 1235 }, { "epoch": 0.11559882061122291, "loss": 1.2763545513153076, "loss_ce": 0.003893628716468811, "loss_iou": 0.5234375, "loss_num": 0.045654296875, "loss_xval": 1.2734375, "num_input_tokens_seen": 81802404, "step": 1235 }, { "epoch": 0.11569242289511864, "grad_norm": 20.424575805664062, "learning_rate": 5e-05, "loss": 1.4339, "num_input_tokens_seen": 81868092, "step": 1236 }, { "epoch": 0.11569242289511864, "loss": 1.5448195934295654, "loss_ce": 0.0028273831121623516, "loss_iou": 0.67578125, "loss_num": 0.038818359375, "loss_xval": 1.5390625, "num_input_tokens_seen": 81868092, "step": 1236 }, { "epoch": 0.11578602517901437, "grad_norm": 51.29452896118164, "learning_rate": 5e-05, "loss": 1.5949, "num_input_tokens_seen": 81934956, "step": 1237 }, { "epoch": 0.11578602517901437, "loss": 1.6440658569335938, "loss_ce": 0.0044175381772220135, "loss_iou": 0.73828125, "loss_num": 0.032470703125, "loss_xval": 1.640625, "num_input_tokens_seen": 81934956, "step": 1237 }, { "epoch": 0.1158796274629101, "grad_norm": 15.815311431884766, "learning_rate": 5e-05, "loss": 1.7589, "num_input_tokens_seen": 82001952, "step": 1238 }, { "epoch": 0.1158796274629101, "loss": 1.9351770877838135, "loss_ce": 0.007442661561071873, "loss_iou": 0.8046875, "loss_num": 0.0634765625, "loss_xval": 1.9296875, "num_input_tokens_seen": 82001952, "step": 1238 }, { "epoch": 0.11597322974680582, "grad_norm": 18.532611846923828, "learning_rate": 5e-05, "loss": 1.6198, "num_input_tokens_seen": 82068772, "step": 1239 }, { "epoch": 0.11597322974680582, "loss": 1.597364902496338, "loss_ce": 0.0041031865403056145, "loss_iou": 0.69140625, "loss_num": 0.042236328125, "loss_xval": 1.59375, "num_input_tokens_seen": 82068772, "step": 1239 }, { "epoch": 0.11606683203070155, "grad_norm": 18.645465850830078, "learning_rate": 5e-05, "loss": 1.7358, "num_input_tokens_seen": 82135532, "step": 1240 }, { "epoch": 0.11606683203070155, "loss": 1.7387772798538208, "loss_ce": 0.004402315244078636, "loss_iou": 0.7109375, "loss_num": 0.0634765625, "loss_xval": 1.734375, "num_input_tokens_seen": 82135532, "step": 1240 }, { "epoch": 0.11616043431459727, "grad_norm": 16.821317672729492, "learning_rate": 5e-05, "loss": 1.4882, "num_input_tokens_seen": 82201672, "step": 1241 }, { "epoch": 0.11616043431459727, "loss": 1.4463303089141846, "loss_ce": 0.0039473664946854115, "loss_iou": 0.65625, "loss_num": 0.0252685546875, "loss_xval": 1.4453125, "num_input_tokens_seen": 82201672, "step": 1241 }, { "epoch": 0.116254036598493, "grad_norm": 28.336610794067383, "learning_rate": 5e-05, "loss": 1.6999, "num_input_tokens_seen": 82267820, "step": 1242 }, { "epoch": 0.116254036598493, "loss": 1.676544189453125, "loss_ce": 0.009552114643156528, "loss_iou": 0.71875, "loss_num": 0.046142578125, "loss_xval": 1.6640625, "num_input_tokens_seen": 82267820, "step": 1242 }, { "epoch": 0.11634763888238873, "grad_norm": 32.36832046508789, "learning_rate": 5e-05, "loss": 1.5587, "num_input_tokens_seen": 82334836, "step": 1243 }, { "epoch": 0.11634763888238873, "loss": 1.7433605194091797, "loss_ce": 0.0031261455733329058, "loss_iou": 0.7421875, "loss_num": 0.05126953125, "loss_xval": 1.7421875, "num_input_tokens_seen": 82334836, "step": 1243 }, { "epoch": 0.11644124116628446, "grad_norm": 31.063182830810547, "learning_rate": 5e-05, "loss": 1.6658, "num_input_tokens_seen": 82402016, "step": 1244 }, { "epoch": 0.11644124116628446, "loss": 1.5226678848266602, "loss_ce": 0.009972486644983292, "loss_iou": 0.6640625, "loss_num": 0.036865234375, "loss_xval": 1.515625, "num_input_tokens_seen": 82402016, "step": 1244 }, { "epoch": 0.11653484345018018, "grad_norm": 21.888381958007812, "learning_rate": 5e-05, "loss": 1.5815, "num_input_tokens_seen": 82467508, "step": 1245 }, { "epoch": 0.11653484345018018, "loss": 1.4058010578155518, "loss_ce": 0.0029691134113818407, "loss_iou": 0.55859375, "loss_num": 0.056884765625, "loss_xval": 1.40625, "num_input_tokens_seen": 82467508, "step": 1245 }, { "epoch": 0.11662844573407591, "grad_norm": 12.787288665771484, "learning_rate": 5e-05, "loss": 1.1663, "num_input_tokens_seen": 82534112, "step": 1246 }, { "epoch": 0.11662844573407591, "loss": 1.4278309345245361, "loss_ce": 0.0010730300564318895, "loss_iou": 0.6328125, "loss_num": 0.032958984375, "loss_xval": 1.4296875, "num_input_tokens_seen": 82534112, "step": 1246 }, { "epoch": 0.11672204801797163, "grad_norm": 16.461729049682617, "learning_rate": 5e-05, "loss": 1.4065, "num_input_tokens_seen": 82599828, "step": 1247 }, { "epoch": 0.11672204801797163, "loss": 1.4895188808441162, "loss_ce": 0.00367908226326108, "loss_iou": 0.64453125, "loss_num": 0.038818359375, "loss_xval": 1.484375, "num_input_tokens_seen": 82599828, "step": 1247 }, { "epoch": 0.11681565030186737, "grad_norm": 22.454593658447266, "learning_rate": 5e-05, "loss": 1.6579, "num_input_tokens_seen": 82666620, "step": 1248 }, { "epoch": 0.11681565030186737, "loss": 1.6648619174957275, "loss_ce": 0.0047057876363396645, "loss_iou": 0.7265625, "loss_num": 0.041259765625, "loss_xval": 1.65625, "num_input_tokens_seen": 82666620, "step": 1248 }, { "epoch": 0.1169092525857631, "grad_norm": 40.78807830810547, "learning_rate": 5e-05, "loss": 1.5815, "num_input_tokens_seen": 82732352, "step": 1249 }, { "epoch": 0.1169092525857631, "loss": 1.781855821609497, "loss_ce": 0.002558845328167081, "loss_iou": 0.78125, "loss_num": 0.04443359375, "loss_xval": 1.78125, "num_input_tokens_seen": 82732352, "step": 1249 }, { "epoch": 0.11700285486965882, "grad_norm": 20.71820640563965, "learning_rate": 5e-05, "loss": 1.7601, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11700285486965882, "eval_seeclick_CIoU": 0.1287650465965271, "eval_seeclick_GIoU": 0.13856594264507294, "eval_seeclick_IoU": 0.248815655708313, "eval_seeclick_MAE_all": 0.16578662395477295, "eval_seeclick_MAE_h": 0.08586683869361877, "eval_seeclick_MAE_w": 0.12790357321500778, "eval_seeclick_MAE_x_boxes": 0.27299533784389496, "eval_seeclick_MAE_y_boxes": 0.11505845375359058, "eval_seeclick_NUM_probability": 0.998512476682663, "eval_seeclick_inside_bbox": 0.41875000298023224, "eval_seeclick_loss": 2.5617454051971436, "eval_seeclick_loss_ce": 0.013203508220613003, "eval_seeclick_loss_iou": 0.876708984375, "eval_seeclick_loss_num": 0.157806396484375, "eval_seeclick_loss_xval": 2.541015625, "eval_seeclick_runtime": 62.0666, "eval_seeclick_samples_per_second": 0.757, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11700285486965882, "eval_icons_CIoU": -0.07235723733901978, "eval_icons_GIoU": -0.003447722876444459, "eval_icons_IoU": 0.08474742993712425, "eval_icons_MAE_all": 0.14920702576637268, "eval_icons_MAE_h": 0.07863498479127884, "eval_icons_MAE_w": 0.1913459524512291, "eval_icons_MAE_x_boxes": 0.09553009271621704, "eval_icons_MAE_y_boxes": 0.12014838308095932, "eval_icons_NUM_probability": 0.997991144657135, "eval_icons_inside_bbox": 0.2638888955116272, "eval_icons_loss": 2.7587175369262695, "eval_icons_loss_ce": 0.00024980072339531034, "eval_icons_loss_iou": 1.017578125, "eval_icons_loss_num": 0.1510772705078125, "eval_icons_loss_xval": 2.79052734375, "eval_icons_runtime": 64.9029, "eval_icons_samples_per_second": 0.77, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11700285486965882, "eval_screenspot_CIoU": 0.010961043337980906, "eval_screenspot_GIoU": 0.01767559101184209, "eval_screenspot_IoU": 0.1899357189734777, "eval_screenspot_MAE_all": 0.1625519891579946, "eval_screenspot_MAE_h": 0.10704489052295685, "eval_screenspot_MAE_w": 0.15161699056625366, "eval_screenspot_MAE_x_boxes": 0.19744082788626352, "eval_screenspot_MAE_y_boxes": 0.1427758956948916, "eval_screenspot_NUM_probability": 0.9987198114395142, "eval_screenspot_inside_bbox": 0.40708333253860474, "eval_screenspot_loss": 2.805513858795166, "eval_screenspot_loss_ce": 0.014925556567807993, "eval_screenspot_loss_iou": 0.99853515625, "eval_screenspot_loss_num": 0.1709136962890625, "eval_screenspot_loss_xval": 2.8528645833333335, "eval_screenspot_runtime": 110.9727, "eval_screenspot_samples_per_second": 0.802, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11700285486965882, "eval_compot_CIoU": 0.034002652391791344, "eval_compot_GIoU": 0.029855364933609962, "eval_compot_IoU": 0.16077818721532822, "eval_compot_MAE_all": 0.12482038512825966, "eval_compot_MAE_h": 0.07313639391213655, "eval_compot_MAE_w": 0.0934268981218338, "eval_compot_MAE_x_boxes": 0.17276836931705475, "eval_compot_MAE_y_boxes": 0.09329631552100182, "eval_compot_NUM_probability": 0.9992510080337524, "eval_compot_inside_bbox": 0.40625, "eval_compot_loss": 2.634563684463501, "eval_compot_loss_ce": 0.004644116037525237, "eval_compot_loss_iou": 1.0078125, "eval_compot_loss_num": 0.1324005126953125, "eval_compot_loss_xval": 2.677734375, "eval_compot_runtime": 77.0866, "eval_compot_samples_per_second": 0.649, "eval_compot_steps_per_second": 0.026, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11700285486965882, "eval_custom_ui_MAE_all": 0.1432972252368927, "eval_custom_ui_MAE_x": 0.1295909397304058, "eval_custom_ui_MAE_y": 0.15700352936983109, "eval_custom_ui_NUM_probability": 0.9995890855789185, "eval_custom_ui_loss": 0.7637790441513062, "eval_custom_ui_loss_ce": 0.07840772718191147, "eval_custom_ui_loss_num": 0.138702392578125, "eval_custom_ui_loss_xval": 0.6934814453125, "eval_custom_ui_runtime": 59.9836, "eval_custom_ui_samples_per_second": 0.834, "eval_custom_ui_steps_per_second": 0.033, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11700285486965882, "loss": 0.7642842531204224, "loss_ce": 0.08703814446926117, "loss_iou": 0.0, "loss_num": 0.1357421875, "loss_xval": 0.67578125, "num_input_tokens_seen": 82798912, "step": 1250 }, { "epoch": 0.11709645715355455, "grad_norm": 18.837018966674805, "learning_rate": 5e-05, "loss": 1.4492, "num_input_tokens_seen": 82864888, "step": 1251 }, { "epoch": 0.11709645715355455, "loss": 1.3731420040130615, "loss_ce": 0.00400142464786768, "loss_iou": 0.5390625, "loss_num": 0.05810546875, "loss_xval": 1.3671875, "num_input_tokens_seen": 82864888, "step": 1251 }, { "epoch": 0.11719005943745027, "grad_norm": 31.525707244873047, "learning_rate": 5e-05, "loss": 1.4484, "num_input_tokens_seen": 82931916, "step": 1252 }, { "epoch": 0.11719005943745027, "loss": 1.5041272640228271, "loss_ce": 0.009010091423988342, "loss_iou": 0.66015625, "loss_num": 0.034912109375, "loss_xval": 1.4921875, "num_input_tokens_seen": 82931916, "step": 1252 }, { "epoch": 0.117283661721346, "grad_norm": 18.124881744384766, "learning_rate": 5e-05, "loss": 1.7309, "num_input_tokens_seen": 82999304, "step": 1253 }, { "epoch": 0.117283661721346, "loss": 1.5895214080810547, "loss_ce": 0.0035839076153934, "loss_iou": 0.69921875, "loss_num": 0.037109375, "loss_xval": 1.5859375, "num_input_tokens_seen": 82999304, "step": 1253 }, { "epoch": 0.11737726400524173, "grad_norm": 16.497982025146484, "learning_rate": 5e-05, "loss": 1.6244, "num_input_tokens_seen": 83065436, "step": 1254 }, { "epoch": 0.11737726400524173, "loss": 1.752490758895874, "loss_ce": 0.004688110668212175, "loss_iou": 0.7109375, "loss_num": 0.06494140625, "loss_xval": 1.75, "num_input_tokens_seen": 83065436, "step": 1254 }, { "epoch": 0.11747086628913746, "grad_norm": 17.74229621887207, "learning_rate": 5e-05, "loss": 1.4879, "num_input_tokens_seen": 83130840, "step": 1255 }, { "epoch": 0.11747086628913746, "loss": 1.7128605842590332, "loss_ce": 0.007294106297194958, "loss_iou": 0.70703125, "loss_num": 0.05859375, "loss_xval": 1.703125, "num_input_tokens_seen": 83130840, "step": 1255 }, { "epoch": 0.11756446857303318, "grad_norm": 16.521076202392578, "learning_rate": 5e-05, "loss": 1.4597, "num_input_tokens_seen": 83197100, "step": 1256 }, { "epoch": 0.11756446857303318, "loss": 1.4006917476654053, "loss_ce": 0.005672247149050236, "loss_iou": 0.5703125, "loss_num": 0.05126953125, "loss_xval": 1.3984375, "num_input_tokens_seen": 83197100, "step": 1256 }, { "epoch": 0.1176580708569289, "grad_norm": 33.442562103271484, "learning_rate": 5e-05, "loss": 1.5389, "num_input_tokens_seen": 83263084, "step": 1257 }, { "epoch": 0.1176580708569289, "loss": 1.6844170093536377, "loss_ce": 0.0057061705738306046, "loss_iou": 0.71875, "loss_num": 0.04833984375, "loss_xval": 1.6796875, "num_input_tokens_seen": 83263084, "step": 1257 }, { "epoch": 0.11775167314082463, "grad_norm": 24.860952377319336, "learning_rate": 5e-05, "loss": 1.4648, "num_input_tokens_seen": 83329536, "step": 1258 }, { "epoch": 0.11775167314082463, "loss": 1.4786264896392822, "loss_ce": 0.006946785841137171, "loss_iou": 0.65625, "loss_num": 0.03271484375, "loss_xval": 1.46875, "num_input_tokens_seen": 83329536, "step": 1258 }, { "epoch": 0.11784527542472037, "grad_norm": 17.826082229614258, "learning_rate": 5e-05, "loss": 1.5541, "num_input_tokens_seen": 83395532, "step": 1259 }, { "epoch": 0.11784527542472037, "loss": 1.3716524839401245, "loss_ce": 0.0021456663962453604, "loss_iou": 0.57421875, "loss_num": 0.0439453125, "loss_xval": 1.3671875, "num_input_tokens_seen": 83395532, "step": 1259 }, { "epoch": 0.1179388777086161, "grad_norm": 21.652984619140625, "learning_rate": 5e-05, "loss": 1.5288, "num_input_tokens_seen": 83461984, "step": 1260 }, { "epoch": 0.1179388777086161, "loss": 1.3684253692626953, "loss_ce": 0.004655827768146992, "loss_iou": 0.5546875, "loss_num": 0.05126953125, "loss_xval": 1.3671875, "num_input_tokens_seen": 83461984, "step": 1260 }, { "epoch": 0.11803247999251182, "grad_norm": 98.35587310791016, "learning_rate": 5e-05, "loss": 1.6681, "num_input_tokens_seen": 83529256, "step": 1261 }, { "epoch": 0.11803247999251182, "loss": 1.677626132965088, "loss_ce": 0.00672775087878108, "loss_iou": 0.69921875, "loss_num": 0.05517578125, "loss_xval": 1.671875, "num_input_tokens_seen": 83529256, "step": 1261 }, { "epoch": 0.11812608227640754, "grad_norm": 15.623915672302246, "learning_rate": 5e-05, "loss": 1.8378, "num_input_tokens_seen": 83594752, "step": 1262 }, { "epoch": 0.11812608227640754, "loss": 1.5487499237060547, "loss_ce": 0.0035839397460222244, "loss_iou": 0.6875, "loss_num": 0.033935546875, "loss_xval": 1.546875, "num_input_tokens_seen": 83594752, "step": 1262 }, { "epoch": 0.11821968456030327, "grad_norm": 15.402535438537598, "learning_rate": 5e-05, "loss": 1.6479, "num_input_tokens_seen": 83660632, "step": 1263 }, { "epoch": 0.11821968456030327, "loss": 1.6366450786590576, "loss_ce": 0.004900718107819557, "loss_iou": 0.72265625, "loss_num": 0.037109375, "loss_xval": 1.6328125, "num_input_tokens_seen": 83660632, "step": 1263 }, { "epoch": 0.11831328684419899, "grad_norm": 11.214442253112793, "learning_rate": 5e-05, "loss": 1.2146, "num_input_tokens_seen": 83725400, "step": 1264 }, { "epoch": 0.11831328684419899, "loss": 1.0320955514907837, "loss_ce": 0.00426351698115468, "loss_iou": 0.404296875, "loss_num": 0.0439453125, "loss_xval": 1.03125, "num_input_tokens_seen": 83725400, "step": 1264 }, { "epoch": 0.11840688912809473, "grad_norm": 17.171525955200195, "learning_rate": 5e-05, "loss": 1.2422, "num_input_tokens_seen": 83791140, "step": 1265 }, { "epoch": 0.11840688912809473, "loss": 1.1908378601074219, "loss_ce": 0.0043144794180989265, "loss_iou": 0.4609375, "loss_num": 0.052734375, "loss_xval": 1.1875, "num_input_tokens_seen": 83791140, "step": 1265 }, { "epoch": 0.11850049141199046, "grad_norm": 10.413750648498535, "learning_rate": 5e-05, "loss": 1.3303, "num_input_tokens_seen": 83856912, "step": 1266 }, { "epoch": 0.11850049141199046, "loss": 1.2113043069839478, "loss_ce": 0.004883345682173967, "loss_iou": 0.51953125, "loss_num": 0.033447265625, "loss_xval": 1.203125, "num_input_tokens_seen": 83856912, "step": 1266 }, { "epoch": 0.11859409369588618, "grad_norm": 20.207469940185547, "learning_rate": 5e-05, "loss": 1.4605, "num_input_tokens_seen": 83923696, "step": 1267 }, { "epoch": 0.11859409369588618, "loss": 1.6463834047317505, "loss_ce": 0.0038052310701459646, "loss_iou": 0.6953125, "loss_num": 0.050048828125, "loss_xval": 1.640625, "num_input_tokens_seen": 83923696, "step": 1267 }, { "epoch": 0.1186876959797819, "grad_norm": 19.983022689819336, "learning_rate": 5e-05, "loss": 1.3416, "num_input_tokens_seen": 83989660, "step": 1268 }, { "epoch": 0.1186876959797819, "loss": 1.4917185306549072, "loss_ce": 0.0073434836231172085, "loss_iou": 0.62890625, "loss_num": 0.044677734375, "loss_xval": 1.484375, "num_input_tokens_seen": 83989660, "step": 1268 }, { "epoch": 0.11878129826367763, "grad_norm": 24.613861083984375, "learning_rate": 5e-05, "loss": 1.6368, "num_input_tokens_seen": 84056984, "step": 1269 }, { "epoch": 0.11878129826367763, "loss": 1.7018146514892578, "loss_ce": 0.0016192832263186574, "loss_iou": 0.7421875, "loss_num": 0.04345703125, "loss_xval": 1.703125, "num_input_tokens_seen": 84056984, "step": 1269 }, { "epoch": 0.11887490054757337, "grad_norm": 18.163482666015625, "learning_rate": 5e-05, "loss": 1.6242, "num_input_tokens_seen": 84122832, "step": 1270 }, { "epoch": 0.11887490054757337, "loss": 1.6886608600616455, "loss_ce": 0.0016491420101374388, "loss_iou": 0.72265625, "loss_num": 0.0478515625, "loss_xval": 1.6875, "num_input_tokens_seen": 84122832, "step": 1270 }, { "epoch": 0.11896850283146909, "grad_norm": 14.846522331237793, "learning_rate": 5e-05, "loss": 1.4988, "num_input_tokens_seen": 84189052, "step": 1271 }, { "epoch": 0.11896850283146909, "loss": 1.6928069591522217, "loss_ce": 0.009213217534124851, "loss_iou": 0.703125, "loss_num": 0.0556640625, "loss_xval": 1.6875, "num_input_tokens_seen": 84189052, "step": 1271 }, { "epoch": 0.11906210511536482, "grad_norm": 15.97767448425293, "learning_rate": 5e-05, "loss": 1.2843, "num_input_tokens_seen": 84254416, "step": 1272 }, { "epoch": 0.11906210511536482, "loss": 1.2944265604019165, "loss_ce": 0.004387491848319769, "loss_iou": 0.58203125, "loss_num": 0.0255126953125, "loss_xval": 1.2890625, "num_input_tokens_seen": 84254416, "step": 1272 }, { "epoch": 0.11915570739926054, "grad_norm": 18.81927490234375, "learning_rate": 5e-05, "loss": 1.3602, "num_input_tokens_seen": 84321512, "step": 1273 }, { "epoch": 0.11915570739926054, "loss": 1.3958415985107422, "loss_ce": 0.0032635130919516087, "loss_iou": 0.6171875, "loss_num": 0.0322265625, "loss_xval": 1.390625, "num_input_tokens_seen": 84321512, "step": 1273 }, { "epoch": 0.11924930968315627, "grad_norm": 19.093976974487305, "learning_rate": 5e-05, "loss": 1.714, "num_input_tokens_seen": 84386724, "step": 1274 }, { "epoch": 0.11924930968315627, "loss": 1.7422585487365723, "loss_ce": 0.00348901329562068, "loss_iou": 0.70703125, "loss_num": 0.06396484375, "loss_xval": 1.7421875, "num_input_tokens_seen": 84386724, "step": 1274 }, { "epoch": 0.11934291196705199, "grad_norm": 14.970403671264648, "learning_rate": 5e-05, "loss": 1.5339, "num_input_tokens_seen": 84454224, "step": 1275 }, { "epoch": 0.11934291196705199, "loss": 1.63826584815979, "loss_ce": 0.0069181350991129875, "loss_iou": 0.65234375, "loss_num": 0.06591796875, "loss_xval": 1.6328125, "num_input_tokens_seen": 84454224, "step": 1275 }, { "epoch": 0.11943651425094773, "grad_norm": 19.01073455810547, "learning_rate": 5e-05, "loss": 1.421, "num_input_tokens_seen": 84519860, "step": 1276 }, { "epoch": 0.11943651425094773, "loss": 1.3654017448425293, "loss_ce": 0.005050238221883774, "loss_iou": 0.5625, "loss_num": 0.047607421875, "loss_xval": 1.359375, "num_input_tokens_seen": 84519860, "step": 1276 }, { "epoch": 0.11953011653484345, "grad_norm": 16.294599533081055, "learning_rate": 5e-05, "loss": 1.5049, "num_input_tokens_seen": 84585884, "step": 1277 }, { "epoch": 0.11953011653484345, "loss": 1.465646505355835, "loss_ce": 0.005197314545512199, "loss_iou": 0.60546875, "loss_num": 0.050048828125, "loss_xval": 1.4609375, "num_input_tokens_seen": 84585884, "step": 1277 }, { "epoch": 0.11962371881873918, "grad_norm": 21.392253875732422, "learning_rate": 5e-05, "loss": 1.5868, "num_input_tokens_seen": 84651872, "step": 1278 }, { "epoch": 0.11962371881873918, "loss": 1.5563135147094727, "loss_ce": 0.00260249525308609, "loss_iou": 0.6640625, "loss_num": 0.045166015625, "loss_xval": 1.5546875, "num_input_tokens_seen": 84651872, "step": 1278 }, { "epoch": 0.1197173211026349, "grad_norm": 19.679719924926758, "learning_rate": 5e-05, "loss": 1.6347, "num_input_tokens_seen": 84717280, "step": 1279 }, { "epoch": 0.1197173211026349, "loss": 1.716318130493164, "loss_ce": 0.0019625977147370577, "loss_iou": 0.73828125, "loss_num": 0.046875, "loss_xval": 1.7109375, "num_input_tokens_seen": 84717280, "step": 1279 }, { "epoch": 0.11981092338653063, "grad_norm": 33.421905517578125, "learning_rate": 5e-05, "loss": 1.4694, "num_input_tokens_seen": 84782716, "step": 1280 }, { "epoch": 0.11981092338653063, "loss": 1.25675368309021, "loss_ce": 0.003579774871468544, "loss_iou": 0.54296875, "loss_num": 0.03369140625, "loss_xval": 1.25, "num_input_tokens_seen": 84782716, "step": 1280 }, { "epoch": 0.11990452567042637, "grad_norm": 27.822599411010742, "learning_rate": 5e-05, "loss": 1.8041, "num_input_tokens_seen": 84848692, "step": 1281 }, { "epoch": 0.11990452567042637, "loss": 1.7329094409942627, "loss_ce": 0.0024407554883509874, "loss_iou": 0.7734375, "loss_num": 0.036376953125, "loss_xval": 1.734375, "num_input_tokens_seen": 84848692, "step": 1281 }, { "epoch": 0.11999812795432209, "grad_norm": 62.687049865722656, "learning_rate": 5e-05, "loss": 1.4717, "num_input_tokens_seen": 84915340, "step": 1282 }, { "epoch": 0.11999812795432209, "loss": 1.7012088298797607, "loss_ce": 0.008826037868857384, "loss_iou": 0.68359375, "loss_num": 0.06494140625, "loss_xval": 1.6953125, "num_input_tokens_seen": 84915340, "step": 1282 }, { "epoch": 0.12009173023821781, "grad_norm": 21.4544734954834, "learning_rate": 5e-05, "loss": 1.5386, "num_input_tokens_seen": 84982344, "step": 1283 }, { "epoch": 0.12009173023821781, "loss": 1.5494441986083984, "loss_ce": 0.005498811602592468, "loss_iou": 0.6953125, "loss_num": 0.0311279296875, "loss_xval": 1.546875, "num_input_tokens_seen": 84982344, "step": 1283 }, { "epoch": 0.12018533252211354, "grad_norm": 22.630477905273438, "learning_rate": 5e-05, "loss": 1.4824, "num_input_tokens_seen": 85049760, "step": 1284 }, { "epoch": 0.12018533252211354, "loss": 1.25767183303833, "loss_ce": 0.0013242331333458424, "loss_iou": 0.5625, "loss_num": 0.0257568359375, "loss_xval": 1.2578125, "num_input_tokens_seen": 85049760, "step": 1284 }, { "epoch": 0.12027893480600926, "grad_norm": 46.782806396484375, "learning_rate": 5e-05, "loss": 1.568, "num_input_tokens_seen": 85116292, "step": 1285 }, { "epoch": 0.12027893480600926, "loss": 1.672314167022705, "loss_ce": 0.0033688938710838556, "loss_iou": 0.6953125, "loss_num": 0.0556640625, "loss_xval": 1.671875, "num_input_tokens_seen": 85116292, "step": 1285 }, { "epoch": 0.12037253708990499, "grad_norm": 22.50234031677246, "learning_rate": 5e-05, "loss": 1.7053, "num_input_tokens_seen": 85181932, "step": 1286 }, { "epoch": 0.12037253708990499, "loss": 1.8718600273132324, "loss_ce": 0.006625790614634752, "loss_iou": 0.8359375, "loss_num": 0.0390625, "loss_xval": 1.8671875, "num_input_tokens_seen": 85181932, "step": 1286 }, { "epoch": 0.12046613937380073, "grad_norm": 58.333805084228516, "learning_rate": 5e-05, "loss": 1.512, "num_input_tokens_seen": 85248036, "step": 1287 }, { "epoch": 0.12046613937380073, "loss": 1.6283907890319824, "loss_ce": 0.013156358152627945, "loss_iou": 0.72265625, "loss_num": 0.034423828125, "loss_xval": 1.6171875, "num_input_tokens_seen": 85248036, "step": 1287 }, { "epoch": 0.12055974165769645, "grad_norm": 15.645672798156738, "learning_rate": 5e-05, "loss": 1.3016, "num_input_tokens_seen": 85314132, "step": 1288 }, { "epoch": 0.12055974165769645, "loss": 1.2546794414520264, "loss_ce": 0.0066325003281235695, "loss_iou": 0.5625, "loss_num": 0.025390625, "loss_xval": 1.25, "num_input_tokens_seen": 85314132, "step": 1288 }, { "epoch": 0.12065334394159218, "grad_norm": 21.752676010131836, "learning_rate": 5e-05, "loss": 1.406, "num_input_tokens_seen": 85379904, "step": 1289 }, { "epoch": 0.12065334394159218, "loss": 1.273632526397705, "loss_ce": 0.0037351157516241074, "loss_iou": 0.5546875, "loss_num": 0.031494140625, "loss_xval": 1.2734375, "num_input_tokens_seen": 85379904, "step": 1289 }, { "epoch": 0.1207469462254879, "grad_norm": 80.36859130859375, "learning_rate": 5e-05, "loss": 1.5521, "num_input_tokens_seen": 85446272, "step": 1290 }, { "epoch": 0.1207469462254879, "loss": 1.5876295566558838, "loss_ce": 0.006574748083949089, "loss_iou": 0.6796875, "loss_num": 0.0439453125, "loss_xval": 1.578125, "num_input_tokens_seen": 85446272, "step": 1290 }, { "epoch": 0.12084054850938362, "grad_norm": 17.155616760253906, "learning_rate": 5e-05, "loss": 1.8221, "num_input_tokens_seen": 85513108, "step": 1291 }, { "epoch": 0.12084054850938362, "loss": 1.7568233013153076, "loss_ce": 0.002917001722380519, "loss_iou": 0.74609375, "loss_num": 0.0517578125, "loss_xval": 1.75, "num_input_tokens_seen": 85513108, "step": 1291 }, { "epoch": 0.12093415079327935, "grad_norm": 13.691561698913574, "learning_rate": 5e-05, "loss": 1.3615, "num_input_tokens_seen": 85580328, "step": 1292 }, { "epoch": 0.12093415079327935, "loss": 1.453429937362671, "loss_ce": 0.0027464372105896473, "loss_iou": 0.61328125, "loss_num": 0.044921875, "loss_xval": 1.453125, "num_input_tokens_seen": 85580328, "step": 1292 }, { "epoch": 0.12102775307717509, "grad_norm": 32.56606674194336, "learning_rate": 5e-05, "loss": 1.2465, "num_input_tokens_seen": 85646888, "step": 1293 }, { "epoch": 0.12102775307717509, "loss": 1.2406527996063232, "loss_ce": 0.0023715831339359283, "loss_iou": 0.5625, "loss_num": 0.0234375, "loss_xval": 1.234375, "num_input_tokens_seen": 85646888, "step": 1293 }, { "epoch": 0.12112135536107081, "grad_norm": 22.93648338317871, "learning_rate": 5e-05, "loss": 1.7601, "num_input_tokens_seen": 85712936, "step": 1294 }, { "epoch": 0.12112135536107081, "loss": 1.686873197555542, "loss_ce": 0.00425603287294507, "loss_iou": 0.734375, "loss_num": 0.04248046875, "loss_xval": 1.6796875, "num_input_tokens_seen": 85712936, "step": 1294 }, { "epoch": 0.12121495764496654, "grad_norm": 14.054872512817383, "learning_rate": 5e-05, "loss": 1.5023, "num_input_tokens_seen": 85779100, "step": 1295 }, { "epoch": 0.12121495764496654, "loss": 1.3477599620819092, "loss_ce": 0.004986626096069813, "loss_iou": 0.58203125, "loss_num": 0.035888671875, "loss_xval": 1.34375, "num_input_tokens_seen": 85779100, "step": 1295 }, { "epoch": 0.12130855992886226, "grad_norm": 13.445719718933105, "learning_rate": 5e-05, "loss": 1.3279, "num_input_tokens_seen": 85844692, "step": 1296 }, { "epoch": 0.12130855992886226, "loss": 1.2723405361175537, "loss_ce": 0.004518401343375444, "loss_iou": 0.5390625, "loss_num": 0.038818359375, "loss_xval": 1.265625, "num_input_tokens_seen": 85844692, "step": 1296 }, { "epoch": 0.12140216221275799, "grad_norm": 12.350967407226562, "learning_rate": 5e-05, "loss": 1.0803, "num_input_tokens_seen": 85910448, "step": 1297 }, { "epoch": 0.12140216221275799, "loss": 1.0261882543563843, "loss_ce": 0.003116979030892253, "loss_iou": 0.458984375, "loss_num": 0.0211181640625, "loss_xval": 1.0234375, "num_input_tokens_seen": 85910448, "step": 1297 }, { "epoch": 0.12149576449665372, "grad_norm": 20.45875358581543, "learning_rate": 5e-05, "loss": 1.3162, "num_input_tokens_seen": 85976348, "step": 1298 }, { "epoch": 0.12149576449665372, "loss": 1.2711149454116821, "loss_ce": 0.005978184752166271, "loss_iou": 0.51953125, "loss_num": 0.0458984375, "loss_xval": 1.265625, "num_input_tokens_seen": 85976348, "step": 1298 }, { "epoch": 0.12158936678054945, "grad_norm": 19.14122200012207, "learning_rate": 5e-05, "loss": 1.4476, "num_input_tokens_seen": 86042828, "step": 1299 }, { "epoch": 0.12158936678054945, "loss": 1.340684413909912, "loss_ce": 0.006455985363572836, "loss_iou": 0.59375, "loss_num": 0.029052734375, "loss_xval": 1.3359375, "num_input_tokens_seen": 86042828, "step": 1299 }, { "epoch": 0.12168296906444517, "grad_norm": 25.907560348510742, "learning_rate": 5e-05, "loss": 1.5056, "num_input_tokens_seen": 86109824, "step": 1300 }, { "epoch": 0.12168296906444517, "loss": 1.396787166595459, "loss_ce": 0.008054189383983612, "loss_iou": 0.63671875, "loss_num": 0.0234375, "loss_xval": 1.390625, "num_input_tokens_seen": 86109824, "step": 1300 }, { "epoch": 0.1217765713483409, "grad_norm": 20.510488510131836, "learning_rate": 5e-05, "loss": 1.6723, "num_input_tokens_seen": 86175660, "step": 1301 }, { "epoch": 0.1217765713483409, "loss": 1.8065403699874878, "loss_ce": 0.004782572388648987, "loss_iou": 0.75390625, "loss_num": 0.0595703125, "loss_xval": 1.8046875, "num_input_tokens_seen": 86175660, "step": 1301 }, { "epoch": 0.12187017363223662, "grad_norm": 13.6410551071167, "learning_rate": 5e-05, "loss": 1.4309, "num_input_tokens_seen": 86241376, "step": 1302 }, { "epoch": 0.12187017363223662, "loss": 1.1717824935913086, "loss_ce": 0.004057946149259806, "loss_iou": 0.4375, "loss_num": 0.058837890625, "loss_xval": 1.1640625, "num_input_tokens_seen": 86241376, "step": 1302 }, { "epoch": 0.12196377591613235, "grad_norm": 22.48175048828125, "learning_rate": 5e-05, "loss": 1.3254, "num_input_tokens_seen": 86308068, "step": 1303 }, { "epoch": 0.12196377591613235, "loss": 1.351536750793457, "loss_ce": 0.008274968713521957, "loss_iou": 0.515625, "loss_num": 0.06201171875, "loss_xval": 1.34375, "num_input_tokens_seen": 86308068, "step": 1303 }, { "epoch": 0.12205737820002809, "grad_norm": 21.522815704345703, "learning_rate": 5e-05, "loss": 1.4996, "num_input_tokens_seen": 86374600, "step": 1304 }, { "epoch": 0.12205737820002809, "loss": 1.4055016040802002, "loss_ce": 0.004134302027523518, "loss_iou": 0.609375, "loss_num": 0.035888671875, "loss_xval": 1.3984375, "num_input_tokens_seen": 86374600, "step": 1304 }, { "epoch": 0.12215098048392381, "grad_norm": 18.598857879638672, "learning_rate": 5e-05, "loss": 1.5089, "num_input_tokens_seen": 86440616, "step": 1305 }, { "epoch": 0.12215098048392381, "loss": 1.826608657836914, "loss_ce": 0.007272652816027403, "loss_iou": 0.76953125, "loss_num": 0.0556640625, "loss_xval": 1.8203125, "num_input_tokens_seen": 86440616, "step": 1305 }, { "epoch": 0.12224458276781953, "grad_norm": 26.113828659057617, "learning_rate": 5e-05, "loss": 1.4685, "num_input_tokens_seen": 86506688, "step": 1306 }, { "epoch": 0.12224458276781953, "loss": 1.4255645275115967, "loss_ce": 0.0017364441882818937, "loss_iou": 0.6328125, "loss_num": 0.0322265625, "loss_xval": 1.421875, "num_input_tokens_seen": 86506688, "step": 1306 }, { "epoch": 0.12233818505171526, "grad_norm": 36.879798889160156, "learning_rate": 5e-05, "loss": 1.5751, "num_input_tokens_seen": 86571828, "step": 1307 }, { "epoch": 0.12233818505171526, "loss": 1.6650397777557373, "loss_ce": 0.006836687680333853, "loss_iou": 0.703125, "loss_num": 0.05078125, "loss_xval": 1.65625, "num_input_tokens_seen": 86571828, "step": 1307 }, { "epoch": 0.12243178733561098, "grad_norm": 23.046838760375977, "learning_rate": 5e-05, "loss": 1.9209, "num_input_tokens_seen": 86638472, "step": 1308 }, { "epoch": 0.12243178733561098, "loss": 2.05283260345459, "loss_ce": 0.0010746953776106238, "loss_iou": 0.875, "loss_num": 0.060791015625, "loss_xval": 2.046875, "num_input_tokens_seen": 86638472, "step": 1308 }, { "epoch": 0.12252538961950672, "grad_norm": 17.48950958251953, "learning_rate": 5e-05, "loss": 1.5904, "num_input_tokens_seen": 86703256, "step": 1309 }, { "epoch": 0.12252538961950672, "loss": 1.7254270315170288, "loss_ce": 0.004723929800093174, "loss_iou": 0.7421875, "loss_num": 0.0478515625, "loss_xval": 1.71875, "num_input_tokens_seen": 86703256, "step": 1309 }, { "epoch": 0.12261899190340245, "grad_norm": 27.78485679626465, "learning_rate": 5e-05, "loss": 1.6184, "num_input_tokens_seen": 86768240, "step": 1310 }, { "epoch": 0.12261899190340245, "loss": 1.8453835248947144, "loss_ce": 0.006516415625810623, "loss_iou": 0.796875, "loss_num": 0.048828125, "loss_xval": 1.8359375, "num_input_tokens_seen": 86768240, "step": 1310 }, { "epoch": 0.12271259418729817, "grad_norm": 29.50354766845703, "learning_rate": 5e-05, "loss": 1.5398, "num_input_tokens_seen": 86834552, "step": 1311 }, { "epoch": 0.12271259418729817, "loss": 1.3262726068496704, "loss_ce": 0.0039764754474163055, "loss_iou": 0.58984375, "loss_num": 0.0286865234375, "loss_xval": 1.3203125, "num_input_tokens_seen": 86834552, "step": 1311 }, { "epoch": 0.1228061964711939, "grad_norm": 11.42076587677002, "learning_rate": 5e-05, "loss": 1.5043, "num_input_tokens_seen": 86900968, "step": 1312 }, { "epoch": 0.1228061964711939, "loss": 1.376326322555542, "loss_ce": 0.0013263248838484287, "loss_iou": 0.5859375, "loss_num": 0.041015625, "loss_xval": 1.375, "num_input_tokens_seen": 86900968, "step": 1312 }, { "epoch": 0.12289979875508962, "grad_norm": 26.578754425048828, "learning_rate": 5e-05, "loss": 1.4276, "num_input_tokens_seen": 86966816, "step": 1313 }, { "epoch": 0.12289979875508962, "loss": 1.2679071426391602, "loss_ce": 0.0027704723179340363, "loss_iou": 0.55859375, "loss_num": 0.0303955078125, "loss_xval": 1.265625, "num_input_tokens_seen": 86966816, "step": 1313 }, { "epoch": 0.12299340103898534, "grad_norm": 19.72821044921875, "learning_rate": 5e-05, "loss": 1.5978, "num_input_tokens_seen": 87033808, "step": 1314 }, { "epoch": 0.12299340103898534, "loss": 1.5294857025146484, "loss_ce": 0.0026302344631403685, "loss_iou": 0.625, "loss_num": 0.054931640625, "loss_xval": 1.5234375, "num_input_tokens_seen": 87033808, "step": 1314 }, { "epoch": 0.12308700332288108, "grad_norm": 15.73732852935791, "learning_rate": 5e-05, "loss": 1.4708, "num_input_tokens_seen": 87100612, "step": 1315 }, { "epoch": 0.12308700332288108, "loss": 1.5097014904022217, "loss_ce": 0.004330409690737724, "loss_iou": 0.6171875, "loss_num": 0.05517578125, "loss_xval": 1.5078125, "num_input_tokens_seen": 87100612, "step": 1315 }, { "epoch": 0.12318060560677681, "grad_norm": 20.726572036743164, "learning_rate": 5e-05, "loss": 1.6119, "num_input_tokens_seen": 87166924, "step": 1316 }, { "epoch": 0.12318060560677681, "loss": 1.7098352909088135, "loss_ce": 0.0008508390747010708, "loss_iou": 0.73828125, "loss_num": 0.046630859375, "loss_xval": 1.7109375, "num_input_tokens_seen": 87166924, "step": 1316 }, { "epoch": 0.12327420789067253, "grad_norm": 31.550519943237305, "learning_rate": 5e-05, "loss": 1.6269, "num_input_tokens_seen": 87233908, "step": 1317 }, { "epoch": 0.12327420789067253, "loss": 1.921507716178894, "loss_ce": 0.0015858153346925974, "loss_iou": 0.79296875, "loss_num": 0.06591796875, "loss_xval": 1.921875, "num_input_tokens_seen": 87233908, "step": 1317 }, { "epoch": 0.12336781017456826, "grad_norm": 19.12386703491211, "learning_rate": 5e-05, "loss": 1.8412, "num_input_tokens_seen": 87300564, "step": 1318 }, { "epoch": 0.12336781017456826, "loss": 1.8357542753219604, "loss_ce": 0.011535624042153358, "loss_iou": 0.71484375, "loss_num": 0.07861328125, "loss_xval": 1.828125, "num_input_tokens_seen": 87300564, "step": 1318 }, { "epoch": 0.12346141245846398, "grad_norm": 21.08283805847168, "learning_rate": 5e-05, "loss": 1.6531, "num_input_tokens_seen": 87368060, "step": 1319 }, { "epoch": 0.12346141245846398, "loss": 1.6483509540557861, "loss_ce": 0.0047962842509150505, "loss_iou": 0.71875, "loss_num": 0.04150390625, "loss_xval": 1.640625, "num_input_tokens_seen": 87368060, "step": 1319 }, { "epoch": 0.12355501474235972, "grad_norm": 75.24449920654297, "learning_rate": 5e-05, "loss": 1.5335, "num_input_tokens_seen": 87433764, "step": 1320 }, { "epoch": 0.12355501474235972, "loss": 1.4258882999420166, "loss_ce": 0.004013323690742254, "loss_iou": 0.609375, "loss_num": 0.041015625, "loss_xval": 1.421875, "num_input_tokens_seen": 87433764, "step": 1320 }, { "epoch": 0.12364861702625544, "grad_norm": 24.328025817871094, "learning_rate": 5e-05, "loss": 1.5936, "num_input_tokens_seen": 87499040, "step": 1321 }, { "epoch": 0.12364861702625544, "loss": 1.732417345046997, "loss_ce": 0.0058548226952552795, "loss_iou": 0.70703125, "loss_num": 0.061767578125, "loss_xval": 1.7265625, "num_input_tokens_seen": 87499040, "step": 1321 }, { "epoch": 0.12374221931015117, "grad_norm": 22.555723190307617, "learning_rate": 5e-05, "loss": 1.438, "num_input_tokens_seen": 87565064, "step": 1322 }, { "epoch": 0.12374221931015117, "loss": 1.4071223735809326, "loss_ce": 0.0023371789138764143, "loss_iou": 0.61328125, "loss_num": 0.0361328125, "loss_xval": 1.40625, "num_input_tokens_seen": 87565064, "step": 1322 }, { "epoch": 0.1238358215940469, "grad_norm": 16.10475730895996, "learning_rate": 5e-05, "loss": 1.7528, "num_input_tokens_seen": 87631036, "step": 1323 }, { "epoch": 0.1238358215940469, "loss": 1.6155085563659668, "loss_ce": 0.006133580580353737, "loss_iou": 0.70703125, "loss_num": 0.03955078125, "loss_xval": 1.609375, "num_input_tokens_seen": 87631036, "step": 1323 }, { "epoch": 0.12392942387794262, "grad_norm": 14.831561088562012, "learning_rate": 5e-05, "loss": 1.5513, "num_input_tokens_seen": 87697400, "step": 1324 }, { "epoch": 0.12392942387794262, "loss": 1.2704427242279053, "loss_ce": 0.004817690700292587, "loss_iou": 0.5546875, "loss_num": 0.031494140625, "loss_xval": 1.265625, "num_input_tokens_seen": 87697400, "step": 1324 }, { "epoch": 0.12402302616183834, "grad_norm": 32.889522552490234, "learning_rate": 5e-05, "loss": 1.2469, "num_input_tokens_seen": 87763576, "step": 1325 }, { "epoch": 0.12402302616183834, "loss": 1.2827637195587158, "loss_ce": 0.0044434089213609695, "loss_iou": 0.5703125, "loss_num": 0.0274658203125, "loss_xval": 1.28125, "num_input_tokens_seen": 87763576, "step": 1325 }, { "epoch": 0.12411662844573408, "grad_norm": 15.29900074005127, "learning_rate": 5e-05, "loss": 1.7251, "num_input_tokens_seen": 87830268, "step": 1326 }, { "epoch": 0.12411662844573408, "loss": 1.6326643228530884, "loss_ce": 0.002781424205750227, "loss_iou": 0.69921875, "loss_num": 0.045654296875, "loss_xval": 1.6328125, "num_input_tokens_seen": 87830268, "step": 1326 }, { "epoch": 0.1242102307296298, "grad_norm": 44.64360427856445, "learning_rate": 5e-05, "loss": 1.5381, "num_input_tokens_seen": 87896616, "step": 1327 }, { "epoch": 0.1242102307296298, "loss": 1.5737522840499878, "loss_ce": 0.008322535082697868, "loss_iou": 0.65625, "loss_num": 0.0517578125, "loss_xval": 1.5625, "num_input_tokens_seen": 87896616, "step": 1327 }, { "epoch": 0.12430383301352553, "grad_norm": 113.04426574707031, "learning_rate": 5e-05, "loss": 1.5927, "num_input_tokens_seen": 87962076, "step": 1328 }, { "epoch": 0.12430383301352553, "loss": 1.9099640846252441, "loss_ce": 0.00859688688069582, "loss_iou": 0.80078125, "loss_num": 0.060546875, "loss_xval": 1.8984375, "num_input_tokens_seen": 87962076, "step": 1328 }, { "epoch": 0.12439743529742125, "grad_norm": 16.400493621826172, "learning_rate": 5e-05, "loss": 1.5002, "num_input_tokens_seen": 88029696, "step": 1329 }, { "epoch": 0.12439743529742125, "loss": 1.7915959358215332, "loss_ce": 0.005463153589516878, "loss_iou": 0.765625, "loss_num": 0.05126953125, "loss_xval": 1.7890625, "num_input_tokens_seen": 88029696, "step": 1329 }, { "epoch": 0.12449103758131698, "grad_norm": 18.444204330444336, "learning_rate": 5e-05, "loss": 1.3574, "num_input_tokens_seen": 88096584, "step": 1330 }, { "epoch": 0.12449103758131698, "loss": 1.3137296438217163, "loss_ce": 0.006112488452345133, "loss_iou": 0.5703125, "loss_num": 0.0341796875, "loss_xval": 1.3046875, "num_input_tokens_seen": 88096584, "step": 1330 }, { "epoch": 0.12458463986521272, "grad_norm": 50.89994430541992, "learning_rate": 5e-05, "loss": 1.9221, "num_input_tokens_seen": 88163308, "step": 1331 }, { "epoch": 0.12458463986521272, "loss": 1.9844614267349243, "loss_ce": 0.004969221539795399, "loss_iou": 0.83984375, "loss_num": 0.060546875, "loss_xval": 1.9765625, "num_input_tokens_seen": 88163308, "step": 1331 }, { "epoch": 0.12467824214910844, "grad_norm": 16.569965362548828, "learning_rate": 5e-05, "loss": 1.6601, "num_input_tokens_seen": 88229560, "step": 1332 }, { "epoch": 0.12467824214910844, "loss": 1.6772743463516235, "loss_ce": 0.003446259070187807, "loss_iou": 0.73828125, "loss_num": 0.038818359375, "loss_xval": 1.671875, "num_input_tokens_seen": 88229560, "step": 1332 }, { "epoch": 0.12477184443300417, "grad_norm": 21.92986297607422, "learning_rate": 5e-05, "loss": 1.5063, "num_input_tokens_seen": 88296360, "step": 1333 }, { "epoch": 0.12477184443300417, "loss": 1.5332324504852295, "loss_ce": 0.004667984321713448, "loss_iou": 0.6328125, "loss_num": 0.05224609375, "loss_xval": 1.53125, "num_input_tokens_seen": 88296360, "step": 1333 }, { "epoch": 0.12486544671689989, "grad_norm": 19.285966873168945, "learning_rate": 5e-05, "loss": 1.2684, "num_input_tokens_seen": 88362000, "step": 1334 }, { "epoch": 0.12486544671689989, "loss": 1.2381023168563843, "loss_ce": 0.005680470261722803, "loss_iou": 0.515625, "loss_num": 0.040771484375, "loss_xval": 1.234375, "num_input_tokens_seen": 88362000, "step": 1334 }, { "epoch": 0.12495904900079562, "grad_norm": 27.227636337280273, "learning_rate": 5e-05, "loss": 1.6714, "num_input_tokens_seen": 88428384, "step": 1335 }, { "epoch": 0.12495904900079562, "loss": 1.6423685550689697, "loss_ce": 0.00564984604716301, "loss_iou": 0.70703125, "loss_num": 0.044677734375, "loss_xval": 1.640625, "num_input_tokens_seen": 88428384, "step": 1335 }, { "epoch": 0.12505265128469134, "grad_norm": 23.74907684326172, "learning_rate": 5e-05, "loss": 1.6194, "num_input_tokens_seen": 88495332, "step": 1336 }, { "epoch": 0.12505265128469134, "loss": 1.825437307357788, "loss_ce": 0.00707800779491663, "loss_iou": 0.7265625, "loss_num": 0.0732421875, "loss_xval": 1.8203125, "num_input_tokens_seen": 88495332, "step": 1336 }, { "epoch": 0.12514625356858708, "grad_norm": 16.62088394165039, "learning_rate": 5e-05, "loss": 1.485, "num_input_tokens_seen": 88562248, "step": 1337 }, { "epoch": 0.12514625356858708, "loss": 1.620102047920227, "loss_ce": 0.0038911611773073673, "loss_iou": 0.65625, "loss_num": 0.061279296875, "loss_xval": 1.6171875, "num_input_tokens_seen": 88562248, "step": 1337 }, { "epoch": 0.1252398558524828, "grad_norm": 40.16783142089844, "learning_rate": 5e-05, "loss": 1.3248, "num_input_tokens_seen": 88629452, "step": 1338 }, { "epoch": 0.1252398558524828, "loss": 1.24601411819458, "loss_ce": 0.002849954180419445, "loss_iou": 0.54296875, "loss_num": 0.0311279296875, "loss_xval": 1.2421875, "num_input_tokens_seen": 88629452, "step": 1338 }, { "epoch": 0.12533345813637853, "grad_norm": 19.44478988647461, "learning_rate": 5e-05, "loss": 1.4632, "num_input_tokens_seen": 88697664, "step": 1339 }, { "epoch": 0.12533345813637853, "loss": 1.550034999847412, "loss_ce": 0.0012068809010088444, "loss_iou": 0.640625, "loss_num": 0.05419921875, "loss_xval": 1.546875, "num_input_tokens_seen": 88697664, "step": 1339 }, { "epoch": 0.12542706042027427, "grad_norm": 15.983454704284668, "learning_rate": 5e-05, "loss": 1.4638, "num_input_tokens_seen": 88763948, "step": 1340 }, { "epoch": 0.12542706042027427, "loss": 1.661836862564087, "loss_ce": 0.007540082558989525, "loss_iou": 0.703125, "loss_num": 0.050048828125, "loss_xval": 1.65625, "num_input_tokens_seen": 88763948, "step": 1340 }, { "epoch": 0.12552066270416998, "grad_norm": 18.1462345123291, "learning_rate": 5e-05, "loss": 1.6568, "num_input_tokens_seen": 88829860, "step": 1341 }, { "epoch": 0.12552066270416998, "loss": 1.7739043235778809, "loss_ce": 0.003396473592147231, "loss_iou": 0.765625, "loss_num": 0.048095703125, "loss_xval": 1.7734375, "num_input_tokens_seen": 88829860, "step": 1341 }, { "epoch": 0.12561426498806572, "grad_norm": 23.744892120361328, "learning_rate": 5e-05, "loss": 1.5271, "num_input_tokens_seen": 88895908, "step": 1342 }, { "epoch": 0.12561426498806572, "loss": 1.634044885635376, "loss_ce": 0.009044930338859558, "loss_iou": 0.6640625, "loss_num": 0.0595703125, "loss_xval": 1.625, "num_input_tokens_seen": 88895908, "step": 1342 }, { "epoch": 0.12570786727196143, "grad_norm": 47.277591705322266, "learning_rate": 5e-05, "loss": 1.4038, "num_input_tokens_seen": 88962172, "step": 1343 }, { "epoch": 0.12570786727196143, "loss": 1.409563660621643, "loss_ce": 0.008196476846933365, "loss_iou": 0.63671875, "loss_num": 0.02587890625, "loss_xval": 1.3984375, "num_input_tokens_seen": 88962172, "step": 1343 }, { "epoch": 0.12580146955585716, "grad_norm": 39.08584976196289, "learning_rate": 5e-05, "loss": 1.5426, "num_input_tokens_seen": 89027524, "step": 1344 }, { "epoch": 0.12580146955585716, "loss": 1.649590253829956, "loss_ce": 0.007988580502569675, "loss_iou": 0.74609375, "loss_num": 0.03076171875, "loss_xval": 1.640625, "num_input_tokens_seen": 89027524, "step": 1344 }, { "epoch": 0.1258950718397529, "grad_norm": 16.79268455505371, "learning_rate": 5e-05, "loss": 1.8017, "num_input_tokens_seen": 89092648, "step": 1345 }, { "epoch": 0.1258950718397529, "loss": 1.7879549264907837, "loss_ce": 0.003775215707719326, "loss_iou": 0.7734375, "loss_num": 0.0478515625, "loss_xval": 1.78125, "num_input_tokens_seen": 89092648, "step": 1345 }, { "epoch": 0.12598867412364861, "grad_norm": 21.5550537109375, "learning_rate": 5e-05, "loss": 1.4655, "num_input_tokens_seen": 89159212, "step": 1346 }, { "epoch": 0.12598867412364861, "loss": 1.4311448335647583, "loss_ce": 0.005363560281693935, "loss_iou": 0.62109375, "loss_num": 0.036376953125, "loss_xval": 1.421875, "num_input_tokens_seen": 89159212, "step": 1346 }, { "epoch": 0.12608227640754435, "grad_norm": 19.53887939453125, "learning_rate": 5e-05, "loss": 1.5629, "num_input_tokens_seen": 89225420, "step": 1347 }, { "epoch": 0.12608227640754435, "loss": 1.3539124727249146, "loss_ce": 0.002349911257624626, "loss_iou": 0.57421875, "loss_num": 0.040283203125, "loss_xval": 1.3515625, "num_input_tokens_seen": 89225420, "step": 1347 }, { "epoch": 0.12617587869144006, "grad_norm": 18.233774185180664, "learning_rate": 5e-05, "loss": 1.6367, "num_input_tokens_seen": 89291752, "step": 1348 }, { "epoch": 0.12617587869144006, "loss": 1.6218806505203247, "loss_ce": 0.0027399638202041388, "loss_iou": 0.68359375, "loss_num": 0.05029296875, "loss_xval": 1.6171875, "num_input_tokens_seen": 89291752, "step": 1348 }, { "epoch": 0.1262694809753358, "grad_norm": 56.429054260253906, "learning_rate": 5e-05, "loss": 1.6564, "num_input_tokens_seen": 89359116, "step": 1349 }, { "epoch": 0.1262694809753358, "loss": 1.5903823375701904, "loss_ce": 0.0034682718105614185, "loss_iou": 0.71875, "loss_num": 0.0296630859375, "loss_xval": 1.5859375, "num_input_tokens_seen": 89359116, "step": 1349 }, { "epoch": 0.1263630832592315, "grad_norm": 15.590723037719727, "learning_rate": 5e-05, "loss": 1.5935, "num_input_tokens_seen": 89424872, "step": 1350 }, { "epoch": 0.1263630832592315, "loss": 1.5478242635726929, "loss_ce": 0.00851763878017664, "loss_iou": 0.6484375, "loss_num": 0.048828125, "loss_xval": 1.5390625, "num_input_tokens_seen": 89424872, "step": 1350 }, { "epoch": 0.12645668554312725, "grad_norm": 13.77322769165039, "learning_rate": 5e-05, "loss": 1.4238, "num_input_tokens_seen": 89492072, "step": 1351 }, { "epoch": 0.12645668554312725, "loss": 1.4697320461273193, "loss_ce": 0.0034233955666422844, "loss_iou": 0.625, "loss_num": 0.0439453125, "loss_xval": 1.46875, "num_input_tokens_seen": 89492072, "step": 1351 }, { "epoch": 0.126550287827023, "grad_norm": 13.505005836486816, "learning_rate": 5e-05, "loss": 1.5099, "num_input_tokens_seen": 89558472, "step": 1352 }, { "epoch": 0.126550287827023, "loss": 1.5141277313232422, "loss_ce": 0.0014323859941214323, "loss_iou": 0.66796875, "loss_num": 0.03466796875, "loss_xval": 1.515625, "num_input_tokens_seen": 89558472, "step": 1352 }, { "epoch": 0.1266438901109187, "grad_norm": 16.373979568481445, "learning_rate": 5e-05, "loss": 1.3335, "num_input_tokens_seen": 89623992, "step": 1353 }, { "epoch": 0.1266438901109187, "loss": 1.3113393783569336, "loss_ce": 0.004363037645816803, "loss_iou": 0.5546875, "loss_num": 0.03955078125, "loss_xval": 1.3046875, "num_input_tokens_seen": 89623992, "step": 1353 }, { "epoch": 0.12673749239481444, "grad_norm": 20.523517608642578, "learning_rate": 5e-05, "loss": 1.7889, "num_input_tokens_seen": 89690240, "step": 1354 }, { "epoch": 0.12673749239481444, "loss": 1.754321813583374, "loss_ce": 0.009204620495438576, "loss_iou": 0.73046875, "loss_num": 0.05712890625, "loss_xval": 1.7421875, "num_input_tokens_seen": 89690240, "step": 1354 }, { "epoch": 0.12683109467871015, "grad_norm": 19.48271942138672, "learning_rate": 5e-05, "loss": 1.7846, "num_input_tokens_seen": 89756784, "step": 1355 }, { "epoch": 0.12683109467871015, "loss": 1.9620779752731323, "loss_ce": 0.005046608857810497, "loss_iou": 0.8359375, "loss_num": 0.056884765625, "loss_xval": 1.953125, "num_input_tokens_seen": 89756784, "step": 1355 }, { "epoch": 0.1269246969626059, "grad_norm": 45.645416259765625, "learning_rate": 5e-05, "loss": 1.631, "num_input_tokens_seen": 89822608, "step": 1356 }, { "epoch": 0.1269246969626059, "loss": 1.6072031259536743, "loss_ce": 0.004175771027803421, "loss_iou": 0.66015625, "loss_num": 0.057373046875, "loss_xval": 1.6015625, "num_input_tokens_seen": 89822608, "step": 1356 }, { "epoch": 0.12701829924650163, "grad_norm": 248.37985229492188, "learning_rate": 5e-05, "loss": 1.8808, "num_input_tokens_seen": 89889216, "step": 1357 }, { "epoch": 0.12701829924650163, "loss": 1.7766605615615845, "loss_ce": 0.0051762256771326065, "loss_iou": 0.76953125, "loss_num": 0.046630859375, "loss_xval": 1.7734375, "num_input_tokens_seen": 89889216, "step": 1357 }, { "epoch": 0.12711190153039734, "grad_norm": 22.761634826660156, "learning_rate": 5e-05, "loss": 1.5701, "num_input_tokens_seen": 89955564, "step": 1358 }, { "epoch": 0.12711190153039734, "loss": 1.3086109161376953, "loss_ce": 0.0034352345392107964, "loss_iou": 0.5546875, "loss_num": 0.039794921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 89955564, "step": 1358 }, { "epoch": 0.12720550381429307, "grad_norm": 28.448043823242188, "learning_rate": 5e-05, "loss": 1.3864, "num_input_tokens_seen": 90020936, "step": 1359 }, { "epoch": 0.12720550381429307, "loss": 1.394110918045044, "loss_ce": 0.0034249001182615757, "loss_iou": 0.6015625, "loss_num": 0.037841796875, "loss_xval": 1.390625, "num_input_tokens_seen": 90020936, "step": 1359 }, { "epoch": 0.12729910609818879, "grad_norm": 17.549392700195312, "learning_rate": 5e-05, "loss": 1.5986, "num_input_tokens_seen": 90086248, "step": 1360 }, { "epoch": 0.12729910609818879, "loss": 1.6289432048797607, "loss_ce": 0.007849406450986862, "loss_iou": 0.68359375, "loss_num": 0.0517578125, "loss_xval": 1.625, "num_input_tokens_seen": 90086248, "step": 1360 }, { "epoch": 0.12739270838208452, "grad_norm": 11.626138687133789, "learning_rate": 5e-05, "loss": 1.471, "num_input_tokens_seen": 90153580, "step": 1361 }, { "epoch": 0.12739270838208452, "loss": 1.4268728494644165, "loss_ce": 0.006950935814529657, "loss_iou": 0.59765625, "loss_num": 0.044189453125, "loss_xval": 1.421875, "num_input_tokens_seen": 90153580, "step": 1361 }, { "epoch": 0.12748631066598026, "grad_norm": 154.85272216796875, "learning_rate": 5e-05, "loss": 1.3774, "num_input_tokens_seen": 90220064, "step": 1362 }, { "epoch": 0.12748631066598026, "loss": 1.483375072479248, "loss_ce": 0.0038829362019896507, "loss_iou": 0.61328125, "loss_num": 0.05126953125, "loss_xval": 1.4765625, "num_input_tokens_seen": 90220064, "step": 1362 }, { "epoch": 0.12757991294987597, "grad_norm": 25.236352920532227, "learning_rate": 5e-05, "loss": 1.4006, "num_input_tokens_seen": 90286288, "step": 1363 }, { "epoch": 0.12757991294987597, "loss": 1.4804292917251587, "loss_ce": 0.004599159583449364, "loss_iou": 0.61328125, "loss_num": 0.049560546875, "loss_xval": 1.4765625, "num_input_tokens_seen": 90286288, "step": 1363 }, { "epoch": 0.1276735152337717, "grad_norm": 35.36727523803711, "learning_rate": 5e-05, "loss": 1.5822, "num_input_tokens_seen": 90352244, "step": 1364 }, { "epoch": 0.1276735152337717, "loss": 1.286836862564087, "loss_ce": 0.0072957719676196575, "loss_iou": 0.5390625, "loss_num": 0.0400390625, "loss_xval": 1.28125, "num_input_tokens_seen": 90352244, "step": 1364 }, { "epoch": 0.12776711751766742, "grad_norm": 17.38477897644043, "learning_rate": 5e-05, "loss": 1.6278, "num_input_tokens_seen": 90418872, "step": 1365 }, { "epoch": 0.12776711751766742, "loss": 1.782911777496338, "loss_ce": 0.003614937188103795, "loss_iou": 0.7890625, "loss_num": 0.04052734375, "loss_xval": 1.78125, "num_input_tokens_seen": 90418872, "step": 1365 }, { "epoch": 0.12786071980156316, "grad_norm": 85.57073974609375, "learning_rate": 5e-05, "loss": 1.4069, "num_input_tokens_seen": 90485600, "step": 1366 }, { "epoch": 0.12786071980156316, "loss": 1.5200433731079102, "loss_ce": 0.001488764537498355, "loss_iou": 0.66015625, "loss_num": 0.0400390625, "loss_xval": 1.515625, "num_input_tokens_seen": 90485600, "step": 1366 }, { "epoch": 0.12795432208545887, "grad_norm": 21.695579528808594, "learning_rate": 5e-05, "loss": 1.5567, "num_input_tokens_seen": 90552584, "step": 1367 }, { "epoch": 0.12795432208545887, "loss": 1.4326601028442383, "loss_ce": 0.0010194204514846206, "loss_iou": 0.63671875, "loss_num": 0.03125, "loss_xval": 1.4296875, "num_input_tokens_seen": 90552584, "step": 1367 }, { "epoch": 0.1280479243693546, "grad_norm": 23.40521812438965, "learning_rate": 5e-05, "loss": 1.5361, "num_input_tokens_seen": 90618684, "step": 1368 }, { "epoch": 0.1280479243693546, "loss": 1.5466312170028687, "loss_ce": 0.0095218475908041, "loss_iou": 0.6953125, "loss_num": 0.0284423828125, "loss_xval": 1.5390625, "num_input_tokens_seen": 90618684, "step": 1368 }, { "epoch": 0.12814152665325035, "grad_norm": 18.414335250854492, "learning_rate": 5e-05, "loss": 1.5375, "num_input_tokens_seen": 90684404, "step": 1369 }, { "epoch": 0.12814152665325035, "loss": 1.5048866271972656, "loss_ce": 0.0058631375432014465, "loss_iou": 0.62890625, "loss_num": 0.04833984375, "loss_xval": 1.5, "num_input_tokens_seen": 90684404, "step": 1369 }, { "epoch": 0.12823512893714606, "grad_norm": 17.33824920654297, "learning_rate": 5e-05, "loss": 1.2944, "num_input_tokens_seen": 90751324, "step": 1370 }, { "epoch": 0.12823512893714606, "loss": 1.223867416381836, "loss_ce": 0.004629106260836124, "loss_iou": 0.5234375, "loss_num": 0.034912109375, "loss_xval": 1.21875, "num_input_tokens_seen": 90751324, "step": 1370 }, { "epoch": 0.1283287312210418, "grad_norm": 25.81096649169922, "learning_rate": 5e-05, "loss": 1.5307, "num_input_tokens_seen": 90818100, "step": 1371 }, { "epoch": 0.1283287312210418, "loss": 1.6869189739227295, "loss_ce": 0.009184468537569046, "loss_iou": 0.6875, "loss_num": 0.060791015625, "loss_xval": 1.6796875, "num_input_tokens_seen": 90818100, "step": 1371 }, { "epoch": 0.1284223335049375, "grad_norm": 18.021596908569336, "learning_rate": 5e-05, "loss": 1.3482, "num_input_tokens_seen": 90884120, "step": 1372 }, { "epoch": 0.1284223335049375, "loss": 1.5917011499404907, "loss_ce": 0.0057636527344584465, "loss_iou": 0.6640625, "loss_num": 0.052734375, "loss_xval": 1.5859375, "num_input_tokens_seen": 90884120, "step": 1372 }, { "epoch": 0.12851593578883325, "grad_norm": 27.08411979675293, "learning_rate": 5e-05, "loss": 1.4109, "num_input_tokens_seen": 90948716, "step": 1373 }, { "epoch": 0.12851593578883325, "loss": 1.512143611907959, "loss_ce": 0.002378041623160243, "loss_iou": 0.62890625, "loss_num": 0.050537109375, "loss_xval": 1.5078125, "num_input_tokens_seen": 90948716, "step": 1373 }, { "epoch": 0.12860953807272898, "grad_norm": 23.115093231201172, "learning_rate": 5e-05, "loss": 1.1726, "num_input_tokens_seen": 91015700, "step": 1374 }, { "epoch": 0.12860953807272898, "loss": 1.2207486629486084, "loss_ce": 0.004928313195705414, "loss_iou": 0.53515625, "loss_num": 0.02880859375, "loss_xval": 1.21875, "num_input_tokens_seen": 91015700, "step": 1374 }, { "epoch": 0.1287031403566247, "grad_norm": 20.75998878479004, "learning_rate": 5e-05, "loss": 1.4698, "num_input_tokens_seen": 91081828, "step": 1375 }, { "epoch": 0.1287031403566247, "loss": 1.5335545539855957, "loss_ce": 0.007675648666918278, "loss_iou": 0.65625, "loss_num": 0.042724609375, "loss_xval": 1.5234375, "num_input_tokens_seen": 91081828, "step": 1375 }, { "epoch": 0.12879674264052043, "grad_norm": 23.59589958190918, "learning_rate": 5e-05, "loss": 1.3764, "num_input_tokens_seen": 91147524, "step": 1376 }, { "epoch": 0.12879674264052043, "loss": 1.2893974781036377, "loss_ce": 0.0052177440375089645, "loss_iou": 0.5703125, "loss_num": 0.028076171875, "loss_xval": 1.28125, "num_input_tokens_seen": 91147524, "step": 1376 }, { "epoch": 0.12889034492441614, "grad_norm": 30.19612693786621, "learning_rate": 5e-05, "loss": 1.5341, "num_input_tokens_seen": 91212540, "step": 1377 }, { "epoch": 0.12889034492441614, "loss": 1.6309689283370972, "loss_ce": 0.004015746992081404, "loss_iou": 0.671875, "loss_num": 0.05712890625, "loss_xval": 1.625, "num_input_tokens_seen": 91212540, "step": 1377 }, { "epoch": 0.12898394720831188, "grad_norm": 16.537076950073242, "learning_rate": 5e-05, "loss": 1.7526, "num_input_tokens_seen": 91278960, "step": 1378 }, { "epoch": 0.12898394720831188, "loss": 1.9375122785568237, "loss_ce": 0.005871674977242947, "loss_iou": 0.80078125, "loss_num": 0.06591796875, "loss_xval": 1.9296875, "num_input_tokens_seen": 91278960, "step": 1378 }, { "epoch": 0.12907754949220762, "grad_norm": 14.124958038330078, "learning_rate": 5e-05, "loss": 1.3359, "num_input_tokens_seen": 91346708, "step": 1379 }, { "epoch": 0.12907754949220762, "loss": 1.371921181678772, "loss_ce": 0.004733636975288391, "loss_iou": 0.57421875, "loss_num": 0.0439453125, "loss_xval": 1.3671875, "num_input_tokens_seen": 91346708, "step": 1379 }, { "epoch": 0.12917115177610333, "grad_norm": 21.393112182617188, "learning_rate": 5e-05, "loss": 1.5838, "num_input_tokens_seen": 91412816, "step": 1380 }, { "epoch": 0.12917115177610333, "loss": 1.4501063823699951, "loss_ce": 0.0008876234060153365, "loss_iou": 0.5859375, "loss_num": 0.0556640625, "loss_xval": 1.453125, "num_input_tokens_seen": 91412816, "step": 1380 }, { "epoch": 0.12926475405999907, "grad_norm": 10.855667114257812, "learning_rate": 5e-05, "loss": 1.3396, "num_input_tokens_seen": 91479152, "step": 1381 }, { "epoch": 0.12926475405999907, "loss": 1.5468095541000366, "loss_ce": 0.0058245365507900715, "loss_iou": 0.671875, "loss_num": 0.039794921875, "loss_xval": 1.5390625, "num_input_tokens_seen": 91479152, "step": 1381 }, { "epoch": 0.12935835634389478, "grad_norm": 10.485991477966309, "learning_rate": 5e-05, "loss": 1.1981, "num_input_tokens_seen": 91544272, "step": 1382 }, { "epoch": 0.12935835634389478, "loss": 1.0928484201431274, "loss_ce": 0.004713610280305147, "loss_iou": 0.4296875, "loss_num": 0.045654296875, "loss_xval": 1.0859375, "num_input_tokens_seen": 91544272, "step": 1382 }, { "epoch": 0.12945195862779052, "grad_norm": 30.87342643737793, "learning_rate": 5e-05, "loss": 1.5138, "num_input_tokens_seen": 91611320, "step": 1383 }, { "epoch": 0.12945195862779052, "loss": 1.6626055240631104, "loss_ce": 0.003425776492804289, "loss_iou": 0.734375, "loss_num": 0.03857421875, "loss_xval": 1.65625, "num_input_tokens_seen": 91611320, "step": 1383 }, { "epoch": 0.12954556091168626, "grad_norm": 40.892181396484375, "learning_rate": 5e-05, "loss": 1.7905, "num_input_tokens_seen": 91677232, "step": 1384 }, { "epoch": 0.12954556091168626, "loss": 1.8152230978012085, "loss_ce": 0.001746569061651826, "loss_iou": 0.80859375, "loss_num": 0.038330078125, "loss_xval": 1.8125, "num_input_tokens_seen": 91677232, "step": 1384 }, { "epoch": 0.12963916319558197, "grad_norm": 17.921037673950195, "learning_rate": 5e-05, "loss": 1.592, "num_input_tokens_seen": 91743024, "step": 1385 }, { "epoch": 0.12963916319558197, "loss": 1.8663153648376465, "loss_ce": 0.0030340198427438736, "loss_iou": 0.796875, "loss_num": 0.053466796875, "loss_xval": 1.859375, "num_input_tokens_seen": 91743024, "step": 1385 }, { "epoch": 0.1297327654794777, "grad_norm": 23.16750717163086, "learning_rate": 5e-05, "loss": 1.4521, "num_input_tokens_seen": 91810484, "step": 1386 }, { "epoch": 0.1297327654794777, "loss": 1.458481788635254, "loss_ce": 0.0043802387081086636, "loss_iou": 0.65625, "loss_num": 0.0289306640625, "loss_xval": 1.453125, "num_input_tokens_seen": 91810484, "step": 1386 }, { "epoch": 0.12982636776337342, "grad_norm": 111.02323150634766, "learning_rate": 5e-05, "loss": 1.4423, "num_input_tokens_seen": 91876592, "step": 1387 }, { "epoch": 0.12982636776337342, "loss": 1.5060733556747437, "loss_ce": 0.007049937732517719, "loss_iou": 0.609375, "loss_num": 0.055908203125, "loss_xval": 1.5, "num_input_tokens_seen": 91876592, "step": 1387 }, { "epoch": 0.12991997004726916, "grad_norm": 19.789958953857422, "learning_rate": 5e-05, "loss": 1.6455, "num_input_tokens_seen": 91943344, "step": 1388 }, { "epoch": 0.12991997004726916, "loss": 1.7664657831192017, "loss_ce": 0.0042588477954268456, "loss_iou": 0.76171875, "loss_num": 0.046630859375, "loss_xval": 1.765625, "num_input_tokens_seen": 91943344, "step": 1388 }, { "epoch": 0.13001357233116487, "grad_norm": 12.905505180358887, "learning_rate": 5e-05, "loss": 1.4612, "num_input_tokens_seen": 92008716, "step": 1389 }, { "epoch": 0.13001357233116487, "loss": 1.1903969049453735, "loss_ce": 0.0033241792116314173, "loss_iou": 0.48046875, "loss_num": 0.044921875, "loss_xval": 1.1875, "num_input_tokens_seen": 92008716, "step": 1389 }, { "epoch": 0.1301071746150606, "grad_norm": 103.31057739257812, "learning_rate": 5e-05, "loss": 1.2876, "num_input_tokens_seen": 92074136, "step": 1390 }, { "epoch": 0.1301071746150606, "loss": 1.2002911567687988, "loss_ce": 0.007023334503173828, "loss_iou": 0.50390625, "loss_num": 0.03662109375, "loss_xval": 1.1953125, "num_input_tokens_seen": 92074136, "step": 1390 }, { "epoch": 0.13020077689895634, "grad_norm": 19.765432357788086, "learning_rate": 5e-05, "loss": 1.4414, "num_input_tokens_seen": 92141644, "step": 1391 }, { "epoch": 0.13020077689895634, "loss": 1.5639369487762451, "loss_ce": 0.0019252786878496408, "loss_iou": 0.68359375, "loss_num": 0.039306640625, "loss_xval": 1.5625, "num_input_tokens_seen": 92141644, "step": 1391 }, { "epoch": 0.13029437918285205, "grad_norm": 27.777828216552734, "learning_rate": 5e-05, "loss": 1.5448, "num_input_tokens_seen": 92207420, "step": 1392 }, { "epoch": 0.13029437918285205, "loss": 1.5984091758728027, "loss_ce": 0.0041709281504154205, "loss_iou": 0.6484375, "loss_num": 0.06005859375, "loss_xval": 1.59375, "num_input_tokens_seen": 92207420, "step": 1392 }, { "epoch": 0.1303879814667478, "grad_norm": 25.60963249206543, "learning_rate": 5e-05, "loss": 1.8416, "num_input_tokens_seen": 92271428, "step": 1393 }, { "epoch": 0.1303879814667478, "loss": 2.0645360946655273, "loss_ce": 0.007895649410784245, "loss_iou": 0.8203125, "loss_num": 0.0830078125, "loss_xval": 2.0625, "num_input_tokens_seen": 92271428, "step": 1393 }, { "epoch": 0.1304815837506435, "grad_norm": 21.06133460998535, "learning_rate": 5e-05, "loss": 1.4674, "num_input_tokens_seen": 92338340, "step": 1394 }, { "epoch": 0.1304815837506435, "loss": 1.5114936828613281, "loss_ce": 0.005634224973618984, "loss_iou": 0.65234375, "loss_num": 0.040771484375, "loss_xval": 1.5078125, "num_input_tokens_seen": 92338340, "step": 1394 }, { "epoch": 0.13057518603453924, "grad_norm": 11.62189769744873, "learning_rate": 5e-05, "loss": 1.3301, "num_input_tokens_seen": 92405116, "step": 1395 }, { "epoch": 0.13057518603453924, "loss": 1.341535210609436, "loss_ce": 0.005109419114887714, "loss_iou": 0.58203125, "loss_num": 0.034423828125, "loss_xval": 1.3359375, "num_input_tokens_seen": 92405116, "step": 1395 }, { "epoch": 0.13066878831843498, "grad_norm": 14.54174518585205, "learning_rate": 5e-05, "loss": 1.3758, "num_input_tokens_seen": 92472160, "step": 1396 }, { "epoch": 0.13066878831843498, "loss": 1.4310237169265747, "loss_ce": 0.001824527862481773, "loss_iou": 0.62890625, "loss_num": 0.034423828125, "loss_xval": 1.4296875, "num_input_tokens_seen": 92472160, "step": 1396 }, { "epoch": 0.1307623906023307, "grad_norm": 22.171239852905273, "learning_rate": 5e-05, "loss": 1.4602, "num_input_tokens_seen": 92538712, "step": 1397 }, { "epoch": 0.1307623906023307, "loss": 1.2587718963623047, "loss_ce": 0.003522851038724184, "loss_iou": 0.53515625, "loss_num": 0.036865234375, "loss_xval": 1.2578125, "num_input_tokens_seen": 92538712, "step": 1397 }, { "epoch": 0.13085599288622643, "grad_norm": 40.621341705322266, "learning_rate": 5e-05, "loss": 1.6449, "num_input_tokens_seen": 92605588, "step": 1398 }, { "epoch": 0.13085599288622643, "loss": 1.5951776504516602, "loss_ce": 0.002892507240176201, "loss_iou": 0.68359375, "loss_num": 0.045654296875, "loss_xval": 1.59375, "num_input_tokens_seen": 92605588, "step": 1398 }, { "epoch": 0.13094959517012214, "grad_norm": 19.85601043701172, "learning_rate": 5e-05, "loss": 1.9056, "num_input_tokens_seen": 92672276, "step": 1399 }, { "epoch": 0.13094959517012214, "loss": 1.9563559293746948, "loss_ce": 0.0022544129751622677, "loss_iou": 0.83984375, "loss_num": 0.055419921875, "loss_xval": 1.953125, "num_input_tokens_seen": 92672276, "step": 1399 }, { "epoch": 0.13104319745401788, "grad_norm": 16.9284725189209, "learning_rate": 5e-05, "loss": 1.4498, "num_input_tokens_seen": 92739320, "step": 1400 }, { "epoch": 0.13104319745401788, "loss": 1.446366548538208, "loss_ce": 0.00886652059853077, "loss_iou": 0.6171875, "loss_num": 0.04052734375, "loss_xval": 1.4375, "num_input_tokens_seen": 92739320, "step": 1400 }, { "epoch": 0.13113679973791362, "grad_norm": 32.16905975341797, "learning_rate": 5e-05, "loss": 1.4314, "num_input_tokens_seen": 92804600, "step": 1401 }, { "epoch": 0.13113679973791362, "loss": 1.3611209392547607, "loss_ce": 0.004675663076341152, "loss_iou": 0.5859375, "loss_num": 0.037841796875, "loss_xval": 1.359375, "num_input_tokens_seen": 92804600, "step": 1401 }, { "epoch": 0.13123040202180933, "grad_norm": 21.563016891479492, "learning_rate": 5e-05, "loss": 1.5172, "num_input_tokens_seen": 92870192, "step": 1402 }, { "epoch": 0.13123040202180933, "loss": 1.6784169673919678, "loss_ce": 0.0065420567989349365, "loss_iou": 0.7109375, "loss_num": 0.0498046875, "loss_xval": 1.671875, "num_input_tokens_seen": 92870192, "step": 1402 }, { "epoch": 0.13132400430570507, "grad_norm": 28.80994987487793, "learning_rate": 5e-05, "loss": 1.6035, "num_input_tokens_seen": 92935924, "step": 1403 }, { "epoch": 0.13132400430570507, "loss": 1.8455475568771362, "loss_ce": 0.007657048758119345, "loss_iou": 0.7734375, "loss_num": 0.05810546875, "loss_xval": 1.8359375, "num_input_tokens_seen": 92935924, "step": 1403 }, { "epoch": 0.13141760658960078, "grad_norm": 16.39699935913086, "learning_rate": 5e-05, "loss": 1.7602, "num_input_tokens_seen": 93002044, "step": 1404 }, { "epoch": 0.13141760658960078, "loss": 1.8066574335098267, "loss_ce": 0.003923080395907164, "loss_iou": 0.796875, "loss_num": 0.0419921875, "loss_xval": 1.8046875, "num_input_tokens_seen": 93002044, "step": 1404 }, { "epoch": 0.13151120887349652, "grad_norm": 18.839136123657227, "learning_rate": 5e-05, "loss": 1.4374, "num_input_tokens_seen": 93068748, "step": 1405 }, { "epoch": 0.13151120887349652, "loss": 1.1774072647094727, "loss_ce": 0.00455570500344038, "loss_iou": 0.48828125, "loss_num": 0.039306640625, "loss_xval": 1.171875, "num_input_tokens_seen": 93068748, "step": 1405 }, { "epoch": 0.13160481115739225, "grad_norm": 36.0368537902832, "learning_rate": 5e-05, "loss": 1.4246, "num_input_tokens_seen": 93135136, "step": 1406 }, { "epoch": 0.13160481115739225, "loss": 1.6487808227539062, "loss_ce": 0.0062027000822126865, "loss_iou": 0.74609375, "loss_num": 0.0301513671875, "loss_xval": 1.640625, "num_input_tokens_seen": 93135136, "step": 1406 }, { "epoch": 0.13169841344128796, "grad_norm": 17.659997940063477, "learning_rate": 5e-05, "loss": 1.7284, "num_input_tokens_seen": 93201716, "step": 1407 }, { "epoch": 0.13169841344128796, "loss": 1.6802024841308594, "loss_ce": 0.0024681566283106804, "loss_iou": 0.73046875, "loss_num": 0.04345703125, "loss_xval": 1.6796875, "num_input_tokens_seen": 93201716, "step": 1407 }, { "epoch": 0.1317920157251837, "grad_norm": 13.591567993164062, "learning_rate": 5e-05, "loss": 1.5516, "num_input_tokens_seen": 93268212, "step": 1408 }, { "epoch": 0.1317920157251837, "loss": 1.7014623880386353, "loss_ce": 0.004196710418909788, "loss_iou": 0.6875, "loss_num": 0.06396484375, "loss_xval": 1.6953125, "num_input_tokens_seen": 93268212, "step": 1408 }, { "epoch": 0.13188561800907941, "grad_norm": 15.220724105834961, "learning_rate": 5e-05, "loss": 1.2826, "num_input_tokens_seen": 93334480, "step": 1409 }, { "epoch": 0.13188561800907941, "loss": 1.4379498958587646, "loss_ce": 0.0024029668420553207, "loss_iou": 0.6015625, "loss_num": 0.04638671875, "loss_xval": 1.4375, "num_input_tokens_seen": 93334480, "step": 1409 }, { "epoch": 0.13197922029297515, "grad_norm": 24.42323875427246, "learning_rate": 5e-05, "loss": 1.5851, "num_input_tokens_seen": 93400640, "step": 1410 }, { "epoch": 0.13197922029297515, "loss": 1.6681182384490967, "loss_ce": 0.006985319312661886, "loss_iou": 0.69140625, "loss_num": 0.05615234375, "loss_xval": 1.6640625, "num_input_tokens_seen": 93400640, "step": 1410 }, { "epoch": 0.13207282257687086, "grad_norm": 35.02754211425781, "learning_rate": 5e-05, "loss": 1.4912, "num_input_tokens_seen": 93466624, "step": 1411 }, { "epoch": 0.13207282257687086, "loss": 1.493984341621399, "loss_ce": 0.004726508166640997, "loss_iou": 0.58984375, "loss_num": 0.06201171875, "loss_xval": 1.4921875, "num_input_tokens_seen": 93466624, "step": 1411 }, { "epoch": 0.1321664248607666, "grad_norm": 21.393375396728516, "learning_rate": 5e-05, "loss": 1.7033, "num_input_tokens_seen": 93533320, "step": 1412 }, { "epoch": 0.1321664248607666, "loss": 1.729456901550293, "loss_ce": 0.0038708860520273447, "loss_iou": 0.72265625, "loss_num": 0.0556640625, "loss_xval": 1.7265625, "num_input_tokens_seen": 93533320, "step": 1412 }, { "epoch": 0.13226002714466234, "grad_norm": 45.04379653930664, "learning_rate": 5e-05, "loss": 1.4752, "num_input_tokens_seen": 93599720, "step": 1413 }, { "epoch": 0.13226002714466234, "loss": 1.413474678993225, "loss_ce": 0.0018535281997174025, "loss_iou": 0.609375, "loss_num": 0.03857421875, "loss_xval": 1.4140625, "num_input_tokens_seen": 93599720, "step": 1413 }, { "epoch": 0.13235362942855805, "grad_norm": 21.20207405090332, "learning_rate": 5e-05, "loss": 1.474, "num_input_tokens_seen": 93665476, "step": 1414 }, { "epoch": 0.13235362942855805, "loss": 1.4328948259353638, "loss_ce": 0.004183912184089422, "loss_iou": 0.56640625, "loss_num": 0.0595703125, "loss_xval": 1.4296875, "num_input_tokens_seen": 93665476, "step": 1414 }, { "epoch": 0.1324472317124538, "grad_norm": 28.82359504699707, "learning_rate": 5e-05, "loss": 1.4343, "num_input_tokens_seen": 93730960, "step": 1415 }, { "epoch": 0.1324472317124538, "loss": 1.4721465110778809, "loss_ce": 0.0024200051557272673, "loss_iou": 0.6484375, "loss_num": 0.033935546875, "loss_xval": 1.46875, "num_input_tokens_seen": 93730960, "step": 1415 }, { "epoch": 0.1325408339963495, "grad_norm": 12.276638984680176, "learning_rate": 5e-05, "loss": 1.4503, "num_input_tokens_seen": 93797336, "step": 1416 }, { "epoch": 0.1325408339963495, "loss": 1.6388862133026123, "loss_ce": 0.004120668862015009, "loss_iou": 0.6484375, "loss_num": 0.06640625, "loss_xval": 1.6328125, "num_input_tokens_seen": 93797336, "step": 1416 }, { "epoch": 0.13263443628024524, "grad_norm": 18.934528350830078, "learning_rate": 5e-05, "loss": 1.4018, "num_input_tokens_seen": 93863448, "step": 1417 }, { "epoch": 0.13263443628024524, "loss": 1.3206698894500732, "loss_ce": 0.0032871188595891, "loss_iou": 0.54296875, "loss_num": 0.0458984375, "loss_xval": 1.3203125, "num_input_tokens_seen": 93863448, "step": 1417 }, { "epoch": 0.13272803856414098, "grad_norm": 32.291786193847656, "learning_rate": 5e-05, "loss": 1.3848, "num_input_tokens_seen": 93928764, "step": 1418 }, { "epoch": 0.13272803856414098, "loss": 1.3497077226638794, "loss_ce": 0.010352229699492455, "loss_iou": 0.5859375, "loss_num": 0.03369140625, "loss_xval": 1.3359375, "num_input_tokens_seen": 93928764, "step": 1418 }, { "epoch": 0.1328216408480367, "grad_norm": 18.41583251953125, "learning_rate": 5e-05, "loss": 1.7208, "num_input_tokens_seen": 93994328, "step": 1419 }, { "epoch": 0.1328216408480367, "loss": 1.655555248260498, "loss_ce": 0.004188154824078083, "loss_iou": 0.71484375, "loss_num": 0.044921875, "loss_xval": 1.6484375, "num_input_tokens_seen": 93994328, "step": 1419 }, { "epoch": 0.13291524313193243, "grad_norm": 14.359472274780273, "learning_rate": 5e-05, "loss": 1.5621, "num_input_tokens_seen": 94059920, "step": 1420 }, { "epoch": 0.13291524313193243, "loss": 1.7808771133422852, "loss_ce": 0.002556830644607544, "loss_iou": 0.71875, "loss_num": 0.06787109375, "loss_xval": 1.78125, "num_input_tokens_seen": 94059920, "step": 1420 }, { "epoch": 0.13300884541582814, "grad_norm": 23.846860885620117, "learning_rate": 5e-05, "loss": 1.3449, "num_input_tokens_seen": 94125840, "step": 1421 }, { "epoch": 0.13300884541582814, "loss": 1.4242160320281982, "loss_ce": 0.005758992396295071, "loss_iou": 0.6015625, "loss_num": 0.043212890625, "loss_xval": 1.421875, "num_input_tokens_seen": 94125840, "step": 1421 }, { "epoch": 0.13310244769972387, "grad_norm": 38.345027923583984, "learning_rate": 5e-05, "loss": 1.6819, "num_input_tokens_seen": 94191860, "step": 1422 }, { "epoch": 0.13310244769972387, "loss": 1.6982989311218262, "loss_ce": 0.00396299222484231, "loss_iou": 0.71484375, "loss_num": 0.052490234375, "loss_xval": 1.6953125, "num_input_tokens_seen": 94191860, "step": 1422 }, { "epoch": 0.1331960499836196, "grad_norm": 30.62224578857422, "learning_rate": 5e-05, "loss": 1.4571, "num_input_tokens_seen": 94258260, "step": 1423 }, { "epoch": 0.1331960499836196, "loss": 1.4068955183029175, "loss_ce": 0.004551771562546492, "loss_iou": 0.59375, "loss_num": 0.04248046875, "loss_xval": 1.40625, "num_input_tokens_seen": 94258260, "step": 1423 }, { "epoch": 0.13328965226751532, "grad_norm": 15.79391098022461, "learning_rate": 5e-05, "loss": 1.7965, "num_input_tokens_seen": 94325912, "step": 1424 }, { "epoch": 0.13328965226751532, "loss": 1.9111042022705078, "loss_ce": 0.002901125233620405, "loss_iou": 0.8203125, "loss_num": 0.052734375, "loss_xval": 1.90625, "num_input_tokens_seen": 94325912, "step": 1424 }, { "epoch": 0.13338325455141106, "grad_norm": 16.25873565673828, "learning_rate": 5e-05, "loss": 1.5263, "num_input_tokens_seen": 94391860, "step": 1425 }, { "epoch": 0.13338325455141106, "loss": 1.6111969947814941, "loss_ce": 0.0042633311823010445, "loss_iou": 0.68359375, "loss_num": 0.0478515625, "loss_xval": 1.609375, "num_input_tokens_seen": 94391860, "step": 1425 }, { "epoch": 0.13347685683530677, "grad_norm": 18.118350982666016, "learning_rate": 5e-05, "loss": 1.3529, "num_input_tokens_seen": 94457948, "step": 1426 }, { "epoch": 0.13347685683530677, "loss": 1.354447364807129, "loss_ce": 0.0057229273952543736, "loss_iou": 0.5625, "loss_num": 0.0439453125, "loss_xval": 1.3515625, "num_input_tokens_seen": 94457948, "step": 1426 }, { "epoch": 0.1335704591192025, "grad_norm": 19.742839813232422, "learning_rate": 5e-05, "loss": 1.4873, "num_input_tokens_seen": 94525160, "step": 1427 }, { "epoch": 0.1335704591192025, "loss": 1.3646737337112427, "loss_ce": 0.004810418467968702, "loss_iou": 0.54296875, "loss_num": 0.05517578125, "loss_xval": 1.359375, "num_input_tokens_seen": 94525160, "step": 1427 }, { "epoch": 0.13366406140309822, "grad_norm": 32.74245834350586, "learning_rate": 5e-05, "loss": 1.6196, "num_input_tokens_seen": 94591060, "step": 1428 }, { "epoch": 0.13366406140309822, "loss": 1.6130080223083496, "loss_ce": 0.005586134735494852, "loss_iou": 0.72265625, "loss_num": 0.0322265625, "loss_xval": 1.609375, "num_input_tokens_seen": 94591060, "step": 1428 }, { "epoch": 0.13375766368699396, "grad_norm": 17.923078536987305, "learning_rate": 5e-05, "loss": 1.7979, "num_input_tokens_seen": 94657624, "step": 1429 }, { "epoch": 0.13375766368699396, "loss": 1.6485836505889893, "loss_ce": 0.005029020830988884, "loss_iou": 0.7265625, "loss_num": 0.038818359375, "loss_xval": 1.640625, "num_input_tokens_seen": 94657624, "step": 1429 }, { "epoch": 0.1338512659708897, "grad_norm": 28.271013259887695, "learning_rate": 5e-05, "loss": 1.333, "num_input_tokens_seen": 94722616, "step": 1430 }, { "epoch": 0.1338512659708897, "loss": 1.2937275171279907, "loss_ce": 0.010036170482635498, "loss_iou": 0.5078125, "loss_num": 0.0537109375, "loss_xval": 1.28125, "num_input_tokens_seen": 94722616, "step": 1430 }, { "epoch": 0.1339448682547854, "grad_norm": 18.96759796142578, "learning_rate": 5e-05, "loss": 1.1467, "num_input_tokens_seen": 94788368, "step": 1431 }, { "epoch": 0.1339448682547854, "loss": 1.2531790733337402, "loss_ce": 0.004399813711643219, "loss_iou": 0.5078125, "loss_num": 0.046142578125, "loss_xval": 1.25, "num_input_tokens_seen": 94788368, "step": 1431 }, { "epoch": 0.13403847053868115, "grad_norm": 30.16978645324707, "learning_rate": 5e-05, "loss": 1.4575, "num_input_tokens_seen": 94854476, "step": 1432 }, { "epoch": 0.13403847053868115, "loss": 1.4885075092315674, "loss_ce": 0.0036137597635388374, "loss_iou": 0.609375, "loss_num": 0.053466796875, "loss_xval": 1.484375, "num_input_tokens_seen": 94854476, "step": 1432 }, { "epoch": 0.13413207282257686, "grad_norm": 21.352434158325195, "learning_rate": 5e-05, "loss": 1.7639, "num_input_tokens_seen": 94922156, "step": 1433 }, { "epoch": 0.13413207282257686, "loss": 1.6939449310302734, "loss_ce": 0.0025387154892086983, "loss_iou": 0.734375, "loss_num": 0.0439453125, "loss_xval": 1.6875, "num_input_tokens_seen": 94922156, "step": 1433 }, { "epoch": 0.1342256751064726, "grad_norm": 14.761849403381348, "learning_rate": 5e-05, "loss": 1.323, "num_input_tokens_seen": 94988076, "step": 1434 }, { "epoch": 0.1342256751064726, "loss": 1.2977325916290283, "loss_ce": 0.00427562091499567, "loss_iou": 0.53515625, "loss_num": 0.044677734375, "loss_xval": 1.296875, "num_input_tokens_seen": 94988076, "step": 1434 }, { "epoch": 0.13431927739036834, "grad_norm": 28.465805053710938, "learning_rate": 5e-05, "loss": 1.5796, "num_input_tokens_seen": 95054844, "step": 1435 }, { "epoch": 0.13431927739036834, "loss": 1.4701448678970337, "loss_ce": 0.005301137454807758, "loss_iou": 0.65234375, "loss_num": 0.0322265625, "loss_xval": 1.46875, "num_input_tokens_seen": 95054844, "step": 1435 }, { "epoch": 0.13441287967426405, "grad_norm": 38.11674118041992, "learning_rate": 5e-05, "loss": 1.6241, "num_input_tokens_seen": 95120052, "step": 1436 }, { "epoch": 0.13441287967426405, "loss": 1.4885752201080322, "loss_ce": 0.0031321346759796143, "loss_iou": 0.640625, "loss_num": 0.0400390625, "loss_xval": 1.484375, "num_input_tokens_seen": 95120052, "step": 1436 }, { "epoch": 0.13450648195815978, "grad_norm": 21.564964294433594, "learning_rate": 5e-05, "loss": 1.6243, "num_input_tokens_seen": 95186444, "step": 1437 }, { "epoch": 0.13450648195815978, "loss": 1.5397932529449463, "loss_ce": 0.0007306834449991584, "loss_iou": 0.69140625, "loss_num": 0.03125, "loss_xval": 1.5390625, "num_input_tokens_seen": 95186444, "step": 1437 }, { "epoch": 0.1346000842420555, "grad_norm": 11.223896980285645, "learning_rate": 5e-05, "loss": 1.3417, "num_input_tokens_seen": 95251976, "step": 1438 }, { "epoch": 0.1346000842420555, "loss": 1.4510210752487183, "loss_ce": 0.002778932685032487, "loss_iou": 0.625, "loss_num": 0.0400390625, "loss_xval": 1.4453125, "num_input_tokens_seen": 95251976, "step": 1438 }, { "epoch": 0.13469368652595123, "grad_norm": 14.81171703338623, "learning_rate": 5e-05, "loss": 1.2577, "num_input_tokens_seen": 95316676, "step": 1439 }, { "epoch": 0.13469368652595123, "loss": 1.3588645458221436, "loss_ce": 0.003395825158804655, "loss_iou": 0.546875, "loss_num": 0.052734375, "loss_xval": 1.359375, "num_input_tokens_seen": 95316676, "step": 1439 }, { "epoch": 0.13478728880984697, "grad_norm": 14.567553520202637, "learning_rate": 5e-05, "loss": 1.3817, "num_input_tokens_seen": 95383056, "step": 1440 }, { "epoch": 0.13478728880984697, "loss": 1.2535104751586914, "loss_ce": 0.0025339778512716293, "loss_iou": 0.55859375, "loss_num": 0.0269775390625, "loss_xval": 1.25, "num_input_tokens_seen": 95383056, "step": 1440 }, { "epoch": 0.13488089109374268, "grad_norm": 38.295921325683594, "learning_rate": 5e-05, "loss": 1.2326, "num_input_tokens_seen": 95450028, "step": 1441 }, { "epoch": 0.13488089109374268, "loss": 1.1347554922103882, "loss_ce": 0.00682583823800087, "loss_iou": 0.515625, "loss_num": 0.019287109375, "loss_xval": 1.125, "num_input_tokens_seen": 95450028, "step": 1441 }, { "epoch": 0.13497449337763842, "grad_norm": 30.807174682617188, "learning_rate": 5e-05, "loss": 1.24, "num_input_tokens_seen": 95515672, "step": 1442 }, { "epoch": 0.13497449337763842, "loss": 1.3047294616699219, "loss_ce": 0.004924705717712641, "loss_iou": 0.55859375, "loss_num": 0.0361328125, "loss_xval": 1.296875, "num_input_tokens_seen": 95515672, "step": 1442 }, { "epoch": 0.13506809566153413, "grad_norm": 18.528146743774414, "learning_rate": 5e-05, "loss": 1.7542, "num_input_tokens_seen": 95581716, "step": 1443 }, { "epoch": 0.13506809566153413, "loss": 1.957296371459961, "loss_ce": 0.003194748191162944, "loss_iou": 0.80859375, "loss_num": 0.06787109375, "loss_xval": 1.953125, "num_input_tokens_seen": 95581716, "step": 1443 }, { "epoch": 0.13516169794542987, "grad_norm": 21.910905838012695, "learning_rate": 5e-05, "loss": 1.473, "num_input_tokens_seen": 95648348, "step": 1444 }, { "epoch": 0.13516169794542987, "loss": 1.4957733154296875, "loss_ce": 0.001632708590477705, "loss_iou": 0.62890625, "loss_num": 0.047607421875, "loss_xval": 1.4921875, "num_input_tokens_seen": 95648348, "step": 1444 }, { "epoch": 0.1352553002293256, "grad_norm": 35.485477447509766, "learning_rate": 5e-05, "loss": 1.5512, "num_input_tokens_seen": 95714692, "step": 1445 }, { "epoch": 0.1352553002293256, "loss": 1.4851305484771729, "loss_ce": 0.005882611498236656, "loss_iou": 0.66015625, "loss_num": 0.0322265625, "loss_xval": 1.4765625, "num_input_tokens_seen": 95714692, "step": 1445 }, { "epoch": 0.13534890251322132, "grad_norm": 13.884212493896484, "learning_rate": 5e-05, "loss": 1.5487, "num_input_tokens_seen": 95781276, "step": 1446 }, { "epoch": 0.13534890251322132, "loss": 1.3953523635864258, "loss_ce": 0.0033235454466193914, "loss_iou": 0.59375, "loss_num": 0.04150390625, "loss_xval": 1.390625, "num_input_tokens_seen": 95781276, "step": 1446 }, { "epoch": 0.13544250479711706, "grad_norm": 12.372857093811035, "learning_rate": 5e-05, "loss": 1.1581, "num_input_tokens_seen": 95845924, "step": 1447 }, { "epoch": 0.13544250479711706, "loss": 1.3289029598236084, "loss_ce": 0.0027310599107295275, "loss_iou": 0.5390625, "loss_num": 0.05029296875, "loss_xval": 1.328125, "num_input_tokens_seen": 95845924, "step": 1447 }, { "epoch": 0.13553610708101277, "grad_norm": 25.897235870361328, "learning_rate": 5e-05, "loss": 1.1798, "num_input_tokens_seen": 95912504, "step": 1448 }, { "epoch": 0.13553610708101277, "loss": 1.1852606534957886, "loss_ce": 0.005084910895675421, "loss_iou": 0.53515625, "loss_num": 0.021728515625, "loss_xval": 1.1796875, "num_input_tokens_seen": 95912504, "step": 1448 }, { "epoch": 0.1356297093649085, "grad_norm": 21.500213623046875, "learning_rate": 5e-05, "loss": 1.2767, "num_input_tokens_seen": 95978508, "step": 1449 }, { "epoch": 0.1356297093649085, "loss": 1.2965521812438965, "loss_ce": 0.00407177209854126, "loss_iou": 0.5625, "loss_num": 0.033935546875, "loss_xval": 1.2890625, "num_input_tokens_seen": 95978508, "step": 1449 }, { "epoch": 0.13572331164880422, "grad_norm": 31.819379806518555, "learning_rate": 5e-05, "loss": 1.4956, "num_input_tokens_seen": 96044504, "step": 1450 }, { "epoch": 0.13572331164880422, "loss": 1.5511112213134766, "loss_ce": 0.00228312611579895, "loss_iou": 0.71875, "loss_num": 0.022216796875, "loss_xval": 1.546875, "num_input_tokens_seen": 96044504, "step": 1450 }, { "epoch": 0.13581691393269996, "grad_norm": 15.583928108215332, "learning_rate": 5e-05, "loss": 1.6707, "num_input_tokens_seen": 96109908, "step": 1451 }, { "epoch": 0.13581691393269996, "loss": 1.4697784185409546, "loss_ce": 0.004446416161954403, "loss_iou": 0.65625, "loss_num": 0.0303955078125, "loss_xval": 1.46875, "num_input_tokens_seen": 96109908, "step": 1451 }, { "epoch": 0.1359105162165957, "grad_norm": 22.426509857177734, "learning_rate": 5e-05, "loss": 1.5282, "num_input_tokens_seen": 96176388, "step": 1452 }, { "epoch": 0.1359105162165957, "loss": 1.519659161567688, "loss_ce": 0.0035458810161799192, "loss_iou": 0.671875, "loss_num": 0.0341796875, "loss_xval": 1.515625, "num_input_tokens_seen": 96176388, "step": 1452 }, { "epoch": 0.1360041185004914, "grad_norm": 17.847627639770508, "learning_rate": 5e-05, "loss": 1.6092, "num_input_tokens_seen": 96241400, "step": 1453 }, { "epoch": 0.1360041185004914, "loss": 1.5556532144546509, "loss_ce": 0.005848539061844349, "loss_iou": 0.66015625, "loss_num": 0.0458984375, "loss_xval": 1.546875, "num_input_tokens_seen": 96241400, "step": 1453 }, { "epoch": 0.13609772078438714, "grad_norm": 22.900426864624023, "learning_rate": 5e-05, "loss": 1.4858, "num_input_tokens_seen": 96307140, "step": 1454 }, { "epoch": 0.13609772078438714, "loss": 1.3733822107315063, "loss_ce": 0.006927090231329203, "loss_iou": 0.55078125, "loss_num": 0.052734375, "loss_xval": 1.3671875, "num_input_tokens_seen": 96307140, "step": 1454 }, { "epoch": 0.13619132306828285, "grad_norm": 136.20973205566406, "learning_rate": 5e-05, "loss": 1.454, "num_input_tokens_seen": 96373008, "step": 1455 }, { "epoch": 0.13619132306828285, "loss": 1.5045087337493896, "loss_ce": 0.004997122101485729, "loss_iou": 0.640625, "loss_num": 0.04296875, "loss_xval": 1.5, "num_input_tokens_seen": 96373008, "step": 1455 }, { "epoch": 0.1362849253521786, "grad_norm": 20.077259063720703, "learning_rate": 5e-05, "loss": 1.7174, "num_input_tokens_seen": 96440248, "step": 1456 }, { "epoch": 0.1362849253521786, "loss": 1.774254322052002, "loss_ce": 0.005699529312551022, "loss_iou": 0.765625, "loss_num": 0.04833984375, "loss_xval": 1.765625, "num_input_tokens_seen": 96440248, "step": 1456 }, { "epoch": 0.13637852763607433, "grad_norm": 18.524415969848633, "learning_rate": 5e-05, "loss": 1.4061, "num_input_tokens_seen": 96506472, "step": 1457 }, { "epoch": 0.13637852763607433, "loss": 1.5060696601867676, "loss_ce": 0.005093043204396963, "loss_iou": 0.63671875, "loss_num": 0.0458984375, "loss_xval": 1.5, "num_input_tokens_seen": 96506472, "step": 1457 }, { "epoch": 0.13647212991997004, "grad_norm": 34.78091812133789, "learning_rate": 5e-05, "loss": 1.3896, "num_input_tokens_seen": 96572796, "step": 1458 }, { "epoch": 0.13647212991997004, "loss": 1.5728785991668701, "loss_ce": 0.001589577179402113, "loss_iou": 0.67578125, "loss_num": 0.043212890625, "loss_xval": 1.5703125, "num_input_tokens_seen": 96572796, "step": 1458 }, { "epoch": 0.13656573220386578, "grad_norm": 22.908023834228516, "learning_rate": 5e-05, "loss": 1.4915, "num_input_tokens_seen": 96638684, "step": 1459 }, { "epoch": 0.13656573220386578, "loss": 1.4252605438232422, "loss_ce": 0.0014324421063065529, "loss_iou": 0.61328125, "loss_num": 0.039794921875, "loss_xval": 1.421875, "num_input_tokens_seen": 96638684, "step": 1459 }, { "epoch": 0.1366593344877615, "grad_norm": 22.792509078979492, "learning_rate": 5e-05, "loss": 1.7169, "num_input_tokens_seen": 96704656, "step": 1460 }, { "epoch": 0.1366593344877615, "loss": 1.835850477218628, "loss_ce": 0.006748875603079796, "loss_iou": 0.8125, "loss_num": 0.040283203125, "loss_xval": 1.828125, "num_input_tokens_seen": 96704656, "step": 1460 }, { "epoch": 0.13675293677165723, "grad_norm": 29.556488037109375, "learning_rate": 5e-05, "loss": 1.1412, "num_input_tokens_seen": 96769928, "step": 1461 }, { "epoch": 0.13675293677165723, "loss": 0.8836391568183899, "loss_ce": 0.004549796227365732, "loss_iou": 0.361328125, "loss_num": 0.03125, "loss_xval": 0.87890625, "num_input_tokens_seen": 96769928, "step": 1461 }, { "epoch": 0.13684653905555297, "grad_norm": 58.098838806152344, "learning_rate": 5e-05, "loss": 1.5686, "num_input_tokens_seen": 96836292, "step": 1462 }, { "epoch": 0.13684653905555297, "loss": 1.7508189678192139, "loss_ce": 0.0027720038779079914, "loss_iou": 0.76953125, "loss_num": 0.042236328125, "loss_xval": 1.75, "num_input_tokens_seen": 96836292, "step": 1462 }, { "epoch": 0.13694014133944868, "grad_norm": 12.583563804626465, "learning_rate": 5e-05, "loss": 1.3308, "num_input_tokens_seen": 96902740, "step": 1463 }, { "epoch": 0.13694014133944868, "loss": 1.4209550619125366, "loss_ce": 0.005427726544439793, "loss_iou": 0.6171875, "loss_num": 0.036376953125, "loss_xval": 1.4140625, "num_input_tokens_seen": 96902740, "step": 1463 }, { "epoch": 0.13703374362334442, "grad_norm": 21.045217514038086, "learning_rate": 5e-05, "loss": 1.3016, "num_input_tokens_seen": 96968620, "step": 1464 }, { "epoch": 0.13703374362334442, "loss": 1.2764999866485596, "loss_ce": 0.0025741523131728172, "loss_iou": 0.494140625, "loss_num": 0.057373046875, "loss_xval": 1.2734375, "num_input_tokens_seen": 96968620, "step": 1464 }, { "epoch": 0.13712734590724013, "grad_norm": 30.389692306518555, "learning_rate": 5e-05, "loss": 1.5985, "num_input_tokens_seen": 97034388, "step": 1465 }, { "epoch": 0.13712734590724013, "loss": 1.6568264961242676, "loss_ce": 0.006924077868461609, "loss_iou": 0.69921875, "loss_num": 0.050048828125, "loss_xval": 1.6484375, "num_input_tokens_seen": 97034388, "step": 1465 }, { "epoch": 0.13722094819113587, "grad_norm": 18.88156509399414, "learning_rate": 5e-05, "loss": 1.5185, "num_input_tokens_seen": 97100156, "step": 1466 }, { "epoch": 0.13722094819113587, "loss": 1.4082818031311035, "loss_ce": 0.003252509981393814, "loss_iou": 0.59375, "loss_num": 0.043701171875, "loss_xval": 1.40625, "num_input_tokens_seen": 97100156, "step": 1466 }, { "epoch": 0.13731455047503158, "grad_norm": 19.068809509277344, "learning_rate": 5e-05, "loss": 1.5123, "num_input_tokens_seen": 97166056, "step": 1467 }, { "epoch": 0.13731455047503158, "loss": 1.5042084455490112, "loss_ce": 0.006161610130220652, "loss_iou": 0.625, "loss_num": 0.050048828125, "loss_xval": 1.5, "num_input_tokens_seen": 97166056, "step": 1467 }, { "epoch": 0.13740815275892732, "grad_norm": 34.79692077636719, "learning_rate": 5e-05, "loss": 1.4256, "num_input_tokens_seen": 97232508, "step": 1468 }, { "epoch": 0.13740815275892732, "loss": 1.284287691116333, "loss_ce": 0.0035258689895272255, "loss_iou": 0.5546875, "loss_num": 0.034423828125, "loss_xval": 1.28125, "num_input_tokens_seen": 97232508, "step": 1468 }, { "epoch": 0.13750175504282305, "grad_norm": 16.231090545654297, "learning_rate": 5e-05, "loss": 1.7221, "num_input_tokens_seen": 97298300, "step": 1469 }, { "epoch": 0.13750175504282305, "loss": 1.5546045303344727, "loss_ce": 0.005288119427859783, "loss_iou": 0.6640625, "loss_num": 0.04345703125, "loss_xval": 1.546875, "num_input_tokens_seen": 97298300, "step": 1469 }, { "epoch": 0.13759535732671876, "grad_norm": 18.629154205322266, "learning_rate": 5e-05, "loss": 1.5172, "num_input_tokens_seen": 97365736, "step": 1470 }, { "epoch": 0.13759535732671876, "loss": 1.3760257959365845, "loss_ce": 0.0010258422698825598, "loss_iou": 0.61328125, "loss_num": 0.030029296875, "loss_xval": 1.375, "num_input_tokens_seen": 97365736, "step": 1470 }, { "epoch": 0.1376889596106145, "grad_norm": 23.517915725708008, "learning_rate": 5e-05, "loss": 1.6245, "num_input_tokens_seen": 97431052, "step": 1471 }, { "epoch": 0.1376889596106145, "loss": 1.8706618547439575, "loss_ce": 0.0034743547439575195, "loss_iou": 0.7578125, "loss_num": 0.07080078125, "loss_xval": 1.8671875, "num_input_tokens_seen": 97431052, "step": 1471 }, { "epoch": 0.1377825618945102, "grad_norm": 20.021461486816406, "learning_rate": 5e-05, "loss": 1.4881, "num_input_tokens_seen": 97497724, "step": 1472 }, { "epoch": 0.1377825618945102, "loss": 1.5418694019317627, "loss_ce": 0.003783364314585924, "loss_iou": 0.671875, "loss_num": 0.03857421875, "loss_xval": 1.5390625, "num_input_tokens_seen": 97497724, "step": 1472 }, { "epoch": 0.13787616417840595, "grad_norm": 21.375080108642578, "learning_rate": 5e-05, "loss": 1.3725, "num_input_tokens_seen": 97564116, "step": 1473 }, { "epoch": 0.13787616417840595, "loss": 1.447035551071167, "loss_ce": 0.004652666859328747, "loss_iou": 0.64453125, "loss_num": 0.02978515625, "loss_xval": 1.4453125, "num_input_tokens_seen": 97564116, "step": 1473 }, { "epoch": 0.1379697664623017, "grad_norm": 43.405216217041016, "learning_rate": 5e-05, "loss": 1.4266, "num_input_tokens_seen": 97629620, "step": 1474 }, { "epoch": 0.1379697664623017, "loss": 1.374740481376648, "loss_ce": 0.0033110652584582567, "loss_iou": 0.5703125, "loss_num": 0.046630859375, "loss_xval": 1.375, "num_input_tokens_seen": 97629620, "step": 1474 }, { "epoch": 0.1380633687461974, "grad_norm": 45.995914459228516, "learning_rate": 5e-05, "loss": 1.4239, "num_input_tokens_seen": 97696068, "step": 1475 }, { "epoch": 0.1380633687461974, "loss": 1.4466938972473145, "loss_ce": 0.0062641566619277, "loss_iou": 0.640625, "loss_num": 0.0311279296875, "loss_xval": 1.4375, "num_input_tokens_seen": 97696068, "step": 1475 }, { "epoch": 0.13815697103009314, "grad_norm": 23.018688201904297, "learning_rate": 5e-05, "loss": 1.6538, "num_input_tokens_seen": 97761804, "step": 1476 }, { "epoch": 0.13815697103009314, "loss": 1.5999515056610107, "loss_ce": 0.0032719094306230545, "loss_iou": 0.69140625, "loss_num": 0.043212890625, "loss_xval": 1.59375, "num_input_tokens_seen": 97761804, "step": 1476 }, { "epoch": 0.13825057331398885, "grad_norm": 16.344158172607422, "learning_rate": 5e-05, "loss": 1.3571, "num_input_tokens_seen": 97827784, "step": 1477 }, { "epoch": 0.13825057331398885, "loss": 1.2144036293029785, "loss_ce": 0.006884063594043255, "loss_iou": 0.4921875, "loss_num": 0.044677734375, "loss_xval": 1.2109375, "num_input_tokens_seen": 97827784, "step": 1477 }, { "epoch": 0.1383441755978846, "grad_norm": 70.98696899414062, "learning_rate": 5e-05, "loss": 1.7217, "num_input_tokens_seen": 97894908, "step": 1478 }, { "epoch": 0.1383441755978846, "loss": 1.8056621551513672, "loss_ce": 0.011716828681528568, "loss_iou": 0.7890625, "loss_num": 0.04345703125, "loss_xval": 1.796875, "num_input_tokens_seen": 97894908, "step": 1478 }, { "epoch": 0.13843777788178033, "grad_norm": 31.899520874023438, "learning_rate": 5e-05, "loss": 1.8972, "num_input_tokens_seen": 97961448, "step": 1479 }, { "epoch": 0.13843777788178033, "loss": 1.9800881147384644, "loss_ce": 0.0025489763356745243, "loss_iou": 0.8125, "loss_num": 0.0712890625, "loss_xval": 1.9765625, "num_input_tokens_seen": 97961448, "step": 1479 }, { "epoch": 0.13853138016567604, "grad_norm": 33.952205657958984, "learning_rate": 5e-05, "loss": 1.8364, "num_input_tokens_seen": 98027512, "step": 1480 }, { "epoch": 0.13853138016567604, "loss": 1.9767546653747559, "loss_ce": 0.005075046792626381, "loss_iou": 0.859375, "loss_num": 0.05078125, "loss_xval": 1.96875, "num_input_tokens_seen": 98027512, "step": 1480 }, { "epoch": 0.13862498244957178, "grad_norm": 10.682676315307617, "learning_rate": 5e-05, "loss": 1.2754, "num_input_tokens_seen": 98094120, "step": 1481 }, { "epoch": 0.13862498244957178, "loss": 1.244215965270996, "loss_ce": 0.0020283497869968414, "loss_iou": 0.55859375, "loss_num": 0.0255126953125, "loss_xval": 1.2421875, "num_input_tokens_seen": 98094120, "step": 1481 }, { "epoch": 0.1387185847334675, "grad_norm": 13.159777641296387, "learning_rate": 5e-05, "loss": 1.4164, "num_input_tokens_seen": 98161292, "step": 1482 }, { "epoch": 0.1387185847334675, "loss": 1.35675847530365, "loss_ce": 0.005196006502956152, "loss_iou": 0.578125, "loss_num": 0.0390625, "loss_xval": 1.3515625, "num_input_tokens_seen": 98161292, "step": 1482 }, { "epoch": 0.13881218701736323, "grad_norm": 31.39657974243164, "learning_rate": 5e-05, "loss": 1.3705, "num_input_tokens_seen": 98227904, "step": 1483 }, { "epoch": 0.13881218701736323, "loss": 1.2967456579208374, "loss_ce": 0.004631428048014641, "loss_iou": 0.56640625, "loss_num": 0.03271484375, "loss_xval": 1.2890625, "num_input_tokens_seen": 98227904, "step": 1483 }, { "epoch": 0.13890578930125896, "grad_norm": 18.579883575439453, "learning_rate": 5e-05, "loss": 1.628, "num_input_tokens_seen": 98294892, "step": 1484 }, { "epoch": 0.13890578930125896, "loss": 1.8487024307250977, "loss_ce": 0.003975847736001015, "loss_iou": 0.78125, "loss_num": 0.056640625, "loss_xval": 1.84375, "num_input_tokens_seen": 98294892, "step": 1484 }, { "epoch": 0.13899939158515467, "grad_norm": 11.909125328063965, "learning_rate": 5e-05, "loss": 1.3687, "num_input_tokens_seen": 98360852, "step": 1485 }, { "epoch": 0.13899939158515467, "loss": 1.5161800384521484, "loss_ce": 0.006902600638568401, "loss_iou": 0.6171875, "loss_num": 0.0556640625, "loss_xval": 1.5078125, "num_input_tokens_seen": 98360852, "step": 1485 }, { "epoch": 0.1390929938690504, "grad_norm": 22.15267562866211, "learning_rate": 5e-05, "loss": 1.3996, "num_input_tokens_seen": 98427528, "step": 1486 }, { "epoch": 0.1390929938690504, "loss": 1.5718417167663574, "loss_ce": 0.0015292345779016614, "loss_iou": 0.703125, "loss_num": 0.0322265625, "loss_xval": 1.5703125, "num_input_tokens_seen": 98427528, "step": 1486 }, { "epoch": 0.13918659615294612, "grad_norm": 17.999814987182617, "learning_rate": 5e-05, "loss": 1.5932, "num_input_tokens_seen": 98493120, "step": 1487 }, { "epoch": 0.13918659615294612, "loss": 1.5637671947479248, "loss_ce": 0.005905886180698872, "loss_iou": 0.6953125, "loss_num": 0.032958984375, "loss_xval": 1.5546875, "num_input_tokens_seen": 98493120, "step": 1487 }, { "epoch": 0.13928019843684186, "grad_norm": 14.201549530029297, "learning_rate": 5e-05, "loss": 1.1767, "num_input_tokens_seen": 98559444, "step": 1488 }, { "epoch": 0.13928019843684186, "loss": 1.1103944778442383, "loss_ce": 0.001019414747133851, "loss_iou": 0.498046875, "loss_num": 0.0223388671875, "loss_xval": 1.109375, "num_input_tokens_seen": 98559444, "step": 1488 }, { "epoch": 0.13937380072073757, "grad_norm": 14.96893310546875, "learning_rate": 5e-05, "loss": 1.4167, "num_input_tokens_seen": 98625640, "step": 1489 }, { "epoch": 0.13937380072073757, "loss": 1.501664161682129, "loss_ce": 0.009720880538225174, "loss_iou": 0.57421875, "loss_num": 0.068359375, "loss_xval": 1.4921875, "num_input_tokens_seen": 98625640, "step": 1489 }, { "epoch": 0.1394674030046333, "grad_norm": 151.1747283935547, "learning_rate": 5e-05, "loss": 1.0989, "num_input_tokens_seen": 98691280, "step": 1490 }, { "epoch": 0.1394674030046333, "loss": 1.0764378309249878, "loss_ce": 0.008139526471495628, "loss_iou": 0.46484375, "loss_num": 0.0281982421875, "loss_xval": 1.0703125, "num_input_tokens_seen": 98691280, "step": 1490 }, { "epoch": 0.13956100528852905, "grad_norm": 36.526615142822266, "learning_rate": 5e-05, "loss": 1.6628, "num_input_tokens_seen": 98756456, "step": 1491 }, { "epoch": 0.13956100528852905, "loss": 1.5029892921447754, "loss_ce": 0.003965775948017836, "loss_iou": 0.6328125, "loss_num": 0.047119140625, "loss_xval": 1.5, "num_input_tokens_seen": 98756456, "step": 1491 }, { "epoch": 0.13965460757242476, "grad_norm": 18.75931739807129, "learning_rate": 5e-05, "loss": 1.5891, "num_input_tokens_seen": 98822744, "step": 1492 }, { "epoch": 0.13965460757242476, "loss": 1.6656162738800049, "loss_ce": 0.006436473689973354, "loss_iou": 0.7265625, "loss_num": 0.04150390625, "loss_xval": 1.65625, "num_input_tokens_seen": 98822744, "step": 1492 }, { "epoch": 0.1397482098563205, "grad_norm": 11.803533554077148, "learning_rate": 5e-05, "loss": 1.1741, "num_input_tokens_seen": 98888000, "step": 1493 }, { "epoch": 0.1397482098563205, "loss": 1.4049453735351562, "loss_ce": 0.005531243979930878, "loss_iou": 0.57421875, "loss_num": 0.049560546875, "loss_xval": 1.3984375, "num_input_tokens_seen": 98888000, "step": 1493 }, { "epoch": 0.1398418121402162, "grad_norm": 24.705406188964844, "learning_rate": 5e-05, "loss": 1.5608, "num_input_tokens_seen": 98953916, "step": 1494 }, { "epoch": 0.1398418121402162, "loss": 1.478304147720337, "loss_ce": 0.0036947601474821568, "loss_iou": 0.59375, "loss_num": 0.05712890625, "loss_xval": 1.4765625, "num_input_tokens_seen": 98953916, "step": 1494 }, { "epoch": 0.13993541442411195, "grad_norm": 24.102781295776367, "learning_rate": 5e-05, "loss": 1.503, "num_input_tokens_seen": 99019580, "step": 1495 }, { "epoch": 0.13993541442411195, "loss": 1.5414433479309082, "loss_ce": 0.006775477435439825, "loss_iou": 0.67578125, "loss_num": 0.035888671875, "loss_xval": 1.53125, "num_input_tokens_seen": 99019580, "step": 1495 }, { "epoch": 0.1400290167080077, "grad_norm": 43.382747650146484, "learning_rate": 5e-05, "loss": 1.8064, "num_input_tokens_seen": 99084964, "step": 1496 }, { "epoch": 0.1400290167080077, "loss": 2.066258192062378, "loss_ce": 0.004734785296022892, "loss_iou": 0.83984375, "loss_num": 0.076171875, "loss_xval": 2.0625, "num_input_tokens_seen": 99084964, "step": 1496 }, { "epoch": 0.1401226189919034, "grad_norm": 26.74125099182129, "learning_rate": 5e-05, "loss": 1.5637, "num_input_tokens_seen": 99150640, "step": 1497 }, { "epoch": 0.1401226189919034, "loss": 1.5906933546066284, "loss_ce": 0.012568360194563866, "loss_iou": 0.62890625, "loss_num": 0.0634765625, "loss_xval": 1.578125, "num_input_tokens_seen": 99150640, "step": 1497 }, { "epoch": 0.14021622127579914, "grad_norm": 22.88028907775879, "learning_rate": 5e-05, "loss": 1.8411, "num_input_tokens_seen": 99217832, "step": 1498 }, { "epoch": 0.14021622127579914, "loss": 1.9064733982086182, "loss_ce": 0.004129617474973202, "loss_iou": 0.7890625, "loss_num": 0.06494140625, "loss_xval": 1.90625, "num_input_tokens_seen": 99217832, "step": 1498 }, { "epoch": 0.14030982355969485, "grad_norm": 28.038320541381836, "learning_rate": 5e-05, "loss": 1.5641, "num_input_tokens_seen": 99283692, "step": 1499 }, { "epoch": 0.14030982355969485, "loss": 1.6595817804336548, "loss_ce": 0.010655960068106651, "loss_iou": 0.65234375, "loss_num": 0.0693359375, "loss_xval": 1.6484375, "num_input_tokens_seen": 99283692, "step": 1499 }, { "epoch": 0.14040342584359058, "grad_norm": 22.882043838500977, "learning_rate": 5e-05, "loss": 1.6246, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14040342584359058, "eval_seeclick_CIoU": 0.12986931204795837, "eval_seeclick_GIoU": 0.1154865175485611, "eval_seeclick_IoU": 0.2536672055721283, "eval_seeclick_MAE_all": 0.14381030946969986, "eval_seeclick_MAE_h": 0.10508225485682487, "eval_seeclick_MAE_w": 0.11415602266788483, "eval_seeclick_MAE_x_boxes": 0.20156145840883255, "eval_seeclick_MAE_y_boxes": 0.12079284340143204, "eval_seeclick_NUM_probability": 0.9990553855895996, "eval_seeclick_inside_bbox": 0.4208333343267441, "eval_seeclick_loss": 2.517971992492676, "eval_seeclick_loss_ce": 0.014489146415144205, "eval_seeclick_loss_iou": 0.8828125, "eval_seeclick_loss_num": 0.14289093017578125, "eval_seeclick_loss_xval": 2.48046875, "eval_seeclick_runtime": 61.9424, "eval_seeclick_samples_per_second": 0.759, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14040342584359058, "eval_icons_CIoU": -0.06381661631166935, "eval_icons_GIoU": -0.012035993859171867, "eval_icons_IoU": 0.12227378040552139, "eval_icons_MAE_all": 0.1508197821676731, "eval_icons_MAE_h": 0.12457311898469925, "eval_icons_MAE_w": 0.15456288307905197, "eval_icons_MAE_x_boxes": 0.16619648039340973, "eval_icons_MAE_y_boxes": 0.06023375503718853, "eval_icons_NUM_probability": 0.9997735619544983, "eval_icons_inside_bbox": 0.2517361119389534, "eval_icons_loss": 2.7961881160736084, "eval_icons_loss_ce": 0.0007625973084941506, "eval_icons_loss_iou": 0.99365234375, "eval_icons_loss_num": 0.17418670654296875, "eval_icons_loss_xval": 2.857421875, "eval_icons_runtime": 73.5619, "eval_icons_samples_per_second": 0.68, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14040342584359058, "eval_screenspot_CIoU": 0.005095538372794787, "eval_screenspot_GIoU": 0.029694482684135437, "eval_screenspot_IoU": 0.17252641916275024, "eval_screenspot_MAE_all": 0.18482921024163565, "eval_screenspot_MAE_h": 0.1193593442440033, "eval_screenspot_MAE_w": 0.1454322561621666, "eval_screenspot_MAE_x_boxes": 0.28102100888888043, "eval_screenspot_MAE_y_boxes": 0.11931897948185603, "eval_screenspot_NUM_probability": 0.9993451635042826, "eval_screenspot_inside_bbox": 0.43833333253860474, "eval_screenspot_loss": 2.9187819957733154, "eval_screenspot_loss_ce": 0.026681889469424885, "eval_screenspot_loss_iou": 0.9869791666666666, "eval_screenspot_loss_num": 0.18549601236979166, "eval_screenspot_loss_xval": 2.9010416666666665, "eval_screenspot_runtime": 113.6674, "eval_screenspot_samples_per_second": 0.783, "eval_screenspot_steps_per_second": 0.026, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14040342584359058, "eval_compot_CIoU": -0.07622082345187664, "eval_compot_GIoU": -0.04671732848510146, "eval_compot_IoU": 0.10855663195252419, "eval_compot_MAE_all": 0.1775462105870247, "eval_compot_MAE_h": 0.12493967264890671, "eval_compot_MAE_w": 0.11044013127684593, "eval_compot_MAE_x_boxes": 0.25486552715301514, "eval_compot_MAE_y_boxes": 0.10714587941765785, "eval_compot_NUM_probability": 0.9995182752609253, "eval_compot_inside_bbox": 0.2048611119389534, "eval_compot_loss": 3.0232787132263184, "eval_compot_loss_ce": 0.008488157531246543, "eval_compot_loss_iou": 1.064208984375, "eval_compot_loss_num": 0.194061279296875, "eval_compot_loss_xval": 3.0986328125, "eval_compot_runtime": 64.4201, "eval_compot_samples_per_second": 0.776, "eval_compot_steps_per_second": 0.031, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14040342584359058, "eval_custom_ui_MAE_all": 0.12771786376833916, "eval_custom_ui_MAE_x": 0.11793666705489159, "eval_custom_ui_MAE_y": 0.13749905303120613, "eval_custom_ui_NUM_probability": 0.9999288022518158, "eval_custom_ui_loss": 0.7349903583526611, "eval_custom_ui_loss_ce": 0.1220637708902359, "eval_custom_ui_loss_num": 0.1292266845703125, "eval_custom_ui_loss_xval": 0.646240234375, "eval_custom_ui_runtime": 56.8825, "eval_custom_ui_samples_per_second": 0.879, "eval_custom_ui_steps_per_second": 0.035, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14040342584359058, "loss": 0.7274760007858276, "loss_ce": 0.12981978058815002, "loss_iou": 0.0, "loss_num": 0.11962890625, "loss_xval": 0.59765625, "num_input_tokens_seen": 99349752, "step": 1500 }, { "epoch": 0.14049702812748632, "grad_norm": 24.231904983520508, "learning_rate": 5e-05, "loss": 1.2735, "num_input_tokens_seen": 99416176, "step": 1501 }, { "epoch": 0.14049702812748632, "loss": 1.2148327827453613, "loss_ce": 0.008076216094195843, "loss_iou": 0.5234375, "loss_num": 0.031494140625, "loss_xval": 1.203125, "num_input_tokens_seen": 99416176, "step": 1501 }, { "epoch": 0.14059063041138203, "grad_norm": 55.206363677978516, "learning_rate": 5e-05, "loss": 1.3824, "num_input_tokens_seen": 99482524, "step": 1502 }, { "epoch": 0.14059063041138203, "loss": 1.4969196319580078, "loss_ce": 0.006685256026685238, "loss_iou": 0.62109375, "loss_num": 0.048828125, "loss_xval": 1.4921875, "num_input_tokens_seen": 99482524, "step": 1502 }, { "epoch": 0.14068423269527777, "grad_norm": 31.431215286254883, "learning_rate": 5e-05, "loss": 1.6985, "num_input_tokens_seen": 99549584, "step": 1503 }, { "epoch": 0.14068423269527777, "loss": 1.6617836952209473, "loss_ce": 0.005533731542527676, "loss_iou": 0.73828125, "loss_num": 0.0361328125, "loss_xval": 1.65625, "num_input_tokens_seen": 99549584, "step": 1503 }, { "epoch": 0.14077783497917348, "grad_norm": 22.039602279663086, "learning_rate": 5e-05, "loss": 1.8213, "num_input_tokens_seen": 99615260, "step": 1504 }, { "epoch": 0.14077783497917348, "loss": 1.7598512172698975, "loss_ce": 0.0059449695982038975, "loss_iou": 0.75, "loss_num": 0.051513671875, "loss_xval": 1.75, "num_input_tokens_seen": 99615260, "step": 1504 }, { "epoch": 0.14087143726306922, "grad_norm": 20.392858505249023, "learning_rate": 5e-05, "loss": 1.4993, "num_input_tokens_seen": 99681472, "step": 1505 }, { "epoch": 0.14087143726306922, "loss": 1.5586025714874268, "loss_ce": 0.004891672637313604, "loss_iou": 0.66796875, "loss_num": 0.042724609375, "loss_xval": 1.5546875, "num_input_tokens_seen": 99681472, "step": 1505 }, { "epoch": 0.14096503954696496, "grad_norm": 28.995214462280273, "learning_rate": 5e-05, "loss": 1.4371, "num_input_tokens_seen": 99747096, "step": 1506 }, { "epoch": 0.14096503954696496, "loss": 1.4462305307388306, "loss_ce": 0.0014062949921935797, "loss_iou": 0.6328125, "loss_num": 0.035888671875, "loss_xval": 1.4453125, "num_input_tokens_seen": 99747096, "step": 1506 }, { "epoch": 0.14105864183086067, "grad_norm": 21.191801071166992, "learning_rate": 5e-05, "loss": 1.5062, "num_input_tokens_seen": 99813116, "step": 1507 }, { "epoch": 0.14105864183086067, "loss": 1.6104576587677002, "loss_ce": 0.006453688256442547, "loss_iou": 0.671875, "loss_num": 0.052734375, "loss_xval": 1.6015625, "num_input_tokens_seen": 99813116, "step": 1507 }, { "epoch": 0.1411522441147564, "grad_norm": 24.812150955200195, "learning_rate": 5e-05, "loss": 1.2635, "num_input_tokens_seen": 99880304, "step": 1508 }, { "epoch": 0.1411522441147564, "loss": 1.4335097074508667, "loss_ce": 0.005287112668156624, "loss_iou": 0.59375, "loss_num": 0.0478515625, "loss_xval": 1.4296875, "num_input_tokens_seen": 99880304, "step": 1508 }, { "epoch": 0.14124584639865212, "grad_norm": 29.584083557128906, "learning_rate": 5e-05, "loss": 1.8114, "num_input_tokens_seen": 99946060, "step": 1509 }, { "epoch": 0.14124584639865212, "loss": 1.7381325960159302, "loss_ce": 0.00864044763147831, "loss_iou": 0.71875, "loss_num": 0.058349609375, "loss_xval": 1.7265625, "num_input_tokens_seen": 99946060, "step": 1509 }, { "epoch": 0.14133944868254786, "grad_norm": 17.324613571166992, "learning_rate": 5e-05, "loss": 1.3028, "num_input_tokens_seen": 100011568, "step": 1510 }, { "epoch": 0.14133944868254786, "loss": 1.3398258686065674, "loss_ce": 0.008771165274083614, "loss_iou": 0.54296875, "loss_num": 0.049560546875, "loss_xval": 1.328125, "num_input_tokens_seen": 100011568, "step": 1510 }, { "epoch": 0.14143305096644357, "grad_norm": 23.694305419921875, "learning_rate": 5e-05, "loss": 1.481, "num_input_tokens_seen": 100078336, "step": 1511 }, { "epoch": 0.14143305096644357, "loss": 1.3688184022903442, "loss_ce": 0.004560520872473717, "loss_iou": 0.60546875, "loss_num": 0.030029296875, "loss_xval": 1.3671875, "num_input_tokens_seen": 100078336, "step": 1511 }, { "epoch": 0.1415266532503393, "grad_norm": 38.424346923828125, "learning_rate": 5e-05, "loss": 1.6412, "num_input_tokens_seen": 100145548, "step": 1512 }, { "epoch": 0.1415266532503393, "loss": 1.7066707611083984, "loss_ce": 0.004522241652011871, "loss_iou": 0.73046875, "loss_num": 0.048583984375, "loss_xval": 1.703125, "num_input_tokens_seen": 100145548, "step": 1512 }, { "epoch": 0.14162025553423505, "grad_norm": 21.91921043395996, "learning_rate": 5e-05, "loss": 1.7673, "num_input_tokens_seen": 100210616, "step": 1513 }, { "epoch": 0.14162025553423505, "loss": 1.8947184085845947, "loss_ce": 0.007999643683433533, "loss_iou": 0.8125, "loss_num": 0.05322265625, "loss_xval": 1.890625, "num_input_tokens_seen": 100210616, "step": 1513 }, { "epoch": 0.14171385781813076, "grad_norm": 38.25762176513672, "learning_rate": 5e-05, "loss": 1.4429, "num_input_tokens_seen": 100275608, "step": 1514 }, { "epoch": 0.14171385781813076, "loss": 1.380049467086792, "loss_ce": 0.002119877375662327, "loss_iou": 0.515625, "loss_num": 0.06884765625, "loss_xval": 1.375, "num_input_tokens_seen": 100275608, "step": 1514 }, { "epoch": 0.1418074601020265, "grad_norm": 10.709073066711426, "learning_rate": 5e-05, "loss": 1.4471, "num_input_tokens_seen": 100341516, "step": 1515 }, { "epoch": 0.1418074601020265, "loss": 1.5805141925811768, "loss_ce": 0.0062953149899840355, "loss_iou": 0.671875, "loss_num": 0.046630859375, "loss_xval": 1.578125, "num_input_tokens_seen": 100341516, "step": 1515 }, { "epoch": 0.1419010623859222, "grad_norm": 20.05232048034668, "learning_rate": 5e-05, "loss": 1.4526, "num_input_tokens_seen": 100408124, "step": 1516 }, { "epoch": 0.1419010623859222, "loss": 1.5399630069732666, "loss_ce": 0.004318465478718281, "loss_iou": 0.62109375, "loss_num": 0.058837890625, "loss_xval": 1.5390625, "num_input_tokens_seen": 100408124, "step": 1516 }, { "epoch": 0.14199466466981794, "grad_norm": 32.801998138427734, "learning_rate": 5e-05, "loss": 1.4752, "num_input_tokens_seen": 100474908, "step": 1517 }, { "epoch": 0.14199466466981794, "loss": 1.391688585281372, "loss_ce": 0.0025284471921622753, "loss_iou": 0.6171875, "loss_num": 0.03076171875, "loss_xval": 1.390625, "num_input_tokens_seen": 100474908, "step": 1517 }, { "epoch": 0.14208826695371368, "grad_norm": 23.03963279724121, "learning_rate": 5e-05, "loss": 1.4937, "num_input_tokens_seen": 100541184, "step": 1518 }, { "epoch": 0.14208826695371368, "loss": 1.4620805978775024, "loss_ce": 0.009932130575180054, "loss_iou": 0.64453125, "loss_num": 0.032958984375, "loss_xval": 1.453125, "num_input_tokens_seen": 100541184, "step": 1518 }, { "epoch": 0.1421818692376094, "grad_norm": 26.238849639892578, "learning_rate": 5e-05, "loss": 1.5887, "num_input_tokens_seen": 100607804, "step": 1519 }, { "epoch": 0.1421818692376094, "loss": 1.434612512588501, "loss_ce": 0.0057337647303938866, "loss_iou": 0.61328125, "loss_num": 0.039794921875, "loss_xval": 1.4296875, "num_input_tokens_seen": 100607804, "step": 1519 }, { "epoch": 0.14227547152150513, "grad_norm": 17.839048385620117, "learning_rate": 5e-05, "loss": 1.2647, "num_input_tokens_seen": 100673700, "step": 1520 }, { "epoch": 0.14227547152150513, "loss": 1.2322412729263306, "loss_ce": 0.006166993640363216, "loss_iou": 0.5078125, "loss_num": 0.042236328125, "loss_xval": 1.2265625, "num_input_tokens_seen": 100673700, "step": 1520 }, { "epoch": 0.14236907380540084, "grad_norm": 18.898719787597656, "learning_rate": 5e-05, "loss": 1.3108, "num_input_tokens_seen": 100741140, "step": 1521 }, { "epoch": 0.14236907380540084, "loss": 1.2941036224365234, "loss_ce": 0.002599644009023905, "loss_iou": 0.58984375, "loss_num": 0.022705078125, "loss_xval": 1.2890625, "num_input_tokens_seen": 100741140, "step": 1521 }, { "epoch": 0.14246267608929658, "grad_norm": 27.399078369140625, "learning_rate": 5e-05, "loss": 1.6027, "num_input_tokens_seen": 100807032, "step": 1522 }, { "epoch": 0.14246267608929658, "loss": 1.7253706455230713, "loss_ce": 0.007597208954393864, "loss_iou": 0.69140625, "loss_num": 0.0673828125, "loss_xval": 1.71875, "num_input_tokens_seen": 100807032, "step": 1522 }, { "epoch": 0.14255627837319232, "grad_norm": 40.45325469970703, "learning_rate": 5e-05, "loss": 1.4111, "num_input_tokens_seen": 100874484, "step": 1523 }, { "epoch": 0.14255627837319232, "loss": 1.465791940689087, "loss_ce": 0.006319269072264433, "loss_iou": 0.6640625, "loss_num": 0.02587890625, "loss_xval": 1.4609375, "num_input_tokens_seen": 100874484, "step": 1523 }, { "epoch": 0.14264988065708803, "grad_norm": 23.753206253051758, "learning_rate": 5e-05, "loss": 1.6924, "num_input_tokens_seen": 100940064, "step": 1524 }, { "epoch": 0.14264988065708803, "loss": 1.8300961256027222, "loss_ce": 0.0034359393175691366, "loss_iou": 0.75, "loss_num": 0.0654296875, "loss_xval": 1.828125, "num_input_tokens_seen": 100940064, "step": 1524 }, { "epoch": 0.14274348294098377, "grad_norm": 12.655919075012207, "learning_rate": 5e-05, "loss": 1.168, "num_input_tokens_seen": 101006424, "step": 1525 }, { "epoch": 0.14274348294098377, "loss": 1.132278323173523, "loss_ce": 0.0023954908829182386, "loss_iou": 0.48828125, "loss_num": 0.03076171875, "loss_xval": 1.1328125, "num_input_tokens_seen": 101006424, "step": 1525 }, { "epoch": 0.14283708522487948, "grad_norm": 23.536968231201172, "learning_rate": 5e-05, "loss": 1.4492, "num_input_tokens_seen": 101071936, "step": 1526 }, { "epoch": 0.14283708522487948, "loss": 1.3138073682785034, "loss_ce": 0.005701958201825619, "loss_iou": 0.57421875, "loss_num": 0.0322265625, "loss_xval": 1.3046875, "num_input_tokens_seen": 101071936, "step": 1526 }, { "epoch": 0.14293068750877522, "grad_norm": 20.349735260009766, "learning_rate": 5e-05, "loss": 1.4725, "num_input_tokens_seen": 101137240, "step": 1527 }, { "epoch": 0.14293068750877522, "loss": 1.4538692235946655, "loss_ce": 0.004162219353020191, "loss_iou": 0.65234375, "loss_num": 0.0283203125, "loss_xval": 1.453125, "num_input_tokens_seen": 101137240, "step": 1527 }, { "epoch": 0.14302428979267093, "grad_norm": 34.49251174926758, "learning_rate": 5e-05, "loss": 1.6424, "num_input_tokens_seen": 101202700, "step": 1528 }, { "epoch": 0.14302428979267093, "loss": 1.6864709854125977, "loss_ce": 0.008736604824662209, "loss_iou": 0.6953125, "loss_num": 0.05712890625, "loss_xval": 1.6796875, "num_input_tokens_seen": 101202700, "step": 1528 }, { "epoch": 0.14311789207656667, "grad_norm": 24.006309509277344, "learning_rate": 5e-05, "loss": 1.6988, "num_input_tokens_seen": 101269828, "step": 1529 }, { "epoch": 0.14311789207656667, "loss": 1.5257787704467773, "loss_ce": 0.0033177491277456284, "loss_iou": 0.68359375, "loss_num": 0.0303955078125, "loss_xval": 1.5234375, "num_input_tokens_seen": 101269828, "step": 1529 }, { "epoch": 0.1432114943604624, "grad_norm": 50.223751068115234, "learning_rate": 5e-05, "loss": 1.524, "num_input_tokens_seen": 101335964, "step": 1530 }, { "epoch": 0.1432114943604624, "loss": 1.5145683288574219, "loss_ce": 0.0038261013105511665, "loss_iou": 0.64453125, "loss_num": 0.0439453125, "loss_xval": 1.5078125, "num_input_tokens_seen": 101335964, "step": 1530 }, { "epoch": 0.14330509664435812, "grad_norm": 16.774459838867188, "learning_rate": 5e-05, "loss": 1.6343, "num_input_tokens_seen": 101401128, "step": 1531 }, { "epoch": 0.14330509664435812, "loss": 1.313849687576294, "loss_ce": 0.0026925490237772465, "loss_iou": 0.6015625, "loss_num": 0.02197265625, "loss_xval": 1.3125, "num_input_tokens_seen": 101401128, "step": 1531 }, { "epoch": 0.14339869892825385, "grad_norm": 18.925683975219727, "learning_rate": 5e-05, "loss": 1.3889, "num_input_tokens_seen": 101468268, "step": 1532 }, { "epoch": 0.14339869892825385, "loss": 1.3102495670318604, "loss_ce": 0.0070268744602799416, "loss_iou": 0.58203125, "loss_num": 0.0279541015625, "loss_xval": 1.3046875, "num_input_tokens_seen": 101468268, "step": 1532 }, { "epoch": 0.14349230121214956, "grad_norm": 18.31987762451172, "learning_rate": 5e-05, "loss": 1.56, "num_input_tokens_seen": 101533604, "step": 1533 }, { "epoch": 0.14349230121214956, "loss": 1.6439417600631714, "loss_ce": 0.007223045919090509, "loss_iou": 0.67578125, "loss_num": 0.05712890625, "loss_xval": 1.640625, "num_input_tokens_seen": 101533604, "step": 1533 }, { "epoch": 0.1435859034960453, "grad_norm": 40.049530029296875, "learning_rate": 5e-05, "loss": 1.4117, "num_input_tokens_seen": 101598752, "step": 1534 }, { "epoch": 0.1435859034960453, "loss": 1.3404393196105957, "loss_ce": 0.009323660284280777, "loss_iou": 0.53125, "loss_num": 0.05322265625, "loss_xval": 1.328125, "num_input_tokens_seen": 101598752, "step": 1534 }, { "epoch": 0.14367950577994104, "grad_norm": 20.997421264648438, "learning_rate": 5e-05, "loss": 1.2269, "num_input_tokens_seen": 101663404, "step": 1535 }, { "epoch": 0.14367950577994104, "loss": 0.9025107622146606, "loss_ce": 0.003432357916608453, "loss_iou": 0.3828125, "loss_num": 0.0269775390625, "loss_xval": 0.8984375, "num_input_tokens_seen": 101663404, "step": 1535 }, { "epoch": 0.14377310806383675, "grad_norm": 24.92330551147461, "learning_rate": 5e-05, "loss": 1.3334, "num_input_tokens_seen": 101729156, "step": 1536 }, { "epoch": 0.14377310806383675, "loss": 1.4888564348220825, "loss_ce": 0.005458023399114609, "loss_iou": 0.63671875, "loss_num": 0.041748046875, "loss_xval": 1.484375, "num_input_tokens_seen": 101729156, "step": 1536 }, { "epoch": 0.1438667103477325, "grad_norm": 38.17001724243164, "learning_rate": 5e-05, "loss": 1.4925, "num_input_tokens_seen": 101795184, "step": 1537 }, { "epoch": 0.1438667103477325, "loss": 1.4716002941131592, "loss_ce": 0.0033385634887963533, "loss_iou": 0.62109375, "loss_num": 0.04443359375, "loss_xval": 1.46875, "num_input_tokens_seen": 101795184, "step": 1537 }, { "epoch": 0.1439603126316282, "grad_norm": 52.16805648803711, "learning_rate": 5e-05, "loss": 1.5786, "num_input_tokens_seen": 101860244, "step": 1538 }, { "epoch": 0.1439603126316282, "loss": 1.6646236181259155, "loss_ce": 0.0036128046922385693, "loss_iou": 0.65234375, "loss_num": 0.0712890625, "loss_xval": 1.6640625, "num_input_tokens_seen": 101860244, "step": 1538 }, { "epoch": 0.14405391491552394, "grad_norm": 18.758058547973633, "learning_rate": 5e-05, "loss": 1.7187, "num_input_tokens_seen": 101926872, "step": 1539 }, { "epoch": 0.14405391491552394, "loss": 1.78778076171875, "loss_ce": 0.004577621817588806, "loss_iou": 0.73828125, "loss_num": 0.06103515625, "loss_xval": 1.78125, "num_input_tokens_seen": 101926872, "step": 1539 }, { "epoch": 0.14414751719941968, "grad_norm": 33.0322151184082, "learning_rate": 5e-05, "loss": 1.4377, "num_input_tokens_seen": 101992900, "step": 1540 }, { "epoch": 0.14414751719941968, "loss": 1.5978648662567139, "loss_ce": 0.003138265572488308, "loss_iou": 0.6484375, "loss_num": 0.06005859375, "loss_xval": 1.59375, "num_input_tokens_seen": 101992900, "step": 1540 }, { "epoch": 0.1442411194833154, "grad_norm": 26.099706649780273, "learning_rate": 5e-05, "loss": 1.4051, "num_input_tokens_seen": 102058808, "step": 1541 }, { "epoch": 0.1442411194833154, "loss": 1.4880197048187256, "loss_ce": 0.0031562973745167255, "loss_iou": 0.63671875, "loss_num": 0.0419921875, "loss_xval": 1.484375, "num_input_tokens_seen": 102058808, "step": 1541 }, { "epoch": 0.14433472176721113, "grad_norm": 22.058637619018555, "learning_rate": 5e-05, "loss": 1.5436, "num_input_tokens_seen": 102125724, "step": 1542 }, { "epoch": 0.14433472176721113, "loss": 1.756797194480896, "loss_ce": 0.003867490915581584, "loss_iou": 0.76171875, "loss_num": 0.045654296875, "loss_xval": 1.75, "num_input_tokens_seen": 102125724, "step": 1542 }, { "epoch": 0.14442832405110684, "grad_norm": 19.189023971557617, "learning_rate": 5e-05, "loss": 1.3314, "num_input_tokens_seen": 102191628, "step": 1543 }, { "epoch": 0.14442832405110684, "loss": 1.3758902549743652, "loss_ce": 0.0018668891862034798, "loss_iou": 0.58984375, "loss_num": 0.03955078125, "loss_xval": 1.375, "num_input_tokens_seen": 102191628, "step": 1543 }, { "epoch": 0.14452192633500258, "grad_norm": 18.584917068481445, "learning_rate": 5e-05, "loss": 1.6219, "num_input_tokens_seen": 102256720, "step": 1544 }, { "epoch": 0.14452192633500258, "loss": 1.7577810287475586, "loss_ce": 0.0019217421067878604, "loss_iou": 0.7421875, "loss_num": 0.0537109375, "loss_xval": 1.7578125, "num_input_tokens_seen": 102256720, "step": 1544 }, { "epoch": 0.14461552861889831, "grad_norm": 14.617402076721191, "learning_rate": 5e-05, "loss": 1.5229, "num_input_tokens_seen": 102322440, "step": 1545 }, { "epoch": 0.14461552861889831, "loss": 1.4960970878601074, "loss_ce": 0.004397881217300892, "loss_iou": 0.62109375, "loss_num": 0.05029296875, "loss_xval": 1.4921875, "num_input_tokens_seen": 102322440, "step": 1545 }, { "epoch": 0.14470913090279403, "grad_norm": 21.33815574645996, "learning_rate": 5e-05, "loss": 1.3602, "num_input_tokens_seen": 102388084, "step": 1546 }, { "epoch": 0.14470913090279403, "loss": 1.1827688217163086, "loss_ce": 0.005522767081856728, "loss_iou": 0.484375, "loss_num": 0.04150390625, "loss_xval": 1.1796875, "num_input_tokens_seen": 102388084, "step": 1546 }, { "epoch": 0.14480273318668976, "grad_norm": 42.99497604370117, "learning_rate": 5e-05, "loss": 1.6943, "num_input_tokens_seen": 102454208, "step": 1547 }, { "epoch": 0.14480273318668976, "loss": 1.6959104537963867, "loss_ce": 0.003527591936290264, "loss_iou": 0.71484375, "loss_num": 0.052978515625, "loss_xval": 1.6953125, "num_input_tokens_seen": 102454208, "step": 1547 }, { "epoch": 0.14489633547058547, "grad_norm": 33.810691833496094, "learning_rate": 5e-05, "loss": 1.7902, "num_input_tokens_seen": 102519784, "step": 1548 }, { "epoch": 0.14489633547058547, "loss": 1.601817011833191, "loss_ce": 0.004649089649319649, "loss_iou": 0.6953125, "loss_num": 0.041748046875, "loss_xval": 1.59375, "num_input_tokens_seen": 102519784, "step": 1548 }, { "epoch": 0.1449899377544812, "grad_norm": 20.305252075195312, "learning_rate": 5e-05, "loss": 1.5246, "num_input_tokens_seen": 102586960, "step": 1549 }, { "epoch": 0.1449899377544812, "loss": 1.528454065322876, "loss_ce": 0.0050166333094239235, "loss_iou": 0.6875, "loss_num": 0.0302734375, "loss_xval": 1.5234375, "num_input_tokens_seen": 102586960, "step": 1549 }, { "epoch": 0.14508354003837692, "grad_norm": 15.906241416931152, "learning_rate": 5e-05, "loss": 1.3853, "num_input_tokens_seen": 102654008, "step": 1550 }, { "epoch": 0.14508354003837692, "loss": 1.4486968517303467, "loss_ce": 0.0014311351114884019, "loss_iou": 0.6171875, "loss_num": 0.04248046875, "loss_xval": 1.4453125, "num_input_tokens_seen": 102654008, "step": 1550 }, { "epoch": 0.14517714232227266, "grad_norm": 18.253984451293945, "learning_rate": 5e-05, "loss": 1.2605, "num_input_tokens_seen": 102720632, "step": 1551 }, { "epoch": 0.14517714232227266, "loss": 1.1389744281768799, "loss_ce": 0.0037205801345407963, "loss_iou": 0.50390625, "loss_num": 0.025146484375, "loss_xval": 1.1328125, "num_input_tokens_seen": 102720632, "step": 1551 }, { "epoch": 0.1452707446061684, "grad_norm": 19.899873733520508, "learning_rate": 5e-05, "loss": 1.5484, "num_input_tokens_seen": 102787532, "step": 1552 }, { "epoch": 0.1452707446061684, "loss": 1.6164824962615967, "loss_ce": 0.004177856259047985, "loss_iou": 0.69921875, "loss_num": 0.042236328125, "loss_xval": 1.609375, "num_input_tokens_seen": 102787532, "step": 1552 }, { "epoch": 0.1453643468900641, "grad_norm": 25.263181686401367, "learning_rate": 5e-05, "loss": 1.7018, "num_input_tokens_seen": 102854264, "step": 1553 }, { "epoch": 0.1453643468900641, "loss": 1.6837835311889648, "loss_ce": 0.003119437489658594, "loss_iou": 0.72265625, "loss_num": 0.04736328125, "loss_xval": 1.6796875, "num_input_tokens_seen": 102854264, "step": 1553 }, { "epoch": 0.14545794917395985, "grad_norm": 20.906625747680664, "learning_rate": 5e-05, "loss": 1.5819, "num_input_tokens_seen": 102921292, "step": 1554 }, { "epoch": 0.14545794917395985, "loss": 1.5744715929031372, "loss_ce": 0.005135647486895323, "loss_iou": 0.6953125, "loss_num": 0.03662109375, "loss_xval": 1.5703125, "num_input_tokens_seen": 102921292, "step": 1554 }, { "epoch": 0.14555155145785556, "grad_norm": 28.993274688720703, "learning_rate": 5e-05, "loss": 1.5309, "num_input_tokens_seen": 102987228, "step": 1555 }, { "epoch": 0.14555155145785556, "loss": 1.6445033550262451, "loss_ce": 0.005343172233551741, "loss_iou": 0.70703125, "loss_num": 0.045166015625, "loss_xval": 1.640625, "num_input_tokens_seen": 102987228, "step": 1555 }, { "epoch": 0.1456451537417513, "grad_norm": 20.963346481323242, "learning_rate": 5e-05, "loss": 1.6217, "num_input_tokens_seen": 103053636, "step": 1556 }, { "epoch": 0.1456451537417513, "loss": 1.5372929573059082, "loss_ce": 0.00604294566437602, "loss_iou": 0.69140625, "loss_num": 0.0303955078125, "loss_xval": 1.53125, "num_input_tokens_seen": 103053636, "step": 1556 }, { "epoch": 0.14573875602564704, "grad_norm": 19.246431350708008, "learning_rate": 5e-05, "loss": 1.4329, "num_input_tokens_seen": 103119868, "step": 1557 }, { "epoch": 0.14573875602564704, "loss": 1.3317689895629883, "loss_ce": 0.005108893848955631, "loss_iou": 0.58984375, "loss_num": 0.0299072265625, "loss_xval": 1.328125, "num_input_tokens_seen": 103119868, "step": 1557 }, { "epoch": 0.14583235830954275, "grad_norm": 23.084415435791016, "learning_rate": 5e-05, "loss": 1.2641, "num_input_tokens_seen": 103185364, "step": 1558 }, { "epoch": 0.14583235830954275, "loss": 1.3581790924072266, "loss_ce": 0.006616611499339342, "loss_iou": 0.59375, "loss_num": 0.033447265625, "loss_xval": 1.3515625, "num_input_tokens_seen": 103185364, "step": 1558 }, { "epoch": 0.1459259605934385, "grad_norm": 23.241357803344727, "learning_rate": 5e-05, "loss": 1.5636, "num_input_tokens_seen": 103250600, "step": 1559 }, { "epoch": 0.1459259605934385, "loss": 1.4889763593673706, "loss_ce": 0.0070427171885967255, "loss_iou": 0.6015625, "loss_num": 0.05615234375, "loss_xval": 1.484375, "num_input_tokens_seen": 103250600, "step": 1559 }, { "epoch": 0.1460195628773342, "grad_norm": 27.193466186523438, "learning_rate": 5e-05, "loss": 1.3414, "num_input_tokens_seen": 103317144, "step": 1560 }, { "epoch": 0.1460195628773342, "loss": 1.224779486656189, "loss_ce": 0.002123260870575905, "loss_iou": 0.578125, "loss_num": 0.013427734375, "loss_xval": 1.21875, "num_input_tokens_seen": 103317144, "step": 1560 }, { "epoch": 0.14611316516122994, "grad_norm": 26.30657196044922, "learning_rate": 5e-05, "loss": 1.7686, "num_input_tokens_seen": 103383100, "step": 1561 }, { "epoch": 0.14611316516122994, "loss": 1.762037754058838, "loss_ce": 0.002272146288305521, "loss_iou": 0.75, "loss_num": 0.0517578125, "loss_xval": 1.7578125, "num_input_tokens_seen": 103383100, "step": 1561 }, { "epoch": 0.14620676744512567, "grad_norm": 10.657283782958984, "learning_rate": 5e-05, "loss": 1.5322, "num_input_tokens_seen": 103449580, "step": 1562 }, { "epoch": 0.14620676744512567, "loss": 1.514688491821289, "loss_ce": 0.004922892898321152, "loss_iou": 0.60546875, "loss_num": 0.059326171875, "loss_xval": 1.5078125, "num_input_tokens_seen": 103449580, "step": 1562 }, { "epoch": 0.14630036972902138, "grad_norm": 17.780200958251953, "learning_rate": 5e-05, "loss": 1.3093, "num_input_tokens_seen": 103515492, "step": 1563 }, { "epoch": 0.14630036972902138, "loss": 1.493285894393921, "loss_ce": 0.0020749655086547136, "loss_iou": 0.6484375, "loss_num": 0.0390625, "loss_xval": 1.4921875, "num_input_tokens_seen": 103515492, "step": 1563 }, { "epoch": 0.14639397201291712, "grad_norm": 16.034446716308594, "learning_rate": 5e-05, "loss": 1.5579, "num_input_tokens_seen": 103582312, "step": 1564 }, { "epoch": 0.14639397201291712, "loss": 1.7352726459503174, "loss_ce": 0.002850731136277318, "loss_iou": 0.7578125, "loss_num": 0.04296875, "loss_xval": 1.734375, "num_input_tokens_seen": 103582312, "step": 1564 }, { "epoch": 0.14648757429681283, "grad_norm": 21.146360397338867, "learning_rate": 5e-05, "loss": 1.5004, "num_input_tokens_seen": 103648172, "step": 1565 }, { "epoch": 0.14648757429681283, "loss": 1.412261962890625, "loss_ce": 0.006988446228206158, "loss_iou": 0.61328125, "loss_num": 0.034912109375, "loss_xval": 1.40625, "num_input_tokens_seen": 103648172, "step": 1565 }, { "epoch": 0.14658117658070857, "grad_norm": 84.80882263183594, "learning_rate": 5e-05, "loss": 1.4568, "num_input_tokens_seen": 103713988, "step": 1566 }, { "epoch": 0.14658117658070857, "loss": 1.646254539489746, "loss_ce": 0.0036764289252460003, "loss_iou": 0.7109375, "loss_num": 0.04345703125, "loss_xval": 1.640625, "num_input_tokens_seen": 103713988, "step": 1566 }, { "epoch": 0.14667477886460428, "grad_norm": 15.519054412841797, "learning_rate": 5e-05, "loss": 1.5439, "num_input_tokens_seen": 103780696, "step": 1567 }, { "epoch": 0.14667477886460428, "loss": 1.5708928108215332, "loss_ce": 0.006927861366420984, "loss_iou": 0.62109375, "loss_num": 0.064453125, "loss_xval": 1.5625, "num_input_tokens_seen": 103780696, "step": 1567 }, { "epoch": 0.14676838114850002, "grad_norm": 18.43244171142578, "learning_rate": 5e-05, "loss": 1.2816, "num_input_tokens_seen": 103846680, "step": 1568 }, { "epoch": 0.14676838114850002, "loss": 1.2332091331481934, "loss_ce": 0.0032285857014358044, "loss_iou": 0.5546875, "loss_num": 0.0238037109375, "loss_xval": 1.2265625, "num_input_tokens_seen": 103846680, "step": 1568 }, { "epoch": 0.14686198343239576, "grad_norm": 21.82929801940918, "learning_rate": 5e-05, "loss": 1.3426, "num_input_tokens_seen": 103912976, "step": 1569 }, { "epoch": 0.14686198343239576, "loss": 1.288723111152649, "loss_ce": 0.005642055068165064, "loss_iou": 0.55859375, "loss_num": 0.033203125, "loss_xval": 1.28125, "num_input_tokens_seen": 103912976, "step": 1569 }, { "epoch": 0.14695558571629147, "grad_norm": 38.64533996582031, "learning_rate": 5e-05, "loss": 1.554, "num_input_tokens_seen": 103980544, "step": 1570 }, { "epoch": 0.14695558571629147, "loss": 1.687143325805664, "loss_ce": 0.0025730598717927933, "loss_iou": 0.74609375, "loss_num": 0.0390625, "loss_xval": 1.6875, "num_input_tokens_seen": 103980544, "step": 1570 }, { "epoch": 0.1470491880001872, "grad_norm": 16.56947135925293, "learning_rate": 5e-05, "loss": 1.6906, "num_input_tokens_seen": 104046316, "step": 1571 }, { "epoch": 0.1470491880001872, "loss": 1.5583475828170776, "loss_ce": 0.007078057155013084, "loss_iou": 0.6640625, "loss_num": 0.04345703125, "loss_xval": 1.5546875, "num_input_tokens_seen": 104046316, "step": 1571 }, { "epoch": 0.14714279028408292, "grad_norm": 44.03985595703125, "learning_rate": 5e-05, "loss": 1.4482, "num_input_tokens_seen": 104112700, "step": 1572 }, { "epoch": 0.14714279028408292, "loss": 1.4707307815551758, "loss_ce": 0.0017366930842399597, "loss_iou": 0.6015625, "loss_num": 0.052978515625, "loss_xval": 1.46875, "num_input_tokens_seen": 104112700, "step": 1572 }, { "epoch": 0.14723639256797866, "grad_norm": 22.382993698120117, "learning_rate": 5e-05, "loss": 1.4796, "num_input_tokens_seen": 104179164, "step": 1573 }, { "epoch": 0.14723639256797866, "loss": 1.465185523033142, "loss_ce": 0.004736325703561306, "loss_iou": 0.609375, "loss_num": 0.049072265625, "loss_xval": 1.4609375, "num_input_tokens_seen": 104179164, "step": 1573 }, { "epoch": 0.1473299948518744, "grad_norm": 31.945423126220703, "learning_rate": 5e-05, "loss": 1.8342, "num_input_tokens_seen": 104245728, "step": 1574 }, { "epoch": 0.1473299948518744, "loss": 1.7808973789215088, "loss_ce": 0.0035536657087504864, "loss_iou": 0.75, "loss_num": 0.0556640625, "loss_xval": 1.78125, "num_input_tokens_seen": 104245728, "step": 1574 }, { "epoch": 0.1474235971357701, "grad_norm": 165.14805603027344, "learning_rate": 5e-05, "loss": 1.1913, "num_input_tokens_seen": 104311864, "step": 1575 }, { "epoch": 0.1474235971357701, "loss": 1.2625303268432617, "loss_ce": 0.007708542514592409, "loss_iou": 0.48828125, "loss_num": 0.055908203125, "loss_xval": 1.2578125, "num_input_tokens_seen": 104311864, "step": 1575 }, { "epoch": 0.14751719941966585, "grad_norm": 30.921964645385742, "learning_rate": 5e-05, "loss": 1.2002, "num_input_tokens_seen": 104378064, "step": 1576 }, { "epoch": 0.14751719941966585, "loss": 1.21901273727417, "loss_ce": 0.002215783577412367, "loss_iou": 0.51171875, "loss_num": 0.039306640625, "loss_xval": 1.21875, "num_input_tokens_seen": 104378064, "step": 1576 }, { "epoch": 0.14761080170356156, "grad_norm": 29.67722511291504, "learning_rate": 5e-05, "loss": 1.4842, "num_input_tokens_seen": 104444520, "step": 1577 }, { "epoch": 0.14761080170356156, "loss": 1.4530389308929443, "loss_ce": 0.006749823689460754, "loss_iou": 0.58984375, "loss_num": 0.052978515625, "loss_xval": 1.4453125, "num_input_tokens_seen": 104444520, "step": 1577 }, { "epoch": 0.1477044039874573, "grad_norm": 33.292762756347656, "learning_rate": 5e-05, "loss": 1.4454, "num_input_tokens_seen": 104510820, "step": 1578 }, { "epoch": 0.1477044039874573, "loss": 1.3860068321228027, "loss_ce": 0.004048776812851429, "loss_iou": 0.62890625, "loss_num": 0.0255126953125, "loss_xval": 1.3828125, "num_input_tokens_seen": 104510820, "step": 1578 }, { "epoch": 0.14779800627135303, "grad_norm": 27.22444725036621, "learning_rate": 5e-05, "loss": 1.6401, "num_input_tokens_seen": 104576180, "step": 1579 }, { "epoch": 0.14779800627135303, "loss": 1.614316463470459, "loss_ce": 0.003720806445926428, "loss_iou": 0.6796875, "loss_num": 0.051513671875, "loss_xval": 1.609375, "num_input_tokens_seen": 104576180, "step": 1579 }, { "epoch": 0.14789160855524874, "grad_norm": 18.634841918945312, "learning_rate": 5e-05, "loss": 1.6994, "num_input_tokens_seen": 104642568, "step": 1580 }, { "epoch": 0.14789160855524874, "loss": 1.7484934329986572, "loss_ce": 0.004352916032075882, "loss_iou": 0.72265625, "loss_num": 0.060302734375, "loss_xval": 1.7421875, "num_input_tokens_seen": 104642568, "step": 1580 }, { "epoch": 0.14798521083914448, "grad_norm": 17.323060989379883, "learning_rate": 5e-05, "loss": 1.4623, "num_input_tokens_seen": 104709464, "step": 1581 }, { "epoch": 0.14798521083914448, "loss": 1.5736188888549805, "loss_ce": 0.005259455647319555, "loss_iou": 0.65625, "loss_num": 0.051513671875, "loss_xval": 1.5703125, "num_input_tokens_seen": 104709464, "step": 1581 }, { "epoch": 0.1480788131230402, "grad_norm": 23.767314910888672, "learning_rate": 5e-05, "loss": 1.6097, "num_input_tokens_seen": 104776000, "step": 1582 }, { "epoch": 0.1480788131230402, "loss": 1.6807904243469238, "loss_ce": 0.003056036541238427, "loss_iou": 0.70703125, "loss_num": 0.052978515625, "loss_xval": 1.6796875, "num_input_tokens_seen": 104776000, "step": 1582 }, { "epoch": 0.14817241540693593, "grad_norm": 23.845067977905273, "learning_rate": 5e-05, "loss": 1.7156, "num_input_tokens_seen": 104842072, "step": 1583 }, { "epoch": 0.14817241540693593, "loss": 1.6568188667297363, "loss_ce": 0.004475062713027, "loss_iou": 0.671875, "loss_num": 0.061767578125, "loss_xval": 1.65625, "num_input_tokens_seen": 104842072, "step": 1583 }, { "epoch": 0.14826601769083167, "grad_norm": 14.527493476867676, "learning_rate": 5e-05, "loss": 1.7741, "num_input_tokens_seen": 104908084, "step": 1584 }, { "epoch": 0.14826601769083167, "loss": 2.057866334915161, "loss_ce": 0.004155375994741917, "loss_iou": 0.80859375, "loss_num": 0.0869140625, "loss_xval": 2.046875, "num_input_tokens_seen": 104908084, "step": 1584 }, { "epoch": 0.14835961997472738, "grad_norm": 18.82594871520996, "learning_rate": 5e-05, "loss": 1.4649, "num_input_tokens_seen": 104973872, "step": 1585 }, { "epoch": 0.14835961997472738, "loss": 1.4868476390838623, "loss_ce": 0.008698174729943275, "loss_iou": 0.60546875, "loss_num": 0.052978515625, "loss_xval": 1.4765625, "num_input_tokens_seen": 104973872, "step": 1585 }, { "epoch": 0.14845322225862312, "grad_norm": 35.864688873291016, "learning_rate": 5e-05, "loss": 1.5298, "num_input_tokens_seen": 105039740, "step": 1586 }, { "epoch": 0.14845322225862312, "loss": 1.4475951194763184, "loss_ce": 0.002038555219769478, "loss_iou": 0.6171875, "loss_num": 0.04296875, "loss_xval": 1.4453125, "num_input_tokens_seen": 105039740, "step": 1586 }, { "epoch": 0.14854682454251883, "grad_norm": 25.220077514648438, "learning_rate": 5e-05, "loss": 1.9283, "num_input_tokens_seen": 105105680, "step": 1587 }, { "epoch": 0.14854682454251883, "loss": 1.9239721298217773, "loss_ce": 0.003073735162615776, "loss_iou": 0.78515625, "loss_num": 0.06982421875, "loss_xval": 1.921875, "num_input_tokens_seen": 105105680, "step": 1587 }, { "epoch": 0.14864042682641457, "grad_norm": 28.7579288482666, "learning_rate": 5e-05, "loss": 1.4026, "num_input_tokens_seen": 105172340, "step": 1588 }, { "epoch": 0.14864042682641457, "loss": 1.445113182067871, "loss_ce": 0.0037068475503474474, "loss_iou": 0.5859375, "loss_num": 0.05419921875, "loss_xval": 1.4375, "num_input_tokens_seen": 105172340, "step": 1588 }, { "epoch": 0.14873402911031028, "grad_norm": 22.010601043701172, "learning_rate": 5e-05, "loss": 1.339, "num_input_tokens_seen": 105238736, "step": 1589 }, { "epoch": 0.14873402911031028, "loss": 1.2648894786834717, "loss_ce": 0.004635551478713751, "loss_iou": 0.53515625, "loss_num": 0.038330078125, "loss_xval": 1.2578125, "num_input_tokens_seen": 105238736, "step": 1589 }, { "epoch": 0.14882763139420602, "grad_norm": 21.327199935913086, "learning_rate": 5e-05, "loss": 1.7862, "num_input_tokens_seen": 105305376, "step": 1590 }, { "epoch": 0.14882763139420602, "loss": 1.929335594177246, "loss_ce": 0.0055074915289878845, "loss_iou": 0.78515625, "loss_num": 0.0712890625, "loss_xval": 1.921875, "num_input_tokens_seen": 105305376, "step": 1590 }, { "epoch": 0.14892123367810176, "grad_norm": 32.1934928894043, "learning_rate": 5e-05, "loss": 1.2378, "num_input_tokens_seen": 105370324, "step": 1591 }, { "epoch": 0.14892123367810176, "loss": 1.1679826974868774, "loss_ce": 0.007628083229064941, "loss_iou": 0.4609375, "loss_num": 0.04736328125, "loss_xval": 1.1640625, "num_input_tokens_seen": 105370324, "step": 1591 }, { "epoch": 0.14901483596199747, "grad_norm": 16.672101974487305, "learning_rate": 5e-05, "loss": 1.7804, "num_input_tokens_seen": 105437024, "step": 1592 }, { "epoch": 0.14901483596199747, "loss": 1.6345713138580322, "loss_ce": 0.010547962039709091, "loss_iou": 0.71875, "loss_num": 0.037841796875, "loss_xval": 1.625, "num_input_tokens_seen": 105437024, "step": 1592 }, { "epoch": 0.1491084382458932, "grad_norm": 20.593778610229492, "learning_rate": 5e-05, "loss": 1.4119, "num_input_tokens_seen": 105502924, "step": 1593 }, { "epoch": 0.1491084382458932, "loss": 1.3221817016601562, "loss_ce": 0.004798830486834049, "loss_iou": 0.5625, "loss_num": 0.03857421875, "loss_xval": 1.3203125, "num_input_tokens_seen": 105502924, "step": 1593 }, { "epoch": 0.14920204052978892, "grad_norm": 41.04405975341797, "learning_rate": 5e-05, "loss": 1.3331, "num_input_tokens_seen": 105569204, "step": 1594 }, { "epoch": 0.14920204052978892, "loss": 1.1906969547271729, "loss_ce": 0.003196998964995146, "loss_iou": 0.51171875, "loss_num": 0.032470703125, "loss_xval": 1.1875, "num_input_tokens_seen": 105569204, "step": 1594 }, { "epoch": 0.14929564281368465, "grad_norm": 16.740083694458008, "learning_rate": 5e-05, "loss": 1.9196, "num_input_tokens_seen": 105634912, "step": 1595 }, { "epoch": 0.14929564281368465, "loss": 1.811830759048462, "loss_ce": 0.00226047751493752, "loss_iou": 0.8125, "loss_num": 0.0361328125, "loss_xval": 1.8125, "num_input_tokens_seen": 105634912, "step": 1595 }, { "epoch": 0.1493892450975804, "grad_norm": 20.989532470703125, "learning_rate": 5e-05, "loss": 1.5249, "num_input_tokens_seen": 105701448, "step": 1596 }, { "epoch": 0.1493892450975804, "loss": 1.7201259136199951, "loss_ce": 0.01016505528241396, "loss_iou": 0.6640625, "loss_num": 0.07666015625, "loss_xval": 1.7109375, "num_input_tokens_seen": 105701448, "step": 1596 }, { "epoch": 0.1494828473814761, "grad_norm": 25.692968368530273, "learning_rate": 5e-05, "loss": 1.352, "num_input_tokens_seen": 105768228, "step": 1597 }, { "epoch": 0.1494828473814761, "loss": 1.401670217514038, "loss_ce": 0.0022561508230865, "loss_iou": 0.58984375, "loss_num": 0.043701171875, "loss_xval": 1.3984375, "num_input_tokens_seen": 105768228, "step": 1597 }, { "epoch": 0.14957644966537184, "grad_norm": 85.5755386352539, "learning_rate": 5e-05, "loss": 1.4141, "num_input_tokens_seen": 105834624, "step": 1598 }, { "epoch": 0.14957644966537184, "loss": 1.4686968326568604, "loss_ce": 0.0058061955496668816, "loss_iou": 0.6484375, "loss_num": 0.03369140625, "loss_xval": 1.4609375, "num_input_tokens_seen": 105834624, "step": 1598 }, { "epoch": 0.14967005194926755, "grad_norm": 15.782243728637695, "learning_rate": 5e-05, "loss": 1.3049, "num_input_tokens_seen": 105899836, "step": 1599 }, { "epoch": 0.14967005194926755, "loss": 1.0485551357269287, "loss_ce": 0.005464324727654457, "loss_iou": 0.408203125, "loss_num": 0.045654296875, "loss_xval": 1.046875, "num_input_tokens_seen": 105899836, "step": 1599 }, { "epoch": 0.1497636542331633, "grad_norm": 102.48106384277344, "learning_rate": 5e-05, "loss": 1.4776, "num_input_tokens_seen": 105965848, "step": 1600 }, { "epoch": 0.1497636542331633, "loss": 1.5239989757537842, "loss_ce": 0.004467761144042015, "loss_iou": 0.640625, "loss_num": 0.046630859375, "loss_xval": 1.515625, "num_input_tokens_seen": 105965848, "step": 1600 }, { "epoch": 0.14985725651705903, "grad_norm": 29.419294357299805, "learning_rate": 5e-05, "loss": 1.4311, "num_input_tokens_seen": 106032240, "step": 1601 }, { "epoch": 0.14985725651705903, "loss": 1.4856876134872437, "loss_ce": 0.00424232380464673, "loss_iou": 0.6328125, "loss_num": 0.043701171875, "loss_xval": 1.484375, "num_input_tokens_seen": 106032240, "step": 1601 }, { "epoch": 0.14995085880095474, "grad_norm": 18.31100845336914, "learning_rate": 5e-05, "loss": 1.4293, "num_input_tokens_seen": 106097984, "step": 1602 }, { "epoch": 0.14995085880095474, "loss": 1.3015904426574707, "loss_ce": 0.005692100618034601, "loss_iou": 0.58203125, "loss_num": 0.025634765625, "loss_xval": 1.296875, "num_input_tokens_seen": 106097984, "step": 1602 }, { "epoch": 0.15004446108485048, "grad_norm": 16.438608169555664, "learning_rate": 5e-05, "loss": 1.3127, "num_input_tokens_seen": 106163944, "step": 1603 }, { "epoch": 0.15004446108485048, "loss": 1.2673999071121216, "loss_ce": 0.008610883727669716, "loss_iou": 0.490234375, "loss_num": 0.0556640625, "loss_xval": 1.2578125, "num_input_tokens_seen": 106163944, "step": 1603 }, { "epoch": 0.1501380633687462, "grad_norm": 19.66848373413086, "learning_rate": 5e-05, "loss": 1.44, "num_input_tokens_seen": 106230308, "step": 1604 }, { "epoch": 0.1501380633687462, "loss": 1.5320746898651123, "loss_ce": 0.001801289152354002, "loss_iou": 0.65625, "loss_num": 0.04443359375, "loss_xval": 1.53125, "num_input_tokens_seen": 106230308, "step": 1604 }, { "epoch": 0.15023166565264193, "grad_norm": 24.82718276977539, "learning_rate": 5e-05, "loss": 1.6418, "num_input_tokens_seen": 106296544, "step": 1605 }, { "epoch": 0.15023166565264193, "loss": 1.6388241052627563, "loss_ce": 0.006988205946981907, "loss_iou": 0.7109375, "loss_num": 0.042724609375, "loss_xval": 1.6328125, "num_input_tokens_seen": 106296544, "step": 1605 }, { "epoch": 0.15032526793653767, "grad_norm": 21.777324676513672, "learning_rate": 5e-05, "loss": 1.6035, "num_input_tokens_seen": 106363548, "step": 1606 }, { "epoch": 0.15032526793653767, "loss": 1.555410385131836, "loss_ce": 0.003652624785900116, "loss_iou": 0.69140625, "loss_num": 0.03369140625, "loss_xval": 1.5546875, "num_input_tokens_seen": 106363548, "step": 1606 }, { "epoch": 0.15041887022043338, "grad_norm": 55.833648681640625, "learning_rate": 5e-05, "loss": 1.3487, "num_input_tokens_seen": 106428844, "step": 1607 }, { "epoch": 0.15041887022043338, "loss": 1.3498376607894897, "loss_ce": 0.00279181613586843, "loss_iou": 0.5625, "loss_num": 0.044921875, "loss_xval": 1.34375, "num_input_tokens_seen": 106428844, "step": 1607 }, { "epoch": 0.15051247250432911, "grad_norm": 27.31374168395996, "learning_rate": 5e-05, "loss": 1.6015, "num_input_tokens_seen": 106494660, "step": 1608 }, { "epoch": 0.15051247250432911, "loss": 1.735952377319336, "loss_ce": 0.005483636632561684, "loss_iou": 0.7109375, "loss_num": 0.0615234375, "loss_xval": 1.734375, "num_input_tokens_seen": 106494660, "step": 1608 }, { "epoch": 0.15060607478822483, "grad_norm": 38.36038589477539, "learning_rate": 5e-05, "loss": 1.4511, "num_input_tokens_seen": 106560668, "step": 1609 }, { "epoch": 0.15060607478822483, "loss": 1.3938336372375488, "loss_ce": 0.006870659068226814, "loss_iou": 0.56640625, "loss_num": 0.05078125, "loss_xval": 1.390625, "num_input_tokens_seen": 106560668, "step": 1609 }, { "epoch": 0.15069967707212056, "grad_norm": 18.214885711669922, "learning_rate": 5e-05, "loss": 1.7583, "num_input_tokens_seen": 106627344, "step": 1610 }, { "epoch": 0.15069967707212056, "loss": 1.8072059154510498, "loss_ce": 0.002518388442695141, "loss_iou": 0.8046875, "loss_num": 0.038818359375, "loss_xval": 1.8046875, "num_input_tokens_seen": 106627344, "step": 1610 }, { "epoch": 0.15079327935601627, "grad_norm": 13.350485801696777, "learning_rate": 5e-05, "loss": 1.4211, "num_input_tokens_seen": 106693476, "step": 1611 }, { "epoch": 0.15079327935601627, "loss": 1.3665543794631958, "loss_ce": 0.006691177375614643, "loss_iou": 0.5703125, "loss_num": 0.043701171875, "loss_xval": 1.359375, "num_input_tokens_seen": 106693476, "step": 1611 }, { "epoch": 0.150886881639912, "grad_norm": 17.282957077026367, "learning_rate": 5e-05, "loss": 1.4673, "num_input_tokens_seen": 106760160, "step": 1612 }, { "epoch": 0.150886881639912, "loss": 1.548656702041626, "loss_ce": 0.004711356945335865, "loss_iou": 0.6328125, "loss_num": 0.054931640625, "loss_xval": 1.546875, "num_input_tokens_seen": 106760160, "step": 1612 }, { "epoch": 0.15098048392380775, "grad_norm": 23.995521545410156, "learning_rate": 5e-05, "loss": 1.6054, "num_input_tokens_seen": 106825812, "step": 1613 }, { "epoch": 0.15098048392380775, "loss": 1.6569788455963135, "loss_ce": 0.005611698143184185, "loss_iou": 0.6796875, "loss_num": 0.05859375, "loss_xval": 1.6484375, "num_input_tokens_seen": 106825812, "step": 1613 }, { "epoch": 0.15107408620770346, "grad_norm": 55.08814239501953, "learning_rate": 5e-05, "loss": 1.5432, "num_input_tokens_seen": 106892084, "step": 1614 }, { "epoch": 0.15107408620770346, "loss": 1.3321701288223267, "loss_ce": 0.002091982401907444, "loss_iou": 0.6015625, "loss_num": 0.0247802734375, "loss_xval": 1.328125, "num_input_tokens_seen": 106892084, "step": 1614 }, { "epoch": 0.1511676884915992, "grad_norm": 17.88591766357422, "learning_rate": 5e-05, "loss": 1.5182, "num_input_tokens_seen": 106958200, "step": 1615 }, { "epoch": 0.1511676884915992, "loss": 1.5853595733642578, "loss_ce": 0.0052814362570643425, "loss_iou": 0.6171875, "loss_num": 0.068359375, "loss_xval": 1.578125, "num_input_tokens_seen": 106958200, "step": 1615 }, { "epoch": 0.1512612907754949, "grad_norm": 30.752655029296875, "learning_rate": 5e-05, "loss": 1.4185, "num_input_tokens_seen": 107023892, "step": 1616 }, { "epoch": 0.1512612907754949, "loss": 1.542198896408081, "loss_ce": 0.008019291795790195, "loss_iou": 0.640625, "loss_num": 0.05078125, "loss_xval": 1.53125, "num_input_tokens_seen": 107023892, "step": 1616 }, { "epoch": 0.15135489305939065, "grad_norm": 22.571989059448242, "learning_rate": 5e-05, "loss": 1.4278, "num_input_tokens_seen": 107091452, "step": 1617 }, { "epoch": 0.15135489305939065, "loss": 1.383119821548462, "loss_ce": 0.005922477692365646, "loss_iou": 0.58984375, "loss_num": 0.039794921875, "loss_xval": 1.375, "num_input_tokens_seen": 107091452, "step": 1617 }, { "epoch": 0.1514484953432864, "grad_norm": 20.350555419921875, "learning_rate": 5e-05, "loss": 1.4435, "num_input_tokens_seen": 107157924, "step": 1618 }, { "epoch": 0.1514484953432864, "loss": 1.6578073501586914, "loss_ce": 0.003998872824013233, "loss_iou": 0.69921875, "loss_num": 0.05126953125, "loss_xval": 1.65625, "num_input_tokens_seen": 107157924, "step": 1618 }, { "epoch": 0.1515420976271821, "grad_norm": 33.785484313964844, "learning_rate": 5e-05, "loss": 1.5386, "num_input_tokens_seen": 107224524, "step": 1619 }, { "epoch": 0.1515420976271821, "loss": 1.577425479888916, "loss_ce": 0.00808954332023859, "loss_iou": 0.65234375, "loss_num": 0.052734375, "loss_xval": 1.5703125, "num_input_tokens_seen": 107224524, "step": 1619 }, { "epoch": 0.15163569991107784, "grad_norm": 20.362022399902344, "learning_rate": 5e-05, "loss": 1.6265, "num_input_tokens_seen": 107290996, "step": 1620 }, { "epoch": 0.15163569991107784, "loss": 1.378639578819275, "loss_ce": 0.00315127894282341, "loss_iou": 0.63671875, "loss_num": 0.02099609375, "loss_xval": 1.375, "num_input_tokens_seen": 107290996, "step": 1620 }, { "epoch": 0.15172930219497355, "grad_norm": 30.693668365478516, "learning_rate": 5e-05, "loss": 1.2258, "num_input_tokens_seen": 107357484, "step": 1621 }, { "epoch": 0.15172930219497355, "loss": 1.1909761428833008, "loss_ce": 0.005429234355688095, "loss_iou": 0.5234375, "loss_num": 0.0283203125, "loss_xval": 1.1875, "num_input_tokens_seen": 107357484, "step": 1621 }, { "epoch": 0.15182290447886929, "grad_norm": 26.40045738220215, "learning_rate": 5e-05, "loss": 1.4304, "num_input_tokens_seen": 107424000, "step": 1622 }, { "epoch": 0.15182290447886929, "loss": 1.2539054155349731, "loss_ce": 0.006163769401609898, "loss_iou": 0.46875, "loss_num": 0.06201171875, "loss_xval": 1.25, "num_input_tokens_seen": 107424000, "step": 1622 }, { "epoch": 0.15191650676276502, "grad_norm": 17.566761016845703, "learning_rate": 5e-05, "loss": 1.2579, "num_input_tokens_seen": 107490224, "step": 1623 }, { "epoch": 0.15191650676276502, "loss": 1.2542431354522705, "loss_ce": 0.004243158735334873, "loss_iou": 0.53515625, "loss_num": 0.035400390625, "loss_xval": 1.25, "num_input_tokens_seen": 107490224, "step": 1623 }, { "epoch": 0.15201010904666074, "grad_norm": 27.513626098632812, "learning_rate": 5e-05, "loss": 1.4749, "num_input_tokens_seen": 107556124, "step": 1624 }, { "epoch": 0.15201010904666074, "loss": 1.5368672609329224, "loss_ce": 0.006593812257051468, "loss_iou": 0.65625, "loss_num": 0.0439453125, "loss_xval": 1.53125, "num_input_tokens_seen": 107556124, "step": 1624 }, { "epoch": 0.15210371133055647, "grad_norm": 41.309940338134766, "learning_rate": 5e-05, "loss": 1.357, "num_input_tokens_seen": 107621688, "step": 1625 }, { "epoch": 0.15210371133055647, "loss": 1.1321182250976562, "loss_ce": 0.0032120319083333015, "loss_iou": 0.50390625, "loss_num": 0.024169921875, "loss_xval": 1.125, "num_input_tokens_seen": 107621688, "step": 1625 }, { "epoch": 0.15219731361445218, "grad_norm": 16.765777587890625, "learning_rate": 5e-05, "loss": 1.6151, "num_input_tokens_seen": 107687820, "step": 1626 }, { "epoch": 0.15219731361445218, "loss": 1.7323691844940186, "loss_ce": 0.0053183650597929955, "loss_iou": 0.73828125, "loss_num": 0.04931640625, "loss_xval": 1.7265625, "num_input_tokens_seen": 107687820, "step": 1626 }, { "epoch": 0.15229091589834792, "grad_norm": 17.608959197998047, "learning_rate": 5e-05, "loss": 1.448, "num_input_tokens_seen": 107753892, "step": 1627 }, { "epoch": 0.15229091589834792, "loss": 1.3993690013885498, "loss_ce": 0.0014197976561263204, "loss_iou": 0.609375, "loss_num": 0.03564453125, "loss_xval": 1.3984375, "num_input_tokens_seen": 107753892, "step": 1627 }, { "epoch": 0.15238451818224363, "grad_norm": 18.415164947509766, "learning_rate": 5e-05, "loss": 1.4588, "num_input_tokens_seen": 107818704, "step": 1628 }, { "epoch": 0.15238451818224363, "loss": 1.503772497177124, "loss_ce": 0.004505004268139601, "loss_iou": 0.59765625, "loss_num": 0.06005859375, "loss_xval": 1.5, "num_input_tokens_seen": 107818704, "step": 1628 }, { "epoch": 0.15247812046613937, "grad_norm": 29.39680290222168, "learning_rate": 5e-05, "loss": 1.5631, "num_input_tokens_seen": 107885208, "step": 1629 }, { "epoch": 0.15247812046613937, "loss": 1.4180433750152588, "loss_ce": 0.009107757359743118, "loss_iou": 0.59375, "loss_num": 0.044921875, "loss_xval": 1.40625, "num_input_tokens_seen": 107885208, "step": 1629 }, { "epoch": 0.1525717227500351, "grad_norm": 13.992585182189941, "learning_rate": 5e-05, "loss": 1.343, "num_input_tokens_seen": 107949688, "step": 1630 }, { "epoch": 0.1525717227500351, "loss": 1.2554576396942139, "loss_ce": 0.003992866724729538, "loss_iou": 0.5390625, "loss_num": 0.03466796875, "loss_xval": 1.25, "num_input_tokens_seen": 107949688, "step": 1630 }, { "epoch": 0.15266532503393082, "grad_norm": 21.308378219604492, "learning_rate": 5e-05, "loss": 1.4879, "num_input_tokens_seen": 108015600, "step": 1631 }, { "epoch": 0.15266532503393082, "loss": 1.3239504098892212, "loss_ce": 0.0021730849985033274, "loss_iou": 0.58203125, "loss_num": 0.031005859375, "loss_xval": 1.3203125, "num_input_tokens_seen": 108015600, "step": 1631 }, { "epoch": 0.15275892731782656, "grad_norm": 28.54804801940918, "learning_rate": 5e-05, "loss": 1.5631, "num_input_tokens_seen": 108081240, "step": 1632 }, { "epoch": 0.15275892731782656, "loss": 1.6782734394073486, "loss_ce": 0.006398522295057774, "loss_iou": 0.7109375, "loss_num": 0.0498046875, "loss_xval": 1.671875, "num_input_tokens_seen": 108081240, "step": 1632 }, { "epoch": 0.15285252960172227, "grad_norm": 34.5693473815918, "learning_rate": 5e-05, "loss": 1.6822, "num_input_tokens_seen": 108148780, "step": 1633 }, { "epoch": 0.15285252960172227, "loss": 1.7861237525939941, "loss_ce": 0.006826853379607201, "loss_iou": 0.734375, "loss_num": 0.06201171875, "loss_xval": 1.78125, "num_input_tokens_seen": 108148780, "step": 1633 }, { "epoch": 0.152946131885618, "grad_norm": 17.04439353942871, "learning_rate": 5e-05, "loss": 1.5437, "num_input_tokens_seen": 108215492, "step": 1634 }, { "epoch": 0.152946131885618, "loss": 1.4546854496002197, "loss_ce": 0.00351364491507411, "loss_iou": 0.60546875, "loss_num": 0.047119140625, "loss_xval": 1.453125, "num_input_tokens_seen": 108215492, "step": 1634 }, { "epoch": 0.15303973416951375, "grad_norm": 21.455020904541016, "learning_rate": 5e-05, "loss": 1.5011, "num_input_tokens_seen": 108281208, "step": 1635 }, { "epoch": 0.15303973416951375, "loss": 1.515901803970337, "loss_ce": 0.005037506110966206, "loss_iou": 0.65234375, "loss_num": 0.04150390625, "loss_xval": 1.5078125, "num_input_tokens_seen": 108281208, "step": 1635 }, { "epoch": 0.15313333645340946, "grad_norm": 17.235258102416992, "learning_rate": 5e-05, "loss": 1.5224, "num_input_tokens_seen": 108346920, "step": 1636 }, { "epoch": 0.15313333645340946, "loss": 1.5439879894256592, "loss_ce": 0.0019958079792559147, "loss_iou": 0.62890625, "loss_num": 0.057373046875, "loss_xval": 1.5390625, "num_input_tokens_seen": 108346920, "step": 1636 }, { "epoch": 0.1532269387373052, "grad_norm": 18.620927810668945, "learning_rate": 5e-05, "loss": 1.2254, "num_input_tokens_seen": 108413068, "step": 1637 }, { "epoch": 0.1532269387373052, "loss": 1.2213191986083984, "loss_ce": 0.0035457073245197535, "loss_iou": 0.51953125, "loss_num": 0.035400390625, "loss_xval": 1.21875, "num_input_tokens_seen": 108413068, "step": 1637 }, { "epoch": 0.1533205410212009, "grad_norm": 19.51734733581543, "learning_rate": 5e-05, "loss": 1.5715, "num_input_tokens_seen": 108478264, "step": 1638 }, { "epoch": 0.1533205410212009, "loss": 1.7905712127685547, "loss_ce": 0.005414885934442282, "loss_iou": 0.765625, "loss_num": 0.0517578125, "loss_xval": 1.78125, "num_input_tokens_seen": 108478264, "step": 1638 }, { "epoch": 0.15341414330509665, "grad_norm": 21.999000549316406, "learning_rate": 5e-05, "loss": 1.3867, "num_input_tokens_seen": 108543548, "step": 1639 }, { "epoch": 0.15341414330509665, "loss": 1.2881088256835938, "loss_ce": 0.00930020771920681, "loss_iou": 0.5078125, "loss_num": 0.052978515625, "loss_xval": 1.28125, "num_input_tokens_seen": 108543548, "step": 1639 }, { "epoch": 0.15350774558899238, "grad_norm": 33.908164978027344, "learning_rate": 5e-05, "loss": 1.3126, "num_input_tokens_seen": 108609232, "step": 1640 }, { "epoch": 0.15350774558899238, "loss": 1.3487985134124756, "loss_ce": 0.006024984642863274, "loss_iou": 0.58984375, "loss_num": 0.03173828125, "loss_xval": 1.34375, "num_input_tokens_seen": 108609232, "step": 1640 }, { "epoch": 0.1536013478728881, "grad_norm": 38.1673698425293, "learning_rate": 5e-05, "loss": 1.7509, "num_input_tokens_seen": 108675260, "step": 1641 }, { "epoch": 0.1536013478728881, "loss": 1.6148765087127686, "loss_ce": 0.006478075869381428, "loss_iou": 0.7265625, "loss_num": 0.03173828125, "loss_xval": 1.609375, "num_input_tokens_seen": 108675260, "step": 1641 }, { "epoch": 0.15369495015678383, "grad_norm": 21.1893253326416, "learning_rate": 5e-05, "loss": 1.2581, "num_input_tokens_seen": 108741472, "step": 1642 }, { "epoch": 0.15369495015678383, "loss": 1.244871735572815, "loss_ce": 0.0012193932197988033, "loss_iou": 0.55859375, "loss_num": 0.0252685546875, "loss_xval": 1.2421875, "num_input_tokens_seen": 108741472, "step": 1642 }, { "epoch": 0.15378855244067954, "grad_norm": 31.65707015991211, "learning_rate": 5e-05, "loss": 1.3797, "num_input_tokens_seen": 108807840, "step": 1643 }, { "epoch": 0.15378855244067954, "loss": 1.431541919708252, "loss_ce": 0.004784135147929192, "loss_iou": 0.6171875, "loss_num": 0.0390625, "loss_xval": 1.4296875, "num_input_tokens_seen": 108807840, "step": 1643 }, { "epoch": 0.15388215472457528, "grad_norm": 34.68901824951172, "learning_rate": 5e-05, "loss": 1.4691, "num_input_tokens_seen": 108874196, "step": 1644 }, { "epoch": 0.15388215472457528, "loss": 1.3026742935180664, "loss_ce": 0.007264128886163235, "loss_iou": 0.5703125, "loss_num": 0.031494140625, "loss_xval": 1.296875, "num_input_tokens_seen": 108874196, "step": 1644 }, { "epoch": 0.15397575700847102, "grad_norm": 17.457021713256836, "learning_rate": 5e-05, "loss": 1.6578, "num_input_tokens_seen": 108940340, "step": 1645 }, { "epoch": 0.15397575700847102, "loss": 1.639525294303894, "loss_ce": 0.0042713712900877, "loss_iou": 0.69140625, "loss_num": 0.051025390625, "loss_xval": 1.6328125, "num_input_tokens_seen": 108940340, "step": 1645 }, { "epoch": 0.15406935929236673, "grad_norm": 18.910030364990234, "learning_rate": 5e-05, "loss": 1.3429, "num_input_tokens_seen": 109005824, "step": 1646 }, { "epoch": 0.15406935929236673, "loss": 1.312802791595459, "loss_ce": 0.004453194327652454, "loss_iou": 0.5546875, "loss_num": 0.039794921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 109005824, "step": 1646 }, { "epoch": 0.15416296157626247, "grad_norm": 22.063074111938477, "learning_rate": 5e-05, "loss": 1.3949, "num_input_tokens_seen": 109071708, "step": 1647 }, { "epoch": 0.15416296157626247, "loss": 1.3934065103530884, "loss_ce": 0.0057111382484436035, "loss_iou": 0.6015625, "loss_num": 0.037353515625, "loss_xval": 1.390625, "num_input_tokens_seen": 109071708, "step": 1647 }, { "epoch": 0.15425656386015818, "grad_norm": 19.004655838012695, "learning_rate": 5e-05, "loss": 1.6076, "num_input_tokens_seen": 109138780, "step": 1648 }, { "epoch": 0.15425656386015818, "loss": 1.6425758600234985, "loss_ce": 0.0063453614711761475, "loss_iou": 0.671875, "loss_num": 0.05908203125, "loss_xval": 1.6328125, "num_input_tokens_seen": 109138780, "step": 1648 }, { "epoch": 0.15435016614405392, "grad_norm": 23.013456344604492, "learning_rate": 5e-05, "loss": 1.7906, "num_input_tokens_seen": 109204364, "step": 1649 }, { "epoch": 0.15435016614405392, "loss": 1.803839087486267, "loss_ce": 0.007940629497170448, "loss_iou": 0.7421875, "loss_num": 0.062255859375, "loss_xval": 1.796875, "num_input_tokens_seen": 109204364, "step": 1649 }, { "epoch": 0.15444376842794963, "grad_norm": 20.71993064880371, "learning_rate": 5e-05, "loss": 1.3997, "num_input_tokens_seen": 109270792, "step": 1650 }, { "epoch": 0.15444376842794963, "loss": 1.245593547821045, "loss_ce": 0.008777073584496975, "loss_iou": 0.53125, "loss_num": 0.034423828125, "loss_xval": 1.234375, "num_input_tokens_seen": 109270792, "step": 1650 }, { "epoch": 0.15453737071184537, "grad_norm": 24.68073081970215, "learning_rate": 5e-05, "loss": 1.5782, "num_input_tokens_seen": 109336160, "step": 1651 }, { "epoch": 0.15453737071184537, "loss": 1.6002860069274902, "loss_ce": 0.0073905158787965775, "loss_iou": 0.6484375, "loss_num": 0.06005859375, "loss_xval": 1.59375, "num_input_tokens_seen": 109336160, "step": 1651 }, { "epoch": 0.1546309729957411, "grad_norm": 23.734413146972656, "learning_rate": 5e-05, "loss": 1.5779, "num_input_tokens_seen": 109402588, "step": 1652 }, { "epoch": 0.1546309729957411, "loss": 1.5255277156829834, "loss_ce": 0.005019812844693661, "loss_iou": 0.65234375, "loss_num": 0.043701171875, "loss_xval": 1.5234375, "num_input_tokens_seen": 109402588, "step": 1652 }, { "epoch": 0.15472457527963682, "grad_norm": 34.1220817565918, "learning_rate": 5e-05, "loss": 1.5569, "num_input_tokens_seen": 109469492, "step": 1653 }, { "epoch": 0.15472457527963682, "loss": 1.609342098236084, "loss_ce": 0.004849947988986969, "loss_iou": 0.63671875, "loss_num": 0.06591796875, "loss_xval": 1.6015625, "num_input_tokens_seen": 109469492, "step": 1653 }, { "epoch": 0.15481817756353256, "grad_norm": 46.722110748291016, "learning_rate": 5e-05, "loss": 1.4057, "num_input_tokens_seen": 109535360, "step": 1654 }, { "epoch": 0.15481817756353256, "loss": 1.1857918500900269, "loss_ce": 0.003174723358824849, "loss_iou": 0.46484375, "loss_num": 0.05078125, "loss_xval": 1.1796875, "num_input_tokens_seen": 109535360, "step": 1654 }, { "epoch": 0.15491177984742827, "grad_norm": 21.381216049194336, "learning_rate": 5e-05, "loss": 1.443, "num_input_tokens_seen": 109602452, "step": 1655 }, { "epoch": 0.15491177984742827, "loss": 1.32462739944458, "loss_ce": 0.0018735050689429045, "loss_iou": 0.59765625, "loss_num": 0.0262451171875, "loss_xval": 1.3203125, "num_input_tokens_seen": 109602452, "step": 1655 }, { "epoch": 0.155005382131324, "grad_norm": 23.39960479736328, "learning_rate": 5e-05, "loss": 1.7426, "num_input_tokens_seen": 109669016, "step": 1656 }, { "epoch": 0.155005382131324, "loss": 1.518391489982605, "loss_ce": 0.005207954440265894, "loss_iou": 0.62109375, "loss_num": 0.0537109375, "loss_xval": 1.515625, "num_input_tokens_seen": 109669016, "step": 1656 }, { "epoch": 0.15509898441521974, "grad_norm": 413.7605285644531, "learning_rate": 5e-05, "loss": 1.3168, "num_input_tokens_seen": 109735364, "step": 1657 }, { "epoch": 0.15509898441521974, "loss": 1.405256748199463, "loss_ce": 0.0026689593214541674, "loss_iou": 0.57421875, "loss_num": 0.05078125, "loss_xval": 1.40625, "num_input_tokens_seen": 109735364, "step": 1657 }, { "epoch": 0.15519258669911545, "grad_norm": 24.647920608520508, "learning_rate": 5e-05, "loss": 1.5683, "num_input_tokens_seen": 109802096, "step": 1658 }, { "epoch": 0.15519258669911545, "loss": 1.470474123954773, "loss_ce": 0.0046538179740309715, "loss_iou": 0.5703125, "loss_num": 0.0654296875, "loss_xval": 1.46875, "num_input_tokens_seen": 109802096, "step": 1658 }, { "epoch": 0.1552861889830112, "grad_norm": 19.028324127197266, "learning_rate": 5e-05, "loss": 1.4417, "num_input_tokens_seen": 109868356, "step": 1659 }, { "epoch": 0.1552861889830112, "loss": 1.4208152294158936, "loss_ce": 0.008217504248023033, "loss_iou": 0.5859375, "loss_num": 0.04931640625, "loss_xval": 1.4140625, "num_input_tokens_seen": 109868356, "step": 1659 }, { "epoch": 0.1553797912669069, "grad_norm": 22.659461975097656, "learning_rate": 5e-05, "loss": 1.4222, "num_input_tokens_seen": 109933888, "step": 1660 }, { "epoch": 0.1553797912669069, "loss": 1.5754845142364502, "loss_ce": 0.0022424214985221624, "loss_iou": 0.66015625, "loss_num": 0.051513671875, "loss_xval": 1.5703125, "num_input_tokens_seen": 109933888, "step": 1660 }, { "epoch": 0.15547339355080264, "grad_norm": 35.12961196899414, "learning_rate": 5e-05, "loss": 1.523, "num_input_tokens_seen": 109999532, "step": 1661 }, { "epoch": 0.15547339355080264, "loss": 1.4882129430770874, "loss_ce": 0.006279327906668186, "loss_iou": 0.5703125, "loss_num": 0.0693359375, "loss_xval": 1.484375, "num_input_tokens_seen": 109999532, "step": 1661 }, { "epoch": 0.15556699583469838, "grad_norm": 27.46817970275879, "learning_rate": 5e-05, "loss": 1.7694, "num_input_tokens_seen": 110066912, "step": 1662 }, { "epoch": 0.15556699583469838, "loss": 1.6405129432678223, "loss_ce": 0.0037942719645798206, "loss_iou": 0.7109375, "loss_num": 0.0439453125, "loss_xval": 1.640625, "num_input_tokens_seen": 110066912, "step": 1662 }, { "epoch": 0.1556605981185941, "grad_norm": 21.02796173095703, "learning_rate": 5e-05, "loss": 1.4437, "num_input_tokens_seen": 110133612, "step": 1663 }, { "epoch": 0.1556605981185941, "loss": 1.339165210723877, "loss_ce": 0.0022510988637804985, "loss_iou": 0.6015625, "loss_num": 0.026611328125, "loss_xval": 1.3359375, "num_input_tokens_seen": 110133612, "step": 1663 }, { "epoch": 0.15575420040248983, "grad_norm": 37.47615051269531, "learning_rate": 5e-05, "loss": 1.2583, "num_input_tokens_seen": 110199372, "step": 1664 }, { "epoch": 0.15575420040248983, "loss": 1.3542975187301636, "loss_ce": 0.009082659147679806, "loss_iou": 0.5859375, "loss_num": 0.033935546875, "loss_xval": 1.34375, "num_input_tokens_seen": 110199372, "step": 1664 }, { "epoch": 0.15584780268638554, "grad_norm": 18.19861602783203, "learning_rate": 5e-05, "loss": 1.614, "num_input_tokens_seen": 110264840, "step": 1665 }, { "epoch": 0.15584780268638554, "loss": 1.5534906387329102, "loss_ce": 0.0066156648099422455, "loss_iou": 0.68359375, "loss_num": 0.036376953125, "loss_xval": 1.546875, "num_input_tokens_seen": 110264840, "step": 1665 }, { "epoch": 0.15594140497028128, "grad_norm": 22.97083282470703, "learning_rate": 5e-05, "loss": 1.3964, "num_input_tokens_seen": 110331480, "step": 1666 }, { "epoch": 0.15594140497028128, "loss": 1.3985646963119507, "loss_ce": 0.0074514285661280155, "loss_iou": 0.62109375, "loss_num": 0.0301513671875, "loss_xval": 1.390625, "num_input_tokens_seen": 110331480, "step": 1666 }, { "epoch": 0.156035007254177, "grad_norm": 37.475643157958984, "learning_rate": 5e-05, "loss": 1.4204, "num_input_tokens_seen": 110396916, "step": 1667 }, { "epoch": 0.156035007254177, "loss": 1.2760589122772217, "loss_ce": 0.008114602416753769, "loss_iou": 0.52734375, "loss_num": 0.0419921875, "loss_xval": 1.265625, "num_input_tokens_seen": 110396916, "step": 1667 }, { "epoch": 0.15612860953807273, "grad_norm": 18.313720703125, "learning_rate": 5e-05, "loss": 1.5083, "num_input_tokens_seen": 110462724, "step": 1668 }, { "epoch": 0.15612860953807273, "loss": 1.5050681829452515, "loss_ce": 0.006044772453606129, "loss_iou": 0.5859375, "loss_num": 0.06591796875, "loss_xval": 1.5, "num_input_tokens_seen": 110462724, "step": 1668 }, { "epoch": 0.15622221182196847, "grad_norm": 20.74662208557129, "learning_rate": 5e-05, "loss": 1.4344, "num_input_tokens_seen": 110529368, "step": 1669 }, { "epoch": 0.15622221182196847, "loss": 1.3420653343200684, "loss_ce": 0.0041747004725039005, "loss_iou": 0.56640625, "loss_num": 0.041259765625, "loss_xval": 1.3359375, "num_input_tokens_seen": 110529368, "step": 1669 }, { "epoch": 0.15631581410586418, "grad_norm": 26.993072509765625, "learning_rate": 5e-05, "loss": 1.4631, "num_input_tokens_seen": 110594948, "step": 1670 }, { "epoch": 0.15631581410586418, "loss": 1.4857993125915527, "loss_ce": 0.0038657882250845432, "loss_iou": 0.62890625, "loss_num": 0.04443359375, "loss_xval": 1.484375, "num_input_tokens_seen": 110594948, "step": 1670 }, { "epoch": 0.15640941638975991, "grad_norm": 26.8720760345459, "learning_rate": 5e-05, "loss": 1.3475, "num_input_tokens_seen": 110661728, "step": 1671 }, { "epoch": 0.15640941638975991, "loss": 1.3804752826690674, "loss_ce": 0.004010488279163837, "loss_iou": 0.625, "loss_num": 0.0252685546875, "loss_xval": 1.375, "num_input_tokens_seen": 110661728, "step": 1671 }, { "epoch": 0.15650301867365563, "grad_norm": 18.590309143066406, "learning_rate": 5e-05, "loss": 1.584, "num_input_tokens_seen": 110726932, "step": 1672 }, { "epoch": 0.15650301867365563, "loss": 1.5854074954986572, "loss_ce": 0.009235595352947712, "loss_iou": 0.671875, "loss_num": 0.046142578125, "loss_xval": 1.578125, "num_input_tokens_seen": 110726932, "step": 1672 }, { "epoch": 0.15659662095755136, "grad_norm": 16.561054229736328, "learning_rate": 5e-05, "loss": 1.137, "num_input_tokens_seen": 110793776, "step": 1673 }, { "epoch": 0.15659662095755136, "loss": 1.1318494081497192, "loss_ce": 0.0029431560542434454, "loss_iou": 0.5078125, "loss_num": 0.0224609375, "loss_xval": 1.125, "num_input_tokens_seen": 110793776, "step": 1673 }, { "epoch": 0.1566902232414471, "grad_norm": 23.643726348876953, "learning_rate": 5e-05, "loss": 1.4951, "num_input_tokens_seen": 110860072, "step": 1674 }, { "epoch": 0.1566902232414471, "loss": 1.722797155380249, "loss_ce": 0.004047118593007326, "loss_iou": 0.7109375, "loss_num": 0.05859375, "loss_xval": 1.71875, "num_input_tokens_seen": 110860072, "step": 1674 }, { "epoch": 0.1567838255253428, "grad_norm": 39.8719596862793, "learning_rate": 5e-05, "loss": 1.4484, "num_input_tokens_seen": 110926548, "step": 1675 }, { "epoch": 0.1567838255253428, "loss": 1.5741641521453857, "loss_ce": 0.004828257951885462, "loss_iou": 0.6796875, "loss_num": 0.0419921875, "loss_xval": 1.5703125, "num_input_tokens_seen": 110926548, "step": 1675 }, { "epoch": 0.15687742780923855, "grad_norm": 16.57904624938965, "learning_rate": 5e-05, "loss": 1.7538, "num_input_tokens_seen": 110993156, "step": 1676 }, { "epoch": 0.15687742780923855, "loss": 1.8768911361694336, "loss_ce": 0.0038443254306912422, "loss_iou": 0.7890625, "loss_num": 0.05908203125, "loss_xval": 1.875, "num_input_tokens_seen": 110993156, "step": 1676 }, { "epoch": 0.15697103009313426, "grad_norm": 19.50870704650879, "learning_rate": 5e-05, "loss": 1.2599, "num_input_tokens_seen": 111058216, "step": 1677 }, { "epoch": 0.15697103009313426, "loss": 1.0795800685882568, "loss_ce": 0.004872962832450867, "loss_iou": 0.447265625, "loss_num": 0.0361328125, "loss_xval": 1.078125, "num_input_tokens_seen": 111058216, "step": 1677 }, { "epoch": 0.15706463237703, "grad_norm": 12.743280410766602, "learning_rate": 5e-05, "loss": 1.3816, "num_input_tokens_seen": 111124028, "step": 1678 }, { "epoch": 0.15706463237703, "loss": 1.6295316219329834, "loss_ce": 0.006973108276724815, "loss_iou": 0.63671875, "loss_num": 0.06982421875, "loss_xval": 1.625, "num_input_tokens_seen": 111124028, "step": 1678 }, { "epoch": 0.15715823466092574, "grad_norm": 11.009029388427734, "learning_rate": 5e-05, "loss": 1.4818, "num_input_tokens_seen": 111190800, "step": 1679 }, { "epoch": 0.15715823466092574, "loss": 1.6931604146957397, "loss_ce": 0.009566588327288628, "loss_iou": 0.6796875, "loss_num": 0.064453125, "loss_xval": 1.6875, "num_input_tokens_seen": 111190800, "step": 1679 }, { "epoch": 0.15725183694482145, "grad_norm": 14.895833969116211, "learning_rate": 5e-05, "loss": 1.4207, "num_input_tokens_seen": 111257668, "step": 1680 }, { "epoch": 0.15725183694482145, "loss": 1.3342525959014893, "loss_ce": 0.0022212760522961617, "loss_iou": 0.578125, "loss_num": 0.03564453125, "loss_xval": 1.328125, "num_input_tokens_seen": 111257668, "step": 1680 }, { "epoch": 0.1573454392287172, "grad_norm": 28.282615661621094, "learning_rate": 5e-05, "loss": 1.6942, "num_input_tokens_seen": 111323136, "step": 1681 }, { "epoch": 0.1573454392287172, "loss": 1.8464713096618652, "loss_ce": 0.005651105660945177, "loss_iou": 0.69140625, "loss_num": 0.09228515625, "loss_xval": 1.84375, "num_input_tokens_seen": 111323136, "step": 1681 }, { "epoch": 0.1574390415126129, "grad_norm": 17.331335067749023, "learning_rate": 5e-05, "loss": 1.342, "num_input_tokens_seen": 111390316, "step": 1682 }, { "epoch": 0.1574390415126129, "loss": 1.3260105848312378, "loss_ce": 0.004233264364302158, "loss_iou": 0.53125, "loss_num": 0.051513671875, "loss_xval": 1.3203125, "num_input_tokens_seen": 111390316, "step": 1682 }, { "epoch": 0.15753264379650864, "grad_norm": 21.25231170654297, "learning_rate": 5e-05, "loss": 1.3749, "num_input_tokens_seen": 111455756, "step": 1683 }, { "epoch": 0.15753264379650864, "loss": 1.3411973714828491, "loss_ce": 0.0042833369225263596, "loss_iou": 0.5703125, "loss_num": 0.039794921875, "loss_xval": 1.3359375, "num_input_tokens_seen": 111455756, "step": 1683 }, { "epoch": 0.15762624608040438, "grad_norm": 27.89680290222168, "learning_rate": 5e-05, "loss": 1.51, "num_input_tokens_seen": 111521896, "step": 1684 }, { "epoch": 0.15762624608040438, "loss": 1.5295137166976929, "loss_ce": 0.006076245103031397, "loss_iou": 0.61328125, "loss_num": 0.05859375, "loss_xval": 1.5234375, "num_input_tokens_seen": 111521896, "step": 1684 }, { "epoch": 0.15771984836430009, "grad_norm": 32.397132873535156, "learning_rate": 5e-05, "loss": 1.4896, "num_input_tokens_seen": 111588764, "step": 1685 }, { "epoch": 0.15771984836430009, "loss": 1.2692574262619019, "loss_ce": 0.007538630161434412, "loss_iou": 0.5703125, "loss_num": 0.0247802734375, "loss_xval": 1.265625, "num_input_tokens_seen": 111588764, "step": 1685 }, { "epoch": 0.15781345064819582, "grad_norm": 19.623626708984375, "learning_rate": 5e-05, "loss": 1.668, "num_input_tokens_seen": 111655256, "step": 1686 }, { "epoch": 0.15781345064819582, "loss": 1.6317683458328247, "loss_ce": 0.0021296862978488207, "loss_iou": 0.6953125, "loss_num": 0.04833984375, "loss_xval": 1.6328125, "num_input_tokens_seen": 111655256, "step": 1686 }, { "epoch": 0.15790705293209154, "grad_norm": 20.810579299926758, "learning_rate": 5e-05, "loss": 1.516, "num_input_tokens_seen": 111721760, "step": 1687 }, { "epoch": 0.15790705293209154, "loss": 1.3997474908828735, "loss_ce": 0.0061928387731313705, "loss_iou": 0.625, "loss_num": 0.0289306640625, "loss_xval": 1.390625, "num_input_tokens_seen": 111721760, "step": 1687 }, { "epoch": 0.15800065521598727, "grad_norm": 19.822107315063477, "learning_rate": 5e-05, "loss": 1.2058, "num_input_tokens_seen": 111789768, "step": 1688 }, { "epoch": 0.15800065521598727, "loss": 1.2267491817474365, "loss_ce": 0.00555785745382309, "loss_iou": 0.56640625, "loss_num": 0.0177001953125, "loss_xval": 1.21875, "num_input_tokens_seen": 111789768, "step": 1688 }, { "epoch": 0.15809425749988298, "grad_norm": 19.842737197875977, "learning_rate": 5e-05, "loss": 1.4674, "num_input_tokens_seen": 111856788, "step": 1689 }, { "epoch": 0.15809425749988298, "loss": 1.2636029720306396, "loss_ce": 0.0018842138815671206, "loss_iou": 0.53515625, "loss_num": 0.0390625, "loss_xval": 1.265625, "num_input_tokens_seen": 111856788, "step": 1689 }, { "epoch": 0.15818785978377872, "grad_norm": 26.088294982910156, "learning_rate": 5e-05, "loss": 1.0922, "num_input_tokens_seen": 111922244, "step": 1690 }, { "epoch": 0.15818785978377872, "loss": 1.1618432998657227, "loss_ce": 0.0031518477480858564, "loss_iou": 0.49609375, "loss_num": 0.033447265625, "loss_xval": 1.15625, "num_input_tokens_seen": 111922244, "step": 1690 }, { "epoch": 0.15828146206767446, "grad_norm": 19.719554901123047, "learning_rate": 5e-05, "loss": 1.5984, "num_input_tokens_seen": 111987568, "step": 1691 }, { "epoch": 0.15828146206767446, "loss": 1.5319783687591553, "loss_ce": 0.004817795939743519, "loss_iou": 0.6484375, "loss_num": 0.04638671875, "loss_xval": 1.5234375, "num_input_tokens_seen": 111987568, "step": 1691 }, { "epoch": 0.15837506435157017, "grad_norm": 22.947669982910156, "learning_rate": 5e-05, "loss": 1.1232, "num_input_tokens_seen": 112053276, "step": 1692 }, { "epoch": 0.15837506435157017, "loss": 1.0806947946548462, "loss_ce": 0.004522897768765688, "loss_iou": 0.4609375, "loss_num": 0.0303955078125, "loss_xval": 1.078125, "num_input_tokens_seen": 112053276, "step": 1692 }, { "epoch": 0.1584686666354659, "grad_norm": 22.45537567138672, "learning_rate": 5e-05, "loss": 1.6815, "num_input_tokens_seen": 112119604, "step": 1693 }, { "epoch": 0.1584686666354659, "loss": 1.701782464981079, "loss_ce": 0.004516840912401676, "loss_iou": 0.69921875, "loss_num": 0.05908203125, "loss_xval": 1.6953125, "num_input_tokens_seen": 112119604, "step": 1693 }, { "epoch": 0.15856226891936162, "grad_norm": 43.1389274597168, "learning_rate": 5e-05, "loss": 1.572, "num_input_tokens_seen": 112185800, "step": 1694 }, { "epoch": 0.15856226891936162, "loss": 1.6698806285858154, "loss_ce": 0.007771380711346865, "loss_iou": 0.6875, "loss_num": 0.0576171875, "loss_xval": 1.6640625, "num_input_tokens_seen": 112185800, "step": 1694 }, { "epoch": 0.15865587120325736, "grad_norm": 18.637434005737305, "learning_rate": 5e-05, "loss": 1.7675, "num_input_tokens_seen": 112252176, "step": 1695 }, { "epoch": 0.15865587120325736, "loss": 1.7455856800079346, "loss_ce": 0.0033982768654823303, "loss_iou": 0.734375, "loss_num": 0.0556640625, "loss_xval": 1.7421875, "num_input_tokens_seen": 112252176, "step": 1695 }, { "epoch": 0.1587494734871531, "grad_norm": 18.56271743774414, "learning_rate": 5e-05, "loss": 1.4154, "num_input_tokens_seen": 112319724, "step": 1696 }, { "epoch": 0.1587494734871531, "loss": 1.4192160367965698, "loss_ce": 0.0022238276433199644, "loss_iou": 0.58984375, "loss_num": 0.0478515625, "loss_xval": 1.4140625, "num_input_tokens_seen": 112319724, "step": 1696 }, { "epoch": 0.1588430757710488, "grad_norm": 22.364852905273438, "learning_rate": 5e-05, "loss": 1.4672, "num_input_tokens_seen": 112385784, "step": 1697 }, { "epoch": 0.1588430757710488, "loss": 1.256899118423462, "loss_ce": 0.004335670731961727, "loss_iou": 0.53125, "loss_num": 0.038330078125, "loss_xval": 1.25, "num_input_tokens_seen": 112385784, "step": 1697 }, { "epoch": 0.15893667805494455, "grad_norm": 37.38805389404297, "learning_rate": 5e-05, "loss": 1.8337, "num_input_tokens_seen": 112452792, "step": 1698 }, { "epoch": 0.15893667805494455, "loss": 1.7924762964248657, "loss_ce": 0.003413844620808959, "loss_iou": 0.7734375, "loss_num": 0.048828125, "loss_xval": 1.7890625, "num_input_tokens_seen": 112452792, "step": 1698 }, { "epoch": 0.15903028033884026, "grad_norm": 21.084718704223633, "learning_rate": 5e-05, "loss": 1.5781, "num_input_tokens_seen": 112519992, "step": 1699 }, { "epoch": 0.15903028033884026, "loss": 1.5638010501861572, "loss_ce": 0.007160455919802189, "loss_iou": 0.671875, "loss_num": 0.04248046875, "loss_xval": 1.5546875, "num_input_tokens_seen": 112519992, "step": 1699 }, { "epoch": 0.159123882622736, "grad_norm": 20.720853805541992, "learning_rate": 5e-05, "loss": 1.3595, "num_input_tokens_seen": 112585208, "step": 1700 }, { "epoch": 0.159123882622736, "loss": 1.4855835437774658, "loss_ce": 0.004138189367949963, "loss_iou": 0.6171875, "loss_num": 0.04931640625, "loss_xval": 1.484375, "num_input_tokens_seen": 112585208, "step": 1700 }, { "epoch": 0.15921748490663173, "grad_norm": 35.28002166748047, "learning_rate": 5e-05, "loss": 1.4608, "num_input_tokens_seen": 112650564, "step": 1701 }, { "epoch": 0.15921748490663173, "loss": 1.1329504251480103, "loss_ce": 0.0030675572343170643, "loss_iou": 0.490234375, "loss_num": 0.0301513671875, "loss_xval": 1.1328125, "num_input_tokens_seen": 112650564, "step": 1701 }, { "epoch": 0.15931108719052745, "grad_norm": 20.50391387939453, "learning_rate": 5e-05, "loss": 1.3924, "num_input_tokens_seen": 112717524, "step": 1702 }, { "epoch": 0.15931108719052745, "loss": 1.4781479835510254, "loss_ce": 0.005491648800671101, "loss_iou": 0.65234375, "loss_num": 0.03369140625, "loss_xval": 1.46875, "num_input_tokens_seen": 112717524, "step": 1702 }, { "epoch": 0.15940468947442318, "grad_norm": 19.996809005737305, "learning_rate": 5e-05, "loss": 1.3563, "num_input_tokens_seen": 112783004, "step": 1703 }, { "epoch": 0.15940468947442318, "loss": 1.3293205499649048, "loss_ce": 0.011449402198195457, "loss_iou": 0.58203125, "loss_num": 0.030517578125, "loss_xval": 1.3203125, "num_input_tokens_seen": 112783004, "step": 1703 }, { "epoch": 0.1594982917583189, "grad_norm": 37.05931091308594, "learning_rate": 5e-05, "loss": 1.5565, "num_input_tokens_seen": 112848828, "step": 1704 }, { "epoch": 0.1594982917583189, "loss": 1.5927069187164307, "loss_ce": 0.0016424510395154357, "loss_iou": 0.69140625, "loss_num": 0.041748046875, "loss_xval": 1.59375, "num_input_tokens_seen": 112848828, "step": 1704 }, { "epoch": 0.15959189404221463, "grad_norm": 13.735276222229004, "learning_rate": 5e-05, "loss": 1.7309, "num_input_tokens_seen": 112915824, "step": 1705 }, { "epoch": 0.15959189404221463, "loss": 1.5490221977233887, "loss_ce": 0.007182505913078785, "loss_iou": 0.65625, "loss_num": 0.0458984375, "loss_xval": 1.5390625, "num_input_tokens_seen": 112915824, "step": 1705 }, { "epoch": 0.15968549632611034, "grad_norm": 264.939208984375, "learning_rate": 5e-05, "loss": 1.7414, "num_input_tokens_seen": 112982292, "step": 1706 }, { "epoch": 0.15968549632611034, "loss": 1.9344041347503662, "loss_ce": 0.002763570984825492, "loss_iou": 0.8125, "loss_num": 0.06201171875, "loss_xval": 1.9296875, "num_input_tokens_seen": 112982292, "step": 1706 }, { "epoch": 0.15977909861000608, "grad_norm": 18.73700714111328, "learning_rate": 5e-05, "loss": 1.3528, "num_input_tokens_seen": 113047952, "step": 1707 }, { "epoch": 0.15977909861000608, "loss": 1.340691328048706, "loss_ce": 0.006218681577593088, "loss_iou": 0.5546875, "loss_num": 0.045166015625, "loss_xval": 1.3359375, "num_input_tokens_seen": 113047952, "step": 1707 }, { "epoch": 0.15987270089390182, "grad_norm": 20.325817108154297, "learning_rate": 5e-05, "loss": 1.4621, "num_input_tokens_seen": 113113652, "step": 1708 }, { "epoch": 0.15987270089390182, "loss": 1.5143709182739258, "loss_ce": 0.00558188371360302, "loss_iou": 0.640625, "loss_num": 0.0458984375, "loss_xval": 1.5078125, "num_input_tokens_seen": 113113652, "step": 1708 }, { "epoch": 0.15996630317779753, "grad_norm": 27.74958038330078, "learning_rate": 5e-05, "loss": 1.4734, "num_input_tokens_seen": 113179736, "step": 1709 }, { "epoch": 0.15996630317779753, "loss": 1.6561379432678223, "loss_ce": 0.0028177243657410145, "loss_iou": 0.69140625, "loss_num": 0.053955078125, "loss_xval": 1.65625, "num_input_tokens_seen": 113179736, "step": 1709 }, { "epoch": 0.16005990546169327, "grad_norm": 19.84098243713379, "learning_rate": 5e-05, "loss": 1.4291, "num_input_tokens_seen": 113246760, "step": 1710 }, { "epoch": 0.16005990546169327, "loss": 1.3039138317108154, "loss_ce": 0.00801539421081543, "loss_iou": 0.54296875, "loss_num": 0.04150390625, "loss_xval": 1.296875, "num_input_tokens_seen": 113246760, "step": 1710 }, { "epoch": 0.16015350774558898, "grad_norm": 21.781787872314453, "learning_rate": 5e-05, "loss": 1.3981, "num_input_tokens_seen": 113312584, "step": 1711 }, { "epoch": 0.16015350774558898, "loss": 1.2843029499053955, "loss_ce": 0.005219653248786926, "loss_iou": 0.5390625, "loss_num": 0.0400390625, "loss_xval": 1.28125, "num_input_tokens_seen": 113312584, "step": 1711 }, { "epoch": 0.16024711002948472, "grad_norm": 37.196868896484375, "learning_rate": 5e-05, "loss": 1.5081, "num_input_tokens_seen": 113378868, "step": 1712 }, { "epoch": 0.16024711002948472, "loss": 1.3969182968139648, "loss_ce": 0.007361485622823238, "loss_iou": 0.6015625, "loss_num": 0.037353515625, "loss_xval": 1.390625, "num_input_tokens_seen": 113378868, "step": 1712 }, { "epoch": 0.16034071231338046, "grad_norm": 18.481407165527344, "learning_rate": 5e-05, "loss": 1.9191, "num_input_tokens_seen": 113444572, "step": 1713 }, { "epoch": 0.16034071231338046, "loss": 1.9023736715316772, "loss_ce": 0.005889302119612694, "loss_iou": 0.765625, "loss_num": 0.0732421875, "loss_xval": 1.8984375, "num_input_tokens_seen": 113444572, "step": 1713 }, { "epoch": 0.16043431459727617, "grad_norm": 17.455732345581055, "learning_rate": 5e-05, "loss": 1.5026, "num_input_tokens_seen": 113510596, "step": 1714 }, { "epoch": 0.16043431459727617, "loss": 1.6242791414260864, "loss_ce": 0.005138522945344448, "loss_iou": 0.6640625, "loss_num": 0.058349609375, "loss_xval": 1.6171875, "num_input_tokens_seen": 113510596, "step": 1714 }, { "epoch": 0.1605279168811719, "grad_norm": 16.29709243774414, "learning_rate": 5e-05, "loss": 1.2475, "num_input_tokens_seen": 113576824, "step": 1715 }, { "epoch": 0.1605279168811719, "loss": 1.4377756118774414, "loss_ce": 0.0022287005558609962, "loss_iou": 0.5859375, "loss_num": 0.05322265625, "loss_xval": 1.4375, "num_input_tokens_seen": 113576824, "step": 1715 }, { "epoch": 0.16062151916506762, "grad_norm": 24.637439727783203, "learning_rate": 5e-05, "loss": 1.3664, "num_input_tokens_seen": 113643476, "step": 1716 }, { "epoch": 0.16062151916506762, "loss": 1.0036041736602783, "loss_ce": 0.007998719811439514, "loss_iou": 0.42578125, "loss_num": 0.028564453125, "loss_xval": 0.99609375, "num_input_tokens_seen": 113643476, "step": 1716 }, { "epoch": 0.16071512144896336, "grad_norm": 22.650537490844727, "learning_rate": 5e-05, "loss": 1.8128, "num_input_tokens_seen": 113708928, "step": 1717 }, { "epoch": 0.16071512144896336, "loss": 1.9136106967926025, "loss_ce": 0.00833725742995739, "loss_iou": 0.7734375, "loss_num": 0.0712890625, "loss_xval": 1.90625, "num_input_tokens_seen": 113708928, "step": 1717 }, { "epoch": 0.1608087237328591, "grad_norm": 24.803722381591797, "learning_rate": 5e-05, "loss": 1.3218, "num_input_tokens_seen": 113774932, "step": 1718 }, { "epoch": 0.1608087237328591, "loss": 1.2616074085235596, "loss_ce": 0.0037948982790112495, "loss_iou": 0.54296875, "loss_num": 0.034912109375, "loss_xval": 1.2578125, "num_input_tokens_seen": 113774932, "step": 1718 }, { "epoch": 0.1609023260167548, "grad_norm": 33.39313507080078, "learning_rate": 5e-05, "loss": 1.5498, "num_input_tokens_seen": 113841396, "step": 1719 }, { "epoch": 0.1609023260167548, "loss": 1.6364741325378418, "loss_ce": 0.0031732642091810703, "loss_iou": 0.70703125, "loss_num": 0.0439453125, "loss_xval": 1.6328125, "num_input_tokens_seen": 113841396, "step": 1719 }, { "epoch": 0.16099592830065054, "grad_norm": 19.37911605834961, "learning_rate": 5e-05, "loss": 1.6403, "num_input_tokens_seen": 113907016, "step": 1720 }, { "epoch": 0.16099592830065054, "loss": 1.7368113994598389, "loss_ce": 0.00731926504522562, "loss_iou": 0.734375, "loss_num": 0.05224609375, "loss_xval": 1.7265625, "num_input_tokens_seen": 113907016, "step": 1720 }, { "epoch": 0.16108953058454625, "grad_norm": 15.980087280273438, "learning_rate": 5e-05, "loss": 1.5086, "num_input_tokens_seen": 113973288, "step": 1721 }, { "epoch": 0.16108953058454625, "loss": 1.6609373092651367, "loss_ce": 0.002734190784394741, "loss_iou": 0.7109375, "loss_num": 0.046875, "loss_xval": 1.65625, "num_input_tokens_seen": 113973288, "step": 1721 }, { "epoch": 0.161183132868442, "grad_norm": 16.575654983520508, "learning_rate": 5e-05, "loss": 1.3348, "num_input_tokens_seen": 114039256, "step": 1722 }, { "epoch": 0.161183132868442, "loss": 1.1895720958709717, "loss_ce": 0.009396284818649292, "loss_iou": 0.5078125, "loss_num": 0.032470703125, "loss_xval": 1.1796875, "num_input_tokens_seen": 114039256, "step": 1722 }, { "epoch": 0.16127673515233773, "grad_norm": 21.366851806640625, "learning_rate": 5e-05, "loss": 1.4415, "num_input_tokens_seen": 114105624, "step": 1723 }, { "epoch": 0.16127673515233773, "loss": 1.335167646408081, "loss_ce": 0.005577889271080494, "loss_iou": 0.58203125, "loss_num": 0.033935546875, "loss_xval": 1.328125, "num_input_tokens_seen": 114105624, "step": 1723 }, { "epoch": 0.16137033743623344, "grad_norm": 49.82637023925781, "learning_rate": 5e-05, "loss": 1.5265, "num_input_tokens_seen": 114172212, "step": 1724 }, { "epoch": 0.16137033743623344, "loss": 1.6437671184539795, "loss_ce": 0.00314208772033453, "loss_iou": 0.69921875, "loss_num": 0.048095703125, "loss_xval": 1.640625, "num_input_tokens_seen": 114172212, "step": 1724 }, { "epoch": 0.16146393972012918, "grad_norm": 21.099742889404297, "learning_rate": 5e-05, "loss": 1.3256, "num_input_tokens_seen": 114238672, "step": 1725 }, { "epoch": 0.16146393972012918, "loss": 1.3868200778961182, "loss_ce": 0.00156625104136765, "loss_iou": 0.60546875, "loss_num": 0.034912109375, "loss_xval": 1.3828125, "num_input_tokens_seen": 114238672, "step": 1725 }, { "epoch": 0.1615575420040249, "grad_norm": 34.11566925048828, "learning_rate": 5e-05, "loss": 1.561, "num_input_tokens_seen": 114304536, "step": 1726 }, { "epoch": 0.1615575420040249, "loss": 1.4601449966430664, "loss_ce": 0.00787458848208189, "loss_iou": 0.59375, "loss_num": 0.052978515625, "loss_xval": 1.453125, "num_input_tokens_seen": 114304536, "step": 1726 }, { "epoch": 0.16165114428792063, "grad_norm": 46.72332763671875, "learning_rate": 5e-05, "loss": 1.5351, "num_input_tokens_seen": 114369684, "step": 1727 }, { "epoch": 0.16165114428792063, "loss": 1.58083176612854, "loss_ce": 0.007589599583297968, "loss_iou": 0.65625, "loss_num": 0.05224609375, "loss_xval": 1.5703125, "num_input_tokens_seen": 114369684, "step": 1727 }, { "epoch": 0.16174474657181634, "grad_norm": 12.304830551147461, "learning_rate": 5e-05, "loss": 1.6069, "num_input_tokens_seen": 114435812, "step": 1728 }, { "epoch": 0.16174474657181634, "loss": 1.6097861528396606, "loss_ce": 0.005293989088386297, "loss_iou": 0.65234375, "loss_num": 0.06005859375, "loss_xval": 1.6015625, "num_input_tokens_seen": 114435812, "step": 1728 }, { "epoch": 0.16183834885571208, "grad_norm": 19.8958683013916, "learning_rate": 5e-05, "loss": 1.5483, "num_input_tokens_seen": 114501696, "step": 1729 }, { "epoch": 0.16183834885571208, "loss": 1.3685400485992432, "loss_ce": 0.0033057434484362602, "loss_iou": 0.57421875, "loss_num": 0.043212890625, "loss_xval": 1.3671875, "num_input_tokens_seen": 114501696, "step": 1729 }, { "epoch": 0.16193195113960782, "grad_norm": 30.420421600341797, "learning_rate": 5e-05, "loss": 1.5928, "num_input_tokens_seen": 114565812, "step": 1730 }, { "epoch": 0.16193195113960782, "loss": 1.695379614830017, "loss_ce": 0.005926554091274738, "loss_iou": 0.62890625, "loss_num": 0.0859375, "loss_xval": 1.6875, "num_input_tokens_seen": 114565812, "step": 1730 }, { "epoch": 0.16202555342350353, "grad_norm": 20.304121017456055, "learning_rate": 5e-05, "loss": 1.6293, "num_input_tokens_seen": 114631956, "step": 1731 }, { "epoch": 0.16202555342350353, "loss": 1.8273935317993164, "loss_ce": 0.006104365922510624, "loss_iou": 0.74609375, "loss_num": 0.06640625, "loss_xval": 1.8203125, "num_input_tokens_seen": 114631956, "step": 1731 }, { "epoch": 0.16211915570739927, "grad_norm": 19.808990478515625, "learning_rate": 5e-05, "loss": 1.4501, "num_input_tokens_seen": 114698872, "step": 1732 }, { "epoch": 0.16211915570739927, "loss": 1.4610393047332764, "loss_ce": 0.004008077085018158, "loss_iou": 0.6015625, "loss_num": 0.050537109375, "loss_xval": 1.453125, "num_input_tokens_seen": 114698872, "step": 1732 }, { "epoch": 0.16221275799129498, "grad_norm": 21.50043487548828, "learning_rate": 5e-05, "loss": 1.432, "num_input_tokens_seen": 114764944, "step": 1733 }, { "epoch": 0.16221275799129498, "loss": 1.4860564470291138, "loss_ce": 0.00412286352366209, "loss_iou": 0.625, "loss_num": 0.046875, "loss_xval": 1.484375, "num_input_tokens_seen": 114764944, "step": 1733 }, { "epoch": 0.16230636027519071, "grad_norm": 50.31922149658203, "learning_rate": 5e-05, "loss": 1.5526, "num_input_tokens_seen": 114830980, "step": 1734 }, { "epoch": 0.16230636027519071, "loss": 1.5092554092407227, "loss_ce": 0.0068140276707708836, "loss_iou": 0.58203125, "loss_num": 0.068359375, "loss_xval": 1.5, "num_input_tokens_seen": 114830980, "step": 1734 }, { "epoch": 0.16239996255908645, "grad_norm": 31.06298065185547, "learning_rate": 5e-05, "loss": 1.5539, "num_input_tokens_seen": 114896520, "step": 1735 }, { "epoch": 0.16239996255908645, "loss": 1.5440752506256104, "loss_ce": 0.007942529395222664, "loss_iou": 0.64453125, "loss_num": 0.050048828125, "loss_xval": 1.5390625, "num_input_tokens_seen": 114896520, "step": 1735 }, { "epoch": 0.16249356484298216, "grad_norm": 22.209434509277344, "learning_rate": 5e-05, "loss": 1.5908, "num_input_tokens_seen": 114962788, "step": 1736 }, { "epoch": 0.16249356484298216, "loss": 1.6674981117248535, "loss_ce": 0.01027146726846695, "loss_iou": 0.7265625, "loss_num": 0.04052734375, "loss_xval": 1.65625, "num_input_tokens_seen": 114962788, "step": 1736 }, { "epoch": 0.1625871671268779, "grad_norm": 26.099212646484375, "learning_rate": 5e-05, "loss": 1.7138, "num_input_tokens_seen": 115028892, "step": 1737 }, { "epoch": 0.1625871671268779, "loss": 1.9323307275772095, "loss_ce": 0.005572822876274586, "loss_iou": 0.78515625, "loss_num": 0.0712890625, "loss_xval": 1.9296875, "num_input_tokens_seen": 115028892, "step": 1737 }, { "epoch": 0.1626807694107736, "grad_norm": 23.11241912841797, "learning_rate": 5e-05, "loss": 1.6092, "num_input_tokens_seen": 115094980, "step": 1738 }, { "epoch": 0.1626807694107736, "loss": 1.8186943531036377, "loss_ce": 0.006194377318024635, "loss_iou": 0.7578125, "loss_num": 0.058837890625, "loss_xval": 1.8125, "num_input_tokens_seen": 115094980, "step": 1738 }, { "epoch": 0.16277437169466935, "grad_norm": 15.767333030700684, "learning_rate": 5e-05, "loss": 1.3477, "num_input_tokens_seen": 115160364, "step": 1739 }, { "epoch": 0.16277437169466935, "loss": 1.303972840309143, "loss_ce": 0.004656454082578421, "loss_iou": 0.55078125, "loss_num": 0.0400390625, "loss_xval": 1.296875, "num_input_tokens_seen": 115160364, "step": 1739 }, { "epoch": 0.1628679739785651, "grad_norm": 26.839588165283203, "learning_rate": 5e-05, "loss": 1.4647, "num_input_tokens_seen": 115226664, "step": 1740 }, { "epoch": 0.1628679739785651, "loss": 1.5073782205581665, "loss_ce": 0.009331364184617996, "loss_iou": 0.65625, "loss_num": 0.036376953125, "loss_xval": 1.5, "num_input_tokens_seen": 115226664, "step": 1740 }, { "epoch": 0.1629615762624608, "grad_norm": 22.037633895874023, "learning_rate": 5e-05, "loss": 1.4477, "num_input_tokens_seen": 115292240, "step": 1741 }, { "epoch": 0.1629615762624608, "loss": 1.5085651874542236, "loss_ce": 0.005635551176965237, "loss_iou": 0.640625, "loss_num": 0.044921875, "loss_xval": 1.5, "num_input_tokens_seen": 115292240, "step": 1741 }, { "epoch": 0.16305517854635654, "grad_norm": 24.764665603637695, "learning_rate": 5e-05, "loss": 1.4892, "num_input_tokens_seen": 115359484, "step": 1742 }, { "epoch": 0.16305517854635654, "loss": 1.4975062608718872, "loss_ce": 0.0058071063831448555, "loss_iou": 0.62109375, "loss_num": 0.050048828125, "loss_xval": 1.4921875, "num_input_tokens_seen": 115359484, "step": 1742 }, { "epoch": 0.16314878083025225, "grad_norm": 30.43408966064453, "learning_rate": 5e-05, "loss": 1.3994, "num_input_tokens_seen": 115424972, "step": 1743 }, { "epoch": 0.16314878083025225, "loss": 1.1698040962219238, "loss_ce": 0.009159505367279053, "loss_iou": 0.51953125, "loss_num": 0.0244140625, "loss_xval": 1.1640625, "num_input_tokens_seen": 115424972, "step": 1743 }, { "epoch": 0.163242383114148, "grad_norm": 14.067171096801758, "learning_rate": 5e-05, "loss": 1.6832, "num_input_tokens_seen": 115490808, "step": 1744 }, { "epoch": 0.163242383114148, "loss": 1.4237908124923706, "loss_ce": 0.004845545627176762, "loss_iou": 0.6171875, "loss_num": 0.0361328125, "loss_xval": 1.421875, "num_input_tokens_seen": 115490808, "step": 1744 }, { "epoch": 0.16333598539804373, "grad_norm": 25.986255645751953, "learning_rate": 5e-05, "loss": 1.4692, "num_input_tokens_seen": 115555808, "step": 1745 }, { "epoch": 0.16333598539804373, "loss": 1.392876386642456, "loss_ce": 0.006157543044537306, "loss_iou": 0.57421875, "loss_num": 0.04736328125, "loss_xval": 1.390625, "num_input_tokens_seen": 115555808, "step": 1745 }, { "epoch": 0.16342958768193944, "grad_norm": 26.338821411132812, "learning_rate": 5e-05, "loss": 1.2838, "num_input_tokens_seen": 115621296, "step": 1746 }, { "epoch": 0.16342958768193944, "loss": 1.0939245223999023, "loss_ce": 0.00432491535320878, "loss_iou": 0.470703125, "loss_num": 0.029296875, "loss_xval": 1.0859375, "num_input_tokens_seen": 115621296, "step": 1746 }, { "epoch": 0.16352318996583518, "grad_norm": 18.52241325378418, "learning_rate": 5e-05, "loss": 1.4664, "num_input_tokens_seen": 115688028, "step": 1747 }, { "epoch": 0.16352318996583518, "loss": 1.514211654663086, "loss_ce": 0.005422556772828102, "loss_iou": 0.61328125, "loss_num": 0.05615234375, "loss_xval": 1.5078125, "num_input_tokens_seen": 115688028, "step": 1747 }, { "epoch": 0.16361679224973089, "grad_norm": 27.766054153442383, "learning_rate": 5e-05, "loss": 1.445, "num_input_tokens_seen": 115754952, "step": 1748 }, { "epoch": 0.16361679224973089, "loss": 1.6001439094543457, "loss_ce": 0.0029758987948298454, "loss_iou": 0.6328125, "loss_num": 0.06689453125, "loss_xval": 1.59375, "num_input_tokens_seen": 115754952, "step": 1748 }, { "epoch": 0.16371039453362662, "grad_norm": 20.995222091674805, "learning_rate": 5e-05, "loss": 1.7868, "num_input_tokens_seen": 115822616, "step": 1749 }, { "epoch": 0.16371039453362662, "loss": 2.139638900756836, "loss_ce": 0.008779620751738548, "loss_iou": 0.8671875, "loss_num": 0.07861328125, "loss_xval": 2.125, "num_input_tokens_seen": 115822616, "step": 1749 }, { "epoch": 0.16380399681752233, "grad_norm": 36.072669982910156, "learning_rate": 5e-05, "loss": 1.4286, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16380399681752233, "eval_seeclick_CIoU": 0.13988454267382622, "eval_seeclick_GIoU": 0.1664675995707512, "eval_seeclick_IoU": 0.26141196489334106, "eval_seeclick_MAE_all": 0.15366214513778687, "eval_seeclick_MAE_h": 0.11682414263486862, "eval_seeclick_MAE_w": 0.11442156881093979, "eval_seeclick_MAE_x_boxes": 0.18956541270017624, "eval_seeclick_MAE_y_boxes": 0.12460571527481079, "eval_seeclick_NUM_probability": 0.9996358752250671, "eval_seeclick_inside_bbox": 0.35625000298023224, "eval_seeclick_loss": 2.53466796875, "eval_seeclick_loss_ce": 0.01495735952630639, "eval_seeclick_loss_iou": 0.87060546875, "eval_seeclick_loss_num": 0.1603240966796875, "eval_seeclick_loss_xval": 2.54296875, "eval_seeclick_runtime": 61.5896, "eval_seeclick_samples_per_second": 0.763, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16380399681752233, "eval_icons_CIoU": -0.04583752155303955, "eval_icons_GIoU": 0.0219585164450109, "eval_icons_IoU": 0.10735082998871803, "eval_icons_MAE_all": 0.13731835782527924, "eval_icons_MAE_h": 0.13323041796684265, "eval_icons_MAE_w": 0.12281985208392143, "eval_icons_MAE_x_boxes": 0.09434624761343002, "eval_icons_MAE_y_boxes": 0.09666785225272179, "eval_icons_NUM_probability": 0.999868243932724, "eval_icons_inside_bbox": 0.1493055559694767, "eval_icons_loss": 2.6144890785217285, "eval_icons_loss_ce": 0.00014377359912032261, "eval_icons_loss_iou": 0.982177734375, "eval_icons_loss_num": 0.1362457275390625, "eval_icons_loss_xval": 2.64501953125, "eval_icons_runtime": 64.0511, "eval_icons_samples_per_second": 0.781, "eval_icons_steps_per_second": 0.031, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16380399681752233, "eval_screenspot_CIoU": -0.0028064359600345292, "eval_screenspot_GIoU": 0.02295659513523181, "eval_screenspot_IoU": 0.16487163801987967, "eval_screenspot_MAE_all": 0.19828185935815176, "eval_screenspot_MAE_h": 0.19914381702740988, "eval_screenspot_MAE_w": 0.192783792813619, "eval_screenspot_MAE_x_boxes": 0.18208610514799753, "eval_screenspot_MAE_y_boxes": 0.12487444778283437, "eval_screenspot_NUM_probability": 0.9997270703315735, "eval_screenspot_inside_bbox": 0.3462499976158142, "eval_screenspot_loss": 2.9908530712127686, "eval_screenspot_loss_ce": 0.009655209258198738, "eval_screenspot_loss_iou": 0.9970703125, "eval_screenspot_loss_num": 0.20709228515625, "eval_screenspot_loss_xval": 3.0305989583333335, "eval_screenspot_runtime": 111.6207, "eval_screenspot_samples_per_second": 0.797, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16380399681752233, "eval_compot_CIoU": -0.06205618567764759, "eval_compot_GIoU": -0.04428771324455738, "eval_compot_IoU": 0.10950541123747826, "eval_compot_MAE_all": 0.17187052220106125, "eval_compot_MAE_h": 0.14416931197047234, "eval_compot_MAE_w": 0.15638110414147377, "eval_compot_MAE_x_boxes": 0.15225274860858917, "eval_compot_MAE_y_boxes": 0.12462204694747925, "eval_compot_NUM_probability": 0.9998358190059662, "eval_compot_inside_bbox": 0.2447916716337204, "eval_compot_loss": 3.0344960689544678, "eval_compot_loss_ce": 0.005371739389374852, "eval_compot_loss_iou": 1.078125, "eval_compot_loss_num": 0.19002532958984375, "eval_compot_loss_xval": 3.107421875, "eval_compot_runtime": 66.3066, "eval_compot_samples_per_second": 0.754, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16380399681752233, "eval_custom_ui_MAE_all": 0.13191691786050797, "eval_custom_ui_MAE_x": 0.12669065594673157, "eval_custom_ui_MAE_y": 0.13714319840073586, "eval_custom_ui_NUM_probability": 0.9998179078102112, "eval_custom_ui_loss": 0.760617196559906, "eval_custom_ui_loss_ce": 0.12496945261955261, "eval_custom_ui_loss_num": 0.1357574462890625, "eval_custom_ui_loss_xval": 0.6785888671875, "eval_custom_ui_runtime": 50.361, "eval_custom_ui_samples_per_second": 0.993, "eval_custom_ui_steps_per_second": 0.04, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16380399681752233, "loss": 0.8060125112533569, "loss_ce": 0.14121757447719574, "loss_iou": 0.0, "loss_num": 0.1328125, "loss_xval": 0.6640625, "num_input_tokens_seen": 115888940, "step": 1750 }, { "epoch": 0.16389759910141807, "grad_norm": 22.022716522216797, "learning_rate": 5e-05, "loss": 1.4951, "num_input_tokens_seen": 115954508, "step": 1751 }, { "epoch": 0.16389759910141807, "loss": 1.6610054969787598, "loss_ce": 0.002802323317155242, "loss_iou": 0.70703125, "loss_num": 0.04931640625, "loss_xval": 1.65625, "num_input_tokens_seen": 115954508, "step": 1751 }, { "epoch": 0.1639912013853138, "grad_norm": 26.0401554107666, "learning_rate": 5e-05, "loss": 1.3329, "num_input_tokens_seen": 116021228, "step": 1752 }, { "epoch": 0.1639912013853138, "loss": 1.2882084846496582, "loss_ce": 0.0010990931186825037, "loss_iou": 0.5390625, "loss_num": 0.041748046875, "loss_xval": 1.2890625, "num_input_tokens_seen": 116021228, "step": 1752 }, { "epoch": 0.16408480366920952, "grad_norm": 21.764360427856445, "learning_rate": 5e-05, "loss": 1.8411, "num_input_tokens_seen": 116087360, "step": 1753 }, { "epoch": 0.16408480366920952, "loss": 1.779345154762268, "loss_ce": 0.006884216796606779, "loss_iou": 0.7109375, "loss_num": 0.06982421875, "loss_xval": 1.7734375, "num_input_tokens_seen": 116087360, "step": 1753 }, { "epoch": 0.16417840595310526, "grad_norm": 14.626240730285645, "learning_rate": 5e-05, "loss": 1.2688, "num_input_tokens_seen": 116155684, "step": 1754 }, { "epoch": 0.16417840595310526, "loss": 1.3500797748565674, "loss_ce": 0.004864873364567757, "loss_iou": 0.578125, "loss_num": 0.038330078125, "loss_xval": 1.34375, "num_input_tokens_seen": 116155684, "step": 1754 }, { "epoch": 0.16427200823700097, "grad_norm": 55.89632797241211, "learning_rate": 5e-05, "loss": 1.2884, "num_input_tokens_seen": 116221772, "step": 1755 }, { "epoch": 0.16427200823700097, "loss": 1.161569356918335, "loss_ce": 0.006723121739923954, "loss_iou": 0.5, "loss_num": 0.0303955078125, "loss_xval": 1.15625, "num_input_tokens_seen": 116221772, "step": 1755 }, { "epoch": 0.1643656105208967, "grad_norm": 36.48343276977539, "learning_rate": 5e-05, "loss": 1.3916, "num_input_tokens_seen": 116288536, "step": 1756 }, { "epoch": 0.1643656105208967, "loss": 1.2644689083099365, "loss_ce": 0.004703253507614136, "loss_iou": 0.578125, "loss_num": 0.0198974609375, "loss_xval": 1.2578125, "num_input_tokens_seen": 116288536, "step": 1756 }, { "epoch": 0.16445921280479245, "grad_norm": 22.66897201538086, "learning_rate": 5e-05, "loss": 1.776, "num_input_tokens_seen": 116354792, "step": 1757 }, { "epoch": 0.16445921280479245, "loss": 1.6933791637420654, "loss_ce": 0.005299382843077183, "loss_iou": 0.6953125, "loss_num": 0.06005859375, "loss_xval": 1.6875, "num_input_tokens_seen": 116354792, "step": 1757 }, { "epoch": 0.16455281508868816, "grad_norm": 22.034452438354492, "learning_rate": 5e-05, "loss": 1.367, "num_input_tokens_seen": 116421524, "step": 1758 }, { "epoch": 0.16455281508868816, "loss": 1.366721272468567, "loss_ce": 0.005393100902438164, "loss_iou": 0.58203125, "loss_num": 0.03857421875, "loss_xval": 1.359375, "num_input_tokens_seen": 116421524, "step": 1758 }, { "epoch": 0.1646464173725839, "grad_norm": 42.0161247253418, "learning_rate": 5e-05, "loss": 1.348, "num_input_tokens_seen": 116487824, "step": 1759 }, { "epoch": 0.1646464173725839, "loss": 1.3555519580841064, "loss_ce": 0.007895732298493385, "loss_iou": 0.61328125, "loss_num": 0.02490234375, "loss_xval": 1.34375, "num_input_tokens_seen": 116487824, "step": 1759 }, { "epoch": 0.1647400196564796, "grad_norm": 26.5911865234375, "learning_rate": 5e-05, "loss": 1.5691, "num_input_tokens_seen": 116554052, "step": 1760 }, { "epoch": 0.1647400196564796, "loss": 1.5571060180664062, "loss_ce": 0.0048599690198898315, "loss_iou": 0.671875, "loss_num": 0.041015625, "loss_xval": 1.5546875, "num_input_tokens_seen": 116554052, "step": 1760 }, { "epoch": 0.16483362194037535, "grad_norm": 195.60792541503906, "learning_rate": 5e-05, "loss": 1.4447, "num_input_tokens_seen": 116619608, "step": 1761 }, { "epoch": 0.16483362194037535, "loss": 1.2059764862060547, "loss_ce": 0.0038279893342405558, "loss_iou": 0.51171875, "loss_num": 0.0361328125, "loss_xval": 1.203125, "num_input_tokens_seen": 116619608, "step": 1761 }, { "epoch": 0.16492722422427109, "grad_norm": 19.173919677734375, "learning_rate": 5e-05, "loss": 1.3819, "num_input_tokens_seen": 116685620, "step": 1762 }, { "epoch": 0.16492722422427109, "loss": 1.4086960554122925, "loss_ce": 0.0034227129071950912, "loss_iou": 0.578125, "loss_num": 0.050048828125, "loss_xval": 1.40625, "num_input_tokens_seen": 116685620, "step": 1762 }, { "epoch": 0.1650208265081668, "grad_norm": 27.1180477142334, "learning_rate": 5e-05, "loss": 1.2627, "num_input_tokens_seen": 116751244, "step": 1763 }, { "epoch": 0.1650208265081668, "loss": 1.0484132766723633, "loss_ce": 0.002881082706153393, "loss_iou": 0.458984375, "loss_num": 0.0255126953125, "loss_xval": 1.046875, "num_input_tokens_seen": 116751244, "step": 1763 }, { "epoch": 0.16511442879206253, "grad_norm": 30.75033950805664, "learning_rate": 5e-05, "loss": 1.3692, "num_input_tokens_seen": 116818284, "step": 1764 }, { "epoch": 0.16511442879206253, "loss": 1.188355803489685, "loss_ce": 0.005250256508588791, "loss_iou": 0.51953125, "loss_num": 0.0286865234375, "loss_xval": 1.1796875, "num_input_tokens_seen": 116818284, "step": 1764 }, { "epoch": 0.16520803107595824, "grad_norm": 24.517242431640625, "learning_rate": 5e-05, "loss": 1.6151, "num_input_tokens_seen": 116884392, "step": 1765 }, { "epoch": 0.16520803107595824, "loss": 1.4741705656051636, "loss_ce": 0.002490849932655692, "loss_iou": 0.625, "loss_num": 0.0439453125, "loss_xval": 1.46875, "num_input_tokens_seen": 116884392, "step": 1765 }, { "epoch": 0.16530163335985398, "grad_norm": 24.159650802612305, "learning_rate": 5e-05, "loss": 1.3617, "num_input_tokens_seen": 116950292, "step": 1766 }, { "epoch": 0.16530163335985398, "loss": 1.3831593990325928, "loss_ce": 0.006206326186656952, "loss_iou": 0.61328125, "loss_num": 0.02978515625, "loss_xval": 1.375, "num_input_tokens_seen": 116950292, "step": 1766 }, { "epoch": 0.1653952356437497, "grad_norm": 20.05497932434082, "learning_rate": 5e-05, "loss": 1.6595, "num_input_tokens_seen": 117018208, "step": 1767 }, { "epoch": 0.1653952356437497, "loss": 1.487356185913086, "loss_ce": 0.002981242025271058, "loss_iou": 0.640625, "loss_num": 0.040283203125, "loss_xval": 1.484375, "num_input_tokens_seen": 117018208, "step": 1767 }, { "epoch": 0.16548883792764543, "grad_norm": 22.533309936523438, "learning_rate": 5e-05, "loss": 1.4024, "num_input_tokens_seen": 117084768, "step": 1768 }, { "epoch": 0.16548883792764543, "loss": 1.3510992527008057, "loss_ce": 0.00734925689175725, "loss_iou": 0.62890625, "loss_num": 0.0174560546875, "loss_xval": 1.34375, "num_input_tokens_seen": 117084768, "step": 1768 }, { "epoch": 0.16558244021154117, "grad_norm": 18.84259605407715, "learning_rate": 5e-05, "loss": 1.6004, "num_input_tokens_seen": 117151304, "step": 1769 }, { "epoch": 0.16558244021154117, "loss": 1.465224266052246, "loss_ce": 0.004774982109665871, "loss_iou": 0.640625, "loss_num": 0.035400390625, "loss_xval": 1.4609375, "num_input_tokens_seen": 117151304, "step": 1769 }, { "epoch": 0.16567604249543688, "grad_norm": 16.174936294555664, "learning_rate": 5e-05, "loss": 1.4084, "num_input_tokens_seen": 117217136, "step": 1770 }, { "epoch": 0.16567604249543688, "loss": 1.5225234031677246, "loss_ce": 0.006898357067257166, "loss_iou": 0.640625, "loss_num": 0.046630859375, "loss_xval": 1.515625, "num_input_tokens_seen": 117217136, "step": 1770 }, { "epoch": 0.16576964477933262, "grad_norm": 22.07652473449707, "learning_rate": 5e-05, "loss": 1.3213, "num_input_tokens_seen": 117284092, "step": 1771 }, { "epoch": 0.16576964477933262, "loss": 1.464104413986206, "loss_ce": 0.008538098074495792, "loss_iou": 0.625, "loss_num": 0.041259765625, "loss_xval": 1.453125, "num_input_tokens_seen": 117284092, "step": 1771 }, { "epoch": 0.16586324706322833, "grad_norm": 33.298805236816406, "learning_rate": 5e-05, "loss": 1.3119, "num_input_tokens_seen": 117350076, "step": 1772 }, { "epoch": 0.16586324706322833, "loss": 1.1476900577545166, "loss_ce": 0.0020602052100002766, "loss_iou": 0.482421875, "loss_num": 0.035888671875, "loss_xval": 1.1484375, "num_input_tokens_seen": 117350076, "step": 1772 }, { "epoch": 0.16595684934712407, "grad_norm": 18.937501907348633, "learning_rate": 5e-05, "loss": 1.716, "num_input_tokens_seen": 117416696, "step": 1773 }, { "epoch": 0.16595684934712407, "loss": 1.636894941329956, "loss_ce": 0.0031060222536325455, "loss_iou": 0.71875, "loss_num": 0.038818359375, "loss_xval": 1.6328125, "num_input_tokens_seen": 117416696, "step": 1773 }, { "epoch": 0.1660504516310198, "grad_norm": 15.270478248596191, "learning_rate": 5e-05, "loss": 1.4745, "num_input_tokens_seen": 117483156, "step": 1774 }, { "epoch": 0.1660504516310198, "loss": 1.3174545764923096, "loss_ce": 0.0015367262531071901, "loss_iou": 0.5859375, "loss_num": 0.0284423828125, "loss_xval": 1.3125, "num_input_tokens_seen": 117483156, "step": 1774 }, { "epoch": 0.16614405391491552, "grad_norm": 26.782865524291992, "learning_rate": 5e-05, "loss": 1.3052, "num_input_tokens_seen": 117548936, "step": 1775 }, { "epoch": 0.16614405391491552, "loss": 1.3338900804519653, "loss_ce": 0.005276761949062347, "loss_iou": 0.56640625, "loss_num": 0.038818359375, "loss_xval": 1.328125, "num_input_tokens_seen": 117548936, "step": 1775 }, { "epoch": 0.16623765619881126, "grad_norm": 27.9973087310791, "learning_rate": 5e-05, "loss": 1.6991, "num_input_tokens_seen": 117614620, "step": 1776 }, { "epoch": 0.16623765619881126, "loss": 1.6971447467803955, "loss_ce": 0.004273728467524052, "loss_iou": 0.74609375, "loss_num": 0.040771484375, "loss_xval": 1.6953125, "num_input_tokens_seen": 117614620, "step": 1776 }, { "epoch": 0.16633125848270697, "grad_norm": 18.598751068115234, "learning_rate": 5e-05, "loss": 1.3747, "num_input_tokens_seen": 117681268, "step": 1777 }, { "epoch": 0.16633125848270697, "loss": 1.3994638919830322, "loss_ce": 0.003956090193241835, "loss_iou": 0.62109375, "loss_num": 0.031005859375, "loss_xval": 1.3984375, "num_input_tokens_seen": 117681268, "step": 1777 }, { "epoch": 0.1664248607666027, "grad_norm": 20.475177764892578, "learning_rate": 5e-05, "loss": 1.4817, "num_input_tokens_seen": 117747892, "step": 1778 }, { "epoch": 0.1664248607666027, "loss": 1.3213424682617188, "loss_ce": 0.002494772197678685, "loss_iou": 0.5234375, "loss_num": 0.05419921875, "loss_xval": 1.3203125, "num_input_tokens_seen": 117747892, "step": 1778 }, { "epoch": 0.16651846305049844, "grad_norm": 41.40374755859375, "learning_rate": 5e-05, "loss": 1.6044, "num_input_tokens_seen": 117813716, "step": 1779 }, { "epoch": 0.16651846305049844, "loss": 1.6195666790008545, "loss_ce": 0.006285447161644697, "loss_iou": 0.6796875, "loss_num": 0.050537109375, "loss_xval": 1.609375, "num_input_tokens_seen": 117813716, "step": 1779 }, { "epoch": 0.16661206533439415, "grad_norm": 17.386091232299805, "learning_rate": 5e-05, "loss": 1.7843, "num_input_tokens_seen": 117880480, "step": 1780 }, { "epoch": 0.16661206533439415, "loss": 2.0016939640045166, "loss_ce": 0.008529771119356155, "loss_iou": 0.8515625, "loss_num": 0.057373046875, "loss_xval": 1.9921875, "num_input_tokens_seen": 117880480, "step": 1780 }, { "epoch": 0.1667056676182899, "grad_norm": 95.06216430664062, "learning_rate": 5e-05, "loss": 1.634, "num_input_tokens_seen": 117946152, "step": 1781 }, { "epoch": 0.1667056676182899, "loss": 1.7567811012268066, "loss_ce": 0.009710765443742275, "loss_iou": 0.71484375, "loss_num": 0.0625, "loss_xval": 1.75, "num_input_tokens_seen": 117946152, "step": 1781 }, { "epoch": 0.1667992699021856, "grad_norm": 23.291643142700195, "learning_rate": 5e-05, "loss": 1.1976, "num_input_tokens_seen": 118012344, "step": 1782 }, { "epoch": 0.1667992699021856, "loss": 1.3272414207458496, "loss_ce": 0.002046172972768545, "loss_iou": 0.56640625, "loss_num": 0.038818359375, "loss_xval": 1.328125, "num_input_tokens_seen": 118012344, "step": 1782 }, { "epoch": 0.16689287218608134, "grad_norm": 22.385786056518555, "learning_rate": 5e-05, "loss": 1.7416, "num_input_tokens_seen": 118078248, "step": 1783 }, { "epoch": 0.16689287218608134, "loss": 1.5646185874938965, "loss_ce": 0.001141960616223514, "loss_iou": 0.6953125, "loss_num": 0.03515625, "loss_xval": 1.5625, "num_input_tokens_seen": 118078248, "step": 1783 }, { "epoch": 0.16698647446997708, "grad_norm": 17.838022232055664, "learning_rate": 5e-05, "loss": 1.2839, "num_input_tokens_seen": 118144696, "step": 1784 }, { "epoch": 0.16698647446997708, "loss": 1.2266840934753418, "loss_ce": 0.004577185958623886, "loss_iou": 0.5, "loss_num": 0.0439453125, "loss_xval": 1.21875, "num_input_tokens_seen": 118144696, "step": 1784 }, { "epoch": 0.1670800767538728, "grad_norm": 24.808393478393555, "learning_rate": 5e-05, "loss": 1.6966, "num_input_tokens_seen": 118211268, "step": 1785 }, { "epoch": 0.1670800767538728, "loss": 1.9299710988998413, "loss_ce": 0.0012601525522768497, "loss_iou": 0.8203125, "loss_num": 0.057373046875, "loss_xval": 1.9296875, "num_input_tokens_seen": 118211268, "step": 1785 }, { "epoch": 0.16717367903776853, "grad_norm": 42.57669448852539, "learning_rate": 5e-05, "loss": 1.6544, "num_input_tokens_seen": 118276872, "step": 1786 }, { "epoch": 0.16717367903776853, "loss": 1.6943639516830444, "loss_ce": 0.004910839721560478, "loss_iou": 0.75, "loss_num": 0.037353515625, "loss_xval": 1.6875, "num_input_tokens_seen": 118276872, "step": 1786 }, { "epoch": 0.16726728132166424, "grad_norm": 21.89909553527832, "learning_rate": 5e-05, "loss": 1.8305, "num_input_tokens_seen": 118343504, "step": 1787 }, { "epoch": 0.16726728132166424, "loss": 1.8874485492706299, "loss_ce": 0.011472001671791077, "loss_iou": 0.82421875, "loss_num": 0.04541015625, "loss_xval": 1.875, "num_input_tokens_seen": 118343504, "step": 1787 }, { "epoch": 0.16736088360555998, "grad_norm": 18.595476150512695, "learning_rate": 5e-05, "loss": 1.6608, "num_input_tokens_seen": 118410004, "step": 1788 }, { "epoch": 0.16736088360555998, "loss": 1.6199846267700195, "loss_ce": 0.01012139581143856, "loss_iou": 0.66015625, "loss_num": 0.057861328125, "loss_xval": 1.609375, "num_input_tokens_seen": 118410004, "step": 1788 }, { "epoch": 0.1674544858894557, "grad_norm": 37.87059020996094, "learning_rate": 5e-05, "loss": 1.3597, "num_input_tokens_seen": 118476512, "step": 1789 }, { "epoch": 0.1674544858894557, "loss": 1.4484469890594482, "loss_ce": 0.005087633151561022, "loss_iou": 0.58203125, "loss_num": 0.056396484375, "loss_xval": 1.4453125, "num_input_tokens_seen": 118476512, "step": 1789 }, { "epoch": 0.16754808817335143, "grad_norm": 17.019468307495117, "learning_rate": 5e-05, "loss": 1.6132, "num_input_tokens_seen": 118542992, "step": 1790 }, { "epoch": 0.16754808817335143, "loss": 1.6254267692565918, "loss_ce": 0.0033564118202775717, "loss_iou": 0.65234375, "loss_num": 0.06298828125, "loss_xval": 1.625, "num_input_tokens_seen": 118542992, "step": 1790 }, { "epoch": 0.16764169045724717, "grad_norm": 15.186246871948242, "learning_rate": 5e-05, "loss": 1.3601, "num_input_tokens_seen": 118610108, "step": 1791 }, { "epoch": 0.16764169045724717, "loss": 1.2531754970550537, "loss_ce": 0.005128608085215092, "loss_iou": 0.5390625, "loss_num": 0.033935546875, "loss_xval": 1.25, "num_input_tokens_seen": 118610108, "step": 1791 }, { "epoch": 0.16773529274114288, "grad_norm": 21.040952682495117, "learning_rate": 5e-05, "loss": 1.5555, "num_input_tokens_seen": 118676632, "step": 1792 }, { "epoch": 0.16773529274114288, "loss": 1.5859944820404053, "loss_ce": 0.00396324135363102, "loss_iou": 0.640625, "loss_num": 0.060546875, "loss_xval": 1.578125, "num_input_tokens_seen": 118676632, "step": 1792 }, { "epoch": 0.16782889502503862, "grad_norm": 18.22553062438965, "learning_rate": 5e-05, "loss": 1.3909, "num_input_tokens_seen": 118743228, "step": 1793 }, { "epoch": 0.16782889502503862, "loss": 1.3683271408081055, "loss_ce": 0.0035810484550893307, "loss_iou": 0.578125, "loss_num": 0.041748046875, "loss_xval": 1.3671875, "num_input_tokens_seen": 118743228, "step": 1793 }, { "epoch": 0.16792249730893433, "grad_norm": 19.60796356201172, "learning_rate": 5e-05, "loss": 1.3766, "num_input_tokens_seen": 118809676, "step": 1794 }, { "epoch": 0.16792249730893433, "loss": 1.3190234899520874, "loss_ce": 0.004570364952087402, "loss_iou": 0.578125, "loss_num": 0.031982421875, "loss_xval": 1.3125, "num_input_tokens_seen": 118809676, "step": 1794 }, { "epoch": 0.16801609959283006, "grad_norm": 23.017669677734375, "learning_rate": 5e-05, "loss": 1.2183, "num_input_tokens_seen": 118875244, "step": 1795 }, { "epoch": 0.16801609959283006, "loss": 1.1294887065887451, "loss_ce": 0.012301236391067505, "loss_iou": 0.470703125, "loss_num": 0.034912109375, "loss_xval": 1.1171875, "num_input_tokens_seen": 118875244, "step": 1795 }, { "epoch": 0.1681097018767258, "grad_norm": 24.088245391845703, "learning_rate": 5e-05, "loss": 1.5871, "num_input_tokens_seen": 118940184, "step": 1796 }, { "epoch": 0.1681097018767258, "loss": 1.6394013166427612, "loss_ce": 0.004635668359696865, "loss_iou": 0.6953125, "loss_num": 0.048583984375, "loss_xval": 1.6328125, "num_input_tokens_seen": 118940184, "step": 1796 }, { "epoch": 0.16820330416062151, "grad_norm": 26.892854690551758, "learning_rate": 5e-05, "loss": 1.4716, "num_input_tokens_seen": 119006516, "step": 1797 }, { "epoch": 0.16820330416062151, "loss": 1.5686166286468506, "loss_ce": 0.006116572301834822, "loss_iou": 0.65234375, "loss_num": 0.051513671875, "loss_xval": 1.5625, "num_input_tokens_seen": 119006516, "step": 1797 }, { "epoch": 0.16829690644451725, "grad_norm": 21.019927978515625, "learning_rate": 5e-05, "loss": 1.5484, "num_input_tokens_seen": 119073728, "step": 1798 }, { "epoch": 0.16829690644451725, "loss": 1.622941255569458, "loss_ce": 0.0028240878600627184, "loss_iou": 0.6875, "loss_num": 0.048583984375, "loss_xval": 1.6171875, "num_input_tokens_seen": 119073728, "step": 1798 }, { "epoch": 0.16839050872841296, "grad_norm": 22.851734161376953, "learning_rate": 5e-05, "loss": 1.3609, "num_input_tokens_seen": 119140208, "step": 1799 }, { "epoch": 0.16839050872841296, "loss": 1.2709921598434448, "loss_ce": 0.003902365453541279, "loss_iou": 0.53515625, "loss_num": 0.039794921875, "loss_xval": 1.265625, "num_input_tokens_seen": 119140208, "step": 1799 }, { "epoch": 0.1684841110123087, "grad_norm": 46.07211685180664, "learning_rate": 5e-05, "loss": 1.5127, "num_input_tokens_seen": 119207072, "step": 1800 }, { "epoch": 0.1684841110123087, "loss": 1.3361321687698364, "loss_ce": 0.00873957946896553, "loss_iou": 0.55859375, "loss_num": 0.0419921875, "loss_xval": 1.328125, "num_input_tokens_seen": 119207072, "step": 1800 }, { "epoch": 0.16857771329620444, "grad_norm": 12.440570831298828, "learning_rate": 5e-05, "loss": 1.1381, "num_input_tokens_seen": 119273512, "step": 1801 }, { "epoch": 0.16857771329620444, "loss": 1.0420058965682983, "loss_ce": 0.0026992480270564556, "loss_iou": 0.451171875, "loss_num": 0.0274658203125, "loss_xval": 1.0390625, "num_input_tokens_seen": 119273512, "step": 1801 }, { "epoch": 0.16867131558010015, "grad_norm": 18.88530731201172, "learning_rate": 5e-05, "loss": 1.4581, "num_input_tokens_seen": 119339200, "step": 1802 }, { "epoch": 0.16867131558010015, "loss": 1.5958948135375977, "loss_ce": 0.003121304791420698, "loss_iou": 0.6796875, "loss_num": 0.046142578125, "loss_xval": 1.59375, "num_input_tokens_seen": 119339200, "step": 1802 }, { "epoch": 0.1687649178639959, "grad_norm": 25.671167373657227, "learning_rate": 5e-05, "loss": 1.285, "num_input_tokens_seen": 119405328, "step": 1803 }, { "epoch": 0.1687649178639959, "loss": 1.2431070804595947, "loss_ce": 0.004581674467772245, "loss_iou": 0.5234375, "loss_num": 0.038330078125, "loss_xval": 1.2421875, "num_input_tokens_seen": 119405328, "step": 1803 }, { "epoch": 0.1688585201478916, "grad_norm": 31.151432037353516, "learning_rate": 5e-05, "loss": 1.2524, "num_input_tokens_seen": 119471152, "step": 1804 }, { "epoch": 0.1688585201478916, "loss": 1.3681247234344482, "loss_ce": 0.006308273877948523, "loss_iou": 0.53125, "loss_num": 0.059326171875, "loss_xval": 1.359375, "num_input_tokens_seen": 119471152, "step": 1804 }, { "epoch": 0.16895212243178734, "grad_norm": 29.523393630981445, "learning_rate": 5e-05, "loss": 1.3213, "num_input_tokens_seen": 119536220, "step": 1805 }, { "epoch": 0.16895212243178734, "loss": 1.1777355670928955, "loss_ce": 0.004639958962798119, "loss_iou": 0.5078125, "loss_num": 0.031982421875, "loss_xval": 1.171875, "num_input_tokens_seen": 119536220, "step": 1805 }, { "epoch": 0.16904572471568305, "grad_norm": 22.126577377319336, "learning_rate": 5e-05, "loss": 1.555, "num_input_tokens_seen": 119602048, "step": 1806 }, { "epoch": 0.16904572471568305, "loss": 1.6079837083816528, "loss_ce": 0.006909474730491638, "loss_iou": 0.69140625, "loss_num": 0.04296875, "loss_xval": 1.6015625, "num_input_tokens_seen": 119602048, "step": 1806 }, { "epoch": 0.1691393269995788, "grad_norm": 9.718029022216797, "learning_rate": 5e-05, "loss": 1.0901, "num_input_tokens_seen": 119667488, "step": 1807 }, { "epoch": 0.1691393269995788, "loss": 1.0904566049575806, "loss_ce": 0.00891362875699997, "loss_iou": 0.455078125, "loss_num": 0.034423828125, "loss_xval": 1.078125, "num_input_tokens_seen": 119667488, "step": 1807 }, { "epoch": 0.16923292928347453, "grad_norm": 13.937957763671875, "learning_rate": 5e-05, "loss": 1.1378, "num_input_tokens_seen": 119733132, "step": 1808 }, { "epoch": 0.16923292928347453, "loss": 1.1642824411392212, "loss_ce": 0.010046683251857758, "loss_iou": 0.4609375, "loss_num": 0.046142578125, "loss_xval": 1.15625, "num_input_tokens_seen": 119733132, "step": 1808 }, { "epoch": 0.16932653156737024, "grad_norm": 19.477317810058594, "learning_rate": 5e-05, "loss": 1.3858, "num_input_tokens_seen": 119799404, "step": 1809 }, { "epoch": 0.16932653156737024, "loss": 1.250647783279419, "loss_ce": 0.0072395941242575645, "loss_iou": 0.53515625, "loss_num": 0.035400390625, "loss_xval": 1.2421875, "num_input_tokens_seen": 119799404, "step": 1809 }, { "epoch": 0.16942013385126597, "grad_norm": 27.120983123779297, "learning_rate": 5e-05, "loss": 1.375, "num_input_tokens_seen": 119866148, "step": 1810 }, { "epoch": 0.16942013385126597, "loss": 1.3680001497268677, "loss_ce": 0.0025826841592788696, "loss_iou": 0.56640625, "loss_num": 0.046142578125, "loss_xval": 1.3671875, "num_input_tokens_seen": 119866148, "step": 1810 }, { "epoch": 0.16951373613516169, "grad_norm": 19.939014434814453, "learning_rate": 5e-05, "loss": 1.3369, "num_input_tokens_seen": 119932400, "step": 1811 }, { "epoch": 0.16951373613516169, "loss": 1.4508637189865112, "loss_ce": 0.006527773104608059, "loss_iou": 0.64453125, "loss_num": 0.03173828125, "loss_xval": 1.4453125, "num_input_tokens_seen": 119932400, "step": 1811 }, { "epoch": 0.16960733841905742, "grad_norm": 47.441558837890625, "learning_rate": 5e-05, "loss": 1.3947, "num_input_tokens_seen": 119998880, "step": 1812 }, { "epoch": 0.16960733841905742, "loss": 1.3162851333618164, "loss_ce": 0.0028085997328162193, "loss_iou": 0.5625, "loss_num": 0.037841796875, "loss_xval": 1.3125, "num_input_tokens_seen": 119998880, "step": 1812 }, { "epoch": 0.16970094070295316, "grad_norm": 17.63170051574707, "learning_rate": 5e-05, "loss": 1.4194, "num_input_tokens_seen": 120064988, "step": 1813 }, { "epoch": 0.16970094070295316, "loss": 1.4789669513702393, "loss_ce": 0.005334258079528809, "loss_iou": 0.59375, "loss_num": 0.056640625, "loss_xval": 1.4765625, "num_input_tokens_seen": 120064988, "step": 1813 }, { "epoch": 0.16979454298684887, "grad_norm": 39.17177963256836, "learning_rate": 5e-05, "loss": 1.3649, "num_input_tokens_seen": 120131336, "step": 1814 }, { "epoch": 0.16979454298684887, "loss": 1.5063436031341553, "loss_ce": 0.003657993394881487, "loss_iou": 0.65234375, "loss_num": 0.0400390625, "loss_xval": 1.5, "num_input_tokens_seen": 120131336, "step": 1814 }, { "epoch": 0.1698881452707446, "grad_norm": 17.9035587310791, "learning_rate": 5e-05, "loss": 1.6655, "num_input_tokens_seen": 120197392, "step": 1815 }, { "epoch": 0.1698881452707446, "loss": 1.4572584629058838, "loss_ce": 0.0041333818808197975, "loss_iou": 0.62890625, "loss_num": 0.039306640625, "loss_xval": 1.453125, "num_input_tokens_seen": 120197392, "step": 1815 }, { "epoch": 0.16998174755464032, "grad_norm": 20.34004020690918, "learning_rate": 5e-05, "loss": 1.2102, "num_input_tokens_seen": 120263592, "step": 1816 }, { "epoch": 0.16998174755464032, "loss": 0.9859544038772583, "loss_ce": 0.0032884140964597464, "loss_iou": 0.43359375, "loss_num": 0.0228271484375, "loss_xval": 0.984375, "num_input_tokens_seen": 120263592, "step": 1816 }, { "epoch": 0.17007534983853606, "grad_norm": 41.65287399291992, "learning_rate": 5e-05, "loss": 1.5414, "num_input_tokens_seen": 120331300, "step": 1817 }, { "epoch": 0.17007534983853606, "loss": 1.572178840637207, "loss_ce": 0.003819531761109829, "loss_iou": 0.66796875, "loss_num": 0.047607421875, "loss_xval": 1.5703125, "num_input_tokens_seen": 120331300, "step": 1817 }, { "epoch": 0.1701689521224318, "grad_norm": 19.289501190185547, "learning_rate": 5e-05, "loss": 1.6966, "num_input_tokens_seen": 120396736, "step": 1818 }, { "epoch": 0.1701689521224318, "loss": 1.491598129272461, "loss_ce": 0.005269893445074558, "loss_iou": 0.6953125, "loss_num": 0.0189208984375, "loss_xval": 1.484375, "num_input_tokens_seen": 120396736, "step": 1818 }, { "epoch": 0.1702625544063275, "grad_norm": 25.355976104736328, "learning_rate": 5e-05, "loss": 1.6667, "num_input_tokens_seen": 120462384, "step": 1819 }, { "epoch": 0.1702625544063275, "loss": 1.6338341236114502, "loss_ce": 0.009322349913418293, "loss_iou": 0.62890625, "loss_num": 0.072265625, "loss_xval": 1.625, "num_input_tokens_seen": 120462384, "step": 1819 }, { "epoch": 0.17035615669022325, "grad_norm": 26.46224594116211, "learning_rate": 5e-05, "loss": 1.338, "num_input_tokens_seen": 120527944, "step": 1820 }, { "epoch": 0.17035615669022325, "loss": 1.0851386785507202, "loss_ce": 0.001886697020381689, "loss_iou": 0.47265625, "loss_num": 0.027587890625, "loss_xval": 1.0859375, "num_input_tokens_seen": 120527944, "step": 1820 }, { "epoch": 0.17044975897411896, "grad_norm": 21.288166046142578, "learning_rate": 5e-05, "loss": 1.5281, "num_input_tokens_seen": 120594564, "step": 1821 }, { "epoch": 0.17044975897411896, "loss": 1.4423449039459229, "loss_ce": 0.007042068988084793, "loss_iou": 0.6171875, "loss_num": 0.041015625, "loss_xval": 1.4375, "num_input_tokens_seen": 120594564, "step": 1821 }, { "epoch": 0.1705433612580147, "grad_norm": 27.84840965270996, "learning_rate": 5e-05, "loss": 1.5046, "num_input_tokens_seen": 120660840, "step": 1822 }, { "epoch": 0.1705433612580147, "loss": 1.4361482858657837, "loss_ce": 0.004995943978428841, "loss_iou": 0.59375, "loss_num": 0.048583984375, "loss_xval": 1.4296875, "num_input_tokens_seen": 120660840, "step": 1822 }, { "epoch": 0.17063696354191044, "grad_norm": 25.643396377563477, "learning_rate": 5e-05, "loss": 1.4685, "num_input_tokens_seen": 120726708, "step": 1823 }, { "epoch": 0.17063696354191044, "loss": 1.4662539958953857, "loss_ce": 0.005804789252579212, "loss_iou": 0.625, "loss_num": 0.042236328125, "loss_xval": 1.4609375, "num_input_tokens_seen": 120726708, "step": 1823 }, { "epoch": 0.17073056582580615, "grad_norm": 17.447202682495117, "learning_rate": 5e-05, "loss": 1.3424, "num_input_tokens_seen": 120792628, "step": 1824 }, { "epoch": 0.17073056582580615, "loss": 1.3110202550888062, "loss_ce": 0.005356179550290108, "loss_iou": 0.5234375, "loss_num": 0.052001953125, "loss_xval": 1.3046875, "num_input_tokens_seen": 120792628, "step": 1824 }, { "epoch": 0.17082416810970188, "grad_norm": 13.089118003845215, "learning_rate": 5e-05, "loss": 1.3288, "num_input_tokens_seen": 120857956, "step": 1825 }, { "epoch": 0.17082416810970188, "loss": 1.1957464218139648, "loss_ce": 0.002386966720223427, "loss_iou": 0.462890625, "loss_num": 0.053466796875, "loss_xval": 1.1953125, "num_input_tokens_seen": 120857956, "step": 1825 }, { "epoch": 0.1709177703935976, "grad_norm": 24.77213478088379, "learning_rate": 5e-05, "loss": 1.3773, "num_input_tokens_seen": 120924276, "step": 1826 }, { "epoch": 0.1709177703935976, "loss": 1.726757287979126, "loss_ce": 0.0031245131976902485, "loss_iou": 0.66796875, "loss_num": 0.0771484375, "loss_xval": 1.7265625, "num_input_tokens_seen": 120924276, "step": 1826 }, { "epoch": 0.17101137267749333, "grad_norm": 26.462329864501953, "learning_rate": 5e-05, "loss": 1.6389, "num_input_tokens_seen": 120991344, "step": 1827 }, { "epoch": 0.17101137267749333, "loss": 1.6456923484802246, "loss_ce": 0.002137661213055253, "loss_iou": 0.74609375, "loss_num": 0.031005859375, "loss_xval": 1.640625, "num_input_tokens_seen": 120991344, "step": 1827 }, { "epoch": 0.17110497496138904, "grad_norm": 21.92740821838379, "learning_rate": 5e-05, "loss": 1.5548, "num_input_tokens_seen": 121058888, "step": 1828 }, { "epoch": 0.17110497496138904, "loss": 1.5315115451812744, "loss_ce": 0.0051443311385810375, "loss_iou": 0.6875, "loss_num": 0.0299072265625, "loss_xval": 1.5234375, "num_input_tokens_seen": 121058888, "step": 1828 }, { "epoch": 0.17119857724528478, "grad_norm": 20.751903533935547, "learning_rate": 5e-05, "loss": 1.3661, "num_input_tokens_seen": 121124576, "step": 1829 }, { "epoch": 0.17119857724528478, "loss": 1.343775749206543, "loss_ce": 0.008326517418026924, "loss_iou": 0.51953125, "loss_num": 0.059326171875, "loss_xval": 1.3359375, "num_input_tokens_seen": 121124576, "step": 1829 }, { "epoch": 0.17129217952918052, "grad_norm": 79.73170471191406, "learning_rate": 5e-05, "loss": 1.6314, "num_input_tokens_seen": 121190060, "step": 1830 }, { "epoch": 0.17129217952918052, "loss": 1.396977186203003, "loss_ce": 0.005375603213906288, "loss_iou": 0.6328125, "loss_num": 0.0263671875, "loss_xval": 1.390625, "num_input_tokens_seen": 121190060, "step": 1830 }, { "epoch": 0.17138578181307623, "grad_norm": 14.038053512573242, "learning_rate": 5e-05, "loss": 1.2064, "num_input_tokens_seen": 121257456, "step": 1831 }, { "epoch": 0.17138578181307623, "loss": 1.0631022453308105, "loss_ce": 0.003532049711793661, "loss_iou": 0.44921875, "loss_num": 0.0322265625, "loss_xval": 1.0625, "num_input_tokens_seen": 121257456, "step": 1831 }, { "epoch": 0.17147938409697197, "grad_norm": 18.470840454101562, "learning_rate": 5e-05, "loss": 1.4822, "num_input_tokens_seen": 121323072, "step": 1832 }, { "epoch": 0.17147938409697197, "loss": 1.5085363388061523, "loss_ce": 0.003653568448498845, "loss_iou": 0.65625, "loss_num": 0.039306640625, "loss_xval": 1.5078125, "num_input_tokens_seen": 121323072, "step": 1832 }, { "epoch": 0.17157298638086768, "grad_norm": 39.324737548828125, "learning_rate": 5e-05, "loss": 1.5217, "num_input_tokens_seen": 121389772, "step": 1833 }, { "epoch": 0.17157298638086768, "loss": 1.4324517250061035, "loss_ce": 0.004717256408184767, "loss_iou": 0.6328125, "loss_num": 0.03271484375, "loss_xval": 1.4296875, "num_input_tokens_seen": 121389772, "step": 1833 }, { "epoch": 0.17166658866476342, "grad_norm": 16.263824462890625, "learning_rate": 5e-05, "loss": 1.7437, "num_input_tokens_seen": 121455420, "step": 1834 }, { "epoch": 0.17166658866476342, "loss": 1.6695587635040283, "loss_ce": 0.0025665597058832645, "loss_iou": 0.72265625, "loss_num": 0.0439453125, "loss_xval": 1.6640625, "num_input_tokens_seen": 121455420, "step": 1834 }, { "epoch": 0.17176019094865916, "grad_norm": 20.818655014038086, "learning_rate": 5e-05, "loss": 1.5081, "num_input_tokens_seen": 121521596, "step": 1835 }, { "epoch": 0.17176019094865916, "loss": 1.6034801006317139, "loss_ce": 0.006312092300504446, "loss_iou": 0.671875, "loss_num": 0.05029296875, "loss_xval": 1.59375, "num_input_tokens_seen": 121521596, "step": 1835 }, { "epoch": 0.17185379323255487, "grad_norm": 24.824081420898438, "learning_rate": 5e-05, "loss": 1.6034, "num_input_tokens_seen": 121584892, "step": 1836 }, { "epoch": 0.17185379323255487, "loss": 1.5662217140197754, "loss_ce": 0.005674734711647034, "loss_iou": 0.65625, "loss_num": 0.0498046875, "loss_xval": 1.5625, "num_input_tokens_seen": 121584892, "step": 1836 }, { "epoch": 0.1719473955164506, "grad_norm": 25.136316299438477, "learning_rate": 5e-05, "loss": 1.6328, "num_input_tokens_seen": 121652308, "step": 1837 }, { "epoch": 0.1719473955164506, "loss": 1.5209910869598389, "loss_ce": 0.005366027355194092, "loss_iou": 0.66015625, "loss_num": 0.039306640625, "loss_xval": 1.515625, "num_input_tokens_seen": 121652308, "step": 1837 }, { "epoch": 0.17204099780034632, "grad_norm": 32.42887496948242, "learning_rate": 5e-05, "loss": 1.2909, "num_input_tokens_seen": 121718660, "step": 1838 }, { "epoch": 0.17204099780034632, "loss": 1.1097770929336548, "loss_ce": 0.004705031868070364, "loss_iou": 0.4765625, "loss_num": 0.0302734375, "loss_xval": 1.1015625, "num_input_tokens_seen": 121718660, "step": 1838 }, { "epoch": 0.17213460008424206, "grad_norm": 17.299095153808594, "learning_rate": 5e-05, "loss": 1.8605, "num_input_tokens_seen": 121784776, "step": 1839 }, { "epoch": 0.17213460008424206, "loss": 1.9374628067016602, "loss_ce": 0.006798781454563141, "loss_iou": 0.79296875, "loss_num": 0.06884765625, "loss_xval": 1.9296875, "num_input_tokens_seen": 121784776, "step": 1839 }, { "epoch": 0.1722282023681378, "grad_norm": 21.14668083190918, "learning_rate": 5e-05, "loss": 1.2216, "num_input_tokens_seen": 121850540, "step": 1840 }, { "epoch": 0.1722282023681378, "loss": 1.1206915378570557, "loss_ce": 0.0027715619653463364, "loss_iou": 0.5078125, "loss_num": 0.019775390625, "loss_xval": 1.1171875, "num_input_tokens_seen": 121850540, "step": 1840 }, { "epoch": 0.1723218046520335, "grad_norm": 34.61922836303711, "learning_rate": 5e-05, "loss": 1.5579, "num_input_tokens_seen": 121916300, "step": 1841 }, { "epoch": 0.1723218046520335, "loss": 1.5194497108459473, "loss_ce": 0.002359856851398945, "loss_iou": 0.62890625, "loss_num": 0.052734375, "loss_xval": 1.515625, "num_input_tokens_seen": 121916300, "step": 1841 }, { "epoch": 0.17241540693592924, "grad_norm": 20.034870147705078, "learning_rate": 5e-05, "loss": 1.7421, "num_input_tokens_seen": 121982672, "step": 1842 }, { "epoch": 0.17241540693592924, "loss": 1.7515795230865479, "loss_ce": 0.005974119529128075, "loss_iou": 0.7109375, "loss_num": 0.064453125, "loss_xval": 1.7421875, "num_input_tokens_seen": 121982672, "step": 1842 }, { "epoch": 0.17250900921982495, "grad_norm": 20.295759201049805, "learning_rate": 5e-05, "loss": 1.3312, "num_input_tokens_seen": 122049272, "step": 1843 }, { "epoch": 0.17250900921982495, "loss": 1.3228318691253662, "loss_ce": 0.005449024029076099, "loss_iou": 0.57421875, "loss_num": 0.03369140625, "loss_xval": 1.3203125, "num_input_tokens_seen": 122049272, "step": 1843 }, { "epoch": 0.1726026115037207, "grad_norm": 30.42950439453125, "learning_rate": 5e-05, "loss": 1.4602, "num_input_tokens_seen": 122114820, "step": 1844 }, { "epoch": 0.1726026115037207, "loss": 1.266453742980957, "loss_ce": 0.005223327316343784, "loss_iou": 0.52734375, "loss_num": 0.041259765625, "loss_xval": 1.2578125, "num_input_tokens_seen": 122114820, "step": 1844 }, { "epoch": 0.17269621378761643, "grad_norm": 20.07485580444336, "learning_rate": 5e-05, "loss": 1.4999, "num_input_tokens_seen": 122180588, "step": 1845 }, { "epoch": 0.17269621378761643, "loss": 1.6367614269256592, "loss_ce": 0.002972241025418043, "loss_iou": 0.69140625, "loss_num": 0.05078125, "loss_xval": 1.6328125, "num_input_tokens_seen": 122180588, "step": 1845 }, { "epoch": 0.17278981607151214, "grad_norm": 26.721233367919922, "learning_rate": 5e-05, "loss": 1.232, "num_input_tokens_seen": 122247328, "step": 1846 }, { "epoch": 0.17278981607151214, "loss": 1.1850439310073853, "loss_ce": 0.0014501872938126326, "loss_iou": 0.5390625, "loss_num": 0.021484375, "loss_xval": 1.1875, "num_input_tokens_seen": 122247328, "step": 1846 }, { "epoch": 0.17288341835540788, "grad_norm": 21.758010864257812, "learning_rate": 5e-05, "loss": 1.5634, "num_input_tokens_seen": 122314604, "step": 1847 }, { "epoch": 0.17288341835540788, "loss": 1.5600001811981201, "loss_ce": 0.007265835534781218, "loss_iou": 0.6328125, "loss_num": 0.05712890625, "loss_xval": 1.5546875, "num_input_tokens_seen": 122314604, "step": 1847 }, { "epoch": 0.1729770206393036, "grad_norm": 29.43509864807129, "learning_rate": 5e-05, "loss": 1.6338, "num_input_tokens_seen": 122381048, "step": 1848 }, { "epoch": 0.1729770206393036, "loss": 1.8224163055419922, "loss_ce": 0.006010045763105154, "loss_iou": 0.7578125, "loss_num": 0.06005859375, "loss_xval": 1.8125, "num_input_tokens_seen": 122381048, "step": 1848 }, { "epoch": 0.17307062292319933, "grad_norm": 54.261634826660156, "learning_rate": 5e-05, "loss": 1.4317, "num_input_tokens_seen": 122448296, "step": 1849 }, { "epoch": 0.17307062292319933, "loss": 1.2774088382720947, "loss_ce": 0.008365956135094166, "loss_iou": 0.58203125, "loss_num": 0.0213623046875, "loss_xval": 1.265625, "num_input_tokens_seen": 122448296, "step": 1849 }, { "epoch": 0.17316422520709504, "grad_norm": 29.9807071685791, "learning_rate": 5e-05, "loss": 1.4884, "num_input_tokens_seen": 122514048, "step": 1850 }, { "epoch": 0.17316422520709504, "loss": 1.5707565546035767, "loss_ce": 0.006791671738028526, "loss_iou": 0.6328125, "loss_num": 0.060302734375, "loss_xval": 1.5625, "num_input_tokens_seen": 122514048, "step": 1850 }, { "epoch": 0.17325782749099078, "grad_norm": 27.879579544067383, "learning_rate": 5e-05, "loss": 1.7332, "num_input_tokens_seen": 122579968, "step": 1851 }, { "epoch": 0.17325782749099078, "loss": 1.716784954071045, "loss_ce": 0.006823934614658356, "loss_iou": 0.7421875, "loss_num": 0.045654296875, "loss_xval": 1.7109375, "num_input_tokens_seen": 122579968, "step": 1851 }, { "epoch": 0.17335142977488652, "grad_norm": 33.21861267089844, "learning_rate": 5e-05, "loss": 1.5843, "num_input_tokens_seen": 122646080, "step": 1852 }, { "epoch": 0.17335142977488652, "loss": 1.7422574758529663, "loss_ce": 0.0049528456293046474, "loss_iou": 0.7265625, "loss_num": 0.056884765625, "loss_xval": 1.734375, "num_input_tokens_seen": 122646080, "step": 1852 }, { "epoch": 0.17344503205878223, "grad_norm": 27.33934211730957, "learning_rate": 5e-05, "loss": 1.56, "num_input_tokens_seen": 122712512, "step": 1853 }, { "epoch": 0.17344503205878223, "loss": 1.5886988639831543, "loss_ce": 0.004714410752058029, "loss_iou": 0.6953125, "loss_num": 0.039306640625, "loss_xval": 1.5859375, "num_input_tokens_seen": 122712512, "step": 1853 }, { "epoch": 0.17353863434267797, "grad_norm": 24.189306259155273, "learning_rate": 5e-05, "loss": 1.3923, "num_input_tokens_seen": 122778824, "step": 1854 }, { "epoch": 0.17353863434267797, "loss": 1.3535451889038086, "loss_ce": 0.01174830086529255, "loss_iou": 0.55859375, "loss_num": 0.04443359375, "loss_xval": 1.34375, "num_input_tokens_seen": 122778824, "step": 1854 }, { "epoch": 0.17363223662657368, "grad_norm": 35.08366394042969, "learning_rate": 5e-05, "loss": 1.4697, "num_input_tokens_seen": 122844616, "step": 1855 }, { "epoch": 0.17363223662657368, "loss": 1.5058411359786987, "loss_ce": 0.006817711051553488, "loss_iou": 0.66015625, "loss_num": 0.035888671875, "loss_xval": 1.5, "num_input_tokens_seen": 122844616, "step": 1855 }, { "epoch": 0.17372583891046942, "grad_norm": 28.835338592529297, "learning_rate": 5e-05, "loss": 1.3623, "num_input_tokens_seen": 122909672, "step": 1856 }, { "epoch": 0.17372583891046942, "loss": 1.3965022563934326, "loss_ce": 0.003924140240997076, "loss_iou": 0.59375, "loss_num": 0.04150390625, "loss_xval": 1.390625, "num_input_tokens_seen": 122909672, "step": 1856 }, { "epoch": 0.17381944119436515, "grad_norm": 20.535924911499023, "learning_rate": 5e-05, "loss": 1.6023, "num_input_tokens_seen": 122975176, "step": 1857 }, { "epoch": 0.17381944119436515, "loss": 1.4852015972137451, "loss_ce": 0.0076626078225672245, "loss_iou": 0.640625, "loss_num": 0.03857421875, "loss_xval": 1.4765625, "num_input_tokens_seen": 122975176, "step": 1857 }, { "epoch": 0.17391304347826086, "grad_norm": 22.818687438964844, "learning_rate": 5e-05, "loss": 1.2961, "num_input_tokens_seen": 123041632, "step": 1858 }, { "epoch": 0.17391304347826086, "loss": 1.2844634056091309, "loss_ce": 0.006875484250485897, "loss_iou": 0.50390625, "loss_num": 0.0546875, "loss_xval": 1.28125, "num_input_tokens_seen": 123041632, "step": 1858 }, { "epoch": 0.1740066457621566, "grad_norm": 44.3472900390625, "learning_rate": 5e-05, "loss": 1.5116, "num_input_tokens_seen": 123107284, "step": 1859 }, { "epoch": 0.1740066457621566, "loss": 1.563816785812378, "loss_ce": 0.0052230944857001305, "loss_iou": 0.68359375, "loss_num": 0.037841796875, "loss_xval": 1.5625, "num_input_tokens_seen": 123107284, "step": 1859 }, { "epoch": 0.1741002480460523, "grad_norm": 25.473066329956055, "learning_rate": 5e-05, "loss": 1.7615, "num_input_tokens_seen": 123173360, "step": 1860 }, { "epoch": 0.1741002480460523, "loss": 1.6592191457748413, "loss_ce": 0.005898888222873211, "loss_iou": 0.75, "loss_num": 0.03125, "loss_xval": 1.65625, "num_input_tokens_seen": 123173360, "step": 1860 }, { "epoch": 0.17419385032994805, "grad_norm": 23.157337188720703, "learning_rate": 5e-05, "loss": 1.4354, "num_input_tokens_seen": 123239668, "step": 1861 }, { "epoch": 0.17419385032994805, "loss": 1.3325526714324951, "loss_ce": 0.0024744963739067316, "loss_iou": 0.578125, "loss_num": 0.0341796875, "loss_xval": 1.328125, "num_input_tokens_seen": 123239668, "step": 1861 }, { "epoch": 0.1742874526138438, "grad_norm": 23.07021141052246, "learning_rate": 5e-05, "loss": 1.3751, "num_input_tokens_seen": 123307004, "step": 1862 }, { "epoch": 0.1742874526138438, "loss": 1.138388991355896, "loss_ce": 0.0031351184006780386, "loss_iou": 0.5, "loss_num": 0.02734375, "loss_xval": 1.1328125, "num_input_tokens_seen": 123307004, "step": 1862 }, { "epoch": 0.1743810548977395, "grad_norm": 20.14305877685547, "learning_rate": 5e-05, "loss": 1.5347, "num_input_tokens_seen": 123372628, "step": 1863 }, { "epoch": 0.1743810548977395, "loss": 1.4409219026565552, "loss_ce": 0.005375009961426258, "loss_iou": 0.6171875, "loss_num": 0.03955078125, "loss_xval": 1.4375, "num_input_tokens_seen": 123372628, "step": 1863 }, { "epoch": 0.17447465718163524, "grad_norm": 21.856277465820312, "learning_rate": 5e-05, "loss": 1.5748, "num_input_tokens_seen": 123439596, "step": 1864 }, { "epoch": 0.17447465718163524, "loss": 1.5219848155975342, "loss_ce": 0.003918478265404701, "loss_iou": 0.64453125, "loss_num": 0.04638671875, "loss_xval": 1.515625, "num_input_tokens_seen": 123439596, "step": 1864 }, { "epoch": 0.17456825946553095, "grad_norm": 29.275348663330078, "learning_rate": 5e-05, "loss": 1.3077, "num_input_tokens_seen": 123506000, "step": 1865 }, { "epoch": 0.17456825946553095, "loss": 1.416922926902771, "loss_ce": 0.0038370315451174974, "loss_iou": 0.6328125, "loss_num": 0.029541015625, "loss_xval": 1.4140625, "num_input_tokens_seen": 123506000, "step": 1865 }, { "epoch": 0.1746618617494267, "grad_norm": 47.204654693603516, "learning_rate": 5e-05, "loss": 1.4407, "num_input_tokens_seen": 123571656, "step": 1866 }, { "epoch": 0.1746618617494267, "loss": 1.3536100387573242, "loss_ce": 0.0040006255730986595, "loss_iou": 0.59375, "loss_num": 0.032470703125, "loss_xval": 1.3515625, "num_input_tokens_seen": 123571656, "step": 1866 }, { "epoch": 0.1747554640333224, "grad_norm": 30.03777313232422, "learning_rate": 5e-05, "loss": 1.4577, "num_input_tokens_seen": 123637340, "step": 1867 }, { "epoch": 0.1747554640333224, "loss": 1.6500486135482788, "loss_ce": 0.0016110586002469063, "loss_iou": 0.69921875, "loss_num": 0.049072265625, "loss_xval": 1.6484375, "num_input_tokens_seen": 123637340, "step": 1867 }, { "epoch": 0.17484906631721814, "grad_norm": 27.495819091796875, "learning_rate": 5e-05, "loss": 1.6743, "num_input_tokens_seen": 123703956, "step": 1868 }, { "epoch": 0.17484906631721814, "loss": 1.6631691455841064, "loss_ce": 0.004966053646057844, "loss_iou": 0.73046875, "loss_num": 0.0390625, "loss_xval": 1.65625, "num_input_tokens_seen": 123703956, "step": 1868 }, { "epoch": 0.17494266860111388, "grad_norm": 22.417184829711914, "learning_rate": 5e-05, "loss": 1.2335, "num_input_tokens_seen": 123769896, "step": 1869 }, { "epoch": 0.17494266860111388, "loss": 1.2517322301864624, "loss_ce": 0.00515022873878479, "loss_iou": 0.546875, "loss_num": 0.03076171875, "loss_xval": 1.25, "num_input_tokens_seen": 123769896, "step": 1869 }, { "epoch": 0.1750362708850096, "grad_norm": 16.17466926574707, "learning_rate": 5e-05, "loss": 1.6097, "num_input_tokens_seen": 123836928, "step": 1870 }, { "epoch": 0.1750362708850096, "loss": 1.4315295219421387, "loss_ce": 0.0052599553018808365, "loss_iou": 0.58984375, "loss_num": 0.049560546875, "loss_xval": 1.4296875, "num_input_tokens_seen": 123836928, "step": 1870 }, { "epoch": 0.17512987316890533, "grad_norm": 16.639633178710938, "learning_rate": 5e-05, "loss": 1.3924, "num_input_tokens_seen": 123903860, "step": 1871 }, { "epoch": 0.17512987316890533, "loss": 1.237128496170044, "loss_ce": 0.0008003418333828449, "loss_iou": 0.55078125, "loss_num": 0.0269775390625, "loss_xval": 1.234375, "num_input_tokens_seen": 123903860, "step": 1871 }, { "epoch": 0.17522347545280104, "grad_norm": 15.891419410705566, "learning_rate": 5e-05, "loss": 1.1916, "num_input_tokens_seen": 123970420, "step": 1872 }, { "epoch": 0.17522347545280104, "loss": 1.1824283599853516, "loss_ce": 0.0051822601817548275, "loss_iou": 0.53125, "loss_num": 0.023193359375, "loss_xval": 1.1796875, "num_input_tokens_seen": 123970420, "step": 1872 }, { "epoch": 0.17531707773669677, "grad_norm": 28.685651779174805, "learning_rate": 5e-05, "loss": 1.6561, "num_input_tokens_seen": 124036516, "step": 1873 }, { "epoch": 0.17531707773669677, "loss": 1.8574066162109375, "loss_ce": 0.00242615956813097, "loss_iou": 0.765625, "loss_num": 0.06396484375, "loss_xval": 1.8515625, "num_input_tokens_seen": 124036516, "step": 1873 }, { "epoch": 0.1754106800205925, "grad_norm": 13.578001022338867, "learning_rate": 5e-05, "loss": 1.4323, "num_input_tokens_seen": 124102304, "step": 1874 }, { "epoch": 0.1754106800205925, "loss": 1.725637674331665, "loss_ce": 0.003957986831665039, "loss_iou": 0.7265625, "loss_num": 0.052734375, "loss_xval": 1.71875, "num_input_tokens_seen": 124102304, "step": 1874 }, { "epoch": 0.17550428230448822, "grad_norm": 13.442028045654297, "learning_rate": 5e-05, "loss": 1.5378, "num_input_tokens_seen": 124169080, "step": 1875 }, { "epoch": 0.17550428230448822, "loss": 1.678246021270752, "loss_ce": 0.010277180932462215, "loss_iou": 0.6953125, "loss_num": 0.05517578125, "loss_xval": 1.671875, "num_input_tokens_seen": 124169080, "step": 1875 }, { "epoch": 0.17559788458838396, "grad_norm": 39.67034912109375, "learning_rate": 5e-05, "loss": 1.3669, "num_input_tokens_seen": 124235620, "step": 1876 }, { "epoch": 0.17559788458838396, "loss": 1.5303378105163574, "loss_ce": 0.003970663528889418, "loss_iou": 0.62890625, "loss_num": 0.053466796875, "loss_xval": 1.5234375, "num_input_tokens_seen": 124235620, "step": 1876 }, { "epoch": 0.17569148687227967, "grad_norm": 18.186180114746094, "learning_rate": 5e-05, "loss": 1.253, "num_input_tokens_seen": 124302224, "step": 1877 }, { "epoch": 0.17569148687227967, "loss": 1.2854080200195312, "loss_ce": 0.004646319895982742, "loss_iou": 0.58203125, "loss_num": 0.0238037109375, "loss_xval": 1.28125, "num_input_tokens_seen": 124302224, "step": 1877 }, { "epoch": 0.1757850891561754, "grad_norm": 36.3538932800293, "learning_rate": 5e-05, "loss": 1.4756, "num_input_tokens_seen": 124368216, "step": 1878 }, { "epoch": 0.1757850891561754, "loss": 1.2984278202056885, "loss_ce": 0.006435736082494259, "loss_iou": 0.546875, "loss_num": 0.0400390625, "loss_xval": 1.2890625, "num_input_tokens_seen": 124368216, "step": 1878 }, { "epoch": 0.17587869144007115, "grad_norm": 21.211565017700195, "learning_rate": 5e-05, "loss": 1.6729, "num_input_tokens_seen": 124434272, "step": 1879 }, { "epoch": 0.17587869144007115, "loss": 1.5440338850021362, "loss_ce": 0.0037505985237658024, "loss_iou": 0.640625, "loss_num": 0.051025390625, "loss_xval": 1.5390625, "num_input_tokens_seen": 124434272, "step": 1879 }, { "epoch": 0.17597229372396686, "grad_norm": 22.88192367553711, "learning_rate": 5e-05, "loss": 1.4055, "num_input_tokens_seen": 124500280, "step": 1880 }, { "epoch": 0.17597229372396686, "loss": 1.4746317863464355, "loss_ce": 0.005393535830080509, "loss_iou": 0.5703125, "loss_num": 0.06591796875, "loss_xval": 1.46875, "num_input_tokens_seen": 124500280, "step": 1880 }, { "epoch": 0.1760658960078626, "grad_norm": 42.942787170410156, "learning_rate": 5e-05, "loss": 1.7371, "num_input_tokens_seen": 124566792, "step": 1881 }, { "epoch": 0.1760658960078626, "loss": 1.6196497678756714, "loss_ce": 0.0034388331696391106, "loss_iou": 0.703125, "loss_num": 0.04150390625, "loss_xval": 1.6171875, "num_input_tokens_seen": 124566792, "step": 1881 }, { "epoch": 0.1761594982917583, "grad_norm": 20.981191635131836, "learning_rate": 5e-05, "loss": 1.727, "num_input_tokens_seen": 124633316, "step": 1882 }, { "epoch": 0.1761594982917583, "loss": 1.6363775730133057, "loss_ce": 0.004785794764757156, "loss_iou": 0.6953125, "loss_num": 0.047607421875, "loss_xval": 1.6328125, "num_input_tokens_seen": 124633316, "step": 1882 }, { "epoch": 0.17625310057565405, "grad_norm": 43.8437614440918, "learning_rate": 5e-05, "loss": 1.4488, "num_input_tokens_seen": 124700520, "step": 1883 }, { "epoch": 0.17625310057565405, "loss": 1.471380591392517, "loss_ce": 0.0011657995637506247, "loss_iou": 0.62109375, "loss_num": 0.045654296875, "loss_xval": 1.46875, "num_input_tokens_seen": 124700520, "step": 1883 }, { "epoch": 0.1763467028595498, "grad_norm": 25.370458602905273, "learning_rate": 5e-05, "loss": 1.2918, "num_input_tokens_seen": 124767320, "step": 1884 }, { "epoch": 0.1763467028595498, "loss": 1.2371501922607422, "loss_ce": 0.006681433413177729, "loss_iou": 0.52734375, "loss_num": 0.035888671875, "loss_xval": 1.234375, "num_input_tokens_seen": 124767320, "step": 1884 }, { "epoch": 0.1764403051434455, "grad_norm": 20.513080596923828, "learning_rate": 5e-05, "loss": 1.5742, "num_input_tokens_seen": 124832456, "step": 1885 }, { "epoch": 0.1764403051434455, "loss": 1.465209722518921, "loss_ce": 0.005737133789807558, "loss_iou": 0.63671875, "loss_num": 0.037841796875, "loss_xval": 1.4609375, "num_input_tokens_seen": 124832456, "step": 1885 }, { "epoch": 0.17653390742734124, "grad_norm": 43.855838775634766, "learning_rate": 5e-05, "loss": 1.3493, "num_input_tokens_seen": 124899228, "step": 1886 }, { "epoch": 0.17653390742734124, "loss": 1.4110389947891235, "loss_ce": 0.006742175668478012, "loss_iou": 0.61328125, "loss_num": 0.035400390625, "loss_xval": 1.40625, "num_input_tokens_seen": 124899228, "step": 1886 }, { "epoch": 0.17662750971123695, "grad_norm": 12.336699485778809, "learning_rate": 5e-05, "loss": 1.3108, "num_input_tokens_seen": 124966564, "step": 1887 }, { "epoch": 0.17662750971123695, "loss": 1.3217785358428955, "loss_ce": 0.005372196435928345, "loss_iou": 0.56640625, "loss_num": 0.03662109375, "loss_xval": 1.3125, "num_input_tokens_seen": 124966564, "step": 1887 }, { "epoch": 0.17672111199513268, "grad_norm": 22.85047149658203, "learning_rate": 5e-05, "loss": 1.473, "num_input_tokens_seen": 125031436, "step": 1888 }, { "epoch": 0.17672111199513268, "loss": 1.7604337930679321, "loss_ce": 0.007504129782319069, "loss_iou": 0.7109375, "loss_num": 0.06640625, "loss_xval": 1.75, "num_input_tokens_seen": 125031436, "step": 1888 }, { "epoch": 0.1768147142790284, "grad_norm": 74.69544982910156, "learning_rate": 5e-05, "loss": 1.4932, "num_input_tokens_seen": 125098176, "step": 1889 }, { "epoch": 0.1768147142790284, "loss": 1.30013108253479, "loss_ce": 0.0032560420222580433, "loss_iou": 0.5625, "loss_num": 0.03369140625, "loss_xval": 1.296875, "num_input_tokens_seen": 125098176, "step": 1889 }, { "epoch": 0.17690831656292413, "grad_norm": 22.372865676879883, "learning_rate": 5e-05, "loss": 1.3025, "num_input_tokens_seen": 125165752, "step": 1890 }, { "epoch": 0.17690831656292413, "loss": 1.313957691192627, "loss_ce": 0.004387363791465759, "loss_iou": 0.55859375, "loss_num": 0.0390625, "loss_xval": 1.3125, "num_input_tokens_seen": 125165752, "step": 1890 }, { "epoch": 0.17700191884681987, "grad_norm": 33.97995376586914, "learning_rate": 5e-05, "loss": 1.2417, "num_input_tokens_seen": 125232416, "step": 1891 }, { "epoch": 0.17700191884681987, "loss": 1.2535755634307861, "loss_ce": 0.00650528259575367, "loss_iou": 0.5546875, "loss_num": 0.02783203125, "loss_xval": 1.25, "num_input_tokens_seen": 125232416, "step": 1891 }, { "epoch": 0.17709552113071558, "grad_norm": 31.841249465942383, "learning_rate": 5e-05, "loss": 1.5356, "num_input_tokens_seen": 125299152, "step": 1892 }, { "epoch": 0.17709552113071558, "loss": 1.565967321395874, "loss_ce": 0.004443918354809284, "loss_iou": 0.63671875, "loss_num": 0.057861328125, "loss_xval": 1.5625, "num_input_tokens_seen": 125299152, "step": 1892 }, { "epoch": 0.17718912341461132, "grad_norm": 19.4390926361084, "learning_rate": 5e-05, "loss": 1.6589, "num_input_tokens_seen": 125365168, "step": 1893 }, { "epoch": 0.17718912341461132, "loss": 1.5728219747543335, "loss_ce": 0.004462606273591518, "loss_iou": 0.671875, "loss_num": 0.04443359375, "loss_xval": 1.5703125, "num_input_tokens_seen": 125365168, "step": 1893 }, { "epoch": 0.17728272569850703, "grad_norm": 12.243718147277832, "learning_rate": 5e-05, "loss": 1.3969, "num_input_tokens_seen": 125431516, "step": 1894 }, { "epoch": 0.17728272569850703, "loss": 1.1028046607971191, "loss_ce": 0.006124977953732014, "loss_iou": 0.46484375, "loss_num": 0.032958984375, "loss_xval": 1.09375, "num_input_tokens_seen": 125431516, "step": 1894 }, { "epoch": 0.17737632798240277, "grad_norm": 18.179861068725586, "learning_rate": 5e-05, "loss": 1.3818, "num_input_tokens_seen": 125497240, "step": 1895 }, { "epoch": 0.17737632798240277, "loss": 1.4240645170211792, "loss_ce": 0.00463092653080821, "loss_iou": 0.546875, "loss_num": 0.06494140625, "loss_xval": 1.421875, "num_input_tokens_seen": 125497240, "step": 1895 }, { "epoch": 0.1774699302662985, "grad_norm": 17.810461044311523, "learning_rate": 5e-05, "loss": 1.6627, "num_input_tokens_seen": 125562696, "step": 1896 }, { "epoch": 0.1774699302662985, "loss": 1.6232903003692627, "loss_ce": 0.005187239032238722, "loss_iou": 0.66015625, "loss_num": 0.058837890625, "loss_xval": 1.6171875, "num_input_tokens_seen": 125562696, "step": 1896 }, { "epoch": 0.17756353255019422, "grad_norm": 19.378154754638672, "learning_rate": 5e-05, "loss": 1.4421, "num_input_tokens_seen": 125629324, "step": 1897 }, { "epoch": 0.17756353255019422, "loss": 1.4215199947357178, "loss_ce": 0.006480968091636896, "loss_iou": 0.5859375, "loss_num": 0.0478515625, "loss_xval": 1.4140625, "num_input_tokens_seen": 125629324, "step": 1897 }, { "epoch": 0.17765713483408996, "grad_norm": 22.260164260864258, "learning_rate": 5e-05, "loss": 1.6494, "num_input_tokens_seen": 125694512, "step": 1898 }, { "epoch": 0.17765713483408996, "loss": 1.6587601900100708, "loss_ce": 0.005439842119812965, "loss_iou": 0.69921875, "loss_num": 0.0517578125, "loss_xval": 1.65625, "num_input_tokens_seen": 125694512, "step": 1898 }, { "epoch": 0.17775073711798567, "grad_norm": 23.74010467529297, "learning_rate": 5e-05, "loss": 1.3235, "num_input_tokens_seen": 125760940, "step": 1899 }, { "epoch": 0.17775073711798567, "loss": 1.5131752490997314, "loss_ce": 0.007315901108086109, "loss_iou": 0.6328125, "loss_num": 0.047607421875, "loss_xval": 1.5078125, "num_input_tokens_seen": 125760940, "step": 1899 }, { "epoch": 0.1778443394018814, "grad_norm": 16.91720199584961, "learning_rate": 5e-05, "loss": 1.2475, "num_input_tokens_seen": 125825884, "step": 1900 }, { "epoch": 0.1778443394018814, "loss": 1.4790034294128418, "loss_ce": 0.0034785200841724873, "loss_iou": 0.63671875, "loss_num": 0.04052734375, "loss_xval": 1.4765625, "num_input_tokens_seen": 125825884, "step": 1900 }, { "epoch": 0.17793794168577715, "grad_norm": 18.59794807434082, "learning_rate": 5e-05, "loss": 1.1832, "num_input_tokens_seen": 125891372, "step": 1901 }, { "epoch": 0.17793794168577715, "loss": 1.0883831977844238, "loss_ce": 0.0037275196518749, "loss_iou": 0.462890625, "loss_num": 0.031982421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 125891372, "step": 1901 }, { "epoch": 0.17803154396967286, "grad_norm": 16.934354782104492, "learning_rate": 5e-05, "loss": 1.4211, "num_input_tokens_seen": 125957712, "step": 1902 }, { "epoch": 0.17803154396967286, "loss": 1.2986729145050049, "loss_ce": 0.0032626825850456953, "loss_iou": 0.5703125, "loss_num": 0.0308837890625, "loss_xval": 1.296875, "num_input_tokens_seen": 125957712, "step": 1902 }, { "epoch": 0.1781251462535686, "grad_norm": 17.2276668548584, "learning_rate": 5e-05, "loss": 1.2597, "num_input_tokens_seen": 126023736, "step": 1903 }, { "epoch": 0.1781251462535686, "loss": 1.0219440460205078, "loss_ce": 0.0024127333890646696, "loss_iou": 0.46875, "loss_num": 0.0167236328125, "loss_xval": 1.015625, "num_input_tokens_seen": 126023736, "step": 1903 }, { "epoch": 0.1782187485374643, "grad_norm": 21.607051849365234, "learning_rate": 5e-05, "loss": 1.5113, "num_input_tokens_seen": 126090644, "step": 1904 }, { "epoch": 0.1782187485374643, "loss": 1.3699097633361816, "loss_ce": 0.005163642577826977, "loss_iou": 0.54296875, "loss_num": 0.05517578125, "loss_xval": 1.3671875, "num_input_tokens_seen": 126090644, "step": 1904 }, { "epoch": 0.17831235082136004, "grad_norm": 36.899269104003906, "learning_rate": 5e-05, "loss": 1.6401, "num_input_tokens_seen": 126156352, "step": 1905 }, { "epoch": 0.17831235082136004, "loss": 1.6202868223190308, "loss_ce": 0.0030993036925792694, "loss_iou": 0.671875, "loss_num": 0.053955078125, "loss_xval": 1.6171875, "num_input_tokens_seen": 126156352, "step": 1905 }, { "epoch": 0.17840595310525575, "grad_norm": 16.8770751953125, "learning_rate": 5e-05, "loss": 1.6128, "num_input_tokens_seen": 126222444, "step": 1906 }, { "epoch": 0.17840595310525575, "loss": 1.82246732711792, "loss_ce": 0.00410798192024231, "loss_iou": 0.8125, "loss_num": 0.038818359375, "loss_xval": 1.8203125, "num_input_tokens_seen": 126222444, "step": 1906 }, { "epoch": 0.1784995553891515, "grad_norm": 21.94072723388672, "learning_rate": 5e-05, "loss": 1.4607, "num_input_tokens_seen": 126288764, "step": 1907 }, { "epoch": 0.1784995553891515, "loss": 1.4229366779327393, "loss_ce": 0.00447956845164299, "loss_iou": 0.609375, "loss_num": 0.040771484375, "loss_xval": 1.421875, "num_input_tokens_seen": 126288764, "step": 1907 }, { "epoch": 0.17859315767304723, "grad_norm": 38.17008972167969, "learning_rate": 5e-05, "loss": 1.4479, "num_input_tokens_seen": 126355220, "step": 1908 }, { "epoch": 0.17859315767304723, "loss": 1.4375016689300537, "loss_ce": 0.003419750602915883, "loss_iou": 0.625, "loss_num": 0.03759765625, "loss_xval": 1.4375, "num_input_tokens_seen": 126355220, "step": 1908 }, { "epoch": 0.17868675995694294, "grad_norm": 17.322668075561523, "learning_rate": 5e-05, "loss": 1.5267, "num_input_tokens_seen": 126422608, "step": 1909 }, { "epoch": 0.17868675995694294, "loss": 1.6921244859695435, "loss_ce": 0.00755410548299551, "loss_iou": 0.73828125, "loss_num": 0.0419921875, "loss_xval": 1.6875, "num_input_tokens_seen": 126422608, "step": 1909 }, { "epoch": 0.17878036224083868, "grad_norm": 22.217540740966797, "learning_rate": 5e-05, "loss": 1.4309, "num_input_tokens_seen": 126489788, "step": 1910 }, { "epoch": 0.17878036224083868, "loss": 1.5370368957519531, "loss_ce": 0.002857227809727192, "loss_iou": 0.6640625, "loss_num": 0.04150390625, "loss_xval": 1.53125, "num_input_tokens_seen": 126489788, "step": 1910 }, { "epoch": 0.1788739645247344, "grad_norm": 42.78955841064453, "learning_rate": 5e-05, "loss": 1.607, "num_input_tokens_seen": 126555532, "step": 1911 }, { "epoch": 0.1788739645247344, "loss": 1.694398283958435, "loss_ce": 0.003968559671193361, "loss_iou": 0.69921875, "loss_num": 0.058837890625, "loss_xval": 1.6875, "num_input_tokens_seen": 126555532, "step": 1911 }, { "epoch": 0.17896756680863013, "grad_norm": 19.902368545532227, "learning_rate": 5e-05, "loss": 1.6004, "num_input_tokens_seen": 126621056, "step": 1912 }, { "epoch": 0.17896756680863013, "loss": 1.6208617687225342, "loss_ce": 0.005627512466162443, "loss_iou": 0.6953125, "loss_num": 0.04443359375, "loss_xval": 1.6171875, "num_input_tokens_seen": 126621056, "step": 1912 }, { "epoch": 0.17906116909252587, "grad_norm": 16.653642654418945, "learning_rate": 5e-05, "loss": 1.2568, "num_input_tokens_seen": 126686680, "step": 1913 }, { "epoch": 0.17906116909252587, "loss": 1.3326518535614014, "loss_ce": 0.004526888951659203, "loss_iou": 0.578125, "loss_num": 0.033935546875, "loss_xval": 1.328125, "num_input_tokens_seen": 126686680, "step": 1913 }, { "epoch": 0.17915477137642158, "grad_norm": 16.447555541992188, "learning_rate": 5e-05, "loss": 1.3708, "num_input_tokens_seen": 126751944, "step": 1914 }, { "epoch": 0.17915477137642158, "loss": 1.5041611194610596, "loss_ce": 0.0031846007332205772, "loss_iou": 0.63671875, "loss_num": 0.045654296875, "loss_xval": 1.5, "num_input_tokens_seen": 126751944, "step": 1914 }, { "epoch": 0.17924837366031732, "grad_norm": 22.584463119506836, "learning_rate": 5e-05, "loss": 1.4771, "num_input_tokens_seen": 126817988, "step": 1915 }, { "epoch": 0.17924837366031732, "loss": 1.708423376083374, "loss_ce": 0.006274915765970945, "loss_iou": 0.71484375, "loss_num": 0.05419921875, "loss_xval": 1.703125, "num_input_tokens_seen": 126817988, "step": 1915 }, { "epoch": 0.17934197594421303, "grad_norm": 26.603622436523438, "learning_rate": 5e-05, "loss": 1.4977, "num_input_tokens_seen": 126884096, "step": 1916 }, { "epoch": 0.17934197594421303, "loss": 1.4041247367858887, "loss_ce": 0.005198855884373188, "loss_iou": 0.60546875, "loss_num": 0.03662109375, "loss_xval": 1.3984375, "num_input_tokens_seen": 126884096, "step": 1916 }, { "epoch": 0.17943557822810877, "grad_norm": 23.41399383544922, "learning_rate": 5e-05, "loss": 1.3756, "num_input_tokens_seen": 126950112, "step": 1917 }, { "epoch": 0.17943557822810877, "loss": 1.3776991367340088, "loss_ce": 0.007581881247460842, "loss_iou": 0.609375, "loss_num": 0.0299072265625, "loss_xval": 1.3671875, "num_input_tokens_seen": 126950112, "step": 1917 }, { "epoch": 0.1795291805120045, "grad_norm": 16.151899337768555, "learning_rate": 5e-05, "loss": 1.6672, "num_input_tokens_seen": 127016528, "step": 1918 }, { "epoch": 0.1795291805120045, "loss": 1.6332756280899048, "loss_ce": 0.0063225338235497475, "loss_iou": 0.7265625, "loss_num": 0.034912109375, "loss_xval": 1.625, "num_input_tokens_seen": 127016528, "step": 1918 }, { "epoch": 0.17962278279590022, "grad_norm": 43.21495056152344, "learning_rate": 5e-05, "loss": 1.3722, "num_input_tokens_seen": 127082776, "step": 1919 }, { "epoch": 0.17962278279590022, "loss": 1.519390344619751, "loss_ce": 0.004741910845041275, "loss_iou": 0.63671875, "loss_num": 0.04833984375, "loss_xval": 1.515625, "num_input_tokens_seen": 127082776, "step": 1919 }, { "epoch": 0.17971638507979595, "grad_norm": 82.2913589477539, "learning_rate": 5e-05, "loss": 1.2503, "num_input_tokens_seen": 127148604, "step": 1920 }, { "epoch": 0.17971638507979595, "loss": 1.1019983291625977, "loss_ce": 0.0053185950964689255, "loss_iou": 0.482421875, "loss_num": 0.0264892578125, "loss_xval": 1.09375, "num_input_tokens_seen": 127148604, "step": 1920 }, { "epoch": 0.17980998736369166, "grad_norm": 17.779369354248047, "learning_rate": 5e-05, "loss": 1.259, "num_input_tokens_seen": 127214744, "step": 1921 }, { "epoch": 0.17980998736369166, "loss": 1.3687243461608887, "loss_ce": 0.004466459155082703, "loss_iou": 0.5859375, "loss_num": 0.0380859375, "loss_xval": 1.3671875, "num_input_tokens_seen": 127214744, "step": 1921 }, { "epoch": 0.1799035896475874, "grad_norm": 33.66106414794922, "learning_rate": 5e-05, "loss": 1.627, "num_input_tokens_seen": 127281020, "step": 1922 }, { "epoch": 0.1799035896475874, "loss": 1.5347261428833008, "loss_ce": 0.004452696070075035, "loss_iou": 0.640625, "loss_num": 0.04931640625, "loss_xval": 1.53125, "num_input_tokens_seen": 127281020, "step": 1922 }, { "epoch": 0.17999719193148314, "grad_norm": 86.88664245605469, "learning_rate": 5e-05, "loss": 1.4684, "num_input_tokens_seen": 127347132, "step": 1923 }, { "epoch": 0.17999719193148314, "loss": 1.442563533782959, "loss_ce": 0.005551830865442753, "loss_iou": 0.5859375, "loss_num": 0.05322265625, "loss_xval": 1.4375, "num_input_tokens_seen": 127347132, "step": 1923 }, { "epoch": 0.18009079421537885, "grad_norm": 21.12995719909668, "learning_rate": 5e-05, "loss": 1.5456, "num_input_tokens_seen": 127412924, "step": 1924 }, { "epoch": 0.18009079421537885, "loss": 1.5041178464889526, "loss_ce": 0.0026530534960329533, "loss_iou": 0.60546875, "loss_num": 0.0576171875, "loss_xval": 1.5, "num_input_tokens_seen": 127412924, "step": 1924 }, { "epoch": 0.1801843964992746, "grad_norm": 39.95879364013672, "learning_rate": 5e-05, "loss": 1.4468, "num_input_tokens_seen": 127479000, "step": 1925 }, { "epoch": 0.1801843964992746, "loss": 1.4673662185668945, "loss_ce": 0.002522457158192992, "loss_iou": 0.65625, "loss_num": 0.0311279296875, "loss_xval": 1.46875, "num_input_tokens_seen": 127479000, "step": 1925 }, { "epoch": 0.1802779987831703, "grad_norm": 24.05704116821289, "learning_rate": 5e-05, "loss": 1.6672, "num_input_tokens_seen": 127543524, "step": 1926 }, { "epoch": 0.1802779987831703, "loss": 1.816376805305481, "loss_ce": 0.003876802045851946, "loss_iou": 0.7109375, "loss_num": 0.07763671875, "loss_xval": 1.8125, "num_input_tokens_seen": 127543524, "step": 1926 }, { "epoch": 0.18037160106706604, "grad_norm": 23.91031837463379, "learning_rate": 5e-05, "loss": 1.0682, "num_input_tokens_seen": 127609152, "step": 1927 }, { "epoch": 0.18037160106706604, "loss": 1.0998576879501343, "loss_ce": 0.004887010902166367, "loss_iou": 0.380859375, "loss_num": 0.06640625, "loss_xval": 1.09375, "num_input_tokens_seen": 127609152, "step": 1927 }, { "epoch": 0.18046520335096175, "grad_norm": 17.20271110534668, "learning_rate": 5e-05, "loss": 1.2571, "num_input_tokens_seen": 127676356, "step": 1928 }, { "epoch": 0.18046520335096175, "loss": 1.2858045101165771, "loss_ce": 0.00601929472759366, "loss_iou": 0.52734375, "loss_num": 0.04541015625, "loss_xval": 1.28125, "num_input_tokens_seen": 127676356, "step": 1928 }, { "epoch": 0.1805588056348575, "grad_norm": 25.051280975341797, "learning_rate": 5e-05, "loss": 1.706, "num_input_tokens_seen": 127742500, "step": 1929 }, { "epoch": 0.1805588056348575, "loss": 1.4824163913726807, "loss_ce": 0.003412485821172595, "loss_iou": 0.671875, "loss_num": 0.0263671875, "loss_xval": 1.4765625, "num_input_tokens_seen": 127742500, "step": 1929 }, { "epoch": 0.18065240791875323, "grad_norm": 28.843645095825195, "learning_rate": 5e-05, "loss": 1.1059, "num_input_tokens_seen": 127808460, "step": 1930 }, { "epoch": 0.18065240791875323, "loss": 1.1264175176620483, "loss_ce": 0.0072769299149513245, "loss_iou": 0.470703125, "loss_num": 0.035400390625, "loss_xval": 1.1171875, "num_input_tokens_seen": 127808460, "step": 1930 }, { "epoch": 0.18074601020264894, "grad_norm": 14.538792610168457, "learning_rate": 5e-05, "loss": 1.3473, "num_input_tokens_seen": 127874696, "step": 1931 }, { "epoch": 0.18074601020264894, "loss": 1.285771131515503, "loss_ce": 0.0054975831881165504, "loss_iou": 0.54296875, "loss_num": 0.0390625, "loss_xval": 1.28125, "num_input_tokens_seen": 127874696, "step": 1931 }, { "epoch": 0.18083961248654468, "grad_norm": 43.93027877807617, "learning_rate": 5e-05, "loss": 1.3949, "num_input_tokens_seen": 127940224, "step": 1932 }, { "epoch": 0.18083961248654468, "loss": 1.3779126405715942, "loss_ce": 0.006818891502916813, "loss_iou": 0.61328125, "loss_num": 0.0284423828125, "loss_xval": 1.375, "num_input_tokens_seen": 127940224, "step": 1932 }, { "epoch": 0.1809332147704404, "grad_norm": 19.06087875366211, "learning_rate": 5e-05, "loss": 1.6005, "num_input_tokens_seen": 128006372, "step": 1933 }, { "epoch": 0.1809332147704404, "loss": 1.6286779642105103, "loss_ce": 0.0017248571384698153, "loss_iou": 0.7265625, "loss_num": 0.035400390625, "loss_xval": 1.625, "num_input_tokens_seen": 128006372, "step": 1933 }, { "epoch": 0.18102681705433613, "grad_norm": 13.02169418334961, "learning_rate": 5e-05, "loss": 1.5185, "num_input_tokens_seen": 128071508, "step": 1934 }, { "epoch": 0.18102681705433613, "loss": 1.6327862739562988, "loss_ce": 0.0014387151459231973, "loss_iou": 0.69140625, "loss_num": 0.04931640625, "loss_xval": 1.6328125, "num_input_tokens_seen": 128071508, "step": 1934 }, { "epoch": 0.18112041933823186, "grad_norm": 24.095348358154297, "learning_rate": 5e-05, "loss": 1.3805, "num_input_tokens_seen": 128138652, "step": 1935 }, { "epoch": 0.18112041933823186, "loss": 1.2067148685455322, "loss_ce": 0.00249119708314538, "loss_iou": 0.5234375, "loss_num": 0.031494140625, "loss_xval": 1.203125, "num_input_tokens_seen": 128138652, "step": 1935 }, { "epoch": 0.18121402162212757, "grad_norm": 32.35599899291992, "learning_rate": 5e-05, "loss": 1.2376, "num_input_tokens_seen": 128204448, "step": 1936 }, { "epoch": 0.18121402162212757, "loss": 1.2077301740646362, "loss_ce": 0.006558326072990894, "loss_iou": 0.4609375, "loss_num": 0.05615234375, "loss_xval": 1.203125, "num_input_tokens_seen": 128204448, "step": 1936 }, { "epoch": 0.1813076239060233, "grad_norm": 22.592796325683594, "learning_rate": 5e-05, "loss": 1.4242, "num_input_tokens_seen": 128271508, "step": 1937 }, { "epoch": 0.1813076239060233, "loss": 1.327092170715332, "loss_ce": 0.0038501075468957424, "loss_iou": 0.58203125, "loss_num": 0.03173828125, "loss_xval": 1.3203125, "num_input_tokens_seen": 128271508, "step": 1937 }, { "epoch": 0.18140122618991902, "grad_norm": 27.72064781188965, "learning_rate": 5e-05, "loss": 1.5403, "num_input_tokens_seen": 128337312, "step": 1938 }, { "epoch": 0.18140122618991902, "loss": 1.6330958604812622, "loss_ce": 0.006142733618617058, "loss_iou": 0.6875, "loss_num": 0.051513671875, "loss_xval": 1.625, "num_input_tokens_seen": 128337312, "step": 1938 }, { "epoch": 0.18149482847381476, "grad_norm": 21.491222381591797, "learning_rate": 5e-05, "loss": 1.4238, "num_input_tokens_seen": 128404084, "step": 1939 }, { "epoch": 0.18149482847381476, "loss": 1.4202455282211304, "loss_ce": 0.0047181760892271996, "loss_iou": 0.5625, "loss_num": 0.05908203125, "loss_xval": 1.4140625, "num_input_tokens_seen": 128404084, "step": 1939 }, { "epoch": 0.1815884307577105, "grad_norm": 17.163345336914062, "learning_rate": 5e-05, "loss": 1.3903, "num_input_tokens_seen": 128470460, "step": 1940 }, { "epoch": 0.1815884307577105, "loss": 1.354461431503296, "loss_ce": 0.00827001966536045, "loss_iou": 0.546875, "loss_num": 0.05029296875, "loss_xval": 1.34375, "num_input_tokens_seen": 128470460, "step": 1940 }, { "epoch": 0.1816820330416062, "grad_norm": 27.74918556213379, "learning_rate": 5e-05, "loss": 1.4035, "num_input_tokens_seen": 128535812, "step": 1941 }, { "epoch": 0.1816820330416062, "loss": 1.4504563808441162, "loss_ce": 0.0051439437083899975, "loss_iou": 0.63671875, "loss_num": 0.03466796875, "loss_xval": 1.4453125, "num_input_tokens_seen": 128535812, "step": 1941 }, { "epoch": 0.18177563532550195, "grad_norm": 24.40339469909668, "learning_rate": 5e-05, "loss": 1.7923, "num_input_tokens_seen": 128601860, "step": 1942 }, { "epoch": 0.18177563532550195, "loss": 1.6550748348236084, "loss_ce": 0.005660798400640488, "loss_iou": 0.6875, "loss_num": 0.054443359375, "loss_xval": 1.6484375, "num_input_tokens_seen": 128601860, "step": 1942 }, { "epoch": 0.18186923760939766, "grad_norm": 17.7146053314209, "learning_rate": 5e-05, "loss": 1.346, "num_input_tokens_seen": 128666976, "step": 1943 }, { "epoch": 0.18186923760939766, "loss": 1.4426484107971191, "loss_ce": 0.008078088983893394, "loss_iou": 0.578125, "loss_num": 0.054931640625, "loss_xval": 1.4375, "num_input_tokens_seen": 128666976, "step": 1943 }, { "epoch": 0.1819628398932934, "grad_norm": 21.35053062438965, "learning_rate": 5e-05, "loss": 1.4457, "num_input_tokens_seen": 128733712, "step": 1944 }, { "epoch": 0.1819628398932934, "loss": 1.3560891151428223, "loss_ce": 0.0069680060259997845, "loss_iou": 0.5625, "loss_num": 0.04541015625, "loss_xval": 1.3515625, "num_input_tokens_seen": 128733712, "step": 1944 }, { "epoch": 0.18205644217718914, "grad_norm": 67.17298889160156, "learning_rate": 5e-05, "loss": 1.3814, "num_input_tokens_seen": 128799752, "step": 1945 }, { "epoch": 0.18205644217718914, "loss": 1.3860197067260742, "loss_ce": 0.00564863346517086, "loss_iou": 0.5859375, "loss_num": 0.0419921875, "loss_xval": 1.3828125, "num_input_tokens_seen": 128799752, "step": 1945 }, { "epoch": 0.18215004446108485, "grad_norm": 17.579940795898438, "learning_rate": 5e-05, "loss": 1.4157, "num_input_tokens_seen": 128865872, "step": 1946 }, { "epoch": 0.18215004446108485, "loss": 1.3579998016357422, "loss_ce": 0.006925633177161217, "loss_iou": 0.5859375, "loss_num": 0.0361328125, "loss_xval": 1.3515625, "num_input_tokens_seen": 128865872, "step": 1946 }, { "epoch": 0.1822436467449806, "grad_norm": 20.450754165649414, "learning_rate": 5e-05, "loss": 1.374, "num_input_tokens_seen": 128932280, "step": 1947 }, { "epoch": 0.1822436467449806, "loss": 1.4243559837341309, "loss_ce": 0.006387227214872837, "loss_iou": 0.5859375, "loss_num": 0.048828125, "loss_xval": 1.421875, "num_input_tokens_seen": 128932280, "step": 1947 }, { "epoch": 0.1823372490288763, "grad_norm": 46.6253547668457, "learning_rate": 5e-05, "loss": 1.44, "num_input_tokens_seen": 128997716, "step": 1948 }, { "epoch": 0.1823372490288763, "loss": 1.4486358165740967, "loss_ce": 0.006741163786500692, "loss_iou": 0.60546875, "loss_num": 0.045654296875, "loss_xval": 1.4453125, "num_input_tokens_seen": 128997716, "step": 1948 }, { "epoch": 0.18243085131277204, "grad_norm": 17.697044372558594, "learning_rate": 5e-05, "loss": 2.0093, "num_input_tokens_seen": 129063060, "step": 1949 }, { "epoch": 0.18243085131277204, "loss": 2.1196231842041016, "loss_ce": 0.006341718602925539, "loss_iou": 0.84375, "loss_num": 0.0849609375, "loss_xval": 2.109375, "num_input_tokens_seen": 129063060, "step": 1949 }, { "epoch": 0.18252445359666775, "grad_norm": 21.53302001953125, "learning_rate": 5e-05, "loss": 1.3981, "num_input_tokens_seen": 129129500, "step": 1950 }, { "epoch": 0.18252445359666775, "loss": 1.3187754154205322, "loss_ce": 0.003345734905451536, "loss_iou": 0.5703125, "loss_num": 0.034912109375, "loss_xval": 1.3125, "num_input_tokens_seen": 129129500, "step": 1950 }, { "epoch": 0.18261805588056348, "grad_norm": 19.902812957763672, "learning_rate": 5e-05, "loss": 1.4631, "num_input_tokens_seen": 129194880, "step": 1951 }, { "epoch": 0.18261805588056348, "loss": 1.3781917095184326, "loss_ce": 0.007586177904158831, "loss_iou": 0.5078125, "loss_num": 0.0712890625, "loss_xval": 1.3671875, "num_input_tokens_seen": 129194880, "step": 1951 }, { "epoch": 0.18271165816445922, "grad_norm": 23.150081634521484, "learning_rate": 5e-05, "loss": 1.2857, "num_input_tokens_seen": 129260348, "step": 1952 }, { "epoch": 0.18271165816445922, "loss": 1.0816574096679688, "loss_ce": 0.004020635969936848, "loss_iou": 0.462890625, "loss_num": 0.030517578125, "loss_xval": 1.078125, "num_input_tokens_seen": 129260348, "step": 1952 }, { "epoch": 0.18280526044835493, "grad_norm": 31.974945068359375, "learning_rate": 5e-05, "loss": 1.7205, "num_input_tokens_seen": 129326728, "step": 1953 }, { "epoch": 0.18280526044835493, "loss": 1.5764834880828857, "loss_ce": 0.003241327591240406, "loss_iou": 0.69140625, "loss_num": 0.037353515625, "loss_xval": 1.5703125, "num_input_tokens_seen": 129326728, "step": 1953 }, { "epoch": 0.18289886273225067, "grad_norm": 18.00637435913086, "learning_rate": 5e-05, "loss": 1.4208, "num_input_tokens_seen": 129392436, "step": 1954 }, { "epoch": 0.18289886273225067, "loss": 1.4153504371643066, "loss_ce": 0.0047058966010808945, "loss_iou": 0.6171875, "loss_num": 0.035400390625, "loss_xval": 1.4140625, "num_input_tokens_seen": 129392436, "step": 1954 }, { "epoch": 0.18299246501614638, "grad_norm": 20.823671340942383, "learning_rate": 5e-05, "loss": 1.3219, "num_input_tokens_seen": 129458680, "step": 1955 }, { "epoch": 0.18299246501614638, "loss": 1.3515501022338867, "loss_ce": 0.0019407474901527166, "loss_iou": 0.578125, "loss_num": 0.0390625, "loss_xval": 1.3515625, "num_input_tokens_seen": 129458680, "step": 1955 }, { "epoch": 0.18308606730004212, "grad_norm": 31.404987335205078, "learning_rate": 5e-05, "loss": 1.4798, "num_input_tokens_seen": 129525600, "step": 1956 }, { "epoch": 0.18308606730004212, "loss": 1.5005778074264526, "loss_ce": 0.00448403786867857, "loss_iou": 0.61328125, "loss_num": 0.0546875, "loss_xval": 1.5, "num_input_tokens_seen": 129525600, "step": 1956 }, { "epoch": 0.18317966958393786, "grad_norm": 18.450042724609375, "learning_rate": 5e-05, "loss": 1.4445, "num_input_tokens_seen": 129591224, "step": 1957 }, { "epoch": 0.18317966958393786, "loss": 1.4111019372940063, "loss_ce": 0.007293255068361759, "loss_iou": 0.5859375, "loss_num": 0.045654296875, "loss_xval": 1.40625, "num_input_tokens_seen": 129591224, "step": 1957 }, { "epoch": 0.18327327186783357, "grad_norm": 14.147624969482422, "learning_rate": 5e-05, "loss": 1.2483, "num_input_tokens_seen": 129657140, "step": 1958 }, { "epoch": 0.18327327186783357, "loss": 1.2138912677764893, "loss_ce": 0.0036861575208604336, "loss_iou": 0.51171875, "loss_num": 0.037109375, "loss_xval": 1.2109375, "num_input_tokens_seen": 129657140, "step": 1958 }, { "epoch": 0.1833668741517293, "grad_norm": 35.48431396484375, "learning_rate": 5e-05, "loss": 1.3859, "num_input_tokens_seen": 129723552, "step": 1959 }, { "epoch": 0.1833668741517293, "loss": 1.332700252532959, "loss_ce": 0.0035985566210001707, "loss_iou": 0.5703125, "loss_num": 0.037109375, "loss_xval": 1.328125, "num_input_tokens_seen": 129723552, "step": 1959 }, { "epoch": 0.18346047643562502, "grad_norm": 31.027301788330078, "learning_rate": 5e-05, "loss": 1.59, "num_input_tokens_seen": 129790336, "step": 1960 }, { "epoch": 0.18346047643562502, "loss": 1.6845269203186035, "loss_ce": 0.010210525244474411, "loss_iou": 0.703125, "loss_num": 0.054443359375, "loss_xval": 1.671875, "num_input_tokens_seen": 129790336, "step": 1960 }, { "epoch": 0.18355407871952076, "grad_norm": 16.66594123840332, "learning_rate": 5e-05, "loss": 1.3838, "num_input_tokens_seen": 129857304, "step": 1961 }, { "epoch": 0.18355407871952076, "loss": 1.312317132949829, "loss_ce": 0.00665307929739356, "loss_iou": 0.58984375, "loss_num": 0.0244140625, "loss_xval": 1.3046875, "num_input_tokens_seen": 129857304, "step": 1961 }, { "epoch": 0.1836476810034165, "grad_norm": 96.72372436523438, "learning_rate": 5e-05, "loss": 1.4029, "num_input_tokens_seen": 129923064, "step": 1962 }, { "epoch": 0.1836476810034165, "loss": 1.5631399154663086, "loss_ce": 0.004546206444501877, "loss_iou": 0.62109375, "loss_num": 0.06298828125, "loss_xval": 1.5625, "num_input_tokens_seen": 129923064, "step": 1962 }, { "epoch": 0.1837412832873122, "grad_norm": 21.35594367980957, "learning_rate": 5e-05, "loss": 1.5687, "num_input_tokens_seen": 129989276, "step": 1963 }, { "epoch": 0.1837412832873122, "loss": 1.50491464138031, "loss_ce": 0.0019849720411002636, "loss_iou": 0.62109375, "loss_num": 0.05224609375, "loss_xval": 1.5, "num_input_tokens_seen": 129989276, "step": 1963 }, { "epoch": 0.18383488557120795, "grad_norm": 25.7547664642334, "learning_rate": 5e-05, "loss": 1.499, "num_input_tokens_seen": 130055708, "step": 1964 }, { "epoch": 0.18383488557120795, "loss": 1.4371271133422852, "loss_ce": 0.006463091820478439, "loss_iou": 0.55859375, "loss_num": 0.06298828125, "loss_xval": 1.4296875, "num_input_tokens_seen": 130055708, "step": 1964 }, { "epoch": 0.18392848785510366, "grad_norm": 21.432514190673828, "learning_rate": 5e-05, "loss": 1.5024, "num_input_tokens_seen": 130122920, "step": 1965 }, { "epoch": 0.18392848785510366, "loss": 1.428673505783081, "loss_ce": 0.0038687740452587605, "loss_iou": 0.640625, "loss_num": 0.029052734375, "loss_xval": 1.421875, "num_input_tokens_seen": 130122920, "step": 1965 }, { "epoch": 0.1840220901389994, "grad_norm": 39.52536392211914, "learning_rate": 5e-05, "loss": 1.3123, "num_input_tokens_seen": 130189624, "step": 1966 }, { "epoch": 0.1840220901389994, "loss": 1.4153207540512085, "loss_ce": 0.004187947139143944, "loss_iou": 0.61328125, "loss_num": 0.036865234375, "loss_xval": 1.4140625, "num_input_tokens_seen": 130189624, "step": 1966 }, { "epoch": 0.1841156924228951, "grad_norm": 26.08038330078125, "learning_rate": 5e-05, "loss": 1.4598, "num_input_tokens_seen": 130255684, "step": 1967 }, { "epoch": 0.1841156924228951, "loss": 1.202392816543579, "loss_ce": 0.0043948134407401085, "loss_iou": 0.51171875, "loss_num": 0.034423828125, "loss_xval": 1.1953125, "num_input_tokens_seen": 130255684, "step": 1967 }, { "epoch": 0.18420929470679084, "grad_norm": 36.27613067626953, "learning_rate": 5e-05, "loss": 1.3115, "num_input_tokens_seen": 130321496, "step": 1968 }, { "epoch": 0.18420929470679084, "loss": 1.3748427629470825, "loss_ce": 0.004237297922372818, "loss_iou": 0.5703125, "loss_num": 0.04638671875, "loss_xval": 1.3671875, "num_input_tokens_seen": 130321496, "step": 1968 }, { "epoch": 0.18430289699068658, "grad_norm": 27.272716522216797, "learning_rate": 5e-05, "loss": 1.4727, "num_input_tokens_seen": 130387560, "step": 1969 }, { "epoch": 0.18430289699068658, "loss": 1.5415709018707275, "loss_ce": 0.006414575502276421, "loss_iou": 0.67578125, "loss_num": 0.037109375, "loss_xval": 1.53125, "num_input_tokens_seen": 130387560, "step": 1969 }, { "epoch": 0.1843964992745823, "grad_norm": 30.705875396728516, "learning_rate": 5e-05, "loss": 1.4431, "num_input_tokens_seen": 130453196, "step": 1970 }, { "epoch": 0.1843964992745823, "loss": 1.5475211143493652, "loss_ce": 0.0045523312874138355, "loss_iou": 0.6875, "loss_num": 0.034423828125, "loss_xval": 1.546875, "num_input_tokens_seen": 130453196, "step": 1970 }, { "epoch": 0.18449010155847803, "grad_norm": 18.834285736083984, "learning_rate": 5e-05, "loss": 1.5848, "num_input_tokens_seen": 130519368, "step": 1971 }, { "epoch": 0.18449010155847803, "loss": 1.6733832359313965, "loss_ce": 0.006391044706106186, "loss_iou": 0.71484375, "loss_num": 0.04736328125, "loss_xval": 1.6640625, "num_input_tokens_seen": 130519368, "step": 1971 }, { "epoch": 0.18458370384237374, "grad_norm": 14.840357780456543, "learning_rate": 5e-05, "loss": 1.3213, "num_input_tokens_seen": 130584576, "step": 1972 }, { "epoch": 0.18458370384237374, "loss": 1.263106346130371, "loss_ce": 0.007735134102404118, "loss_iou": 0.5234375, "loss_num": 0.041748046875, "loss_xval": 1.2578125, "num_input_tokens_seen": 130584576, "step": 1972 }, { "epoch": 0.18467730612626948, "grad_norm": 13.746054649353027, "learning_rate": 5e-05, "loss": 1.2833, "num_input_tokens_seen": 130650776, "step": 1973 }, { "epoch": 0.18467730612626948, "loss": 1.2599341869354248, "loss_ce": 0.0021216641180217266, "loss_iou": 0.5390625, "loss_num": 0.0361328125, "loss_xval": 1.2578125, "num_input_tokens_seen": 130650776, "step": 1973 }, { "epoch": 0.18477090841016522, "grad_norm": 14.861371994018555, "learning_rate": 5e-05, "loss": 1.1308, "num_input_tokens_seen": 130716800, "step": 1974 }, { "epoch": 0.18477090841016522, "loss": 1.0687583684921265, "loss_ce": 0.005525918677449226, "loss_iou": 0.45703125, "loss_num": 0.030029296875, "loss_xval": 1.0625, "num_input_tokens_seen": 130716800, "step": 1974 }, { "epoch": 0.18486451069406093, "grad_norm": 17.192245483398438, "learning_rate": 5e-05, "loss": 1.3216, "num_input_tokens_seen": 130782332, "step": 1975 }, { "epoch": 0.18486451069406093, "loss": 1.2852097749710083, "loss_ce": 0.0061570508405566216, "loss_iou": 0.54296875, "loss_num": 0.03857421875, "loss_xval": 1.28125, "num_input_tokens_seen": 130782332, "step": 1975 }, { "epoch": 0.18495811297795667, "grad_norm": 25.762100219726562, "learning_rate": 5e-05, "loss": 1.3482, "num_input_tokens_seen": 130847928, "step": 1976 }, { "epoch": 0.18495811297795667, "loss": 1.269754409790039, "loss_ce": 0.0016880759503692389, "loss_iou": 0.57421875, "loss_num": 0.024658203125, "loss_xval": 1.265625, "num_input_tokens_seen": 130847928, "step": 1976 }, { "epoch": 0.18505171526185238, "grad_norm": 119.86146545410156, "learning_rate": 5e-05, "loss": 1.7387, "num_input_tokens_seen": 130914068, "step": 1977 }, { "epoch": 0.18505171526185238, "loss": 2.0166561603546143, "loss_ce": 0.002984270453453064, "loss_iou": 0.8125, "loss_num": 0.0771484375, "loss_xval": 2.015625, "num_input_tokens_seen": 130914068, "step": 1977 }, { "epoch": 0.18514531754574812, "grad_norm": 12.953497886657715, "learning_rate": 5e-05, "loss": 1.3247, "num_input_tokens_seen": 130980532, "step": 1978 }, { "epoch": 0.18514531754574812, "loss": 1.5354790687561035, "loss_ce": 0.004229026846587658, "loss_iou": 0.62890625, "loss_num": 0.055419921875, "loss_xval": 1.53125, "num_input_tokens_seen": 130980532, "step": 1978 }, { "epoch": 0.18523891982964386, "grad_norm": 17.847932815551758, "learning_rate": 5e-05, "loss": 1.2552, "num_input_tokens_seen": 131043352, "step": 1979 }, { "epoch": 0.18523891982964386, "loss": 1.4122639894485474, "loss_ce": 0.004549109376966953, "loss_iou": 0.5390625, "loss_num": 0.06689453125, "loss_xval": 1.40625, "num_input_tokens_seen": 131043352, "step": 1979 }, { "epoch": 0.18533252211353957, "grad_norm": 18.029878616333008, "learning_rate": 5e-05, "loss": 1.5715, "num_input_tokens_seen": 131109000, "step": 1980 }, { "epoch": 0.18533252211353957, "loss": 1.5618164539337158, "loss_ce": 0.012011634185910225, "loss_iou": 0.59765625, "loss_num": 0.0703125, "loss_xval": 1.546875, "num_input_tokens_seen": 131109000, "step": 1980 }, { "epoch": 0.1854261243974353, "grad_norm": 39.68682098388672, "learning_rate": 5e-05, "loss": 1.2683, "num_input_tokens_seen": 131175640, "step": 1981 }, { "epoch": 0.1854261243974353, "loss": 1.1519880294799805, "loss_ce": 0.006968412082642317, "loss_iou": 0.484375, "loss_num": 0.034912109375, "loss_xval": 1.1484375, "num_input_tokens_seen": 131175640, "step": 1981 }, { "epoch": 0.18551972668133102, "grad_norm": 16.826982498168945, "learning_rate": 5e-05, "loss": 1.2313, "num_input_tokens_seen": 131242008, "step": 1982 }, { "epoch": 0.18551972668133102, "loss": 1.4770022630691528, "loss_ce": 0.007763974368572235, "loss_iou": 0.58984375, "loss_num": 0.0576171875, "loss_xval": 1.46875, "num_input_tokens_seen": 131242008, "step": 1982 }, { "epoch": 0.18561332896522675, "grad_norm": 43.08837890625, "learning_rate": 5e-05, "loss": 1.5267, "num_input_tokens_seen": 131308212, "step": 1983 }, { "epoch": 0.18561332896522675, "loss": 1.6214125156402588, "loss_ce": 0.005201567430049181, "loss_iou": 0.6484375, "loss_num": 0.0634765625, "loss_xval": 1.6171875, "num_input_tokens_seen": 131308212, "step": 1983 }, { "epoch": 0.1857069312491225, "grad_norm": 18.7119083404541, "learning_rate": 5e-05, "loss": 1.6566, "num_input_tokens_seen": 131376164, "step": 1984 }, { "epoch": 0.1857069312491225, "loss": 1.633873462677002, "loss_ce": 0.00399056589230895, "loss_iou": 0.6640625, "loss_num": 0.06103515625, "loss_xval": 1.6328125, "num_input_tokens_seen": 131376164, "step": 1984 }, { "epoch": 0.1858005335330182, "grad_norm": 66.57056427001953, "learning_rate": 5e-05, "loss": 1.3089, "num_input_tokens_seen": 131442472, "step": 1985 }, { "epoch": 0.1858005335330182, "loss": 1.2516813278198242, "loss_ce": 0.0038785517681390047, "loss_iou": 0.478515625, "loss_num": 0.05810546875, "loss_xval": 1.25, "num_input_tokens_seen": 131442472, "step": 1985 }, { "epoch": 0.18589413581691394, "grad_norm": 30.466259002685547, "learning_rate": 5e-05, "loss": 1.4092, "num_input_tokens_seen": 131508660, "step": 1986 }, { "epoch": 0.18589413581691394, "loss": 1.365430474281311, "loss_ce": 0.002637461293488741, "loss_iou": 0.55078125, "loss_num": 0.0517578125, "loss_xval": 1.359375, "num_input_tokens_seen": 131508660, "step": 1986 }, { "epoch": 0.18598773810080965, "grad_norm": 19.79939842224121, "learning_rate": 5e-05, "loss": 1.703, "num_input_tokens_seen": 131575048, "step": 1987 }, { "epoch": 0.18598773810080965, "loss": 1.8632733821868896, "loss_ce": 0.004386810585856438, "loss_iou": 0.71875, "loss_num": 0.083984375, "loss_xval": 1.859375, "num_input_tokens_seen": 131575048, "step": 1987 }, { "epoch": 0.1860813403847054, "grad_norm": 13.872488975524902, "learning_rate": 5e-05, "loss": 1.275, "num_input_tokens_seen": 131641564, "step": 1988 }, { "epoch": 0.1860813403847054, "loss": 1.226672649383545, "loss_ce": 0.006945985835045576, "loss_iou": 0.5234375, "loss_num": 0.034912109375, "loss_xval": 1.21875, "num_input_tokens_seen": 131641564, "step": 1988 }, { "epoch": 0.1861749426686011, "grad_norm": 24.429122924804688, "learning_rate": 5e-05, "loss": 1.1989, "num_input_tokens_seen": 131705996, "step": 1989 }, { "epoch": 0.1861749426686011, "loss": 1.265514850616455, "loss_ce": 0.003796099917963147, "loss_iou": 0.48828125, "loss_num": 0.056884765625, "loss_xval": 1.265625, "num_input_tokens_seen": 131705996, "step": 1989 }, { "epoch": 0.18626854495249684, "grad_norm": 16.61542320251465, "learning_rate": 5e-05, "loss": 1.2852, "num_input_tokens_seen": 131771648, "step": 1990 }, { "epoch": 0.18626854495249684, "loss": 1.1972167491912842, "loss_ce": 0.004833961371332407, "loss_iou": 0.5, "loss_num": 0.03759765625, "loss_xval": 1.1953125, "num_input_tokens_seen": 131771648, "step": 1990 }, { "epoch": 0.18636214723639258, "grad_norm": 20.98232650756836, "learning_rate": 5e-05, "loss": 1.3515, "num_input_tokens_seen": 131838160, "step": 1991 }, { "epoch": 0.18636214723639258, "loss": 1.2970855236053467, "loss_ce": 0.004116716329008341, "loss_iou": 0.55078125, "loss_num": 0.037841796875, "loss_xval": 1.296875, "num_input_tokens_seen": 131838160, "step": 1991 }, { "epoch": 0.1864557495202883, "grad_norm": 20.897262573242188, "learning_rate": 5e-05, "loss": 1.5175, "num_input_tokens_seen": 131903900, "step": 1992 }, { "epoch": 0.1864557495202883, "loss": 1.455235481262207, "loss_ce": 0.005040129646658897, "loss_iou": 0.62109375, "loss_num": 0.041748046875, "loss_xval": 1.453125, "num_input_tokens_seen": 131903900, "step": 1992 }, { "epoch": 0.18654935180418403, "grad_norm": 26.697927474975586, "learning_rate": 5e-05, "loss": 1.4688, "num_input_tokens_seen": 131970484, "step": 1993 }, { "epoch": 0.18654935180418403, "loss": 1.4565171003341675, "loss_ce": 0.011692902073264122, "loss_iou": 0.58203125, "loss_num": 0.055908203125, "loss_xval": 1.4453125, "num_input_tokens_seen": 131970484, "step": 1993 }, { "epoch": 0.18664295408807974, "grad_norm": 16.57448387145996, "learning_rate": 5e-05, "loss": 1.504, "num_input_tokens_seen": 132036380, "step": 1994 }, { "epoch": 0.18664295408807974, "loss": 1.5036506652832031, "loss_ce": 0.005603712517768145, "loss_iou": 0.66015625, "loss_num": 0.0361328125, "loss_xval": 1.5, "num_input_tokens_seen": 132036380, "step": 1994 }, { "epoch": 0.18673655637197548, "grad_norm": 13.335344314575195, "learning_rate": 5e-05, "loss": 1.5174, "num_input_tokens_seen": 132101980, "step": 1995 }, { "epoch": 0.18673655637197548, "loss": 1.5372298955917358, "loss_ce": 0.004515079781413078, "loss_iou": 0.60546875, "loss_num": 0.06396484375, "loss_xval": 1.53125, "num_input_tokens_seen": 132101980, "step": 1995 }, { "epoch": 0.18683015865587121, "grad_norm": 13.998605728149414, "learning_rate": 5e-05, "loss": 1.2338, "num_input_tokens_seen": 132167712, "step": 1996 }, { "epoch": 0.18683015865587121, "loss": 1.4663426876068115, "loss_ce": 0.004916940815746784, "loss_iou": 0.6015625, "loss_num": 0.052001953125, "loss_xval": 1.4609375, "num_input_tokens_seen": 132167712, "step": 1996 }, { "epoch": 0.18692376093976693, "grad_norm": 34.171142578125, "learning_rate": 5e-05, "loss": 1.2822, "num_input_tokens_seen": 132233348, "step": 1997 }, { "epoch": 0.18692376093976693, "loss": 1.3784549236297607, "loss_ce": 0.005408023484051228, "loss_iou": 0.59765625, "loss_num": 0.035888671875, "loss_xval": 1.375, "num_input_tokens_seen": 132233348, "step": 1997 }, { "epoch": 0.18701736322366266, "grad_norm": 42.80735397338867, "learning_rate": 5e-05, "loss": 1.6852, "num_input_tokens_seen": 132299644, "step": 1998 }, { "epoch": 0.18701736322366266, "loss": 1.6969330310821533, "loss_ce": 0.006503298878669739, "loss_iou": 0.69921875, "loss_num": 0.057861328125, "loss_xval": 1.6875, "num_input_tokens_seen": 132299644, "step": 1998 }, { "epoch": 0.18711096550755837, "grad_norm": 20.92772674560547, "learning_rate": 5e-05, "loss": 1.203, "num_input_tokens_seen": 132366452, "step": 1999 }, { "epoch": 0.18711096550755837, "loss": 1.2389694452285767, "loss_ce": 0.009355181828141212, "loss_iou": 0.50390625, "loss_num": 0.044189453125, "loss_xval": 1.2265625, "num_input_tokens_seen": 132366452, "step": 1999 }, { "epoch": 0.1872045677914541, "grad_norm": 30.069229125976562, "learning_rate": 5e-05, "loss": 1.5779, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.1872045677914541, "eval_seeclick_CIoU": 0.11575577221810818, "eval_seeclick_GIoU": 0.1071876734495163, "eval_seeclick_IoU": 0.23197105526924133, "eval_seeclick_MAE_all": 0.13012679666280746, "eval_seeclick_MAE_h": 0.06467602401971817, "eval_seeclick_MAE_w": 0.10543861240148544, "eval_seeclick_MAE_x_boxes": 0.2087840437889099, "eval_seeclick_MAE_y_boxes": 0.10963879711925983, "eval_seeclick_NUM_probability": 0.9996125400066376, "eval_seeclick_inside_bbox": 0.2760416716337204, "eval_seeclick_loss": 2.5171961784362793, "eval_seeclick_loss_ce": 0.013720860704779625, "eval_seeclick_loss_iou": 0.9281005859375, "eval_seeclick_loss_num": 0.12964630126953125, "eval_seeclick_loss_xval": 2.505859375, "eval_seeclick_runtime": 69.1578, "eval_seeclick_samples_per_second": 0.68, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.1872045677914541, "eval_icons_CIoU": -0.06847554817795753, "eval_icons_GIoU": -0.0004963139072060585, "eval_icons_IoU": 0.11401020735502243, "eval_icons_MAE_all": 0.172628253698349, "eval_icons_MAE_h": 0.24317608773708344, "eval_icons_MAE_w": 0.13153423741459846, "eval_icons_MAE_x_boxes": 0.1154375858604908, "eval_icons_MAE_y_boxes": 0.0758729837834835, "eval_icons_NUM_probability": 0.9998328685760498, "eval_icons_inside_bbox": 0.1770833358168602, "eval_icons_loss": 2.8988170623779297, "eval_icons_loss_ce": 6.705435225740075e-05, "eval_icons_loss_iou": 1.011962890625, "eval_icons_loss_num": 0.18157958984375, "eval_icons_loss_xval": 2.931640625, "eval_icons_runtime": 65.7634, "eval_icons_samples_per_second": 0.76, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.1872045677914541, "eval_screenspot_CIoU": -0.03418503267069658, "eval_screenspot_GIoU": -0.018533222377300262, "eval_screenspot_IoU": 0.15474786361058554, "eval_screenspot_MAE_all": 0.22027545173962912, "eval_screenspot_MAE_h": 0.23204520344734192, "eval_screenspot_MAE_w": 0.19964665671189627, "eval_screenspot_MAE_x_boxes": 0.26087622344493866, "eval_screenspot_MAE_y_boxes": 0.11870437612136205, "eval_screenspot_NUM_probability": 0.999854306379954, "eval_screenspot_inside_bbox": 0.3149999976158142, "eval_screenspot_loss": 3.1787924766540527, "eval_screenspot_loss_ce": 0.010288806942602, "eval_screenspot_loss_iou": 1.0364583333333333, "eval_screenspot_loss_num": 0.22825113932291666, "eval_screenspot_loss_xval": 3.21484375, "eval_screenspot_runtime": 110.6768, "eval_screenspot_samples_per_second": 0.804, "eval_screenspot_steps_per_second": 0.027, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.1872045677914541, "eval_compot_CIoU": -0.08787180855870247, "eval_compot_GIoU": -0.045600379817187786, "eval_compot_IoU": 0.09643351286649704, "eval_compot_MAE_all": 0.20011915266513824, "eval_compot_MAE_h": 0.1415417194366455, "eval_compot_MAE_w": 0.21793979406356812, "eval_compot_MAE_x_boxes": 0.20072585344314575, "eval_compot_MAE_y_boxes": 0.09426753595471382, "eval_compot_NUM_probability": 0.9998772144317627, "eval_compot_inside_bbox": 0.2170138955116272, "eval_compot_loss": 3.152869939804077, "eval_compot_loss_ce": 0.002876733778975904, "eval_compot_loss_iou": 1.078125, "eval_compot_loss_num": 0.204376220703125, "eval_compot_loss_xval": 3.177734375, "eval_compot_runtime": 77.9382, "eval_compot_samples_per_second": 0.642, "eval_compot_steps_per_second": 0.026, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.1872045677914541, "eval_custom_ui_MAE_all": 0.15138039737939835, "eval_custom_ui_MAE_x": 0.1302195005118847, "eval_custom_ui_MAE_y": 0.1725413054227829, "eval_custom_ui_NUM_probability": 0.9990260899066925, "eval_custom_ui_loss": 0.8326959013938904, "eval_custom_ui_loss_ce": 0.13630567491054535, "eval_custom_ui_loss_num": 0.149688720703125, "eval_custom_ui_loss_xval": 0.7489013671875, "eval_custom_ui_runtime": 51.0925, "eval_custom_ui_samples_per_second": 0.979, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.1872045677914541, "loss": 0.9159218072891235, "loss_ce": 0.14614643156528473, "loss_iou": 0.0, "loss_num": 0.154296875, "loss_xval": 0.76953125, "num_input_tokens_seen": 132432716, "step": 2000 }, { "epoch": 0.18729817007534985, "grad_norm": 36.8077392578125, "learning_rate": 5e-05, "loss": 1.5314, "num_input_tokens_seen": 132499200, "step": 2001 }, { "epoch": 0.18729817007534985, "loss": 1.541508436203003, "loss_ce": 0.0053756022825837135, "loss_iou": 0.640625, "loss_num": 0.051513671875, "loss_xval": 1.5390625, "num_input_tokens_seen": 132499200, "step": 2001 }, { "epoch": 0.18739177235924556, "grad_norm": 22.555620193481445, "learning_rate": 5e-05, "loss": 1.4048, "num_input_tokens_seen": 132566316, "step": 2002 }, { "epoch": 0.18739177235924556, "loss": 1.3378509283065796, "loss_ce": 0.0019134902395308018, "loss_iou": 0.55859375, "loss_num": 0.043701171875, "loss_xval": 1.3359375, "num_input_tokens_seen": 132566316, "step": 2002 }, { "epoch": 0.1874853746431413, "grad_norm": 18.991727828979492, "learning_rate": 5e-05, "loss": 1.2057, "num_input_tokens_seen": 132631852, "step": 2003 }, { "epoch": 0.1874853746431413, "loss": 1.2083985805511475, "loss_ce": 0.00612806249409914, "loss_iou": 0.498046875, "loss_num": 0.04150390625, "loss_xval": 1.203125, "num_input_tokens_seen": 132631852, "step": 2003 }, { "epoch": 0.187578976927037, "grad_norm": 26.7177677154541, "learning_rate": 5e-05, "loss": 1.5627, "num_input_tokens_seen": 132697524, "step": 2004 }, { "epoch": 0.187578976927037, "loss": 1.4155142307281494, "loss_ce": 0.006334573030471802, "loss_iou": 0.59375, "loss_num": 0.04345703125, "loss_xval": 1.40625, "num_input_tokens_seen": 132697524, "step": 2004 }, { "epoch": 0.18767257921093275, "grad_norm": 31.37240982055664, "learning_rate": 5e-05, "loss": 1.5654, "num_input_tokens_seen": 132762980, "step": 2005 }, { "epoch": 0.18767257921093275, "loss": 1.430168628692627, "loss_ce": 0.0034108958207070827, "loss_iou": 0.640625, "loss_num": 0.029541015625, "loss_xval": 1.4296875, "num_input_tokens_seen": 132762980, "step": 2005 }, { "epoch": 0.18776618149482846, "grad_norm": 22.789878845214844, "learning_rate": 5e-05, "loss": 1.5458, "num_input_tokens_seen": 132828428, "step": 2006 }, { "epoch": 0.18776618149482846, "loss": 1.5693515539169312, "loss_ce": 0.005386617965996265, "loss_iou": 0.6875, "loss_num": 0.037109375, "loss_xval": 1.5625, "num_input_tokens_seen": 132828428, "step": 2006 }, { "epoch": 0.1878597837787242, "grad_norm": 23.16802406311035, "learning_rate": 5e-05, "loss": 1.3025, "num_input_tokens_seen": 132894728, "step": 2007 }, { "epoch": 0.1878597837787242, "loss": 1.2459523677825928, "loss_ce": 0.004741411656141281, "loss_iou": 0.546875, "loss_num": 0.029541015625, "loss_xval": 1.2421875, "num_input_tokens_seen": 132894728, "step": 2007 }, { "epoch": 0.18795338606261994, "grad_norm": 18.789785385131836, "learning_rate": 5e-05, "loss": 1.3813, "num_input_tokens_seen": 132961028, "step": 2008 }, { "epoch": 0.18795338606261994, "loss": 1.3989194631576538, "loss_ce": 0.007317899260669947, "loss_iou": 0.6171875, "loss_num": 0.03125, "loss_xval": 1.390625, "num_input_tokens_seen": 132961028, "step": 2008 }, { "epoch": 0.18804698834651565, "grad_norm": 36.02462387084961, "learning_rate": 5e-05, "loss": 1.5948, "num_input_tokens_seen": 133027816, "step": 2009 }, { "epoch": 0.18804698834651565, "loss": 1.7780526876449585, "loss_ce": 0.011451136320829391, "loss_iou": 0.703125, "loss_num": 0.0732421875, "loss_xval": 1.765625, "num_input_tokens_seen": 133027816, "step": 2009 }, { "epoch": 0.1881405906304114, "grad_norm": 17.939682006835938, "learning_rate": 5e-05, "loss": 1.4303, "num_input_tokens_seen": 133093232, "step": 2010 }, { "epoch": 0.1881405906304114, "loss": 1.3121365308761597, "loss_ce": 0.008486653678119183, "loss_iou": 0.5546875, "loss_num": 0.038818359375, "loss_xval": 1.3046875, "num_input_tokens_seen": 133093232, "step": 2010 }, { "epoch": 0.1882341929143071, "grad_norm": 19.468914031982422, "learning_rate": 5e-05, "loss": 1.3176, "num_input_tokens_seen": 133159060, "step": 2011 }, { "epoch": 0.1882341929143071, "loss": 1.310737133026123, "loss_ce": 0.004096532706171274, "loss_iou": 0.55859375, "loss_num": 0.038330078125, "loss_xval": 1.3046875, "num_input_tokens_seen": 133159060, "step": 2011 }, { "epoch": 0.18832779519820284, "grad_norm": 26.613019943237305, "learning_rate": 5e-05, "loss": 1.4038, "num_input_tokens_seen": 133226052, "step": 2012 }, { "epoch": 0.18832779519820284, "loss": 1.391554355621338, "loss_ce": 0.00971841812133789, "loss_iou": 0.5703125, "loss_num": 0.048095703125, "loss_xval": 1.3828125, "num_input_tokens_seen": 133226052, "step": 2012 }, { "epoch": 0.18842139748209857, "grad_norm": 22.770835876464844, "learning_rate": 5e-05, "loss": 1.4995, "num_input_tokens_seen": 133292700, "step": 2013 }, { "epoch": 0.18842139748209857, "loss": 1.5055845975875854, "loss_ce": 0.007049466483294964, "loss_iou": 0.6171875, "loss_num": 0.052490234375, "loss_xval": 1.5, "num_input_tokens_seen": 133292700, "step": 2013 }, { "epoch": 0.18851499976599428, "grad_norm": 31.13936424255371, "learning_rate": 5e-05, "loss": 1.3374, "num_input_tokens_seen": 133358588, "step": 2014 }, { "epoch": 0.18851499976599428, "loss": 1.3625235557556152, "loss_ce": 0.006780206225812435, "loss_iou": 0.6015625, "loss_num": 0.030029296875, "loss_xval": 1.359375, "num_input_tokens_seen": 133358588, "step": 2014 }, { "epoch": 0.18860860204989002, "grad_norm": 21.765378952026367, "learning_rate": 5e-05, "loss": 1.6432, "num_input_tokens_seen": 133424892, "step": 2015 }, { "epoch": 0.18860860204989002, "loss": 1.6781728267669678, "loss_ce": 0.0044667646288871765, "loss_iou": 0.67578125, "loss_num": 0.0654296875, "loss_xval": 1.671875, "num_input_tokens_seen": 133424892, "step": 2015 }, { "epoch": 0.18870220433378573, "grad_norm": 21.015169143676758, "learning_rate": 5e-05, "loss": 1.4138, "num_input_tokens_seen": 133490260, "step": 2016 }, { "epoch": 0.18870220433378573, "loss": 1.2584657669067383, "loss_ce": 0.0017519108951091766, "loss_iou": 0.498046875, "loss_num": 0.05224609375, "loss_xval": 1.2578125, "num_input_tokens_seen": 133490260, "step": 2016 }, { "epoch": 0.18879580661768147, "grad_norm": 45.764705657958984, "learning_rate": 5e-05, "loss": 1.6168, "num_input_tokens_seen": 133557864, "step": 2017 }, { "epoch": 0.18879580661768147, "loss": 1.614818811416626, "loss_ce": 0.0015376550145447254, "loss_iou": 0.7265625, "loss_num": 0.03125, "loss_xval": 1.609375, "num_input_tokens_seen": 133557864, "step": 2017 }, { "epoch": 0.1888894089015772, "grad_norm": 20.390649795532227, "learning_rate": 5e-05, "loss": 1.7049, "num_input_tokens_seen": 133624080, "step": 2018 }, { "epoch": 0.1888894089015772, "loss": 1.7868666648864746, "loss_ce": 0.0036635464057326317, "loss_iou": 0.73828125, "loss_num": 0.061767578125, "loss_xval": 1.78125, "num_input_tokens_seen": 133624080, "step": 2018 }, { "epoch": 0.18898301118547292, "grad_norm": 23.811748504638672, "learning_rate": 5e-05, "loss": 1.4238, "num_input_tokens_seen": 133691348, "step": 2019 }, { "epoch": 0.18898301118547292, "loss": 1.4709348678588867, "loss_ce": 0.005602835211902857, "loss_iou": 0.64453125, "loss_num": 0.035400390625, "loss_xval": 1.46875, "num_input_tokens_seen": 133691348, "step": 2019 }, { "epoch": 0.18907661346936866, "grad_norm": 30.91826629638672, "learning_rate": 5e-05, "loss": 1.3329, "num_input_tokens_seen": 133758316, "step": 2020 }, { "epoch": 0.18907661346936866, "loss": 1.3241961002349854, "loss_ce": 0.0019304631277918816, "loss_iou": 0.5859375, "loss_num": 0.0303955078125, "loss_xval": 1.3203125, "num_input_tokens_seen": 133758316, "step": 2020 }, { "epoch": 0.18917021575326437, "grad_norm": 20.472759246826172, "learning_rate": 5e-05, "loss": 1.7706, "num_input_tokens_seen": 133826016, "step": 2021 }, { "epoch": 0.18917021575326437, "loss": 1.650133728981018, "loss_ce": 0.004625923000276089, "loss_iou": 0.70703125, "loss_num": 0.046142578125, "loss_xval": 1.6484375, "num_input_tokens_seen": 133826016, "step": 2021 }, { "epoch": 0.1892638180371601, "grad_norm": 28.499732971191406, "learning_rate": 5e-05, "loss": 1.4081, "num_input_tokens_seen": 133892244, "step": 2022 }, { "epoch": 0.1892638180371601, "loss": 1.5334014892578125, "loss_ce": 0.004104674328118563, "loss_iou": 0.640625, "loss_num": 0.050048828125, "loss_xval": 1.53125, "num_input_tokens_seen": 133892244, "step": 2022 }, { "epoch": 0.18935742032105585, "grad_norm": 43.2701301574707, "learning_rate": 5e-05, "loss": 1.3486, "num_input_tokens_seen": 133957804, "step": 2023 }, { "epoch": 0.18935742032105585, "loss": 1.3434921503067017, "loss_ce": 0.003160080872476101, "loss_iou": 0.5859375, "loss_num": 0.034423828125, "loss_xval": 1.34375, "num_input_tokens_seen": 133957804, "step": 2023 }, { "epoch": 0.18945102260495156, "grad_norm": 24.83196258544922, "learning_rate": 5e-05, "loss": 1.4019, "num_input_tokens_seen": 134024396, "step": 2024 }, { "epoch": 0.18945102260495156, "loss": 1.3687567710876465, "loss_ce": 0.0015692950692027807, "loss_iou": 0.59375, "loss_num": 0.035400390625, "loss_xval": 1.3671875, "num_input_tokens_seen": 134024396, "step": 2024 }, { "epoch": 0.1895446248888473, "grad_norm": 31.510297775268555, "learning_rate": 5e-05, "loss": 1.4844, "num_input_tokens_seen": 134091252, "step": 2025 }, { "epoch": 0.1895446248888473, "loss": 1.485026240348816, "loss_ce": 0.006510650739073753, "loss_iou": 0.625, "loss_num": 0.044677734375, "loss_xval": 1.4765625, "num_input_tokens_seen": 134091252, "step": 2025 }, { "epoch": 0.189638227172743, "grad_norm": 15.40998649597168, "learning_rate": 5e-05, "loss": 1.4777, "num_input_tokens_seen": 134158108, "step": 2026 }, { "epoch": 0.189638227172743, "loss": 0.9744458198547363, "loss_ce": 0.008137264288961887, "loss_iou": 0.416015625, "loss_num": 0.0264892578125, "loss_xval": 0.96484375, "num_input_tokens_seen": 134158108, "step": 2026 }, { "epoch": 0.18973182945663875, "grad_norm": 79.20665740966797, "learning_rate": 5e-05, "loss": 1.4997, "num_input_tokens_seen": 134225672, "step": 2027 }, { "epoch": 0.18973182945663875, "loss": 1.300979495048523, "loss_ce": 0.0021513812243938446, "loss_iou": 0.5703125, "loss_num": 0.03173828125, "loss_xval": 1.296875, "num_input_tokens_seen": 134225672, "step": 2027 }, { "epoch": 0.18982543174053446, "grad_norm": 22.0035457611084, "learning_rate": 5e-05, "loss": 1.2491, "num_input_tokens_seen": 134291564, "step": 2028 }, { "epoch": 0.18982543174053446, "loss": 1.1726512908935547, "loss_ce": 0.008924500085413456, "loss_iou": 0.494140625, "loss_num": 0.034912109375, "loss_xval": 1.1640625, "num_input_tokens_seen": 134291564, "step": 2028 }, { "epoch": 0.1899190340244302, "grad_norm": 24.686311721801758, "learning_rate": 5e-05, "loss": 1.3878, "num_input_tokens_seen": 134357992, "step": 2029 }, { "epoch": 0.1899190340244302, "loss": 1.517027497291565, "loss_ce": 0.0033556390553712845, "loss_iou": 0.65625, "loss_num": 0.041015625, "loss_xval": 1.515625, "num_input_tokens_seen": 134357992, "step": 2029 }, { "epoch": 0.19001263630832593, "grad_norm": 35.2572135925293, "learning_rate": 5e-05, "loss": 1.5802, "num_input_tokens_seen": 134424524, "step": 2030 }, { "epoch": 0.19001263630832593, "loss": 1.7430429458618164, "loss_ce": 0.003785067005082965, "loss_iou": 0.74609375, "loss_num": 0.048828125, "loss_xval": 1.7421875, "num_input_tokens_seen": 134424524, "step": 2030 }, { "epoch": 0.19010623859222164, "grad_norm": 22.11330795288086, "learning_rate": 5e-05, "loss": 1.5555, "num_input_tokens_seen": 134489824, "step": 2031 }, { "epoch": 0.19010623859222164, "loss": 1.5073288679122925, "loss_ce": 0.0014695466961711645, "loss_iou": 0.69140625, "loss_num": 0.024169921875, "loss_xval": 1.5078125, "num_input_tokens_seen": 134489824, "step": 2031 }, { "epoch": 0.19019984087611738, "grad_norm": 24.93625831604004, "learning_rate": 5e-05, "loss": 1.4622, "num_input_tokens_seen": 134556340, "step": 2032 }, { "epoch": 0.19019984087611738, "loss": 1.3464317321777344, "loss_ce": 0.003658217377960682, "loss_iou": 0.55859375, "loss_num": 0.044921875, "loss_xval": 1.34375, "num_input_tokens_seen": 134556340, "step": 2032 }, { "epoch": 0.1902934431600131, "grad_norm": 19.162181854248047, "learning_rate": 5e-05, "loss": 1.3348, "num_input_tokens_seen": 134622728, "step": 2033 }, { "epoch": 0.1902934431600131, "loss": 1.2705814838409424, "loss_ce": 0.004468115046620369, "loss_iou": 0.5390625, "loss_num": 0.036865234375, "loss_xval": 1.265625, "num_input_tokens_seen": 134622728, "step": 2033 }, { "epoch": 0.19038704544390883, "grad_norm": 76.74585723876953, "learning_rate": 5e-05, "loss": 1.5657, "num_input_tokens_seen": 134688572, "step": 2034 }, { "epoch": 0.19038704544390883, "loss": 1.6774089336395264, "loss_ce": 0.0035807411186397076, "loss_iou": 0.69140625, "loss_num": 0.0576171875, "loss_xval": 1.671875, "num_input_tokens_seen": 134688572, "step": 2034 }, { "epoch": 0.19048064772780457, "grad_norm": 19.1229248046875, "learning_rate": 5e-05, "loss": 1.5923, "num_input_tokens_seen": 134754856, "step": 2035 }, { "epoch": 0.19048064772780457, "loss": 1.5887892246246338, "loss_ce": 0.0052930498495697975, "loss_iou": 0.6640625, "loss_num": 0.052001953125, "loss_xval": 1.5859375, "num_input_tokens_seen": 134754856, "step": 2035 }, { "epoch": 0.19057425001170028, "grad_norm": 17.247560501098633, "learning_rate": 5e-05, "loss": 1.4603, "num_input_tokens_seen": 134821232, "step": 2036 }, { "epoch": 0.19057425001170028, "loss": 1.5276718139648438, "loss_ce": 0.0042343344539403915, "loss_iou": 0.63671875, "loss_num": 0.0498046875, "loss_xval": 1.5234375, "num_input_tokens_seen": 134821232, "step": 2036 }, { "epoch": 0.19066785229559602, "grad_norm": 45.40583038330078, "learning_rate": 5e-05, "loss": 1.4312, "num_input_tokens_seen": 134887156, "step": 2037 }, { "epoch": 0.19066785229559602, "loss": 1.5920612812042236, "loss_ce": 0.005147155839949846, "loss_iou": 0.65234375, "loss_num": 0.056884765625, "loss_xval": 1.5859375, "num_input_tokens_seen": 134887156, "step": 2037 }, { "epoch": 0.19076145457949173, "grad_norm": 24.952116012573242, "learning_rate": 5e-05, "loss": 1.7742, "num_input_tokens_seen": 134953656, "step": 2038 }, { "epoch": 0.19076145457949173, "loss": 1.8267306089401245, "loss_ce": 0.005441478453576565, "loss_iou": 0.75390625, "loss_num": 0.06298828125, "loss_xval": 1.8203125, "num_input_tokens_seen": 134953656, "step": 2038 }, { "epoch": 0.19085505686338747, "grad_norm": 31.87831687927246, "learning_rate": 5e-05, "loss": 1.2528, "num_input_tokens_seen": 135019064, "step": 2039 }, { "epoch": 0.19085505686338747, "loss": 1.0912940502166748, "loss_ce": 0.0029150533955544233, "loss_iou": 0.47265625, "loss_num": 0.0289306640625, "loss_xval": 1.0859375, "num_input_tokens_seen": 135019064, "step": 2039 }, { "epoch": 0.1909486591472832, "grad_norm": 30.30205726623535, "learning_rate": 5e-05, "loss": 1.4011, "num_input_tokens_seen": 135085700, "step": 2040 }, { "epoch": 0.1909486591472832, "loss": 1.2939293384552002, "loss_ce": 0.0029137025121599436, "loss_iou": 0.57421875, "loss_num": 0.0274658203125, "loss_xval": 1.2890625, "num_input_tokens_seen": 135085700, "step": 2040 }, { "epoch": 0.19104226143117892, "grad_norm": 18.505590438842773, "learning_rate": 5e-05, "loss": 1.5128, "num_input_tokens_seen": 135151936, "step": 2041 }, { "epoch": 0.19104226143117892, "loss": 1.4650330543518066, "loss_ce": 0.0026306798681616783, "loss_iou": 0.640625, "loss_num": 0.037109375, "loss_xval": 1.4609375, "num_input_tokens_seen": 135151936, "step": 2041 }, { "epoch": 0.19113586371507466, "grad_norm": 28.354467391967773, "learning_rate": 5e-05, "loss": 1.2614, "num_input_tokens_seen": 135217880, "step": 2042 }, { "epoch": 0.19113586371507466, "loss": 1.3484842777252197, "loss_ce": 0.003269465174525976, "loss_iou": 0.53125, "loss_num": 0.05712890625, "loss_xval": 1.34375, "num_input_tokens_seen": 135217880, "step": 2042 }, { "epoch": 0.19122946599897037, "grad_norm": 29.115802764892578, "learning_rate": 5e-05, "loss": 1.3967, "num_input_tokens_seen": 135284100, "step": 2043 }, { "epoch": 0.19122946599897037, "loss": 1.264920949935913, "loss_ce": 0.0066202254965901375, "loss_iou": 0.5859375, "loss_num": 0.016845703125, "loss_xval": 1.2578125, "num_input_tokens_seen": 135284100, "step": 2043 }, { "epoch": 0.1913230682828661, "grad_norm": 16.204648971557617, "learning_rate": 5e-05, "loss": 1.3888, "num_input_tokens_seen": 135349864, "step": 2044 }, { "epoch": 0.1913230682828661, "loss": 1.231990098953247, "loss_ce": 0.010432527400553226, "loss_iou": 0.5078125, "loss_num": 0.04150390625, "loss_xval": 1.21875, "num_input_tokens_seen": 135349864, "step": 2044 }, { "epoch": 0.19141667056676182, "grad_norm": 10.998099327087402, "learning_rate": 5e-05, "loss": 1.3206, "num_input_tokens_seen": 135416512, "step": 2045 }, { "epoch": 0.19141667056676182, "loss": 1.4172794818878174, "loss_ce": 0.008099868893623352, "loss_iou": 0.546875, "loss_num": 0.0634765625, "loss_xval": 1.40625, "num_input_tokens_seen": 135416512, "step": 2045 }, { "epoch": 0.19151027285065755, "grad_norm": 22.290937423706055, "learning_rate": 5e-05, "loss": 1.3779, "num_input_tokens_seen": 135481948, "step": 2046 }, { "epoch": 0.19151027285065755, "loss": 1.562142252922058, "loss_ce": 0.014046537689864635, "loss_iou": 0.5703125, "loss_num": 0.0810546875, "loss_xval": 1.546875, "num_input_tokens_seen": 135481948, "step": 2046 }, { "epoch": 0.1916038751345533, "grad_norm": 30.228641510009766, "learning_rate": 5e-05, "loss": 1.4567, "num_input_tokens_seen": 135547780, "step": 2047 }, { "epoch": 0.1916038751345533, "loss": 1.78278648853302, "loss_ce": 0.00544277299195528, "loss_iou": 0.71484375, "loss_num": 0.06884765625, "loss_xval": 1.78125, "num_input_tokens_seen": 135547780, "step": 2047 }, { "epoch": 0.191697477418449, "grad_norm": 27.665626525878906, "learning_rate": 5e-05, "loss": 1.6013, "num_input_tokens_seen": 135613412, "step": 2048 }, { "epoch": 0.191697477418449, "loss": 1.3266444206237793, "loss_ce": 0.005355388857424259, "loss_iou": 0.56640625, "loss_num": 0.037109375, "loss_xval": 1.3203125, "num_input_tokens_seen": 135613412, "step": 2048 }, { "epoch": 0.19179107970234474, "grad_norm": 129.3882293701172, "learning_rate": 5e-05, "loss": 1.3811, "num_input_tokens_seen": 135680668, "step": 2049 }, { "epoch": 0.19179107970234474, "loss": 1.3647184371948242, "loss_ce": 0.005343450233340263, "loss_iou": 0.625, "loss_num": 0.022705078125, "loss_xval": 1.359375, "num_input_tokens_seen": 135680668, "step": 2049 }, { "epoch": 0.19188468198624045, "grad_norm": 23.881717681884766, "learning_rate": 5e-05, "loss": 1.5054, "num_input_tokens_seen": 135747252, "step": 2050 }, { "epoch": 0.19188468198624045, "loss": 1.521451711654663, "loss_ce": 0.007779826410114765, "loss_iou": 0.6484375, "loss_num": 0.0439453125, "loss_xval": 1.515625, "num_input_tokens_seen": 135747252, "step": 2050 }, { "epoch": 0.1919782842701362, "grad_norm": 21.621906280517578, "learning_rate": 5e-05, "loss": 1.2942, "num_input_tokens_seen": 135813520, "step": 2051 }, { "epoch": 0.1919782842701362, "loss": 1.4511830806732178, "loss_ce": 0.003917410969734192, "loss_iou": 0.609375, "loss_num": 0.04638671875, "loss_xval": 1.4453125, "num_input_tokens_seen": 135813520, "step": 2051 }, { "epoch": 0.19207188655403193, "grad_norm": 43.121639251708984, "learning_rate": 5e-05, "loss": 1.4368, "num_input_tokens_seen": 135879308, "step": 2052 }, { "epoch": 0.19207188655403193, "loss": 1.5804208517074585, "loss_ce": 0.002295836340636015, "loss_iou": 0.68359375, "loss_num": 0.042236328125, "loss_xval": 1.578125, "num_input_tokens_seen": 135879308, "step": 2052 }, { "epoch": 0.19216548883792764, "grad_norm": 38.35646438598633, "learning_rate": 5e-05, "loss": 1.554, "num_input_tokens_seen": 135945264, "step": 2053 }, { "epoch": 0.19216548883792764, "loss": 1.4601187705993652, "loss_ce": 0.010167606174945831, "loss_iou": 0.60546875, "loss_num": 0.0478515625, "loss_xval": 1.453125, "num_input_tokens_seen": 135945264, "step": 2053 }, { "epoch": 0.19225909112182338, "grad_norm": 15.436667442321777, "learning_rate": 5e-05, "loss": 1.3526, "num_input_tokens_seen": 136012284, "step": 2054 }, { "epoch": 0.19225909112182338, "loss": 1.357641339302063, "loss_ce": 0.004125708714127541, "loss_iou": 0.5234375, "loss_num": 0.06201171875, "loss_xval": 1.3515625, "num_input_tokens_seen": 136012284, "step": 2054 }, { "epoch": 0.1923526934057191, "grad_norm": 35.86705017089844, "learning_rate": 5e-05, "loss": 1.2888, "num_input_tokens_seen": 136078704, "step": 2055 }, { "epoch": 0.1923526934057191, "loss": 1.2883079051971436, "loss_ce": 0.005776176694780588, "loss_iou": 0.53515625, "loss_num": 0.0419921875, "loss_xval": 1.28125, "num_input_tokens_seen": 136078704, "step": 2055 }, { "epoch": 0.19244629568961483, "grad_norm": 20.787574768066406, "learning_rate": 5e-05, "loss": 1.5442, "num_input_tokens_seen": 136145564, "step": 2056 }, { "epoch": 0.19244629568961483, "loss": 1.7299318313598633, "loss_ce": 0.0043459320440888405, "loss_iou": 0.71875, "loss_num": 0.058349609375, "loss_xval": 1.7265625, "num_input_tokens_seen": 136145564, "step": 2056 }, { "epoch": 0.19253989797351057, "grad_norm": 16.39914321899414, "learning_rate": 5e-05, "loss": 1.2208, "num_input_tokens_seen": 136212092, "step": 2057 }, { "epoch": 0.19253989797351057, "loss": 1.276814579963684, "loss_ce": 0.003377099521458149, "loss_iou": 0.5703125, "loss_num": 0.0257568359375, "loss_xval": 1.2734375, "num_input_tokens_seen": 136212092, "step": 2057 }, { "epoch": 0.19263350025740628, "grad_norm": 22.33026123046875, "learning_rate": 5e-05, "loss": 1.5323, "num_input_tokens_seen": 136278912, "step": 2058 }, { "epoch": 0.19263350025740628, "loss": 1.7549822330474854, "loss_ce": 0.005958713591098785, "loss_iou": 0.75390625, "loss_num": 0.048095703125, "loss_xval": 1.75, "num_input_tokens_seen": 136278912, "step": 2058 }, { "epoch": 0.19272710254130201, "grad_norm": 17.806638717651367, "learning_rate": 5e-05, "loss": 1.5435, "num_input_tokens_seen": 136345416, "step": 2059 }, { "epoch": 0.19272710254130201, "loss": 1.5639630556106567, "loss_ce": 0.006345891393721104, "loss_iou": 0.671875, "loss_num": 0.043212890625, "loss_xval": 1.5546875, "num_input_tokens_seen": 136345416, "step": 2059 }, { "epoch": 0.19282070482519773, "grad_norm": 17.42680549621582, "learning_rate": 5e-05, "loss": 1.3353, "num_input_tokens_seen": 136411592, "step": 2060 }, { "epoch": 0.19282070482519773, "loss": 1.1476075649261475, "loss_ce": 0.007470767013728619, "loss_iou": 0.5, "loss_num": 0.0274658203125, "loss_xval": 1.140625, "num_input_tokens_seen": 136411592, "step": 2060 }, { "epoch": 0.19291430710909346, "grad_norm": 18.45506477355957, "learning_rate": 5e-05, "loss": 1.3657, "num_input_tokens_seen": 136478500, "step": 2061 }, { "epoch": 0.19291430710909346, "loss": 1.2191590070724487, "loss_ce": 0.004803495481610298, "loss_iou": 0.5390625, "loss_num": 0.027587890625, "loss_xval": 1.2109375, "num_input_tokens_seen": 136478500, "step": 2061 }, { "epoch": 0.1930079093929892, "grad_norm": 23.658056259155273, "learning_rate": 5e-05, "loss": 1.5858, "num_input_tokens_seen": 136543556, "step": 2062 }, { "epoch": 0.1930079093929892, "loss": 1.5524146556854248, "loss_ce": 0.007004569284617901, "loss_iou": 0.640625, "loss_num": 0.052734375, "loss_xval": 1.546875, "num_input_tokens_seen": 136543556, "step": 2062 }, { "epoch": 0.1931015116768849, "grad_norm": 69.70921325683594, "learning_rate": 5e-05, "loss": 1.6602, "num_input_tokens_seen": 136609576, "step": 2063 }, { "epoch": 0.1931015116768849, "loss": 1.5370267629623413, "loss_ce": 0.005044293124228716, "loss_iou": 0.625, "loss_num": 0.056396484375, "loss_xval": 1.53125, "num_input_tokens_seen": 136609576, "step": 2063 }, { "epoch": 0.19319511396078065, "grad_norm": 21.903438568115234, "learning_rate": 5e-05, "loss": 1.561, "num_input_tokens_seen": 136676252, "step": 2064 }, { "epoch": 0.19319511396078065, "loss": 1.5233875513076782, "loss_ce": 0.004832881968468428, "loss_iou": 0.63671875, "loss_num": 0.049072265625, "loss_xval": 1.515625, "num_input_tokens_seen": 136676252, "step": 2064 }, { "epoch": 0.19328871624467636, "grad_norm": 11.89264965057373, "learning_rate": 5e-05, "loss": 1.5001, "num_input_tokens_seen": 136742748, "step": 2065 }, { "epoch": 0.19328871624467636, "loss": 1.5451369285583496, "loss_ce": 0.0021682712249457836, "loss_iou": 0.6640625, "loss_num": 0.04296875, "loss_xval": 1.546875, "num_input_tokens_seen": 136742748, "step": 2065 }, { "epoch": 0.1933823185285721, "grad_norm": 30.70446014404297, "learning_rate": 5e-05, "loss": 1.4357, "num_input_tokens_seen": 136809404, "step": 2066 }, { "epoch": 0.1933823185285721, "loss": 1.6584296226501465, "loss_ce": 0.0046210456639528275, "loss_iou": 0.66796875, "loss_num": 0.0634765625, "loss_xval": 1.65625, "num_input_tokens_seen": 136809404, "step": 2066 }, { "epoch": 0.1934759208124678, "grad_norm": 32.82989501953125, "learning_rate": 5e-05, "loss": 1.6026, "num_input_tokens_seen": 136875364, "step": 2067 }, { "epoch": 0.1934759208124678, "loss": 1.6275928020477295, "loss_ce": 0.004057623445987701, "loss_iou": 0.66015625, "loss_num": 0.06103515625, "loss_xval": 1.625, "num_input_tokens_seen": 136875364, "step": 2067 }, { "epoch": 0.19356952309636355, "grad_norm": 18.987201690673828, "learning_rate": 5e-05, "loss": 1.5606, "num_input_tokens_seen": 136942216, "step": 2068 }, { "epoch": 0.19356952309636355, "loss": 1.5228208303451538, "loss_ce": 0.0038388194516301155, "loss_iou": 0.6328125, "loss_num": 0.050048828125, "loss_xval": 1.515625, "num_input_tokens_seen": 136942216, "step": 2068 }, { "epoch": 0.1936631253802593, "grad_norm": 10.04777717590332, "learning_rate": 5e-05, "loss": 1.3714, "num_input_tokens_seen": 137008152, "step": 2069 }, { "epoch": 0.1936631253802593, "loss": 1.1534812450408936, "loss_ce": 0.004799516871571541, "loss_iou": 0.49609375, "loss_num": 0.031494140625, "loss_xval": 1.1484375, "num_input_tokens_seen": 137008152, "step": 2069 }, { "epoch": 0.193756727664155, "grad_norm": 15.951964378356934, "learning_rate": 5e-05, "loss": 1.1364, "num_input_tokens_seen": 137075148, "step": 2070 }, { "epoch": 0.193756727664155, "loss": 1.2243413925170898, "loss_ce": 0.006354369223117828, "loss_iou": 0.49609375, "loss_num": 0.044677734375, "loss_xval": 1.21875, "num_input_tokens_seen": 137075148, "step": 2070 }, { "epoch": 0.19385032994805074, "grad_norm": 77.72500610351562, "learning_rate": 5e-05, "loss": 1.4225, "num_input_tokens_seen": 137141752, "step": 2071 }, { "epoch": 0.19385032994805074, "loss": 1.2814772129058838, "loss_ce": 0.003156971652060747, "loss_iou": 0.57421875, "loss_num": 0.026611328125, "loss_xval": 1.28125, "num_input_tokens_seen": 137141752, "step": 2071 }, { "epoch": 0.19394393223194645, "grad_norm": 41.3846435546875, "learning_rate": 5e-05, "loss": 1.5205, "num_input_tokens_seen": 137208480, "step": 2072 }, { "epoch": 0.19394393223194645, "loss": 1.5058314800262451, "loss_ce": 0.007784511893987656, "loss_iou": 0.640625, "loss_num": 0.04296875, "loss_xval": 1.5, "num_input_tokens_seen": 137208480, "step": 2072 }, { "epoch": 0.19403753451584219, "grad_norm": 22.192081451416016, "learning_rate": 5e-05, "loss": 1.6468, "num_input_tokens_seen": 137274816, "step": 2073 }, { "epoch": 0.19403753451584219, "loss": 1.4129598140716553, "loss_ce": 0.007442166563123465, "loss_iou": 0.6015625, "loss_num": 0.0400390625, "loss_xval": 1.40625, "num_input_tokens_seen": 137274816, "step": 2073 }, { "epoch": 0.19413113679973792, "grad_norm": 19.99022674560547, "learning_rate": 5e-05, "loss": 1.3093, "num_input_tokens_seen": 137341384, "step": 2074 }, { "epoch": 0.19413113679973792, "loss": 1.2690801620483398, "loss_ce": 0.006384734064340591, "loss_iou": 0.53515625, "loss_num": 0.038818359375, "loss_xval": 1.265625, "num_input_tokens_seen": 137341384, "step": 2074 }, { "epoch": 0.19422473908363364, "grad_norm": 35.265750885009766, "learning_rate": 5e-05, "loss": 1.4349, "num_input_tokens_seen": 137408812, "step": 2075 }, { "epoch": 0.19422473908363364, "loss": 1.3765074014663696, "loss_ce": 0.004437153693288565, "loss_iou": 0.59765625, "loss_num": 0.03515625, "loss_xval": 1.375, "num_input_tokens_seen": 137408812, "step": 2075 }, { "epoch": 0.19431834136752937, "grad_norm": 22.01068115234375, "learning_rate": 5e-05, "loss": 1.7257, "num_input_tokens_seen": 137475192, "step": 2076 }, { "epoch": 0.19431834136752937, "loss": 1.6239678859710693, "loss_ce": 0.0038506705313920975, "loss_iou": 0.671875, "loss_num": 0.0546875, "loss_xval": 1.6171875, "num_input_tokens_seen": 137475192, "step": 2076 }, { "epoch": 0.19441194365142508, "grad_norm": 25.21246910095215, "learning_rate": 5e-05, "loss": 1.3686, "num_input_tokens_seen": 137542068, "step": 2077 }, { "epoch": 0.19441194365142508, "loss": 1.4951391220092773, "loss_ce": 0.005392924882471561, "loss_iou": 0.63671875, "loss_num": 0.043212890625, "loss_xval": 1.4921875, "num_input_tokens_seen": 137542068, "step": 2077 }, { "epoch": 0.19450554593532082, "grad_norm": 26.272916793823242, "learning_rate": 5e-05, "loss": 1.2421, "num_input_tokens_seen": 137608836, "step": 2078 }, { "epoch": 0.19450554593532082, "loss": 1.3518580198287964, "loss_ce": 0.005178361665457487, "loss_iou": 0.59765625, "loss_num": 0.030517578125, "loss_xval": 1.34375, "num_input_tokens_seen": 137608836, "step": 2078 }, { "epoch": 0.19459914821921656, "grad_norm": 19.860158920288086, "learning_rate": 5e-05, "loss": 1.2909, "num_input_tokens_seen": 137674152, "step": 2079 }, { "epoch": 0.19459914821921656, "loss": 1.1912102699279785, "loss_ce": 0.006884138099849224, "loss_iou": 0.515625, "loss_num": 0.030517578125, "loss_xval": 1.1875, "num_input_tokens_seen": 137674152, "step": 2079 }, { "epoch": 0.19469275050311227, "grad_norm": 19.28133773803711, "learning_rate": 5e-05, "loss": 1.3627, "num_input_tokens_seen": 137740688, "step": 2080 }, { "epoch": 0.19469275050311227, "loss": 1.4136195182800293, "loss_ce": 0.011275755241513252, "loss_iou": 0.59375, "loss_num": 0.042724609375, "loss_xval": 1.40625, "num_input_tokens_seen": 137740688, "step": 2080 }, { "epoch": 0.194786352787008, "grad_norm": 26.150165557861328, "learning_rate": 5e-05, "loss": 1.559, "num_input_tokens_seen": 137806812, "step": 2081 }, { "epoch": 0.194786352787008, "loss": 1.5323134660720825, "loss_ce": 0.0020399647764861584, "loss_iou": 0.63671875, "loss_num": 0.051025390625, "loss_xval": 1.53125, "num_input_tokens_seen": 137806812, "step": 2081 }, { "epoch": 0.19487995507090372, "grad_norm": 66.3988265991211, "learning_rate": 5e-05, "loss": 1.6166, "num_input_tokens_seen": 137872808, "step": 2082 }, { "epoch": 0.19487995507090372, "loss": 1.366492509841919, "loss_ce": 0.006629281677305698, "loss_iou": 0.578125, "loss_num": 0.040283203125, "loss_xval": 1.359375, "num_input_tokens_seen": 137872808, "step": 2082 }, { "epoch": 0.19497355735479946, "grad_norm": 23.48615074157715, "learning_rate": 5e-05, "loss": 1.4515, "num_input_tokens_seen": 137939268, "step": 2083 }, { "epoch": 0.19497355735479946, "loss": 1.219407320022583, "loss_ce": 0.004075351171195507, "loss_iou": 0.5234375, "loss_num": 0.032958984375, "loss_xval": 1.21875, "num_input_tokens_seen": 137939268, "step": 2083 }, { "epoch": 0.1950671596386952, "grad_norm": 26.136940002441406, "learning_rate": 5e-05, "loss": 1.2827, "num_input_tokens_seen": 138004908, "step": 2084 }, { "epoch": 0.1950671596386952, "loss": 1.1523009538650513, "loss_ce": 0.007281413301825523, "loss_iou": 0.49609375, "loss_num": 0.0301513671875, "loss_xval": 1.1484375, "num_input_tokens_seen": 138004908, "step": 2084 }, { "epoch": 0.1951607619225909, "grad_norm": 24.42344093322754, "learning_rate": 5e-05, "loss": 1.6681, "num_input_tokens_seen": 138072064, "step": 2085 }, { "epoch": 0.1951607619225909, "loss": 1.651878833770752, "loss_ce": 0.004906099289655685, "loss_iou": 0.69921875, "loss_num": 0.049560546875, "loss_xval": 1.6484375, "num_input_tokens_seen": 138072064, "step": 2085 }, { "epoch": 0.19525436420648665, "grad_norm": 26.99013328552246, "learning_rate": 5e-05, "loss": 1.3346, "num_input_tokens_seen": 138137952, "step": 2086 }, { "epoch": 0.19525436420648665, "loss": 1.3745825290679932, "loss_ce": 0.0025121495127677917, "loss_iou": 0.5390625, "loss_num": 0.05810546875, "loss_xval": 1.375, "num_input_tokens_seen": 138137952, "step": 2086 }, { "epoch": 0.19534796649038236, "grad_norm": 22.294784545898438, "learning_rate": 5e-05, "loss": 1.5853, "num_input_tokens_seen": 138202528, "step": 2087 }, { "epoch": 0.19534796649038236, "loss": 1.4166902303695679, "loss_ce": 0.006534058600664139, "loss_iou": 0.62109375, "loss_num": 0.033447265625, "loss_xval": 1.40625, "num_input_tokens_seen": 138202528, "step": 2087 }, { "epoch": 0.1954415687742781, "grad_norm": 13.048158645629883, "learning_rate": 5e-05, "loss": 1.3337, "num_input_tokens_seen": 138269120, "step": 2088 }, { "epoch": 0.1954415687742781, "loss": 1.5177942514419556, "loss_ce": 0.005098981782793999, "loss_iou": 0.625, "loss_num": 0.05224609375, "loss_xval": 1.515625, "num_input_tokens_seen": 138269120, "step": 2088 }, { "epoch": 0.1955351710581738, "grad_norm": 20.494586944580078, "learning_rate": 5e-05, "loss": 1.5094, "num_input_tokens_seen": 138335564, "step": 2089 }, { "epoch": 0.1955351710581738, "loss": 1.535733699798584, "loss_ce": 0.006436879746615887, "loss_iou": 0.6796875, "loss_num": 0.03466796875, "loss_xval": 1.53125, "num_input_tokens_seen": 138335564, "step": 2089 }, { "epoch": 0.19562877334206955, "grad_norm": 17.31364631652832, "learning_rate": 5e-05, "loss": 1.178, "num_input_tokens_seen": 138402628, "step": 2090 }, { "epoch": 0.19562877334206955, "loss": 1.0419156551361084, "loss_ce": 0.008224312216043472, "loss_iou": 0.44140625, "loss_num": 0.0299072265625, "loss_xval": 1.03125, "num_input_tokens_seen": 138402628, "step": 2090 }, { "epoch": 0.19572237562596528, "grad_norm": 12.1629056930542, "learning_rate": 5e-05, "loss": 1.513, "num_input_tokens_seen": 138468852, "step": 2091 }, { "epoch": 0.19572237562596528, "loss": 1.4543973207473755, "loss_ce": 0.004201980773359537, "loss_iou": 0.61328125, "loss_num": 0.044921875, "loss_xval": 1.453125, "num_input_tokens_seen": 138468852, "step": 2091 }, { "epoch": 0.195815977909861, "grad_norm": 64.06700134277344, "learning_rate": 5e-05, "loss": 1.358, "num_input_tokens_seen": 138534760, "step": 2092 }, { "epoch": 0.195815977909861, "loss": 1.3369941711425781, "loss_ce": 0.006427827291190624, "loss_iou": 0.51171875, "loss_num": 0.06201171875, "loss_xval": 1.328125, "num_input_tokens_seen": 138534760, "step": 2092 }, { "epoch": 0.19590958019375673, "grad_norm": 29.642932891845703, "learning_rate": 5e-05, "loss": 1.338, "num_input_tokens_seen": 138601456, "step": 2093 }, { "epoch": 0.19590958019375673, "loss": 1.1915972232818604, "loss_ce": 0.0080033540725708, "loss_iou": 0.546875, "loss_num": 0.0186767578125, "loss_xval": 1.1875, "num_input_tokens_seen": 138601456, "step": 2093 }, { "epoch": 0.19600318247765244, "grad_norm": 173.1945037841797, "learning_rate": 5e-05, "loss": 1.3226, "num_input_tokens_seen": 138668296, "step": 2094 }, { "epoch": 0.19600318247765244, "loss": 1.2930828332901, "loss_ce": 0.005057895090430975, "loss_iou": 0.5546875, "loss_num": 0.035400390625, "loss_xval": 1.2890625, "num_input_tokens_seen": 138668296, "step": 2094 }, { "epoch": 0.19609678476154818, "grad_norm": 23.310731887817383, "learning_rate": 5e-05, "loss": 1.2903, "num_input_tokens_seen": 138734044, "step": 2095 }, { "epoch": 0.19609678476154818, "loss": 1.418961763381958, "loss_ce": 0.001969480188563466, "loss_iou": 0.59375, "loss_num": 0.045654296875, "loss_xval": 1.4140625, "num_input_tokens_seen": 138734044, "step": 2095 }, { "epoch": 0.19619038704544392, "grad_norm": 32.93905258178711, "learning_rate": 5e-05, "loss": 1.122, "num_input_tokens_seen": 138801836, "step": 2096 }, { "epoch": 0.19619038704544392, "loss": 1.181718349456787, "loss_ce": 0.0059369634836912155, "loss_iou": 0.5, "loss_num": 0.03466796875, "loss_xval": 1.171875, "num_input_tokens_seen": 138801836, "step": 2096 }, { "epoch": 0.19628398932933963, "grad_norm": 25.704120635986328, "learning_rate": 5e-05, "loss": 1.3838, "num_input_tokens_seen": 138868040, "step": 2097 }, { "epoch": 0.19628398932933963, "loss": 1.3380424976348877, "loss_ce": 0.010405833832919598, "loss_iou": 0.58984375, "loss_num": 0.029541015625, "loss_xval": 1.328125, "num_input_tokens_seen": 138868040, "step": 2097 }, { "epoch": 0.19637759161323537, "grad_norm": 54.63113021850586, "learning_rate": 5e-05, "loss": 1.395, "num_input_tokens_seen": 138933456, "step": 2098 }, { "epoch": 0.19637759161323537, "loss": 1.4318453073501587, "loss_ce": 0.006552326492965221, "loss_iou": 0.6171875, "loss_num": 0.0380859375, "loss_xval": 1.421875, "num_input_tokens_seen": 138933456, "step": 2098 }, { "epoch": 0.19647119389713108, "grad_norm": 18.833669662475586, "learning_rate": 5e-05, "loss": 1.6439, "num_input_tokens_seen": 138999968, "step": 2099 }, { "epoch": 0.19647119389713108, "loss": 1.536208987236023, "loss_ce": 0.00398245407268405, "loss_iou": 0.65234375, "loss_num": 0.044677734375, "loss_xval": 1.53125, "num_input_tokens_seen": 138999968, "step": 2099 }, { "epoch": 0.19656479618102682, "grad_norm": 21.15911865234375, "learning_rate": 5e-05, "loss": 1.4839, "num_input_tokens_seen": 139066696, "step": 2100 }, { "epoch": 0.19656479618102682, "loss": 1.5681376457214355, "loss_ce": 0.004661109764128923, "loss_iou": 0.59375, "loss_num": 0.07470703125, "loss_xval": 1.5625, "num_input_tokens_seen": 139066696, "step": 2100 }, { "epoch": 0.19665839846492256, "grad_norm": 27.802852630615234, "learning_rate": 5e-05, "loss": 1.5475, "num_input_tokens_seen": 139133616, "step": 2101 }, { "epoch": 0.19665839846492256, "loss": 1.6107966899871826, "loss_ce": 0.01802327111363411, "loss_iou": 0.6484375, "loss_num": 0.059814453125, "loss_xval": 1.59375, "num_input_tokens_seen": 139133616, "step": 2101 }, { "epoch": 0.19675200074881827, "grad_norm": 24.559831619262695, "learning_rate": 5e-05, "loss": 1.2611, "num_input_tokens_seen": 139199640, "step": 2102 }, { "epoch": 0.19675200074881827, "loss": 1.2036356925964355, "loss_ce": 0.005027277860790491, "loss_iou": 0.490234375, "loss_num": 0.04345703125, "loss_xval": 1.1953125, "num_input_tokens_seen": 139199640, "step": 2102 }, { "epoch": 0.196845603032714, "grad_norm": 25.998132705688477, "learning_rate": 5e-05, "loss": 1.5986, "num_input_tokens_seen": 139265920, "step": 2103 }, { "epoch": 0.196845603032714, "loss": 1.5837655067443848, "loss_ce": 0.007593577262014151, "loss_iou": 0.609375, "loss_num": 0.0712890625, "loss_xval": 1.578125, "num_input_tokens_seen": 139265920, "step": 2103 }, { "epoch": 0.19693920531660972, "grad_norm": 26.764257431030273, "learning_rate": 5e-05, "loss": 1.3174, "num_input_tokens_seen": 139332048, "step": 2104 }, { "epoch": 0.19693920531660972, "loss": 1.2746940851211548, "loss_ce": 0.0066276658326387405, "loss_iou": 0.52734375, "loss_num": 0.04248046875, "loss_xval": 1.265625, "num_input_tokens_seen": 139332048, "step": 2104 }, { "epoch": 0.19703280760050546, "grad_norm": 13.437508583068848, "learning_rate": 5e-05, "loss": 1.2624, "num_input_tokens_seen": 139398548, "step": 2105 }, { "epoch": 0.19703280760050546, "loss": 1.1900551319122314, "loss_ce": 0.00304337777197361, "loss_iou": 0.515625, "loss_num": 0.031982421875, "loss_xval": 1.1875, "num_input_tokens_seen": 139398548, "step": 2105 }, { "epoch": 0.19712640988440117, "grad_norm": 16.992902755737305, "learning_rate": 5e-05, "loss": 1.2524, "num_input_tokens_seen": 139464796, "step": 2106 }, { "epoch": 0.19712640988440117, "loss": 1.3787537813186646, "loss_ce": 0.005218650214374065, "loss_iou": 0.57421875, "loss_num": 0.045654296875, "loss_xval": 1.375, "num_input_tokens_seen": 139464796, "step": 2106 }, { "epoch": 0.1972200121682969, "grad_norm": 67.82573699951172, "learning_rate": 5e-05, "loss": 1.3071, "num_input_tokens_seen": 139531248, "step": 2107 }, { "epoch": 0.1972200121682969, "loss": 1.1374847888946533, "loss_ce": 0.005770938470959663, "loss_iou": 0.458984375, "loss_num": 0.04248046875, "loss_xval": 1.1328125, "num_input_tokens_seen": 139531248, "step": 2107 }, { "epoch": 0.19731361445219264, "grad_norm": 26.257421493530273, "learning_rate": 5e-05, "loss": 1.7659, "num_input_tokens_seen": 139596544, "step": 2108 }, { "epoch": 0.19731361445219264, "loss": 1.7995635271072388, "loss_ce": 0.007571268826723099, "loss_iou": 0.7421875, "loss_num": 0.061767578125, "loss_xval": 1.7890625, "num_input_tokens_seen": 139596544, "step": 2108 }, { "epoch": 0.19740721673608835, "grad_norm": 30.2551212310791, "learning_rate": 5e-05, "loss": 1.193, "num_input_tokens_seen": 139663640, "step": 2109 }, { "epoch": 0.19740721673608835, "loss": 1.1639381647109985, "loss_ce": 0.0013405437348410487, "loss_iou": 0.515625, "loss_num": 0.0257568359375, "loss_xval": 1.1640625, "num_input_tokens_seen": 139663640, "step": 2109 }, { "epoch": 0.1975008190199841, "grad_norm": 11.374149322509766, "learning_rate": 5e-05, "loss": 1.2721, "num_input_tokens_seen": 139730368, "step": 2110 }, { "epoch": 0.1975008190199841, "loss": 1.266725778579712, "loss_ce": 0.007448405493050814, "loss_iou": 0.546875, "loss_num": 0.032470703125, "loss_xval": 1.2578125, "num_input_tokens_seen": 139730368, "step": 2110 }, { "epoch": 0.1975944213038798, "grad_norm": 109.66407775878906, "learning_rate": 5e-05, "loss": 1.2802, "num_input_tokens_seen": 139796708, "step": 2111 }, { "epoch": 0.1975944213038798, "loss": 1.364131212234497, "loss_ce": 0.0032914401963353157, "loss_iou": 0.5703125, "loss_num": 0.044677734375, "loss_xval": 1.359375, "num_input_tokens_seen": 139796708, "step": 2111 }, { "epoch": 0.19768802358777554, "grad_norm": 28.161298751831055, "learning_rate": 5e-05, "loss": 1.4388, "num_input_tokens_seen": 139863900, "step": 2112 }, { "epoch": 0.19768802358777554, "loss": 1.3507535457611084, "loss_ce": 0.003097320906817913, "loss_iou": 0.578125, "loss_num": 0.038330078125, "loss_xval": 1.34375, "num_input_tokens_seen": 139863900, "step": 2112 }, { "epoch": 0.19778162587167128, "grad_norm": 22.915809631347656, "learning_rate": 5e-05, "loss": 1.4894, "num_input_tokens_seen": 139931096, "step": 2113 }, { "epoch": 0.19778162587167128, "loss": 1.3303176164627075, "loss_ce": 0.0021926192566752434, "loss_iou": 0.58984375, "loss_num": 0.02880859375, "loss_xval": 1.328125, "num_input_tokens_seen": 139931096, "step": 2113 }, { "epoch": 0.197875228155567, "grad_norm": 25.763959884643555, "learning_rate": 5e-05, "loss": 1.3341, "num_input_tokens_seen": 139998124, "step": 2114 }, { "epoch": 0.197875228155567, "loss": 1.458052158355713, "loss_ce": 0.002485835924744606, "loss_iou": 0.57421875, "loss_num": 0.062255859375, "loss_xval": 1.453125, "num_input_tokens_seen": 139998124, "step": 2114 }, { "epoch": 0.19796883043946273, "grad_norm": 24.01043128967285, "learning_rate": 5e-05, "loss": 1.4405, "num_input_tokens_seen": 140065164, "step": 2115 }, { "epoch": 0.19796883043946273, "loss": 1.3948493003845215, "loss_ce": 0.006177469156682491, "loss_iou": 0.6015625, "loss_num": 0.03662109375, "loss_xval": 1.390625, "num_input_tokens_seen": 140065164, "step": 2115 }, { "epoch": 0.19806243272335844, "grad_norm": 24.73077392578125, "learning_rate": 5e-05, "loss": 1.1244, "num_input_tokens_seen": 140129844, "step": 2116 }, { "epoch": 0.19806243272335844, "loss": 1.1548796892166138, "loss_ce": 0.005953886546194553, "loss_iou": 0.474609375, "loss_num": 0.0400390625, "loss_xval": 1.1484375, "num_input_tokens_seen": 140129844, "step": 2116 }, { "epoch": 0.19815603500725418, "grad_norm": 25.4124813079834, "learning_rate": 5e-05, "loss": 1.4685, "num_input_tokens_seen": 140195892, "step": 2117 }, { "epoch": 0.19815603500725418, "loss": 1.6075721979141235, "loss_ce": 0.006986259017139673, "loss_iou": 0.65625, "loss_num": 0.05712890625, "loss_xval": 1.6015625, "num_input_tokens_seen": 140195892, "step": 2117 }, { "epoch": 0.19824963729114992, "grad_norm": 33.2074089050293, "learning_rate": 5e-05, "loss": 1.5424, "num_input_tokens_seen": 140261484, "step": 2118 }, { "epoch": 0.19824963729114992, "loss": 1.6192563772201538, "loss_ce": 0.005486827343702316, "loss_iou": 0.67578125, "loss_num": 0.052001953125, "loss_xval": 1.6171875, "num_input_tokens_seen": 140261484, "step": 2118 }, { "epoch": 0.19834323957504563, "grad_norm": 19.876880645751953, "learning_rate": 5e-05, "loss": 1.6574, "num_input_tokens_seen": 140327892, "step": 2119 }, { "epoch": 0.19834323957504563, "loss": 1.7455635070800781, "loss_ce": 0.005329243838787079, "loss_iou": 0.7265625, "loss_num": 0.0576171875, "loss_xval": 1.7421875, "num_input_tokens_seen": 140327892, "step": 2119 }, { "epoch": 0.19843684185894137, "grad_norm": 33.19362258911133, "learning_rate": 5e-05, "loss": 1.3776, "num_input_tokens_seen": 140394832, "step": 2120 }, { "epoch": 0.19843684185894137, "loss": 1.3313980102539062, "loss_ce": 0.005714379251003265, "loss_iou": 0.55078125, "loss_num": 0.044677734375, "loss_xval": 1.328125, "num_input_tokens_seen": 140394832, "step": 2120 }, { "epoch": 0.19853044414283708, "grad_norm": 20.490461349487305, "learning_rate": 5e-05, "loss": 1.3399, "num_input_tokens_seen": 140461516, "step": 2121 }, { "epoch": 0.19853044414283708, "loss": 1.456218957901001, "loss_ce": 0.005047088488936424, "loss_iou": 0.63671875, "loss_num": 0.03515625, "loss_xval": 1.453125, "num_input_tokens_seen": 140461516, "step": 2121 }, { "epoch": 0.19862404642673281, "grad_norm": 36.80075454711914, "learning_rate": 5e-05, "loss": 1.5404, "num_input_tokens_seen": 140527928, "step": 2122 }, { "epoch": 0.19862404642673281, "loss": 1.4016985893249512, "loss_ce": 0.00374927488155663, "loss_iou": 0.59765625, "loss_num": 0.041015625, "loss_xval": 1.3984375, "num_input_tokens_seen": 140527928, "step": 2122 }, { "epoch": 0.19871764871062855, "grad_norm": 21.78231430053711, "learning_rate": 5e-05, "loss": 1.594, "num_input_tokens_seen": 140593928, "step": 2123 }, { "epoch": 0.19871764871062855, "loss": 1.6748840808868408, "loss_ce": 0.004962306469678879, "loss_iou": 0.6953125, "loss_num": 0.054931640625, "loss_xval": 1.671875, "num_input_tokens_seen": 140593928, "step": 2123 }, { "epoch": 0.19881125099452426, "grad_norm": 23.20709991455078, "learning_rate": 5e-05, "loss": 1.3419, "num_input_tokens_seen": 140660548, "step": 2124 }, { "epoch": 0.19881125099452426, "loss": 1.2174222469329834, "loss_ce": 0.00453170295804739, "loss_iou": 0.53125, "loss_num": 0.030029296875, "loss_xval": 1.2109375, "num_input_tokens_seen": 140660548, "step": 2124 }, { "epoch": 0.19890485327842, "grad_norm": 24.04755210876465, "learning_rate": 5e-05, "loss": 1.5632, "num_input_tokens_seen": 140726312, "step": 2125 }, { "epoch": 0.19890485327842, "loss": 1.418169379234314, "loss_ce": 0.0038627460598945618, "loss_iou": 0.57421875, "loss_num": 0.053466796875, "loss_xval": 1.4140625, "num_input_tokens_seen": 140726312, "step": 2125 }, { "epoch": 0.1989984555623157, "grad_norm": 12.664782524108887, "learning_rate": 5e-05, "loss": 1.2301, "num_input_tokens_seen": 140792020, "step": 2126 }, { "epoch": 0.1989984555623157, "loss": 1.1144989728927612, "loss_ce": 0.008541987277567387, "loss_iou": 0.443359375, "loss_num": 0.043701171875, "loss_xval": 1.109375, "num_input_tokens_seen": 140792020, "step": 2126 }, { "epoch": 0.19909205784621145, "grad_norm": 14.952765464782715, "learning_rate": 5e-05, "loss": 1.4113, "num_input_tokens_seen": 140858640, "step": 2127 }, { "epoch": 0.19909205784621145, "loss": 1.442549228668213, "loss_ce": 0.007002345286309719, "loss_iou": 0.60546875, "loss_num": 0.045166015625, "loss_xval": 1.4375, "num_input_tokens_seen": 140858640, "step": 2127 }, { "epoch": 0.19918566013010716, "grad_norm": 34.664398193359375, "learning_rate": 5e-05, "loss": 1.3865, "num_input_tokens_seen": 140923380, "step": 2128 }, { "epoch": 0.19918566013010716, "loss": 1.5297850370407104, "loss_ce": 0.005859227851033211, "loss_iou": 0.609375, "loss_num": 0.061279296875, "loss_xval": 1.5234375, "num_input_tokens_seen": 140923380, "step": 2128 }, { "epoch": 0.1992792624140029, "grad_norm": 23.958240509033203, "learning_rate": 5e-05, "loss": 1.5756, "num_input_tokens_seen": 140989880, "step": 2129 }, { "epoch": 0.1992792624140029, "loss": 1.7080774307250977, "loss_ce": 0.005928914062678814, "loss_iou": 0.7734375, "loss_num": 0.0303955078125, "loss_xval": 1.703125, "num_input_tokens_seen": 140989880, "step": 2129 }, { "epoch": 0.19937286469789864, "grad_norm": 34.81669235229492, "learning_rate": 5e-05, "loss": 1.4449, "num_input_tokens_seen": 141057188, "step": 2130 }, { "epoch": 0.19937286469789864, "loss": 1.278939962387085, "loss_ce": 0.0064790514297783375, "loss_iou": 0.51953125, "loss_num": 0.046630859375, "loss_xval": 1.2734375, "num_input_tokens_seen": 141057188, "step": 2130 }, { "epoch": 0.19946646698179435, "grad_norm": 49.78134536743164, "learning_rate": 5e-05, "loss": 1.4739, "num_input_tokens_seen": 141123452, "step": 2131 }, { "epoch": 0.19946646698179435, "loss": 1.5672564506530762, "loss_ce": 0.009639300405979156, "loss_iou": 0.6875, "loss_num": 0.035888671875, "loss_xval": 1.5546875, "num_input_tokens_seen": 141123452, "step": 2131 }, { "epoch": 0.1995600692656901, "grad_norm": 15.674970626831055, "learning_rate": 5e-05, "loss": 1.5758, "num_input_tokens_seen": 141189372, "step": 2132 }, { "epoch": 0.1995600692656901, "loss": 1.4908785820007324, "loss_ce": 0.005649122409522533, "loss_iou": 0.63671875, "loss_num": 0.042724609375, "loss_xval": 1.484375, "num_input_tokens_seen": 141189372, "step": 2132 }, { "epoch": 0.1996536715495858, "grad_norm": 23.224313735961914, "learning_rate": 5e-05, "loss": 1.455, "num_input_tokens_seen": 141256076, "step": 2133 }, { "epoch": 0.1996536715495858, "loss": 1.4713666439056396, "loss_ce": 0.003593207336962223, "loss_iou": 0.640625, "loss_num": 0.03759765625, "loss_xval": 1.46875, "num_input_tokens_seen": 141256076, "step": 2133 }, { "epoch": 0.19974727383348154, "grad_norm": 62.98255920410156, "learning_rate": 5e-05, "loss": 1.3374, "num_input_tokens_seen": 141323612, "step": 2134 }, { "epoch": 0.19974727383348154, "loss": 1.3081867694854736, "loss_ce": 0.004475940950214863, "loss_iou": 0.578125, "loss_num": 0.02978515625, "loss_xval": 1.3046875, "num_input_tokens_seen": 141323612, "step": 2134 }, { "epoch": 0.19984087611737728, "grad_norm": 21.07288932800293, "learning_rate": 5e-05, "loss": 1.3495, "num_input_tokens_seen": 141390536, "step": 2135 }, { "epoch": 0.19984087611737728, "loss": 1.368180513381958, "loss_ce": 0.007828842848539352, "loss_iou": 0.609375, "loss_num": 0.0286865234375, "loss_xval": 1.359375, "num_input_tokens_seen": 141390536, "step": 2135 }, { "epoch": 0.19993447840127299, "grad_norm": 35.24394607543945, "learning_rate": 5e-05, "loss": 1.3854, "num_input_tokens_seen": 141456632, "step": 2136 }, { "epoch": 0.19993447840127299, "loss": 1.4562913179397583, "loss_ce": 0.00609607994556427, "loss_iou": 0.609375, "loss_num": 0.04638671875, "loss_xval": 1.453125, "num_input_tokens_seen": 141456632, "step": 2136 }, { "epoch": 0.20002808068516872, "grad_norm": 55.776893615722656, "learning_rate": 5e-05, "loss": 1.5803, "num_input_tokens_seen": 141522892, "step": 2137 }, { "epoch": 0.20002808068516872, "loss": 1.470313310623169, "loss_ce": 0.007910918444395065, "loss_iou": 0.60546875, "loss_num": 0.05029296875, "loss_xval": 1.4609375, "num_input_tokens_seen": 141522892, "step": 2137 }, { "epoch": 0.20012168296906443, "grad_norm": 43.92560577392578, "learning_rate": 5e-05, "loss": 1.3085, "num_input_tokens_seen": 141588712, "step": 2138 }, { "epoch": 0.20012168296906443, "loss": 1.2516255378723145, "loss_ce": 0.003090436104685068, "loss_iou": 0.5234375, "loss_num": 0.040771484375, "loss_xval": 1.25, "num_input_tokens_seen": 141588712, "step": 2138 }, { "epoch": 0.20021528525296017, "grad_norm": 225.448974609375, "learning_rate": 5e-05, "loss": 1.1565, "num_input_tokens_seen": 141655232, "step": 2139 }, { "epoch": 0.20021528525296017, "loss": 1.0503004789352417, "loss_ce": 0.0058669159188866615, "loss_iou": 0.416015625, "loss_num": 0.042236328125, "loss_xval": 1.046875, "num_input_tokens_seen": 141655232, "step": 2139 }, { "epoch": 0.2003088875368559, "grad_norm": 20.54629898071289, "learning_rate": 5e-05, "loss": 1.4583, "num_input_tokens_seen": 141721376, "step": 2140 }, { "epoch": 0.2003088875368559, "loss": 1.5018723011016846, "loss_ce": 0.0028488750103861094, "loss_iou": 0.6328125, "loss_num": 0.0478515625, "loss_xval": 1.5, "num_input_tokens_seen": 141721376, "step": 2140 }, { "epoch": 0.20040248982075162, "grad_norm": 32.34085464477539, "learning_rate": 5e-05, "loss": 1.3375, "num_input_tokens_seen": 141787640, "step": 2141 }, { "epoch": 0.20040248982075162, "loss": 1.2765312194824219, "loss_ce": 0.00455861771479249, "loss_iou": 0.515625, "loss_num": 0.048583984375, "loss_xval": 1.2734375, "num_input_tokens_seen": 141787640, "step": 2141 }, { "epoch": 0.20049609210464736, "grad_norm": 45.27389907836914, "learning_rate": 5e-05, "loss": 1.5283, "num_input_tokens_seen": 141853152, "step": 2142 }, { "epoch": 0.20049609210464736, "loss": 1.6496539115905762, "loss_ce": 0.010005595162510872, "loss_iou": 0.68359375, "loss_num": 0.054931640625, "loss_xval": 1.640625, "num_input_tokens_seen": 141853152, "step": 2142 }, { "epoch": 0.20058969438854307, "grad_norm": 14.894622802734375, "learning_rate": 5e-05, "loss": 1.2859, "num_input_tokens_seen": 141919916, "step": 2143 }, { "epoch": 0.20058969438854307, "loss": 1.3160537481307983, "loss_ce": 0.00453033484518528, "loss_iou": 0.5390625, "loss_num": 0.046630859375, "loss_xval": 1.3125, "num_input_tokens_seen": 141919916, "step": 2143 }, { "epoch": 0.2006832966724388, "grad_norm": 14.653695106506348, "learning_rate": 5e-05, "loss": 1.3589, "num_input_tokens_seen": 141985588, "step": 2144 }, { "epoch": 0.2006832966724388, "loss": 1.351457118988037, "loss_ce": 0.0033125763293355703, "loss_iou": 0.5625, "loss_num": 0.044189453125, "loss_xval": 1.3515625, "num_input_tokens_seen": 141985588, "step": 2144 }, { "epoch": 0.20077689895633452, "grad_norm": 17.91266632080078, "learning_rate": 5e-05, "loss": 1.6382, "num_input_tokens_seen": 142050996, "step": 2145 }, { "epoch": 0.20077689895633452, "loss": 1.3563157320022583, "loss_ce": 0.005241512320935726, "loss_iou": 0.60546875, "loss_num": 0.0277099609375, "loss_xval": 1.3515625, "num_input_tokens_seen": 142050996, "step": 2145 }, { "epoch": 0.20087050124023026, "grad_norm": 28.426193237304688, "learning_rate": 5e-05, "loss": 1.4209, "num_input_tokens_seen": 142116196, "step": 2146 }, { "epoch": 0.20087050124023026, "loss": 1.5161666870117188, "loss_ce": 0.00249484833329916, "loss_iou": 0.625, "loss_num": 0.052490234375, "loss_xval": 1.515625, "num_input_tokens_seen": 142116196, "step": 2146 }, { "epoch": 0.200964103524126, "grad_norm": 22.27088165283203, "learning_rate": 5e-05, "loss": 1.4221, "num_input_tokens_seen": 142181700, "step": 2147 }, { "epoch": 0.200964103524126, "loss": 1.6393663883209229, "loss_ce": 0.0041125984862446785, "loss_iou": 0.6875, "loss_num": 0.05224609375, "loss_xval": 1.6328125, "num_input_tokens_seen": 142181700, "step": 2147 }, { "epoch": 0.2010577058080217, "grad_norm": 19.2863712310791, "learning_rate": 5e-05, "loss": 1.546, "num_input_tokens_seen": 142249104, "step": 2148 }, { "epoch": 0.2010577058080217, "loss": 1.6800668239593506, "loss_ce": 0.0023324843496084213, "loss_iou": 0.7265625, "loss_num": 0.045166015625, "loss_xval": 1.6796875, "num_input_tokens_seen": 142249104, "step": 2148 }, { "epoch": 0.20115130809191745, "grad_norm": 18.831619262695312, "learning_rate": 5e-05, "loss": 1.0992, "num_input_tokens_seen": 142315916, "step": 2149 }, { "epoch": 0.20115130809191745, "loss": 1.0366709232330322, "loss_ce": 0.005420956294983625, "loss_iou": 0.45703125, "loss_num": 0.0235595703125, "loss_xval": 1.03125, "num_input_tokens_seen": 142315916, "step": 2149 }, { "epoch": 0.20124491037581316, "grad_norm": 19.836402893066406, "learning_rate": 5e-05, "loss": 1.2922, "num_input_tokens_seen": 142382940, "step": 2150 }, { "epoch": 0.20124491037581316, "loss": 1.3474340438842773, "loss_ce": 0.004172265063971281, "loss_iou": 0.5625, "loss_num": 0.043701171875, "loss_xval": 1.34375, "num_input_tokens_seen": 142382940, "step": 2150 }, { "epoch": 0.2013385126597089, "grad_norm": 16.515554428100586, "learning_rate": 5e-05, "loss": 1.3101, "num_input_tokens_seen": 142449124, "step": 2151 }, { "epoch": 0.2013385126597089, "loss": 1.3000141382217407, "loss_ce": 0.006557063199579716, "loss_iou": 0.51953125, "loss_num": 0.051025390625, "loss_xval": 1.296875, "num_input_tokens_seen": 142449124, "step": 2151 }, { "epoch": 0.20143211494360463, "grad_norm": 35.38172149658203, "learning_rate": 5e-05, "loss": 1.3323, "num_input_tokens_seen": 142514168, "step": 2152 }, { "epoch": 0.20143211494360463, "loss": 1.446332573890686, "loss_ce": 0.004560066852718592, "loss_iou": 0.58203125, "loss_num": 0.055908203125, "loss_xval": 1.4453125, "num_input_tokens_seen": 142514168, "step": 2152 }, { "epoch": 0.20152571722750034, "grad_norm": 34.99225997924805, "learning_rate": 5e-05, "loss": 1.5669, "num_input_tokens_seen": 142580064, "step": 2153 }, { "epoch": 0.20152571722750034, "loss": 1.490980863571167, "loss_ce": 0.006605847738683224, "loss_iou": 0.6015625, "loss_num": 0.056640625, "loss_xval": 1.484375, "num_input_tokens_seen": 142580064, "step": 2153 }, { "epoch": 0.20161931951139608, "grad_norm": 22.245737075805664, "learning_rate": 5e-05, "loss": 1.3902, "num_input_tokens_seen": 142645520, "step": 2154 }, { "epoch": 0.20161931951139608, "loss": 1.0189058780670166, "loss_ce": 0.007004044950008392, "loss_iou": 0.4453125, "loss_num": 0.0247802734375, "loss_xval": 1.015625, "num_input_tokens_seen": 142645520, "step": 2154 }, { "epoch": 0.2017129217952918, "grad_norm": 18.72137451171875, "learning_rate": 5e-05, "loss": 1.215, "num_input_tokens_seen": 142711508, "step": 2155 }, { "epoch": 0.2017129217952918, "loss": 1.3505170345306396, "loss_ce": 0.006278828717768192, "loss_iou": 0.53515625, "loss_num": 0.05419921875, "loss_xval": 1.34375, "num_input_tokens_seen": 142711508, "step": 2155 }, { "epoch": 0.20180652407918753, "grad_norm": 20.530357360839844, "learning_rate": 5e-05, "loss": 1.626, "num_input_tokens_seen": 142776984, "step": 2156 }, { "epoch": 0.20180652407918753, "loss": 1.7002286911010742, "loss_ce": 0.004427995067089796, "loss_iou": 0.70703125, "loss_num": 0.056640625, "loss_xval": 1.6953125, "num_input_tokens_seen": 142776984, "step": 2156 }, { "epoch": 0.20190012636308327, "grad_norm": 22.80133628845215, "learning_rate": 5e-05, "loss": 1.4426, "num_input_tokens_seen": 142843388, "step": 2157 }, { "epoch": 0.20190012636308327, "loss": 1.5896942615509033, "loss_ce": 0.0037568435072898865, "loss_iou": 0.6640625, "loss_num": 0.052734375, "loss_xval": 1.5859375, "num_input_tokens_seen": 142843388, "step": 2157 }, { "epoch": 0.20199372864697898, "grad_norm": 23.717363357543945, "learning_rate": 5e-05, "loss": 1.4548, "num_input_tokens_seen": 142910168, "step": 2158 }, { "epoch": 0.20199372864697898, "loss": 1.4617071151733398, "loss_ce": 0.004675748758018017, "loss_iou": 0.6171875, "loss_num": 0.044189453125, "loss_xval": 1.453125, "num_input_tokens_seen": 142910168, "step": 2158 }, { "epoch": 0.20208733093087472, "grad_norm": 19.471845626831055, "learning_rate": 5e-05, "loss": 1.4269, "num_input_tokens_seen": 142975132, "step": 2159 }, { "epoch": 0.20208733093087472, "loss": 1.4465241432189941, "loss_ce": 0.004141343291848898, "loss_iou": 0.61328125, "loss_num": 0.04296875, "loss_xval": 1.4453125, "num_input_tokens_seen": 142975132, "step": 2159 }, { "epoch": 0.20218093321477043, "grad_norm": 29.96786880493164, "learning_rate": 5e-05, "loss": 1.4185, "num_input_tokens_seen": 143042128, "step": 2160 }, { "epoch": 0.20218093321477043, "loss": 1.3426908254623413, "loss_ce": 0.0018704948015511036, "loss_iou": 0.5703125, "loss_num": 0.03955078125, "loss_xval": 1.34375, "num_input_tokens_seen": 143042128, "step": 2160 }, { "epoch": 0.20227453549866617, "grad_norm": 39.95287322998047, "learning_rate": 5e-05, "loss": 1.344, "num_input_tokens_seen": 143106544, "step": 2161 }, { "epoch": 0.20227453549866617, "loss": 1.298628807067871, "loss_ce": 0.00175384059548378, "loss_iou": 0.5625, "loss_num": 0.034423828125, "loss_xval": 1.296875, "num_input_tokens_seen": 143106544, "step": 2161 }, { "epoch": 0.2023681377825619, "grad_norm": 50.39260482788086, "learning_rate": 5e-05, "loss": 1.5972, "num_input_tokens_seen": 143171920, "step": 2162 }, { "epoch": 0.2023681377825619, "loss": 1.5182979106903076, "loss_ce": 0.0036495246458798647, "loss_iou": 0.66796875, "loss_num": 0.035400390625, "loss_xval": 1.515625, "num_input_tokens_seen": 143171920, "step": 2162 }, { "epoch": 0.20246174006645762, "grad_norm": 11.106011390686035, "learning_rate": 5e-05, "loss": 1.3537, "num_input_tokens_seen": 143238752, "step": 2163 }, { "epoch": 0.20246174006645762, "loss": 1.4705227613449097, "loss_ce": 0.004214137326925993, "loss_iou": 0.6015625, "loss_num": 0.05322265625, "loss_xval": 1.46875, "num_input_tokens_seen": 143238752, "step": 2163 }, { "epoch": 0.20255534235035336, "grad_norm": 15.916219711303711, "learning_rate": 5e-05, "loss": 1.5475, "num_input_tokens_seen": 143304276, "step": 2164 }, { "epoch": 0.20255534235035336, "loss": 1.6630849838256836, "loss_ce": 0.00634677754715085, "loss_iou": 0.66796875, "loss_num": 0.06396484375, "loss_xval": 1.65625, "num_input_tokens_seen": 143304276, "step": 2164 }, { "epoch": 0.20264894463424907, "grad_norm": 25.220035552978516, "learning_rate": 5e-05, "loss": 1.3673, "num_input_tokens_seen": 143370060, "step": 2165 }, { "epoch": 0.20264894463424907, "loss": 1.121541976928711, "loss_ce": 0.006185547914355993, "loss_iou": 0.47265625, "loss_num": 0.033935546875, "loss_xval": 1.1171875, "num_input_tokens_seen": 143370060, "step": 2165 }, { "epoch": 0.2027425469181448, "grad_norm": 28.483713150024414, "learning_rate": 5e-05, "loss": 1.3473, "num_input_tokens_seen": 143437080, "step": 2166 }, { "epoch": 0.2027425469181448, "loss": 1.2359519004821777, "loss_ce": 0.009877657517790794, "loss_iou": 0.47265625, "loss_num": 0.055908203125, "loss_xval": 1.2265625, "num_input_tokens_seen": 143437080, "step": 2166 }, { "epoch": 0.20283614920204052, "grad_norm": 25.22542381286621, "learning_rate": 5e-05, "loss": 1.4368, "num_input_tokens_seen": 143503224, "step": 2167 }, { "epoch": 0.20283614920204052, "loss": 1.3459038734436035, "loss_ce": 0.001177316065877676, "loss_iou": 0.578125, "loss_num": 0.03857421875, "loss_xval": 1.34375, "num_input_tokens_seen": 143503224, "step": 2167 }, { "epoch": 0.20292975148593626, "grad_norm": 24.992958068847656, "learning_rate": 5e-05, "loss": 1.3714, "num_input_tokens_seen": 143569348, "step": 2168 }, { "epoch": 0.20292975148593626, "loss": 1.4888062477111816, "loss_ce": 0.006384441163390875, "loss_iou": 0.546875, "loss_num": 0.0771484375, "loss_xval": 1.484375, "num_input_tokens_seen": 143569348, "step": 2168 }, { "epoch": 0.203023353769832, "grad_norm": 22.52411651611328, "learning_rate": 5e-05, "loss": 1.3275, "num_input_tokens_seen": 143635772, "step": 2169 }, { "epoch": 0.203023353769832, "loss": 1.3241088390350342, "loss_ce": 0.0030333655886352062, "loss_iou": 0.57421875, "loss_num": 0.03515625, "loss_xval": 1.3203125, "num_input_tokens_seen": 143635772, "step": 2169 }, { "epoch": 0.2031169560537277, "grad_norm": 42.44329833984375, "learning_rate": 5e-05, "loss": 1.4694, "num_input_tokens_seen": 143700864, "step": 2170 }, { "epoch": 0.2031169560537277, "loss": 1.4569463729858398, "loss_ce": 0.006751062348484993, "loss_iou": 0.61328125, "loss_num": 0.045166015625, "loss_xval": 1.453125, "num_input_tokens_seen": 143700864, "step": 2170 }, { "epoch": 0.20321055833762344, "grad_norm": 22.192626953125, "learning_rate": 5e-05, "loss": 1.2989, "num_input_tokens_seen": 143767608, "step": 2171 }, { "epoch": 0.20321055833762344, "loss": 1.1069200038909912, "loss_ce": 0.0029160603880882263, "loss_iou": 0.48046875, "loss_num": 0.0284423828125, "loss_xval": 1.1015625, "num_input_tokens_seen": 143767608, "step": 2171 }, { "epoch": 0.20330416062151915, "grad_norm": 35.91387939453125, "learning_rate": 5e-05, "loss": 1.4401, "num_input_tokens_seen": 143834088, "step": 2172 }, { "epoch": 0.20330416062151915, "loss": 1.5390979051589966, "loss_ce": 0.002965106163173914, "loss_iou": 0.671875, "loss_num": 0.038818359375, "loss_xval": 1.5390625, "num_input_tokens_seen": 143834088, "step": 2172 }, { "epoch": 0.2033977629054149, "grad_norm": 21.08890724182129, "learning_rate": 5e-05, "loss": 1.6687, "num_input_tokens_seen": 143899788, "step": 2173 }, { "epoch": 0.2033977629054149, "loss": 1.4316294193267822, "loss_ce": 0.002918568905442953, "loss_iou": 0.6328125, "loss_num": 0.032958984375, "loss_xval": 1.4296875, "num_input_tokens_seen": 143899788, "step": 2173 }, { "epoch": 0.20349136518931063, "grad_norm": 22.691059112548828, "learning_rate": 5e-05, "loss": 1.3941, "num_input_tokens_seen": 143966944, "step": 2174 }, { "epoch": 0.20349136518931063, "loss": 1.5247730016708374, "loss_ce": 0.004265276715159416, "loss_iou": 0.65625, "loss_num": 0.041748046875, "loss_xval": 1.5234375, "num_input_tokens_seen": 143966944, "step": 2174 }, { "epoch": 0.20358496747320634, "grad_norm": 42.85596466064453, "learning_rate": 5e-05, "loss": 1.2111, "num_input_tokens_seen": 144033116, "step": 2175 }, { "epoch": 0.20358496747320634, "loss": 1.2911250591278076, "loss_ce": 0.0025508119724690914, "loss_iou": 0.546875, "loss_num": 0.039306640625, "loss_xval": 1.2890625, "num_input_tokens_seen": 144033116, "step": 2175 }, { "epoch": 0.20367856975710208, "grad_norm": 30.698877334594727, "learning_rate": 5e-05, "loss": 1.5079, "num_input_tokens_seen": 144099212, "step": 2176 }, { "epoch": 0.20367856975710208, "loss": 1.4964704513549805, "loss_ce": 0.008189158514142036, "loss_iou": 0.61328125, "loss_num": 0.052734375, "loss_xval": 1.484375, "num_input_tokens_seen": 144099212, "step": 2176 }, { "epoch": 0.2037721720409978, "grad_norm": 16.635744094848633, "learning_rate": 5e-05, "loss": 1.249, "num_input_tokens_seen": 144165616, "step": 2177 }, { "epoch": 0.2037721720409978, "loss": 1.2867045402526855, "loss_ce": 0.005454451777040958, "loss_iou": 0.5546875, "loss_num": 0.03515625, "loss_xval": 1.28125, "num_input_tokens_seen": 144165616, "step": 2177 }, { "epoch": 0.20386577432489353, "grad_norm": 21.607072830200195, "learning_rate": 5e-05, "loss": 1.621, "num_input_tokens_seen": 144232660, "step": 2178 }, { "epoch": 0.20386577432489353, "loss": 1.3934295177459717, "loss_ce": 0.008663848042488098, "loss_iou": 0.609375, "loss_num": 0.03369140625, "loss_xval": 1.3828125, "num_input_tokens_seen": 144232660, "step": 2178 }, { "epoch": 0.20395937660878927, "grad_norm": 32.10531997680664, "learning_rate": 5e-05, "loss": 1.4021, "num_input_tokens_seen": 144299392, "step": 2179 }, { "epoch": 0.20395937660878927, "loss": 1.331843376159668, "loss_ce": 0.0056714159436523914, "loss_iou": 0.6015625, "loss_num": 0.0240478515625, "loss_xval": 1.328125, "num_input_tokens_seen": 144299392, "step": 2179 }, { "epoch": 0.20405297889268498, "grad_norm": 21.30291175842285, "learning_rate": 5e-05, "loss": 1.7013, "num_input_tokens_seen": 144365804, "step": 2180 }, { "epoch": 0.20405297889268498, "loss": 1.5601857900619507, "loss_ce": 0.004521731752902269, "loss_iou": 0.640625, "loss_num": 0.05517578125, "loss_xval": 1.5546875, "num_input_tokens_seen": 144365804, "step": 2180 }, { "epoch": 0.20414658117658072, "grad_norm": 22.230737686157227, "learning_rate": 5e-05, "loss": 1.3447, "num_input_tokens_seen": 144432600, "step": 2181 }, { "epoch": 0.20414658117658072, "loss": 1.4684849977493286, "loss_ce": 0.008035789243876934, "loss_iou": 0.5859375, "loss_num": 0.056884765625, "loss_xval": 1.4609375, "num_input_tokens_seen": 144432600, "step": 2181 }, { "epoch": 0.20424018346047643, "grad_norm": 20.775821685791016, "learning_rate": 5e-05, "loss": 1.235, "num_input_tokens_seen": 144498544, "step": 2182 }, { "epoch": 0.20424018346047643, "loss": 1.1361083984375, "loss_ce": 0.004028338938951492, "loss_iou": 0.455078125, "loss_num": 0.044677734375, "loss_xval": 1.1328125, "num_input_tokens_seen": 144498544, "step": 2182 }, { "epoch": 0.20433378574437217, "grad_norm": 18.58045196533203, "learning_rate": 5e-05, "loss": 1.6382, "num_input_tokens_seen": 144565964, "step": 2183 }, { "epoch": 0.20433378574437217, "loss": 1.5659629106521606, "loss_ce": 0.005904317833483219, "loss_iou": 0.69140625, "loss_num": 0.035888671875, "loss_xval": 1.5625, "num_input_tokens_seen": 144565964, "step": 2183 }, { "epoch": 0.2044273880282679, "grad_norm": 38.042015075683594, "learning_rate": 5e-05, "loss": 1.5261, "num_input_tokens_seen": 144631488, "step": 2184 }, { "epoch": 0.2044273880282679, "loss": 1.412172555923462, "loss_ce": 0.004945940803736448, "loss_iou": 0.59375, "loss_num": 0.043212890625, "loss_xval": 1.40625, "num_input_tokens_seen": 144631488, "step": 2184 }, { "epoch": 0.20452099031216361, "grad_norm": 17.624834060668945, "learning_rate": 5e-05, "loss": 1.4384, "num_input_tokens_seen": 144698352, "step": 2185 }, { "epoch": 0.20452099031216361, "loss": 1.404240369796753, "loss_ce": 0.004826275631785393, "loss_iou": 0.57421875, "loss_num": 0.05029296875, "loss_xval": 1.3984375, "num_input_tokens_seen": 144698352, "step": 2185 }, { "epoch": 0.20461459259605935, "grad_norm": 46.20594787597656, "learning_rate": 5e-05, "loss": 1.4092, "num_input_tokens_seen": 144765108, "step": 2186 }, { "epoch": 0.20461459259605935, "loss": 1.4681873321533203, "loss_ce": 0.005785033572465181, "loss_iou": 0.60546875, "loss_num": 0.0498046875, "loss_xval": 1.4609375, "num_input_tokens_seen": 144765108, "step": 2186 }, { "epoch": 0.20470819487995506, "grad_norm": 17.88568878173828, "learning_rate": 5e-05, "loss": 1.5187, "num_input_tokens_seen": 144830616, "step": 2187 }, { "epoch": 0.20470819487995506, "loss": 1.8436431884765625, "loss_ce": 0.010635368525981903, "loss_iou": 0.76171875, "loss_num": 0.0625, "loss_xval": 1.8359375, "num_input_tokens_seen": 144830616, "step": 2187 }, { "epoch": 0.2048017971638508, "grad_norm": 37.66646957397461, "learning_rate": 5e-05, "loss": 1.5437, "num_input_tokens_seen": 144897316, "step": 2188 }, { "epoch": 0.2048017971638508, "loss": 1.565699815750122, "loss_ce": 0.002711482811719179, "loss_iou": 0.62109375, "loss_num": 0.06396484375, "loss_xval": 1.5625, "num_input_tokens_seen": 144897316, "step": 2188 }, { "epoch": 0.2048953994477465, "grad_norm": 39.91148376464844, "learning_rate": 5e-05, "loss": 1.4788, "num_input_tokens_seen": 144962820, "step": 2189 }, { "epoch": 0.2048953994477465, "loss": 1.3915303945541382, "loss_ce": 0.0038351090624928474, "loss_iou": 0.56640625, "loss_num": 0.051025390625, "loss_xval": 1.390625, "num_input_tokens_seen": 144962820, "step": 2189 }, { "epoch": 0.20498900173164225, "grad_norm": 33.69110107421875, "learning_rate": 5e-05, "loss": 1.6458, "num_input_tokens_seen": 145030904, "step": 2190 }, { "epoch": 0.20498900173164225, "loss": 1.5528013706207275, "loss_ce": 0.002996806986629963, "loss_iou": 0.671875, "loss_num": 0.04052734375, "loss_xval": 1.546875, "num_input_tokens_seen": 145030904, "step": 2190 }, { "epoch": 0.205082604015538, "grad_norm": 16.49057388305664, "learning_rate": 5e-05, "loss": 1.3206, "num_input_tokens_seen": 145097788, "step": 2191 }, { "epoch": 0.205082604015538, "loss": 1.3586344718933105, "loss_ce": 0.0031656406354159117, "loss_iou": 0.5625, "loss_num": 0.047119140625, "loss_xval": 1.359375, "num_input_tokens_seen": 145097788, "step": 2191 }, { "epoch": 0.2051762062994337, "grad_norm": 27.333337783813477, "learning_rate": 5e-05, "loss": 1.415, "num_input_tokens_seen": 145163536, "step": 2192 }, { "epoch": 0.2051762062994337, "loss": 1.326690912246704, "loss_ce": 0.004913484677672386, "loss_iou": 0.5546875, "loss_num": 0.0419921875, "loss_xval": 1.3203125, "num_input_tokens_seen": 145163536, "step": 2192 }, { "epoch": 0.20526980858332944, "grad_norm": 20.88427734375, "learning_rate": 5e-05, "loss": 1.5599, "num_input_tokens_seen": 145228784, "step": 2193 }, { "epoch": 0.20526980858332944, "loss": 1.6858747005462646, "loss_ce": 0.009116966277360916, "loss_iou": 0.71875, "loss_num": 0.048583984375, "loss_xval": 1.6796875, "num_input_tokens_seen": 145228784, "step": 2193 }, { "epoch": 0.20536341086722515, "grad_norm": 19.816139221191406, "learning_rate": 5e-05, "loss": 1.1869, "num_input_tokens_seen": 145295832, "step": 2194 }, { "epoch": 0.20536341086722515, "loss": 1.0105786323547363, "loss_ce": 0.004841390997171402, "loss_iou": 0.42578125, "loss_num": 0.0301513671875, "loss_xval": 1.0078125, "num_input_tokens_seen": 145295832, "step": 2194 }, { "epoch": 0.2054570131511209, "grad_norm": 21.31490135192871, "learning_rate": 5e-05, "loss": 1.455, "num_input_tokens_seen": 145363284, "step": 2195 }, { "epoch": 0.2054570131511209, "loss": 1.525984287261963, "loss_ce": 0.004499888978898525, "loss_iou": 0.6328125, "loss_num": 0.051513671875, "loss_xval": 1.5234375, "num_input_tokens_seen": 145363284, "step": 2195 }, { "epoch": 0.20555061543501663, "grad_norm": 16.395980834960938, "learning_rate": 5e-05, "loss": 1.4608, "num_input_tokens_seen": 145428900, "step": 2196 }, { "epoch": 0.20555061543501663, "loss": 1.323909044265747, "loss_ce": 0.003962744493037462, "loss_iou": 0.53515625, "loss_num": 0.050048828125, "loss_xval": 1.3203125, "num_input_tokens_seen": 145428900, "step": 2196 }, { "epoch": 0.20564421771891234, "grad_norm": 33.9736442565918, "learning_rate": 5e-05, "loss": 1.3005, "num_input_tokens_seen": 145494208, "step": 2197 }, { "epoch": 0.20564421771891234, "loss": 1.2018710374832153, "loss_ce": 0.005581974517554045, "loss_iou": 0.48828125, "loss_num": 0.0439453125, "loss_xval": 1.1953125, "num_input_tokens_seen": 145494208, "step": 2197 }, { "epoch": 0.20573782000280808, "grad_norm": 32.992347717285156, "learning_rate": 5e-05, "loss": 1.6301, "num_input_tokens_seen": 145559440, "step": 2198 }, { "epoch": 0.20573782000280808, "loss": 1.7665610313415527, "loss_ce": 0.003865695558488369, "loss_iou": 0.74609375, "loss_num": 0.0537109375, "loss_xval": 1.765625, "num_input_tokens_seen": 145559440, "step": 2198 }, { "epoch": 0.20583142228670379, "grad_norm": 19.87616539001465, "learning_rate": 5e-05, "loss": 1.2179, "num_input_tokens_seen": 145625060, "step": 2199 }, { "epoch": 0.20583142228670379, "loss": 1.1124231815338135, "loss_ce": 0.003902629017829895, "loss_iou": 0.46875, "loss_num": 0.034423828125, "loss_xval": 1.109375, "num_input_tokens_seen": 145625060, "step": 2199 }, { "epoch": 0.20592502457059952, "grad_norm": 16.862716674804688, "learning_rate": 5e-05, "loss": 1.1606, "num_input_tokens_seen": 145691548, "step": 2200 }, { "epoch": 0.20592502457059952, "loss": 1.068557858467102, "loss_ce": 0.006057845428586006, "loss_iou": 0.412109375, "loss_num": 0.047607421875, "loss_xval": 1.0625, "num_input_tokens_seen": 145691548, "step": 2200 }, { "epoch": 0.20601862685449526, "grad_norm": 16.87476921081543, "learning_rate": 5e-05, "loss": 1.2487, "num_input_tokens_seen": 145758132, "step": 2201 }, { "epoch": 0.20601862685449526, "loss": 1.3122081756591797, "loss_ce": 0.004102656617760658, "loss_iou": 0.5546875, "loss_num": 0.039794921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 145758132, "step": 2201 }, { "epoch": 0.20611222913839097, "grad_norm": 35.36105728149414, "learning_rate": 5e-05, "loss": 1.2631, "num_input_tokens_seen": 145824952, "step": 2202 }, { "epoch": 0.20611222913839097, "loss": 1.2746992111206055, "loss_ce": 0.003703146940097213, "loss_iou": 0.51953125, "loss_num": 0.0458984375, "loss_xval": 1.2734375, "num_input_tokens_seen": 145824952, "step": 2202 }, { "epoch": 0.2062058314222867, "grad_norm": 20.086915969848633, "learning_rate": 5e-05, "loss": 1.5068, "num_input_tokens_seen": 145890460, "step": 2203 }, { "epoch": 0.2062058314222867, "loss": 1.3482638597488403, "loss_ce": 0.010617414489388466, "loss_iou": 0.5546875, "loss_num": 0.0458984375, "loss_xval": 1.3359375, "num_input_tokens_seen": 145890460, "step": 2203 }, { "epoch": 0.20629943370618242, "grad_norm": 18.681596755981445, "learning_rate": 5e-05, "loss": 1.4599, "num_input_tokens_seen": 145956848, "step": 2204 }, { "epoch": 0.20629943370618242, "loss": 1.3786115646362305, "loss_ce": 0.0060529257170856, "loss_iou": 0.578125, "loss_num": 0.04296875, "loss_xval": 1.375, "num_input_tokens_seen": 145956848, "step": 2204 }, { "epoch": 0.20639303599007816, "grad_norm": 16.152690887451172, "learning_rate": 5e-05, "loss": 1.4318, "num_input_tokens_seen": 146023292, "step": 2205 }, { "epoch": 0.20639303599007816, "loss": 1.682483434677124, "loss_ce": 0.0037725758738815784, "loss_iou": 0.671875, "loss_num": 0.0673828125, "loss_xval": 1.6796875, "num_input_tokens_seen": 146023292, "step": 2205 }, { "epoch": 0.20648663827397387, "grad_norm": 15.745396614074707, "learning_rate": 5e-05, "loss": 1.6109, "num_input_tokens_seen": 146089560, "step": 2206 }, { "epoch": 0.20648663827397387, "loss": 1.5467182397842407, "loss_ce": 0.006190854590386152, "loss_iou": 0.609375, "loss_num": 0.064453125, "loss_xval": 1.5390625, "num_input_tokens_seen": 146089560, "step": 2206 }, { "epoch": 0.2065802405578696, "grad_norm": 31.07707977294922, "learning_rate": 5e-05, "loss": 1.2601, "num_input_tokens_seen": 146155592, "step": 2207 }, { "epoch": 0.2065802405578696, "loss": 1.3511521816253662, "loss_ce": 0.007402233779430389, "loss_iou": 0.53515625, "loss_num": 0.0546875, "loss_xval": 1.34375, "num_input_tokens_seen": 146155592, "step": 2207 }, { "epoch": 0.20667384284176535, "grad_norm": 23.022127151489258, "learning_rate": 5e-05, "loss": 1.4658, "num_input_tokens_seen": 146222144, "step": 2208 }, { "epoch": 0.20667384284176535, "loss": 1.1982890367507935, "loss_ce": 0.007371045649051666, "loss_iou": 0.5234375, "loss_num": 0.0283203125, "loss_xval": 1.1875, "num_input_tokens_seen": 146222144, "step": 2208 }, { "epoch": 0.20676744512566106, "grad_norm": 33.60017013549805, "learning_rate": 5e-05, "loss": 1.457, "num_input_tokens_seen": 146289488, "step": 2209 }, { "epoch": 0.20676744512566106, "loss": 1.6218219995498657, "loss_ce": 0.004146199207752943, "loss_iou": 0.65234375, "loss_num": 0.0625, "loss_xval": 1.6171875, "num_input_tokens_seen": 146289488, "step": 2209 }, { "epoch": 0.2068610474095568, "grad_norm": 25.5781192779541, "learning_rate": 5e-05, "loss": 1.6717, "num_input_tokens_seen": 146355076, "step": 2210 }, { "epoch": 0.2068610474095568, "loss": 1.6904634237289429, "loss_ce": 0.0039400034584105015, "loss_iou": 0.71484375, "loss_num": 0.051025390625, "loss_xval": 1.6875, "num_input_tokens_seen": 146355076, "step": 2210 }, { "epoch": 0.2069546496934525, "grad_norm": 10.948907852172852, "learning_rate": 5e-05, "loss": 1.238, "num_input_tokens_seen": 146420240, "step": 2211 }, { "epoch": 0.2069546496934525, "loss": 1.414231300354004, "loss_ce": 0.007981323637068272, "loss_iou": 0.5234375, "loss_num": 0.07177734375, "loss_xval": 1.40625, "num_input_tokens_seen": 146420240, "step": 2211 }, { "epoch": 0.20704825197734825, "grad_norm": 20.03610610961914, "learning_rate": 5e-05, "loss": 1.4347, "num_input_tokens_seen": 146486432, "step": 2212 }, { "epoch": 0.20704825197734825, "loss": 1.4449725151062012, "loss_ce": 0.004542850889265537, "loss_iou": 0.57421875, "loss_num": 0.0576171875, "loss_xval": 1.4375, "num_input_tokens_seen": 146486432, "step": 2212 }, { "epoch": 0.20714185426124399, "grad_norm": 15.121575355529785, "learning_rate": 5e-05, "loss": 1.3037, "num_input_tokens_seen": 146551664, "step": 2213 }, { "epoch": 0.20714185426124399, "loss": 1.372945785522461, "loss_ce": 0.008047164417803288, "loss_iou": 0.6015625, "loss_num": 0.031494140625, "loss_xval": 1.3671875, "num_input_tokens_seen": 146551664, "step": 2213 }, { "epoch": 0.2072354565451397, "grad_norm": 21.025821685791016, "learning_rate": 5e-05, "loss": 1.2563, "num_input_tokens_seen": 146618556, "step": 2214 }, { "epoch": 0.2072354565451397, "loss": 1.1853673458099365, "loss_ce": 0.0037267024163156748, "loss_iou": 0.5078125, "loss_num": 0.032470703125, "loss_xval": 1.1796875, "num_input_tokens_seen": 146618556, "step": 2214 }, { "epoch": 0.20732905882903543, "grad_norm": 27.402463912963867, "learning_rate": 5e-05, "loss": 1.6821, "num_input_tokens_seen": 146684372, "step": 2215 }, { "epoch": 0.20732905882903543, "loss": 1.7625792026519775, "loss_ce": 0.006719916593283415, "loss_iou": 0.73046875, "loss_num": 0.05859375, "loss_xval": 1.7578125, "num_input_tokens_seen": 146684372, "step": 2215 }, { "epoch": 0.20742266111293114, "grad_norm": 20.352569580078125, "learning_rate": 5e-05, "loss": 1.227, "num_input_tokens_seen": 146750312, "step": 2216 }, { "epoch": 0.20742266111293114, "loss": 1.1586307287216187, "loss_ce": 0.004822149872779846, "loss_iou": 0.5234375, "loss_num": 0.02197265625, "loss_xval": 1.15625, "num_input_tokens_seen": 146750312, "step": 2216 }, { "epoch": 0.20751626339682688, "grad_norm": 20.267181396484375, "learning_rate": 5e-05, "loss": 1.4477, "num_input_tokens_seen": 146816936, "step": 2217 }, { "epoch": 0.20751626339682688, "loss": 1.2870581150054932, "loss_ce": 0.007761204615235329, "loss_iou": 0.5546875, "loss_num": 0.03466796875, "loss_xval": 1.28125, "num_input_tokens_seen": 146816936, "step": 2217 }, { "epoch": 0.20760986568072262, "grad_norm": 16.633872985839844, "learning_rate": 5e-05, "loss": 1.2765, "num_input_tokens_seen": 146883120, "step": 2218 }, { "epoch": 0.20760986568072262, "loss": 1.0969173908233643, "loss_ce": 0.0026791575364768505, "loss_iou": 0.4296875, "loss_num": 0.046875, "loss_xval": 1.09375, "num_input_tokens_seen": 146883120, "step": 2218 }, { "epoch": 0.20770346796461833, "grad_norm": 14.718212127685547, "learning_rate": 5e-05, "loss": 1.3418, "num_input_tokens_seen": 146949108, "step": 2219 }, { "epoch": 0.20770346796461833, "loss": 1.2713162899017334, "loss_ce": 0.005691338796168566, "loss_iou": 0.4921875, "loss_num": 0.055908203125, "loss_xval": 1.265625, "num_input_tokens_seen": 146949108, "step": 2219 }, { "epoch": 0.20779707024851407, "grad_norm": 16.255435943603516, "learning_rate": 5e-05, "loss": 1.2042, "num_input_tokens_seen": 147015480, "step": 2220 }, { "epoch": 0.20779707024851407, "loss": 1.3084746599197388, "loss_ce": 0.0037871471140533686, "loss_iou": 0.52734375, "loss_num": 0.049560546875, "loss_xval": 1.3046875, "num_input_tokens_seen": 147015480, "step": 2220 }, { "epoch": 0.20789067253240978, "grad_norm": 18.149707794189453, "learning_rate": 5e-05, "loss": 1.3916, "num_input_tokens_seen": 147081252, "step": 2221 }, { "epoch": 0.20789067253240978, "loss": 1.2870043516159058, "loss_ce": 0.010637231171131134, "loss_iou": 0.51171875, "loss_num": 0.05029296875, "loss_xval": 1.2734375, "num_input_tokens_seen": 147081252, "step": 2221 }, { "epoch": 0.20798427481630552, "grad_norm": 15.424684524536133, "learning_rate": 5e-05, "loss": 1.2039, "num_input_tokens_seen": 147148376, "step": 2222 }, { "epoch": 0.20798427481630552, "loss": 0.9300609827041626, "loss_ce": 0.002570751588791609, "loss_iou": 0.388671875, "loss_num": 0.0301513671875, "loss_xval": 0.92578125, "num_input_tokens_seen": 147148376, "step": 2222 }, { "epoch": 0.20807787710020126, "grad_norm": 23.329143524169922, "learning_rate": 5e-05, "loss": 1.2619, "num_input_tokens_seen": 147214676, "step": 2223 }, { "epoch": 0.20807787710020126, "loss": 1.3454594612121582, "loss_ce": 0.0046391854993999004, "loss_iou": 0.5859375, "loss_num": 0.0341796875, "loss_xval": 1.34375, "num_input_tokens_seen": 147214676, "step": 2223 }, { "epoch": 0.20817147938409697, "grad_norm": 22.675811767578125, "learning_rate": 5e-05, "loss": 1.541, "num_input_tokens_seen": 147281548, "step": 2224 }, { "epoch": 0.20817147938409697, "loss": 1.6554572582244873, "loss_ce": 0.004090028814971447, "loss_iou": 0.72265625, "loss_num": 0.040771484375, "loss_xval": 1.6484375, "num_input_tokens_seen": 147281548, "step": 2224 }, { "epoch": 0.2082650816679927, "grad_norm": 89.36438751220703, "learning_rate": 5e-05, "loss": 1.4125, "num_input_tokens_seen": 147348384, "step": 2225 }, { "epoch": 0.2082650816679927, "loss": 1.6650135517120361, "loss_ce": 0.009740199893712997, "loss_iou": 0.68359375, "loss_num": 0.05712890625, "loss_xval": 1.65625, "num_input_tokens_seen": 147348384, "step": 2225 }, { "epoch": 0.20835868395188842, "grad_norm": 84.10919952392578, "learning_rate": 5e-05, "loss": 1.5184, "num_input_tokens_seen": 147414920, "step": 2226 }, { "epoch": 0.20835868395188842, "loss": 1.56592857837677, "loss_ce": 0.0073348539881408215, "loss_iou": 0.6875, "loss_num": 0.036865234375, "loss_xval": 1.5625, "num_input_tokens_seen": 147414920, "step": 2226 }, { "epoch": 0.20845228623578416, "grad_norm": 14.97479248046875, "learning_rate": 5e-05, "loss": 1.441, "num_input_tokens_seen": 147480396, "step": 2227 }, { "epoch": 0.20845228623578416, "loss": 1.5814578533172607, "loss_ce": 0.007239131256937981, "loss_iou": 0.60546875, "loss_num": 0.07275390625, "loss_xval": 1.578125, "num_input_tokens_seen": 147480396, "step": 2227 }, { "epoch": 0.20854588851967987, "grad_norm": 40.541664123535156, "learning_rate": 5e-05, "loss": 1.4666, "num_input_tokens_seen": 147546120, "step": 2228 }, { "epoch": 0.20854588851967987, "loss": 1.5482265949249268, "loss_ce": 0.004281272180378437, "loss_iou": 0.62890625, "loss_num": 0.05712890625, "loss_xval": 1.546875, "num_input_tokens_seen": 147546120, "step": 2228 }, { "epoch": 0.2086394908035756, "grad_norm": 34.50336456298828, "learning_rate": 5e-05, "loss": 1.508, "num_input_tokens_seen": 147612456, "step": 2229 }, { "epoch": 0.2086394908035756, "loss": 1.5039300918579102, "loss_ce": 0.0049066124483942986, "loss_iou": 0.59765625, "loss_num": 0.061279296875, "loss_xval": 1.5, "num_input_tokens_seen": 147612456, "step": 2229 }, { "epoch": 0.20873309308747134, "grad_norm": 23.92495346069336, "learning_rate": 5e-05, "loss": 1.3672, "num_input_tokens_seen": 147678372, "step": 2230 }, { "epoch": 0.20873309308747134, "loss": 1.2686617374420166, "loss_ce": 0.008163605816662312, "loss_iou": 0.54296875, "loss_num": 0.035400390625, "loss_xval": 1.2578125, "num_input_tokens_seen": 147678372, "step": 2230 }, { "epoch": 0.20882669537136705, "grad_norm": 54.392547607421875, "learning_rate": 5e-05, "loss": 1.6349, "num_input_tokens_seen": 147744796, "step": 2231 }, { "epoch": 0.20882669537136705, "loss": 1.6422569751739502, "loss_ce": 0.00944453664124012, "loss_iou": 0.6875, "loss_num": 0.0517578125, "loss_xval": 1.6328125, "num_input_tokens_seen": 147744796, "step": 2231 }, { "epoch": 0.2089202976552628, "grad_norm": 20.766681671142578, "learning_rate": 5e-05, "loss": 1.8613, "num_input_tokens_seen": 147811408, "step": 2232 }, { "epoch": 0.2089202976552628, "loss": 1.9362826347351074, "loss_ce": 0.00464208796620369, "loss_iou": 0.8125, "loss_num": 0.06201171875, "loss_xval": 1.9296875, "num_input_tokens_seen": 147811408, "step": 2232 }, { "epoch": 0.2090138999391585, "grad_norm": 56.70877456665039, "learning_rate": 5e-05, "loss": 1.3871, "num_input_tokens_seen": 147877692, "step": 2233 }, { "epoch": 0.2090138999391585, "loss": 1.3354027271270752, "loss_ce": 0.004348025657236576, "loss_iou": 0.5703125, "loss_num": 0.0380859375, "loss_xval": 1.328125, "num_input_tokens_seen": 147877692, "step": 2233 }, { "epoch": 0.20910750222305424, "grad_norm": 34.47943115234375, "learning_rate": 5e-05, "loss": 1.5514, "num_input_tokens_seen": 147943848, "step": 2234 }, { "epoch": 0.20910750222305424, "loss": 1.5931035280227661, "loss_ce": 0.006677714176476002, "loss_iou": 0.63671875, "loss_num": 0.06298828125, "loss_xval": 1.5859375, "num_input_tokens_seen": 147943848, "step": 2234 }, { "epoch": 0.20920110450694998, "grad_norm": 18.903854370117188, "learning_rate": 5e-05, "loss": 1.8535, "num_input_tokens_seen": 148010672, "step": 2235 }, { "epoch": 0.20920110450694998, "loss": 1.7717020511627197, "loss_ce": 0.010959910228848457, "loss_iou": 0.75390625, "loss_num": 0.051513671875, "loss_xval": 1.7578125, "num_input_tokens_seen": 148010672, "step": 2235 }, { "epoch": 0.2092947067908457, "grad_norm": 52.18496322631836, "learning_rate": 5e-05, "loss": 1.3746, "num_input_tokens_seen": 148077216, "step": 2236 }, { "epoch": 0.2092947067908457, "loss": 1.3000686168670654, "loss_ce": 0.005634992383420467, "loss_iou": 0.5234375, "loss_num": 0.049072265625, "loss_xval": 1.296875, "num_input_tokens_seen": 148077216, "step": 2236 }, { "epoch": 0.20938830907474143, "grad_norm": 65.09969329833984, "learning_rate": 5e-05, "loss": 1.4046, "num_input_tokens_seen": 148143628, "step": 2237 }, { "epoch": 0.20938830907474143, "loss": 1.4511845111846924, "loss_ce": 0.007336852140724659, "loss_iou": 0.5625, "loss_num": 0.0634765625, "loss_xval": 1.4453125, "num_input_tokens_seen": 148143628, "step": 2237 }, { "epoch": 0.20948191135863714, "grad_norm": 41.23759078979492, "learning_rate": 5e-05, "loss": 1.6012, "num_input_tokens_seen": 148210352, "step": 2238 }, { "epoch": 0.20948191135863714, "loss": 1.45740807056427, "loss_ce": 0.0033065066672861576, "loss_iou": 0.61328125, "loss_num": 0.044677734375, "loss_xval": 1.453125, "num_input_tokens_seen": 148210352, "step": 2238 }, { "epoch": 0.20957551364253288, "grad_norm": 35.63227844238281, "learning_rate": 5e-05, "loss": 1.5818, "num_input_tokens_seen": 148277092, "step": 2239 }, { "epoch": 0.20957551364253288, "loss": 1.6763197183609009, "loss_ce": 0.008350997231900692, "loss_iou": 0.67578125, "loss_num": 0.06396484375, "loss_xval": 1.671875, "num_input_tokens_seen": 148277092, "step": 2239 }, { "epoch": 0.20966911592642862, "grad_norm": 24.028881072998047, "learning_rate": 5e-05, "loss": 1.6909, "num_input_tokens_seen": 148343912, "step": 2240 }, { "epoch": 0.20966911592642862, "loss": 1.7737795114517212, "loss_ce": 0.002295173704624176, "loss_iou": 0.76953125, "loss_num": 0.04638671875, "loss_xval": 1.7734375, "num_input_tokens_seen": 148343912, "step": 2240 }, { "epoch": 0.20976271821032433, "grad_norm": 38.34393310546875, "learning_rate": 5e-05, "loss": 1.5307, "num_input_tokens_seen": 148409256, "step": 2241 }, { "epoch": 0.20976271821032433, "loss": 1.6319129467010498, "loss_ce": 0.003983210772275925, "loss_iou": 0.671875, "loss_num": 0.056884765625, "loss_xval": 1.625, "num_input_tokens_seen": 148409256, "step": 2241 }, { "epoch": 0.20985632049422007, "grad_norm": 36.2247314453125, "learning_rate": 5e-05, "loss": 1.4455, "num_input_tokens_seen": 148474748, "step": 2242 }, { "epoch": 0.20985632049422007, "loss": 1.538226842880249, "loss_ce": 0.005512088071554899, "loss_iou": 0.6171875, "loss_num": 0.06005859375, "loss_xval": 1.53125, "num_input_tokens_seen": 148474748, "step": 2242 }, { "epoch": 0.20994992277811578, "grad_norm": 19.64275550842285, "learning_rate": 5e-05, "loss": 1.6078, "num_input_tokens_seen": 148541100, "step": 2243 }, { "epoch": 0.20994992277811578, "loss": 1.5026966333389282, "loss_ce": 0.007579442113637924, "loss_iou": 0.65234375, "loss_num": 0.03857421875, "loss_xval": 1.4921875, "num_input_tokens_seen": 148541100, "step": 2243 }, { "epoch": 0.21004352506201152, "grad_norm": 14.244067192077637, "learning_rate": 5e-05, "loss": 1.2634, "num_input_tokens_seen": 148607516, "step": 2244 }, { "epoch": 0.21004352506201152, "loss": 1.3142645359039307, "loss_ce": 0.0042059896513819695, "loss_iou": 0.5546875, "loss_num": 0.040771484375, "loss_xval": 1.3125, "num_input_tokens_seen": 148607516, "step": 2244 }, { "epoch": 0.21013712734590723, "grad_norm": 22.575939178466797, "learning_rate": 5e-05, "loss": 1.2309, "num_input_tokens_seen": 148673148, "step": 2245 }, { "epoch": 0.21013712734590723, "loss": 1.457908272743225, "loss_ce": 0.0028301426209509373, "loss_iou": 0.6015625, "loss_num": 0.050048828125, "loss_xval": 1.453125, "num_input_tokens_seen": 148673148, "step": 2245 }, { "epoch": 0.21023072962980296, "grad_norm": 23.176837921142578, "learning_rate": 5e-05, "loss": 1.3858, "num_input_tokens_seen": 148739024, "step": 2246 }, { "epoch": 0.21023072962980296, "loss": 1.5564404726028442, "loss_ce": 0.002241305308416486, "loss_iou": 0.66015625, "loss_num": 0.04638671875, "loss_xval": 1.5546875, "num_input_tokens_seen": 148739024, "step": 2246 }, { "epoch": 0.2103243319136987, "grad_norm": 78.06816864013672, "learning_rate": 5e-05, "loss": 1.4197, "num_input_tokens_seen": 148805784, "step": 2247 }, { "epoch": 0.2103243319136987, "loss": 1.2678524255752563, "loss_ce": 0.005645415745675564, "loss_iou": 0.54296875, "loss_num": 0.03515625, "loss_xval": 1.265625, "num_input_tokens_seen": 148805784, "step": 2247 }, { "epoch": 0.21041793419759441, "grad_norm": 18.7746639251709, "learning_rate": 5e-05, "loss": 1.7969, "num_input_tokens_seen": 148872216, "step": 2248 }, { "epoch": 0.21041793419759441, "loss": 1.6653659343719482, "loss_ce": 0.0017916755750775337, "loss_iou": 0.69921875, "loss_num": 0.053466796875, "loss_xval": 1.6640625, "num_input_tokens_seen": 148872216, "step": 2248 }, { "epoch": 0.21051153648149015, "grad_norm": 25.165340423583984, "learning_rate": 5e-05, "loss": 1.4906, "num_input_tokens_seen": 148938992, "step": 2249 }, { "epoch": 0.21051153648149015, "loss": 1.5340664386749268, "loss_ce": 0.004769505932927132, "loss_iou": 0.6171875, "loss_num": 0.05908203125, "loss_xval": 1.53125, "num_input_tokens_seen": 148938992, "step": 2249 }, { "epoch": 0.21060513876538586, "grad_norm": 25.190000534057617, "learning_rate": 5e-05, "loss": 1.5433, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.21060513876538586, "eval_seeclick_CIoU": 0.1746026836335659, "eval_seeclick_GIoU": 0.18487906455993652, "eval_seeclick_IoU": 0.27577562630176544, "eval_seeclick_MAE_all": 0.14121928066015244, "eval_seeclick_MAE_h": 0.0973590612411499, "eval_seeclick_MAE_w": 0.09571099653840065, "eval_seeclick_MAE_x_boxes": 0.21197088062763214, "eval_seeclick_MAE_y_boxes": 0.09960607439279556, "eval_seeclick_NUM_probability": 0.9994820952415466, "eval_seeclick_inside_bbox": 0.4541666805744171, "eval_seeclick_loss": 2.4197990894317627, "eval_seeclick_loss_ce": 0.013916734606027603, "eval_seeclick_loss_iou": 0.8665771484375, "eval_seeclick_loss_num": 0.14796829223632812, "eval_seeclick_loss_xval": 2.47119140625, "eval_seeclick_runtime": 62.9995, "eval_seeclick_samples_per_second": 0.746, "eval_seeclick_steps_per_second": 0.032, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.21060513876538586, "eval_icons_CIoU": -0.15117041021585464, "eval_icons_GIoU": -0.06868918985128403, "eval_icons_IoU": 0.052425259724259377, "eval_icons_MAE_all": 0.20415173470973969, "eval_icons_MAE_h": 0.22264737635850906, "eval_icons_MAE_w": 0.18965063244104385, "eval_icons_MAE_x_boxes": 0.12681209295988083, "eval_icons_MAE_y_boxes": 0.1221475638449192, "eval_icons_NUM_probability": 0.9999066889286041, "eval_icons_inside_bbox": 0.1145833358168602, "eval_icons_loss": 3.141892433166504, "eval_icons_loss_ce": 1.8032216758001596e-05, "eval_icons_loss_iou": 1.07275390625, "eval_icons_loss_num": 0.197662353515625, "eval_icons_loss_xval": 3.1328125, "eval_icons_runtime": 69.1361, "eval_icons_samples_per_second": 0.723, "eval_icons_steps_per_second": 0.029, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.21060513876538586, "eval_screenspot_CIoU": -0.016074684758981068, "eval_screenspot_GIoU": 0.01664178321758906, "eval_screenspot_IoU": 0.16272087146838507, "eval_screenspot_MAE_all": 0.20578849812348685, "eval_screenspot_MAE_h": 0.18250097831090292, "eval_screenspot_MAE_w": 0.19865205387274423, "eval_screenspot_MAE_x_boxes": 0.22381174067656198, "eval_screenspot_MAE_y_boxes": 0.1280993570884069, "eval_screenspot_NUM_probability": 0.9997875094413757, "eval_screenspot_inside_bbox": 0.3312500019868215, "eval_screenspot_loss": 3.0338799953460693, "eval_screenspot_loss_ce": 0.011708071455359459, "eval_screenspot_loss_iou": 0.994140625, "eval_screenspot_loss_num": 0.21026611328125, "eval_screenspot_loss_xval": 3.041015625, "eval_screenspot_runtime": 116.4531, "eval_screenspot_samples_per_second": 0.764, "eval_screenspot_steps_per_second": 0.026, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.21060513876538586, "eval_compot_CIoU": -0.08576418831944466, "eval_compot_GIoU": -0.027402309700846672, "eval_compot_IoU": 0.09865068644285202, "eval_compot_MAE_all": 0.22077596932649612, "eval_compot_MAE_h": 0.20852911472320557, "eval_compot_MAE_w": 0.24083954840898514, "eval_compot_MAE_x_boxes": 0.1733434721827507, "eval_compot_MAE_y_boxes": 0.10590603947639465, "eval_compot_NUM_probability": 0.9998809397220612, "eval_compot_inside_bbox": 0.1927083358168602, "eval_compot_loss": 3.1436450481414795, "eval_compot_loss_ce": 0.0049590670969337225, "eval_compot_loss_iou": 1.023681640625, "eval_compot_loss_num": 0.22637939453125, "eval_compot_loss_xval": 3.177734375, "eval_compot_runtime": 71.2026, "eval_compot_samples_per_second": 0.702, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.21060513876538586, "eval_custom_ui_MAE_all": 0.14865753799676895, "eval_custom_ui_MAE_x": 0.1338840276002884, "eval_custom_ui_MAE_y": 0.16343104094266891, "eval_custom_ui_NUM_probability": 0.9999413192272186, "eval_custom_ui_loss": 0.833726167678833, "eval_custom_ui_loss_ce": 0.16179928183555603, "eval_custom_ui_loss_num": 0.140472412109375, "eval_custom_ui_loss_xval": 0.702880859375, "eval_custom_ui_runtime": 51.0191, "eval_custom_ui_samples_per_second": 0.98, "eval_custom_ui_steps_per_second": 0.039, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.21060513876538586, "loss": 0.9273938536643982, "loss_ce": 0.18276497721672058, "loss_iou": 0.0, "loss_num": 0.1484375, "loss_xval": 0.74609375, "num_input_tokens_seen": 149006232, "step": 2250 }, { "epoch": 0.2106987410492816, "grad_norm": 20.41312599182129, "learning_rate": 5e-05, "loss": 1.6266, "num_input_tokens_seen": 149074428, "step": 2251 }, { "epoch": 0.2106987410492816, "loss": 1.5528807640075684, "loss_ce": 0.003564316313713789, "loss_iou": 0.640625, "loss_num": 0.053466796875, "loss_xval": 1.546875, "num_input_tokens_seen": 149074428, "step": 2251 }, { "epoch": 0.21079234333317734, "grad_norm": 21.525434494018555, "learning_rate": 5e-05, "loss": 1.3801, "num_input_tokens_seen": 149141204, "step": 2252 }, { "epoch": 0.21079234333317734, "loss": 1.443589687347412, "loss_ce": 0.004624864086508751, "loss_iou": 0.60546875, "loss_num": 0.045166015625, "loss_xval": 1.4375, "num_input_tokens_seen": 149141204, "step": 2252 }, { "epoch": 0.21088594561707305, "grad_norm": 20.107213973999023, "learning_rate": 5e-05, "loss": 1.5497, "num_input_tokens_seen": 149207932, "step": 2253 }, { "epoch": 0.21088594561707305, "loss": 1.4908183813095093, "loss_ce": 0.007908225059509277, "loss_iou": 0.625, "loss_num": 0.045654296875, "loss_xval": 1.484375, "num_input_tokens_seen": 149207932, "step": 2253 }, { "epoch": 0.2109795479009688, "grad_norm": 61.2882194519043, "learning_rate": 5e-05, "loss": 1.0448, "num_input_tokens_seen": 149272128, "step": 2254 }, { "epoch": 0.2109795479009688, "loss": 1.0997624397277832, "loss_ce": 0.002594493795186281, "loss_iou": 0.453125, "loss_num": 0.037841796875, "loss_xval": 1.09375, "num_input_tokens_seen": 149272128, "step": 2254 }, { "epoch": 0.2110731501848645, "grad_norm": 21.424177169799805, "learning_rate": 5e-05, "loss": 1.4858, "num_input_tokens_seen": 149338100, "step": 2255 }, { "epoch": 0.2110731501848645, "loss": 1.6285836696624756, "loss_ce": 0.0035836196038872004, "loss_iou": 0.69921875, "loss_num": 0.0458984375, "loss_xval": 1.625, "num_input_tokens_seen": 149338100, "step": 2255 }, { "epoch": 0.21116675246876024, "grad_norm": 19.440874099731445, "learning_rate": 5e-05, "loss": 1.4271, "num_input_tokens_seen": 149405908, "step": 2256 }, { "epoch": 0.21116675246876024, "loss": 1.5703045129776, "loss_ce": 0.0014568158658221364, "loss_iou": 0.6875, "loss_num": 0.0380859375, "loss_xval": 1.5703125, "num_input_tokens_seen": 149405908, "step": 2256 }, { "epoch": 0.21126035475265598, "grad_norm": 13.495523452758789, "learning_rate": 5e-05, "loss": 1.2129, "num_input_tokens_seen": 149471956, "step": 2257 }, { "epoch": 0.21126035475265598, "loss": 1.3002811670303345, "loss_ce": 0.0019413732225075364, "loss_iou": 0.546875, "loss_num": 0.041748046875, "loss_xval": 1.296875, "num_input_tokens_seen": 149471956, "step": 2257 }, { "epoch": 0.2113539570365517, "grad_norm": 20.041250228881836, "learning_rate": 5e-05, "loss": 1.1498, "num_input_tokens_seen": 149537104, "step": 2258 }, { "epoch": 0.2113539570365517, "loss": 1.2770624160766602, "loss_ce": 0.005089844577014446, "loss_iou": 0.57421875, "loss_num": 0.0245361328125, "loss_xval": 1.2734375, "num_input_tokens_seen": 149537104, "step": 2258 }, { "epoch": 0.21144755932044743, "grad_norm": 33.957183837890625, "learning_rate": 5e-05, "loss": 1.5538, "num_input_tokens_seen": 149602036, "step": 2259 }, { "epoch": 0.21144755932044743, "loss": 1.463841438293457, "loss_ce": 0.005833700764924288, "loss_iou": 0.64453125, "loss_num": 0.032958984375, "loss_xval": 1.4609375, "num_input_tokens_seen": 149602036, "step": 2259 }, { "epoch": 0.21154116160434314, "grad_norm": 189.0176239013672, "learning_rate": 5e-05, "loss": 1.7054, "num_input_tokens_seen": 149668420, "step": 2260 }, { "epoch": 0.21154116160434314, "loss": 1.7047916650772095, "loss_ce": 0.00654939329251647, "loss_iou": 0.734375, "loss_num": 0.046630859375, "loss_xval": 1.6953125, "num_input_tokens_seen": 149668420, "step": 2260 }, { "epoch": 0.21163476388823887, "grad_norm": 25.923681259155273, "learning_rate": 5e-05, "loss": 1.5028, "num_input_tokens_seen": 149735604, "step": 2261 }, { "epoch": 0.21163476388823887, "loss": 1.4564383029937744, "loss_ce": 0.006242984440177679, "loss_iou": 0.59765625, "loss_num": 0.0517578125, "loss_xval": 1.453125, "num_input_tokens_seen": 149735604, "step": 2261 }, { "epoch": 0.2117283661721346, "grad_norm": 30.270620346069336, "learning_rate": 5e-05, "loss": 1.2854, "num_input_tokens_seen": 149801416, "step": 2262 }, { "epoch": 0.2117283661721346, "loss": 1.1462156772613525, "loss_ce": 0.009740954264998436, "loss_iou": 0.4765625, "loss_num": 0.036376953125, "loss_xval": 1.1328125, "num_input_tokens_seen": 149801416, "step": 2262 }, { "epoch": 0.21182196845603032, "grad_norm": 22.751750946044922, "learning_rate": 5e-05, "loss": 1.3604, "num_input_tokens_seen": 149866496, "step": 2263 }, { "epoch": 0.21182196845603032, "loss": 1.426013708114624, "loss_ce": 0.00609190808609128, "loss_iou": 0.64453125, "loss_num": 0.0263671875, "loss_xval": 1.421875, "num_input_tokens_seen": 149866496, "step": 2263 }, { "epoch": 0.21191557073992606, "grad_norm": 17.920101165771484, "learning_rate": 5e-05, "loss": 1.4721, "num_input_tokens_seen": 149932904, "step": 2264 }, { "epoch": 0.21191557073992606, "loss": 1.6346955299377441, "loss_ce": 0.005300988908857107, "loss_iou": 0.61328125, "loss_num": 0.08056640625, "loss_xval": 1.6328125, "num_input_tokens_seen": 149932904, "step": 2264 }, { "epoch": 0.21200917302382177, "grad_norm": 17.05817222595215, "learning_rate": 5e-05, "loss": 1.3378, "num_input_tokens_seen": 149999576, "step": 2265 }, { "epoch": 0.21200917302382177, "loss": 1.4109833240509033, "loss_ce": 0.005221586674451828, "loss_iou": 0.6015625, "loss_num": 0.041015625, "loss_xval": 1.40625, "num_input_tokens_seen": 149999576, "step": 2265 }, { "epoch": 0.2121027753077175, "grad_norm": 28.452831268310547, "learning_rate": 5e-05, "loss": 1.3323, "num_input_tokens_seen": 150064768, "step": 2266 }, { "epoch": 0.2121027753077175, "loss": 1.3750596046447754, "loss_ce": 0.014219718053936958, "loss_iou": 0.5390625, "loss_num": 0.056884765625, "loss_xval": 1.359375, "num_input_tokens_seen": 150064768, "step": 2266 }, { "epoch": 0.21219637759161322, "grad_norm": 18.17842674255371, "learning_rate": 5e-05, "loss": 1.4363, "num_input_tokens_seen": 150129856, "step": 2267 }, { "epoch": 0.21219637759161322, "loss": 1.4692213535308838, "loss_ce": 0.013166775926947594, "loss_iou": 0.58984375, "loss_num": 0.0556640625, "loss_xval": 1.453125, "num_input_tokens_seen": 150129856, "step": 2267 }, { "epoch": 0.21228997987550896, "grad_norm": 14.280591011047363, "learning_rate": 5e-05, "loss": 1.3063, "num_input_tokens_seen": 150196056, "step": 2268 }, { "epoch": 0.21228997987550896, "loss": 1.3882437944412231, "loss_ce": 0.0025016251020133495, "loss_iou": 0.5703125, "loss_num": 0.0498046875, "loss_xval": 1.3828125, "num_input_tokens_seen": 150196056, "step": 2268 }, { "epoch": 0.2123835821594047, "grad_norm": 34.965476989746094, "learning_rate": 5e-05, "loss": 1.4558, "num_input_tokens_seen": 150262864, "step": 2269 }, { "epoch": 0.2123835821594047, "loss": 1.3500034809112549, "loss_ce": 0.006253512110561132, "loss_iou": 0.5625, "loss_num": 0.04443359375, "loss_xval": 1.34375, "num_input_tokens_seen": 150262864, "step": 2269 }, { "epoch": 0.2124771844433004, "grad_norm": 37.37397766113281, "learning_rate": 5e-05, "loss": 1.4802, "num_input_tokens_seen": 150329176, "step": 2270 }, { "epoch": 0.2124771844433004, "loss": 1.531097412109375, "loss_ce": 0.005706858821213245, "loss_iou": 0.60546875, "loss_num": 0.0625, "loss_xval": 1.5234375, "num_input_tokens_seen": 150329176, "step": 2270 }, { "epoch": 0.21257078672719615, "grad_norm": 32.53642272949219, "learning_rate": 5e-05, "loss": 1.785, "num_input_tokens_seen": 150397212, "step": 2271 }, { "epoch": 0.21257078672719615, "loss": 1.8849523067474365, "loss_ce": 0.007999066263437271, "loss_iou": 0.7890625, "loss_num": 0.0595703125, "loss_xval": 1.875, "num_input_tokens_seen": 150397212, "step": 2271 }, { "epoch": 0.21266438901109186, "grad_norm": 19.93190574645996, "learning_rate": 5e-05, "loss": 1.4283, "num_input_tokens_seen": 150463268, "step": 2272 }, { "epoch": 0.21266438901109186, "loss": 1.392273187637329, "loss_ce": 0.005554419942200184, "loss_iou": 0.59375, "loss_num": 0.04052734375, "loss_xval": 1.390625, "num_input_tokens_seen": 150463268, "step": 2272 }, { "epoch": 0.2127579912949876, "grad_norm": 28.814884185791016, "learning_rate": 5e-05, "loss": 1.3199, "num_input_tokens_seen": 150529540, "step": 2273 }, { "epoch": 0.2127579912949876, "loss": 1.3307688236236572, "loss_ce": 0.00410857331007719, "loss_iou": 0.56640625, "loss_num": 0.038818359375, "loss_xval": 1.328125, "num_input_tokens_seen": 150529540, "step": 2273 }, { "epoch": 0.21285159357888334, "grad_norm": 40.80498123168945, "learning_rate": 5e-05, "loss": 1.5853, "num_input_tokens_seen": 150595128, "step": 2274 }, { "epoch": 0.21285159357888334, "loss": 1.6934144496917725, "loss_ce": 0.002984791761264205, "loss_iou": 0.6953125, "loss_num": 0.060546875, "loss_xval": 1.6875, "num_input_tokens_seen": 150595128, "step": 2274 }, { "epoch": 0.21294519586277905, "grad_norm": 14.86881160736084, "learning_rate": 5e-05, "loss": 1.3172, "num_input_tokens_seen": 150661340, "step": 2275 }, { "epoch": 0.21294519586277905, "loss": 1.3164842128753662, "loss_ce": 0.010636991821229458, "loss_iou": 0.51953125, "loss_num": 0.052734375, "loss_xval": 1.3046875, "num_input_tokens_seen": 150661340, "step": 2275 }, { "epoch": 0.21303879814667478, "grad_norm": 24.285980224609375, "learning_rate": 5e-05, "loss": 1.319, "num_input_tokens_seen": 150727916, "step": 2276 }, { "epoch": 0.21303879814667478, "loss": 1.420497179031372, "loss_ce": 0.0074112494476139545, "loss_iou": 0.58984375, "loss_num": 0.046875, "loss_xval": 1.4140625, "num_input_tokens_seen": 150727916, "step": 2276 }, { "epoch": 0.2131324004305705, "grad_norm": 24.9462890625, "learning_rate": 5e-05, "loss": 1.5156, "num_input_tokens_seen": 150793392, "step": 2277 }, { "epoch": 0.2131324004305705, "loss": 1.4599671363830566, "loss_ce": 0.004888995084911585, "loss_iou": 0.58984375, "loss_num": 0.05419921875, "loss_xval": 1.453125, "num_input_tokens_seen": 150793392, "step": 2277 }, { "epoch": 0.21322600271446623, "grad_norm": 27.210254669189453, "learning_rate": 5e-05, "loss": 1.5673, "num_input_tokens_seen": 150859512, "step": 2278 }, { "epoch": 0.21322600271446623, "loss": 1.566602349281311, "loss_ce": 0.010450020432472229, "loss_iou": 0.62109375, "loss_num": 0.0625, "loss_xval": 1.5546875, "num_input_tokens_seen": 150859512, "step": 2278 }, { "epoch": 0.21331960499836197, "grad_norm": 34.29689025878906, "learning_rate": 5e-05, "loss": 1.4849, "num_input_tokens_seen": 150926076, "step": 2279 }, { "epoch": 0.21331960499836197, "loss": 1.4312529563903809, "loss_ce": 0.006448186933994293, "loss_iou": 0.5859375, "loss_num": 0.05029296875, "loss_xval": 1.421875, "num_input_tokens_seen": 150926076, "step": 2279 }, { "epoch": 0.21341320728225768, "grad_norm": 25.24986457824707, "learning_rate": 5e-05, "loss": 1.7384, "num_input_tokens_seen": 150992612, "step": 2280 }, { "epoch": 0.21341320728225768, "loss": 1.6506407260894775, "loss_ce": 0.005132874473929405, "loss_iou": 0.71875, "loss_num": 0.041015625, "loss_xval": 1.6484375, "num_input_tokens_seen": 150992612, "step": 2280 }, { "epoch": 0.21350680956615342, "grad_norm": 19.837234497070312, "learning_rate": 5e-05, "loss": 1.3911, "num_input_tokens_seen": 151059056, "step": 2281 }, { "epoch": 0.21350680956615342, "loss": 1.5271108150482178, "loss_ce": 0.0031849215738475323, "loss_iou": 0.63671875, "loss_num": 0.05078125, "loss_xval": 1.5234375, "num_input_tokens_seen": 151059056, "step": 2281 }, { "epoch": 0.21360041185004913, "grad_norm": 98.2978515625, "learning_rate": 5e-05, "loss": 1.4678, "num_input_tokens_seen": 151125564, "step": 2282 }, { "epoch": 0.21360041185004913, "loss": 1.698117971420288, "loss_ce": 0.009641429409384727, "loss_iou": 0.68359375, "loss_num": 0.0634765625, "loss_xval": 1.6875, "num_input_tokens_seen": 151125564, "step": 2282 }, { "epoch": 0.21369401413394487, "grad_norm": 22.10891342163086, "learning_rate": 5e-05, "loss": 1.5324, "num_input_tokens_seen": 151191800, "step": 2283 }, { "epoch": 0.21369401413394487, "loss": 1.639382243156433, "loss_ce": 0.00461659487336874, "loss_iou": 0.67578125, "loss_num": 0.056396484375, "loss_xval": 1.6328125, "num_input_tokens_seen": 151191800, "step": 2283 }, { "epoch": 0.2137876164178406, "grad_norm": 17.821348190307617, "learning_rate": 5e-05, "loss": 1.4788, "num_input_tokens_seen": 151257892, "step": 2284 }, { "epoch": 0.2137876164178406, "loss": 1.5812492370605469, "loss_ce": 0.003612576285377145, "loss_iou": 0.6484375, "loss_num": 0.056640625, "loss_xval": 1.578125, "num_input_tokens_seen": 151257892, "step": 2284 }, { "epoch": 0.21388121870173632, "grad_norm": 28.492366790771484, "learning_rate": 5e-05, "loss": 1.5016, "num_input_tokens_seen": 151324140, "step": 2285 }, { "epoch": 0.21388121870173632, "loss": 1.4028370380401611, "loss_ce": 0.009282313287258148, "loss_iou": 0.58203125, "loss_num": 0.046630859375, "loss_xval": 1.390625, "num_input_tokens_seen": 151324140, "step": 2285 }, { "epoch": 0.21397482098563206, "grad_norm": 36.702606201171875, "learning_rate": 5e-05, "loss": 1.3095, "num_input_tokens_seen": 151388180, "step": 2286 }, { "epoch": 0.21397482098563206, "loss": 1.497154712677002, "loss_ce": 0.004967216867953539, "loss_iou": 0.62890625, "loss_num": 0.047119140625, "loss_xval": 1.4921875, "num_input_tokens_seen": 151388180, "step": 2286 }, { "epoch": 0.21406842326952777, "grad_norm": 26.864652633666992, "learning_rate": 5e-05, "loss": 1.4334, "num_input_tokens_seen": 151455172, "step": 2287 }, { "epoch": 0.21406842326952777, "loss": 1.5351767539978027, "loss_ce": 0.0014852817403152585, "loss_iou": 0.67578125, "loss_num": 0.036376953125, "loss_xval": 1.53125, "num_input_tokens_seen": 151455172, "step": 2287 }, { "epoch": 0.2141620255534235, "grad_norm": 20.276927947998047, "learning_rate": 5e-05, "loss": 1.6213, "num_input_tokens_seen": 151521496, "step": 2288 }, { "epoch": 0.2141620255534235, "loss": 1.7019915580749512, "loss_ce": 0.005702382419258356, "loss_iou": 0.6953125, "loss_num": 0.06103515625, "loss_xval": 1.6953125, "num_input_tokens_seen": 151521496, "step": 2288 }, { "epoch": 0.21425562783731922, "grad_norm": 26.8983211517334, "learning_rate": 5e-05, "loss": 1.6012, "num_input_tokens_seen": 151587740, "step": 2289 }, { "epoch": 0.21425562783731922, "loss": 1.7984768152236938, "loss_ce": 0.008437741547822952, "loss_iou": 0.70703125, "loss_num": 0.07470703125, "loss_xval": 1.7890625, "num_input_tokens_seen": 151587740, "step": 2289 }, { "epoch": 0.21434923012121496, "grad_norm": 22.092914581298828, "learning_rate": 5e-05, "loss": 1.4659, "num_input_tokens_seen": 151653832, "step": 2290 }, { "epoch": 0.21434923012121496, "loss": 1.527360439300537, "loss_ce": 0.01100299321115017, "loss_iou": 0.58203125, "loss_num": 0.0703125, "loss_xval": 1.515625, "num_input_tokens_seen": 151653832, "step": 2290 }, { "epoch": 0.2144428324051107, "grad_norm": 23.715099334716797, "learning_rate": 5e-05, "loss": 1.4252, "num_input_tokens_seen": 151720092, "step": 2291 }, { "epoch": 0.2144428324051107, "loss": 1.4918944835662842, "loss_ce": 0.004589731805026531, "loss_iou": 0.6328125, "loss_num": 0.04541015625, "loss_xval": 1.484375, "num_input_tokens_seen": 151720092, "step": 2291 }, { "epoch": 0.2145364346890064, "grad_norm": 28.33808135986328, "learning_rate": 5e-05, "loss": 1.246, "num_input_tokens_seen": 151785860, "step": 2292 }, { "epoch": 0.2145364346890064, "loss": 1.1220732927322388, "loss_ce": 0.006350632291287184, "loss_iou": 0.46484375, "loss_num": 0.036865234375, "loss_xval": 1.1171875, "num_input_tokens_seen": 151785860, "step": 2292 }, { "epoch": 0.21463003697290214, "grad_norm": 31.031892776489258, "learning_rate": 5e-05, "loss": 1.5237, "num_input_tokens_seen": 151852356, "step": 2293 }, { "epoch": 0.21463003697290214, "loss": 1.4461427927017212, "loss_ce": 0.005713123362511396, "loss_iou": 0.62109375, "loss_num": 0.04052734375, "loss_xval": 1.4375, "num_input_tokens_seen": 151852356, "step": 2293 }, { "epoch": 0.21472363925679785, "grad_norm": 18.391902923583984, "learning_rate": 5e-05, "loss": 1.1777, "num_input_tokens_seen": 151919120, "step": 2294 }, { "epoch": 0.21472363925679785, "loss": 1.0668582916259766, "loss_ce": 0.005456896498799324, "loss_iou": 0.44140625, "loss_num": 0.035888671875, "loss_xval": 1.0625, "num_input_tokens_seen": 151919120, "step": 2294 }, { "epoch": 0.2148172415406936, "grad_norm": 28.7263126373291, "learning_rate": 5e-05, "loss": 1.3777, "num_input_tokens_seen": 151984628, "step": 2295 }, { "epoch": 0.2148172415406936, "loss": 1.324313998222351, "loss_ce": 0.004489860497415066, "loss_iou": 0.498046875, "loss_num": 0.064453125, "loss_xval": 1.3203125, "num_input_tokens_seen": 151984628, "step": 2295 }, { "epoch": 0.21491084382458933, "grad_norm": 22.25419044494629, "learning_rate": 5e-05, "loss": 1.5439, "num_input_tokens_seen": 152051292, "step": 2296 }, { "epoch": 0.21491084382458933, "loss": 1.7377827167510986, "loss_ce": 0.0014546426245942712, "loss_iou": 0.6875, "loss_num": 0.072265625, "loss_xval": 1.734375, "num_input_tokens_seen": 152051292, "step": 2296 }, { "epoch": 0.21500444610848504, "grad_norm": 37.100059509277344, "learning_rate": 5e-05, "loss": 1.3964, "num_input_tokens_seen": 152119152, "step": 2297 }, { "epoch": 0.21500444610848504, "loss": 1.3398528099060059, "loss_ce": 0.005868327338248491, "loss_iou": 0.58203125, "loss_num": 0.033447265625, "loss_xval": 1.3359375, "num_input_tokens_seen": 152119152, "step": 2297 }, { "epoch": 0.21509804839238078, "grad_norm": 20.610252380371094, "learning_rate": 5e-05, "loss": 1.5867, "num_input_tokens_seen": 152185324, "step": 2298 }, { "epoch": 0.21509804839238078, "loss": 1.6689636707305908, "loss_ce": 0.005389504134654999, "loss_iou": 0.66796875, "loss_num": 0.06494140625, "loss_xval": 1.6640625, "num_input_tokens_seen": 152185324, "step": 2298 }, { "epoch": 0.2151916506762765, "grad_norm": 33.635032653808594, "learning_rate": 5e-05, "loss": 1.312, "num_input_tokens_seen": 152250808, "step": 2299 }, { "epoch": 0.2151916506762765, "loss": 1.3759686946868896, "loss_ce": 0.004386695101857185, "loss_iou": 0.5625, "loss_num": 0.049072265625, "loss_xval": 1.375, "num_input_tokens_seen": 152250808, "step": 2299 }, { "epoch": 0.21528525296017223, "grad_norm": 22.86467170715332, "learning_rate": 5e-05, "loss": 1.2004, "num_input_tokens_seen": 152316712, "step": 2300 }, { "epoch": 0.21528525296017223, "loss": 1.2595347166061401, "loss_ce": 0.007581586949527264, "loss_iou": 0.494140625, "loss_num": 0.052978515625, "loss_xval": 1.25, "num_input_tokens_seen": 152316712, "step": 2300 }, { "epoch": 0.21537885524406797, "grad_norm": 25.605911254882812, "learning_rate": 5e-05, "loss": 1.4399, "num_input_tokens_seen": 152383300, "step": 2301 }, { "epoch": 0.21537885524406797, "loss": 1.4577968120574951, "loss_ce": 0.00857799407094717, "loss_iou": 0.57421875, "loss_num": 0.060546875, "loss_xval": 1.453125, "num_input_tokens_seen": 152383300, "step": 2301 }, { "epoch": 0.21547245752796368, "grad_norm": 48.16738510131836, "learning_rate": 5e-05, "loss": 1.4634, "num_input_tokens_seen": 152451216, "step": 2302 }, { "epoch": 0.21547245752796368, "loss": 1.5580638647079468, "loss_ce": 0.010212285444140434, "loss_iou": 0.66015625, "loss_num": 0.045166015625, "loss_xval": 1.546875, "num_input_tokens_seen": 152451216, "step": 2302 }, { "epoch": 0.21556605981185942, "grad_norm": 22.7972354888916, "learning_rate": 5e-05, "loss": 1.6227, "num_input_tokens_seen": 152517532, "step": 2303 }, { "epoch": 0.21556605981185942, "loss": 1.4903476238250732, "loss_ce": 0.008902355097234249, "loss_iou": 0.65625, "loss_num": 0.0341796875, "loss_xval": 1.484375, "num_input_tokens_seen": 152517532, "step": 2303 }, { "epoch": 0.21565966209575513, "grad_norm": 15.255064964294434, "learning_rate": 5e-05, "loss": 1.4808, "num_input_tokens_seen": 152583972, "step": 2304 }, { "epoch": 0.21565966209575513, "loss": 1.5429351329803467, "loss_ce": 0.003872634842991829, "loss_iou": 0.60546875, "loss_num": 0.0654296875, "loss_xval": 1.5390625, "num_input_tokens_seen": 152583972, "step": 2304 }, { "epoch": 0.21575326437965087, "grad_norm": 50.384613037109375, "learning_rate": 5e-05, "loss": 1.2977, "num_input_tokens_seen": 152649672, "step": 2305 }, { "epoch": 0.21575326437965087, "loss": 1.387707233428955, "loss_ce": 0.006847859360277653, "loss_iou": 0.58984375, "loss_num": 0.040771484375, "loss_xval": 1.3828125, "num_input_tokens_seen": 152649672, "step": 2305 }, { "epoch": 0.21584686666354658, "grad_norm": 33.084590911865234, "learning_rate": 5e-05, "loss": 1.45, "num_input_tokens_seen": 152716932, "step": 2306 }, { "epoch": 0.21584686666354658, "loss": 1.5436749458312988, "loss_ce": 0.0036359750665724277, "loss_iou": 0.6484375, "loss_num": 0.048583984375, "loss_xval": 1.5390625, "num_input_tokens_seen": 152716932, "step": 2306 }, { "epoch": 0.21594046894744232, "grad_norm": 20.642091751098633, "learning_rate": 5e-05, "loss": 1.6851, "num_input_tokens_seen": 152782976, "step": 2307 }, { "epoch": 0.21594046894744232, "loss": 1.8109982013702393, "loss_ce": 0.00435758288949728, "loss_iou": 0.7421875, "loss_num": 0.0634765625, "loss_xval": 1.8046875, "num_input_tokens_seen": 152782976, "step": 2307 }, { "epoch": 0.21603407123133805, "grad_norm": 13.964632987976074, "learning_rate": 5e-05, "loss": 1.3355, "num_input_tokens_seen": 152849072, "step": 2308 }, { "epoch": 0.21603407123133805, "loss": 1.3385752439498901, "loss_ce": 0.01093850377947092, "loss_iou": 0.53515625, "loss_num": 0.051025390625, "loss_xval": 1.328125, "num_input_tokens_seen": 152849072, "step": 2308 }, { "epoch": 0.21612767351523376, "grad_norm": 24.622068405151367, "learning_rate": 5e-05, "loss": 1.3879, "num_input_tokens_seen": 152915040, "step": 2309 }, { "epoch": 0.21612767351523376, "loss": 1.3095051050186157, "loss_ce": 0.007747296243906021, "loss_iou": 0.53515625, "loss_num": 0.04638671875, "loss_xval": 1.3046875, "num_input_tokens_seen": 152915040, "step": 2309 }, { "epoch": 0.2162212757991295, "grad_norm": 23.115848541259766, "learning_rate": 5e-05, "loss": 1.5362, "num_input_tokens_seen": 152981616, "step": 2310 }, { "epoch": 0.2162212757991295, "loss": 1.5281729698181152, "loss_ce": 0.00912993773818016, "loss_iou": 0.6328125, "loss_num": 0.05126953125, "loss_xval": 1.515625, "num_input_tokens_seen": 152981616, "step": 2310 }, { "epoch": 0.2163148780830252, "grad_norm": 104.16942596435547, "learning_rate": 5e-05, "loss": 1.4349, "num_input_tokens_seen": 153048352, "step": 2311 }, { "epoch": 0.2163148780830252, "loss": 1.5230183601379395, "loss_ce": 0.006905078887939453, "loss_iou": 0.59375, "loss_num": 0.06591796875, "loss_xval": 1.515625, "num_input_tokens_seen": 153048352, "step": 2311 }, { "epoch": 0.21640848036692095, "grad_norm": 25.01462173461914, "learning_rate": 5e-05, "loss": 1.4899, "num_input_tokens_seen": 153115308, "step": 2312 }, { "epoch": 0.21640848036692095, "loss": 1.5599207878112793, "loss_ce": 0.006209854036569595, "loss_iou": 0.6640625, "loss_num": 0.045654296875, "loss_xval": 1.5546875, "num_input_tokens_seen": 153115308, "step": 2312 }, { "epoch": 0.2165020826508167, "grad_norm": 22.376323699951172, "learning_rate": 5e-05, "loss": 1.4721, "num_input_tokens_seen": 153182640, "step": 2313 }, { "epoch": 0.2165020826508167, "loss": 1.455622911453247, "loss_ce": 0.00933381263166666, "loss_iou": 0.6171875, "loss_num": 0.04248046875, "loss_xval": 1.4453125, "num_input_tokens_seen": 153182640, "step": 2313 }, { "epoch": 0.2165956849347124, "grad_norm": 30.825626373291016, "learning_rate": 5e-05, "loss": 1.4976, "num_input_tokens_seen": 153249076, "step": 2314 }, { "epoch": 0.2165956849347124, "loss": 1.3127450942993164, "loss_ce": 0.002686548512428999, "loss_iou": 0.55078125, "loss_num": 0.042236328125, "loss_xval": 1.3125, "num_input_tokens_seen": 153249076, "step": 2314 }, { "epoch": 0.21668928721860814, "grad_norm": 33.20108413696289, "learning_rate": 5e-05, "loss": 1.7264, "num_input_tokens_seen": 153315072, "step": 2315 }, { "epoch": 0.21668928721860814, "loss": 1.7240190505981445, "loss_ce": 0.007222216576337814, "loss_iou": 0.734375, "loss_num": 0.04931640625, "loss_xval": 1.71875, "num_input_tokens_seen": 153315072, "step": 2315 }, { "epoch": 0.21678288950250385, "grad_norm": 10.900003433227539, "learning_rate": 5e-05, "loss": 1.2683, "num_input_tokens_seen": 153382136, "step": 2316 }, { "epoch": 0.21678288950250385, "loss": 1.3354460000991821, "loss_ce": 0.003903105389326811, "loss_iou": 0.5390625, "loss_num": 0.050537109375, "loss_xval": 1.328125, "num_input_tokens_seen": 153382136, "step": 2316 }, { "epoch": 0.2168764917863996, "grad_norm": 36.16751480102539, "learning_rate": 5e-05, "loss": 1.2627, "num_input_tokens_seen": 153448448, "step": 2317 }, { "epoch": 0.2168764917863996, "loss": 1.2977166175842285, "loss_ce": 0.0032829931005835533, "loss_iou": 0.56640625, "loss_num": 0.032470703125, "loss_xval": 1.296875, "num_input_tokens_seen": 153448448, "step": 2317 }, { "epoch": 0.21697009407029533, "grad_norm": 21.0792179107666, "learning_rate": 5e-05, "loss": 1.4286, "num_input_tokens_seen": 153513880, "step": 2318 }, { "epoch": 0.21697009407029533, "loss": 1.4616341590881348, "loss_ce": 0.005579415243119001, "loss_iou": 0.61328125, "loss_num": 0.046142578125, "loss_xval": 1.453125, "num_input_tokens_seen": 153513880, "step": 2318 }, { "epoch": 0.21706369635419104, "grad_norm": 25.42380714416504, "learning_rate": 5e-05, "loss": 1.3606, "num_input_tokens_seen": 153579424, "step": 2319 }, { "epoch": 0.21706369635419104, "loss": 1.374646544456482, "loss_ce": 0.004285210743546486, "loss_iou": 0.5859375, "loss_num": 0.03955078125, "loss_xval": 1.3671875, "num_input_tokens_seen": 153579424, "step": 2319 }, { "epoch": 0.21715729863808678, "grad_norm": 29.15047836303711, "learning_rate": 5e-05, "loss": 1.3324, "num_input_tokens_seen": 153646624, "step": 2320 }, { "epoch": 0.21715729863808678, "loss": 1.2218403816223145, "loss_ce": 0.011391129344701767, "loss_iou": 0.51171875, "loss_num": 0.037841796875, "loss_xval": 1.2109375, "num_input_tokens_seen": 153646624, "step": 2320 }, { "epoch": 0.2172509009219825, "grad_norm": 27.564165115356445, "learning_rate": 5e-05, "loss": 1.5839, "num_input_tokens_seen": 153713048, "step": 2321 }, { "epoch": 0.2172509009219825, "loss": 1.962526798248291, "loss_ce": 0.005495624616742134, "loss_iou": 0.7890625, "loss_num": 0.07568359375, "loss_xval": 1.953125, "num_input_tokens_seen": 153713048, "step": 2321 }, { "epoch": 0.21734450320587823, "grad_norm": 22.206457138061523, "learning_rate": 5e-05, "loss": 1.5211, "num_input_tokens_seen": 153778652, "step": 2322 }, { "epoch": 0.21734450320587823, "loss": 1.782984972000122, "loss_ce": 0.008571009151637554, "loss_iou": 0.765625, "loss_num": 0.04833984375, "loss_xval": 1.7734375, "num_input_tokens_seen": 153778652, "step": 2322 }, { "epoch": 0.21743810548977396, "grad_norm": 27.375165939331055, "learning_rate": 5e-05, "loss": 1.1316, "num_input_tokens_seen": 153844580, "step": 2323 }, { "epoch": 0.21743810548977396, "loss": 1.2421653270721436, "loss_ce": 0.00486073549836874, "loss_iou": 0.5625, "loss_num": 0.0228271484375, "loss_xval": 1.234375, "num_input_tokens_seen": 153844580, "step": 2323 }, { "epoch": 0.21753170777366967, "grad_norm": 21.734966278076172, "learning_rate": 5e-05, "loss": 1.7028, "num_input_tokens_seen": 153910768, "step": 2324 }, { "epoch": 0.21753170777366967, "loss": 1.7416293621063232, "loss_ce": 0.007254478521645069, "loss_iou": 0.734375, "loss_num": 0.05224609375, "loss_xval": 1.734375, "num_input_tokens_seen": 153910768, "step": 2324 }, { "epoch": 0.2176253100575654, "grad_norm": 17.44476890563965, "learning_rate": 5e-05, "loss": 1.1036, "num_input_tokens_seen": 153977908, "step": 2325 }, { "epoch": 0.2176253100575654, "loss": 1.0553233623504639, "loss_ce": 0.00405379943549633, "loss_iou": 0.466796875, "loss_num": 0.0235595703125, "loss_xval": 1.0546875, "num_input_tokens_seen": 153977908, "step": 2325 }, { "epoch": 0.21771891234146112, "grad_norm": 20.79197883605957, "learning_rate": 5e-05, "loss": 1.4434, "num_input_tokens_seen": 154044332, "step": 2326 }, { "epoch": 0.21771891234146112, "loss": 1.4662706851959229, "loss_ce": 0.010215958580374718, "loss_iou": 0.58203125, "loss_num": 0.05859375, "loss_xval": 1.453125, "num_input_tokens_seen": 154044332, "step": 2326 }, { "epoch": 0.21781251462535686, "grad_norm": 14.832798957824707, "learning_rate": 5e-05, "loss": 1.3066, "num_input_tokens_seen": 154111084, "step": 2327 }, { "epoch": 0.21781251462535686, "loss": 1.3189163208007812, "loss_ce": 0.003974883817136288, "loss_iou": 0.51953125, "loss_num": 0.05517578125, "loss_xval": 1.3125, "num_input_tokens_seen": 154111084, "step": 2327 }, { "epoch": 0.21790611690925257, "grad_norm": 18.780778884887695, "learning_rate": 5e-05, "loss": 1.3165, "num_input_tokens_seen": 154177292, "step": 2328 }, { "epoch": 0.21790611690925257, "loss": 1.3648505210876465, "loss_ce": 0.004499020054936409, "loss_iou": 0.58984375, "loss_num": 0.03662109375, "loss_xval": 1.359375, "num_input_tokens_seen": 154177292, "step": 2328 }, { "epoch": 0.2179997191931483, "grad_norm": 19.013347625732422, "learning_rate": 5e-05, "loss": 1.3023, "num_input_tokens_seen": 154243572, "step": 2329 }, { "epoch": 0.2179997191931483, "loss": 1.3702499866485596, "loss_ce": 0.005015599075704813, "loss_iou": 0.48828125, "loss_num": 0.0771484375, "loss_xval": 1.3671875, "num_input_tokens_seen": 154243572, "step": 2329 }, { "epoch": 0.21809332147704405, "grad_norm": 12.443313598632812, "learning_rate": 5e-05, "loss": 1.2873, "num_input_tokens_seen": 154309600, "step": 2330 }, { "epoch": 0.21809332147704405, "loss": 1.2727537155151367, "loss_ce": 0.004931387957185507, "loss_iou": 0.54296875, "loss_num": 0.035888671875, "loss_xval": 1.265625, "num_input_tokens_seen": 154309600, "step": 2330 }, { "epoch": 0.21818692376093976, "grad_norm": 15.309600830078125, "learning_rate": 5e-05, "loss": 1.2904, "num_input_tokens_seen": 154376540, "step": 2331 }, { "epoch": 0.21818692376093976, "loss": 1.1204233169555664, "loss_ce": 0.0027475638780742884, "loss_iou": 0.486328125, "loss_num": 0.02880859375, "loss_xval": 1.1171875, "num_input_tokens_seen": 154376540, "step": 2331 }, { "epoch": 0.2182805260448355, "grad_norm": 26.37431526184082, "learning_rate": 5e-05, "loss": 1.27, "num_input_tokens_seen": 154442212, "step": 2332 }, { "epoch": 0.2182805260448355, "loss": 1.0199689865112305, "loss_ce": 0.006541187409311533, "loss_iou": 0.37890625, "loss_num": 0.051025390625, "loss_xval": 1.015625, "num_input_tokens_seen": 154442212, "step": 2332 }, { "epoch": 0.2183741283287312, "grad_norm": 17.268339157104492, "learning_rate": 5e-05, "loss": 1.3789, "num_input_tokens_seen": 154508468, "step": 2333 }, { "epoch": 0.2183741283287312, "loss": 1.6162927150726318, "loss_ce": 0.004964606836438179, "loss_iou": 0.65625, "loss_num": 0.060302734375, "loss_xval": 1.609375, "num_input_tokens_seen": 154508468, "step": 2333 }, { "epoch": 0.21846773061262695, "grad_norm": 24.678123474121094, "learning_rate": 5e-05, "loss": 1.4215, "num_input_tokens_seen": 154576148, "step": 2334 }, { "epoch": 0.21846773061262695, "loss": 1.5552942752838135, "loss_ce": 0.00548955611884594, "loss_iou": 0.6484375, "loss_num": 0.05029296875, "loss_xval": 1.546875, "num_input_tokens_seen": 154576148, "step": 2334 }, { "epoch": 0.2185613328965227, "grad_norm": 20.073673248291016, "learning_rate": 5e-05, "loss": 1.5777, "num_input_tokens_seen": 154642524, "step": 2335 }, { "epoch": 0.2185613328965227, "loss": 1.4285829067230225, "loss_ce": 0.007196234539151192, "loss_iou": 0.57421875, "loss_num": 0.05517578125, "loss_xval": 1.421875, "num_input_tokens_seen": 154642524, "step": 2335 }, { "epoch": 0.2186549351804184, "grad_norm": 29.751808166503906, "learning_rate": 5e-05, "loss": 1.2831, "num_input_tokens_seen": 154709156, "step": 2336 }, { "epoch": 0.2186549351804184, "loss": 1.2040770053863525, "loss_ce": 0.006323082372546196, "loss_iou": 0.498046875, "loss_num": 0.0400390625, "loss_xval": 1.1953125, "num_input_tokens_seen": 154709156, "step": 2336 }, { "epoch": 0.21874853746431414, "grad_norm": 24.08979606628418, "learning_rate": 5e-05, "loss": 1.6359, "num_input_tokens_seen": 154775196, "step": 2337 }, { "epoch": 0.21874853746431414, "loss": 1.6917073726654053, "loss_ce": 0.006160551682114601, "loss_iou": 0.7109375, "loss_num": 0.052978515625, "loss_xval": 1.6875, "num_input_tokens_seen": 154775196, "step": 2337 }, { "epoch": 0.21884213974820985, "grad_norm": 26.720741271972656, "learning_rate": 5e-05, "loss": 1.2053, "num_input_tokens_seen": 154841964, "step": 2338 }, { "epoch": 0.21884213974820985, "loss": 1.1504793167114258, "loss_ce": 0.0064363982528448105, "loss_iou": 0.466796875, "loss_num": 0.042236328125, "loss_xval": 1.140625, "num_input_tokens_seen": 154841964, "step": 2338 }, { "epoch": 0.21893574203210558, "grad_norm": 18.2683162689209, "learning_rate": 5e-05, "loss": 1.2713, "num_input_tokens_seen": 154907748, "step": 2339 }, { "epoch": 0.21893574203210558, "loss": 1.1781668663024902, "loss_ce": 0.00238564470782876, "loss_iou": 0.484375, "loss_num": 0.04150390625, "loss_xval": 1.171875, "num_input_tokens_seen": 154907748, "step": 2339 }, { "epoch": 0.21902934431600132, "grad_norm": 26.95297622680664, "learning_rate": 5e-05, "loss": 1.4753, "num_input_tokens_seen": 154974288, "step": 2340 }, { "epoch": 0.21902934431600132, "loss": 1.4889464378356934, "loss_ce": 0.007012868300080299, "loss_iou": 0.62109375, "loss_num": 0.04833984375, "loss_xval": 1.484375, "num_input_tokens_seen": 154974288, "step": 2340 }, { "epoch": 0.21912294659989703, "grad_norm": 33.99496078491211, "learning_rate": 5e-05, "loss": 1.2593, "num_input_tokens_seen": 155039436, "step": 2341 }, { "epoch": 0.21912294659989703, "loss": 1.2838804721832275, "loss_ce": 0.0055601941421628, "loss_iou": 0.54296875, "loss_num": 0.03857421875, "loss_xval": 1.28125, "num_input_tokens_seen": 155039436, "step": 2341 }, { "epoch": 0.21921654888379277, "grad_norm": 22.024890899658203, "learning_rate": 5e-05, "loss": 1.5783, "num_input_tokens_seen": 155105084, "step": 2342 }, { "epoch": 0.21921654888379277, "loss": 1.4463050365447998, "loss_ce": 0.0070350803434848785, "loss_iou": 0.60546875, "loss_num": 0.04541015625, "loss_xval": 1.4375, "num_input_tokens_seen": 155105084, "step": 2342 }, { "epoch": 0.21931015116768848, "grad_norm": 43.48978042602539, "learning_rate": 5e-05, "loss": 1.3735, "num_input_tokens_seen": 155171816, "step": 2343 }, { "epoch": 0.21931015116768848, "loss": 1.605210781097412, "loss_ce": 0.0046248482540249825, "loss_iou": 0.6640625, "loss_num": 0.05517578125, "loss_xval": 1.6015625, "num_input_tokens_seen": 155171816, "step": 2343 }, { "epoch": 0.21940375345158422, "grad_norm": 12.529638290405273, "learning_rate": 5e-05, "loss": 1.3214, "num_input_tokens_seen": 155238608, "step": 2344 }, { "epoch": 0.21940375345158422, "loss": 1.2851300239562988, "loss_ce": 0.007786213420331478, "loss_iou": 0.54296875, "loss_num": 0.038818359375, "loss_xval": 1.28125, "num_input_tokens_seen": 155238608, "step": 2344 }, { "epoch": 0.21949735573547993, "grad_norm": 19.131290435791016, "learning_rate": 5e-05, "loss": 1.4721, "num_input_tokens_seen": 155304780, "step": 2345 }, { "epoch": 0.21949735573547993, "loss": 1.5659270286560059, "loss_ce": 0.005868379957973957, "loss_iou": 0.6171875, "loss_num": 0.064453125, "loss_xval": 1.5625, "num_input_tokens_seen": 155304780, "step": 2345 }, { "epoch": 0.21959095801937567, "grad_norm": 31.61728858947754, "learning_rate": 5e-05, "loss": 1.2846, "num_input_tokens_seen": 155370144, "step": 2346 }, { "epoch": 0.21959095801937567, "loss": 1.262368083000183, "loss_ce": 0.004555597901344299, "loss_iou": 0.5, "loss_num": 0.052001953125, "loss_xval": 1.2578125, "num_input_tokens_seen": 155370144, "step": 2346 }, { "epoch": 0.2196845603032714, "grad_norm": 21.765249252319336, "learning_rate": 5e-05, "loss": 1.6413, "num_input_tokens_seen": 155435816, "step": 2347 }, { "epoch": 0.2196845603032714, "loss": 1.5300925970077515, "loss_ce": 0.008608181029558182, "loss_iou": 0.66796875, "loss_num": 0.03759765625, "loss_xval": 1.5234375, "num_input_tokens_seen": 155435816, "step": 2347 }, { "epoch": 0.21977816258716712, "grad_norm": 14.472197532653809, "learning_rate": 5e-05, "loss": 1.2535, "num_input_tokens_seen": 155502040, "step": 2348 }, { "epoch": 0.21977816258716712, "loss": 1.121678352355957, "loss_ce": 0.0027817978989332914, "loss_iou": 0.470703125, "loss_num": 0.03515625, "loss_xval": 1.1171875, "num_input_tokens_seen": 155502040, "step": 2348 }, { "epoch": 0.21987176487106286, "grad_norm": 22.763681411743164, "learning_rate": 5e-05, "loss": 1.1664, "num_input_tokens_seen": 155568120, "step": 2349 }, { "epoch": 0.21987176487106286, "loss": 1.309107780456543, "loss_ce": 0.00734997633844614, "loss_iou": 0.51953125, "loss_num": 0.05224609375, "loss_xval": 1.3046875, "num_input_tokens_seen": 155568120, "step": 2349 }, { "epoch": 0.21996536715495857, "grad_norm": 19.524024963378906, "learning_rate": 5e-05, "loss": 1.3087, "num_input_tokens_seen": 155635224, "step": 2350 }, { "epoch": 0.21996536715495857, "loss": 1.471051812171936, "loss_ce": 0.009137764573097229, "loss_iou": 0.59375, "loss_num": 0.05419921875, "loss_xval": 1.4609375, "num_input_tokens_seen": 155635224, "step": 2350 }, { "epoch": 0.2200589694388543, "grad_norm": 34.68376159667969, "learning_rate": 5e-05, "loss": 1.3156, "num_input_tokens_seen": 155700372, "step": 2351 }, { "epoch": 0.2200589694388543, "loss": 1.2919411659240723, "loss_ce": 0.009470431134104729, "loss_iou": 0.515625, "loss_num": 0.04931640625, "loss_xval": 1.28125, "num_input_tokens_seen": 155700372, "step": 2351 }, { "epoch": 0.22015257172275005, "grad_norm": 84.88421630859375, "learning_rate": 5e-05, "loss": 1.6112, "num_input_tokens_seen": 155766948, "step": 2352 }, { "epoch": 0.22015257172275005, "loss": 1.6565346717834473, "loss_ce": 0.004190912935882807, "loss_iou": 0.7109375, "loss_num": 0.04638671875, "loss_xval": 1.65625, "num_input_tokens_seen": 155766948, "step": 2352 }, { "epoch": 0.22024617400664576, "grad_norm": 10.805121421813965, "learning_rate": 5e-05, "loss": 1.4896, "num_input_tokens_seen": 155832876, "step": 2353 }, { "epoch": 0.22024617400664576, "loss": 1.6248092651367188, "loss_ce": 0.00469213305041194, "loss_iou": 0.69921875, "loss_num": 0.044677734375, "loss_xval": 1.6171875, "num_input_tokens_seen": 155832876, "step": 2353 }, { "epoch": 0.2203397762905415, "grad_norm": 17.746360778808594, "learning_rate": 5e-05, "loss": 1.2904, "num_input_tokens_seen": 155899048, "step": 2354 }, { "epoch": 0.2203397762905415, "loss": 1.238121509552002, "loss_ce": 0.0032582785934209824, "loss_iou": 0.52734375, "loss_num": 0.035888671875, "loss_xval": 1.234375, "num_input_tokens_seen": 155899048, "step": 2354 }, { "epoch": 0.2204333785744372, "grad_norm": 25.77299690246582, "learning_rate": 5e-05, "loss": 1.4596, "num_input_tokens_seen": 155964556, "step": 2355 }, { "epoch": 0.2204333785744372, "loss": 1.2023621797561646, "loss_ce": 0.006317278370261192, "loss_iou": 0.4921875, "loss_num": 0.042236328125, "loss_xval": 1.1953125, "num_input_tokens_seen": 155964556, "step": 2355 }, { "epoch": 0.22052698085833294, "grad_norm": 43.80807113647461, "learning_rate": 5e-05, "loss": 1.5496, "num_input_tokens_seen": 156031108, "step": 2356 }, { "epoch": 0.22052698085833294, "loss": 1.5094894170761108, "loss_ce": 0.005094944499433041, "loss_iou": 0.66015625, "loss_num": 0.03759765625, "loss_xval": 1.5078125, "num_input_tokens_seen": 156031108, "step": 2356 }, { "epoch": 0.22062058314222868, "grad_norm": 17.146434783935547, "learning_rate": 5e-05, "loss": 1.6264, "num_input_tokens_seen": 156097468, "step": 2357 }, { "epoch": 0.22062058314222868, "loss": 1.7161318063735962, "loss_ce": 0.0022646307479590178, "loss_iou": 0.75390625, "loss_num": 0.041015625, "loss_xval": 1.7109375, "num_input_tokens_seen": 156097468, "step": 2357 }, { "epoch": 0.2207141854261244, "grad_norm": 29.371784210205078, "learning_rate": 5e-05, "loss": 1.5495, "num_input_tokens_seen": 156163380, "step": 2358 }, { "epoch": 0.2207141854261244, "loss": 1.7303476333618164, "loss_ce": 0.006714966148138046, "loss_iou": 0.734375, "loss_num": 0.051025390625, "loss_xval": 1.7265625, "num_input_tokens_seen": 156163380, "step": 2358 }, { "epoch": 0.22080778771002013, "grad_norm": 48.36513900756836, "learning_rate": 5e-05, "loss": 1.4079, "num_input_tokens_seen": 156229304, "step": 2359 }, { "epoch": 0.22080778771002013, "loss": 1.62276291847229, "loss_ce": 0.005575337912887335, "loss_iou": 0.67578125, "loss_num": 0.053955078125, "loss_xval": 1.6171875, "num_input_tokens_seen": 156229304, "step": 2359 }, { "epoch": 0.22090138999391584, "grad_norm": 26.75238037109375, "learning_rate": 5e-05, "loss": 1.5, "num_input_tokens_seen": 156296604, "step": 2360 }, { "epoch": 0.22090138999391584, "loss": 1.39876389503479, "loss_ce": 0.007162292487919331, "loss_iou": 0.6171875, "loss_num": 0.0322265625, "loss_xval": 1.390625, "num_input_tokens_seen": 156296604, "step": 2360 }, { "epoch": 0.22099499227781158, "grad_norm": 40.60019302368164, "learning_rate": 5e-05, "loss": 1.2413, "num_input_tokens_seen": 156363332, "step": 2361 }, { "epoch": 0.22099499227781158, "loss": 1.3198292255401611, "loss_ce": 0.003911266103386879, "loss_iou": 0.55859375, "loss_num": 0.0400390625, "loss_xval": 1.3125, "num_input_tokens_seen": 156363332, "step": 2361 }, { "epoch": 0.22108859456170732, "grad_norm": 23.686565399169922, "learning_rate": 5e-05, "loss": 1.3274, "num_input_tokens_seen": 156429808, "step": 2362 }, { "epoch": 0.22108859456170732, "loss": 1.3950692415237427, "loss_ce": 0.012256758287549019, "loss_iou": 0.58203125, "loss_num": 0.043212890625, "loss_xval": 1.3828125, "num_input_tokens_seen": 156429808, "step": 2362 }, { "epoch": 0.22118219684560303, "grad_norm": 47.21058654785156, "learning_rate": 5e-05, "loss": 1.559, "num_input_tokens_seen": 156495828, "step": 2363 }, { "epoch": 0.22118219684560303, "loss": 1.6289995908737183, "loss_ce": 0.005952713079750538, "loss_iou": 0.6484375, "loss_num": 0.064453125, "loss_xval": 1.625, "num_input_tokens_seen": 156495828, "step": 2363 }, { "epoch": 0.22127579912949877, "grad_norm": 23.084014892578125, "learning_rate": 5e-05, "loss": 1.5317, "num_input_tokens_seen": 156562224, "step": 2364 }, { "epoch": 0.22127579912949877, "loss": 1.513930082321167, "loss_ce": 0.004164400044828653, "loss_iou": 0.6015625, "loss_num": 0.06103515625, "loss_xval": 1.5078125, "num_input_tokens_seen": 156562224, "step": 2364 }, { "epoch": 0.22136940141339448, "grad_norm": 13.007682800292969, "learning_rate": 5e-05, "loss": 1.4322, "num_input_tokens_seen": 156628364, "step": 2365 }, { "epoch": 0.22136940141339448, "loss": 1.3901398181915283, "loss_ce": 0.008303900249302387, "loss_iou": 0.5625, "loss_num": 0.052001953125, "loss_xval": 1.3828125, "num_input_tokens_seen": 156628364, "step": 2365 }, { "epoch": 0.22146300369729022, "grad_norm": 15.463536262512207, "learning_rate": 5e-05, "loss": 1.3102, "num_input_tokens_seen": 156694452, "step": 2366 }, { "epoch": 0.22146300369729022, "loss": 1.3520538806915283, "loss_ce": 0.0034210835583508015, "loss_iou": 0.54296875, "loss_num": 0.052490234375, "loss_xval": 1.3515625, "num_input_tokens_seen": 156694452, "step": 2366 }, { "epoch": 0.22155660598118593, "grad_norm": 32.025978088378906, "learning_rate": 5e-05, "loss": 1.4148, "num_input_tokens_seen": 156761440, "step": 2367 }, { "epoch": 0.22155660598118593, "loss": 1.164376974105835, "loss_ce": 0.002939025405794382, "loss_iou": 0.5234375, "loss_num": 0.0223388671875, "loss_xval": 1.1640625, "num_input_tokens_seen": 156761440, "step": 2367 }, { "epoch": 0.22165020826508167, "grad_norm": 21.3950138092041, "learning_rate": 5e-05, "loss": 1.5863, "num_input_tokens_seen": 156827060, "step": 2368 }, { "epoch": 0.22165020826508167, "loss": 1.6351876258850098, "loss_ce": 0.0062814257107675076, "loss_iou": 0.640625, "loss_num": 0.06982421875, "loss_xval": 1.625, "num_input_tokens_seen": 156827060, "step": 2368 }, { "epoch": 0.2217438105489774, "grad_norm": 20.068511962890625, "learning_rate": 5e-05, "loss": 1.4375, "num_input_tokens_seen": 156891820, "step": 2369 }, { "epoch": 0.2217438105489774, "loss": 1.3463859558105469, "loss_ce": 0.005077444948256016, "loss_iou": 0.5390625, "loss_num": 0.052734375, "loss_xval": 1.34375, "num_input_tokens_seen": 156891820, "step": 2369 }, { "epoch": 0.22183741283287312, "grad_norm": 19.693435668945312, "learning_rate": 5e-05, "loss": 1.5323, "num_input_tokens_seen": 156958584, "step": 2370 }, { "epoch": 0.22183741283287312, "loss": 1.6072475910186768, "loss_ce": 0.008126460015773773, "loss_iou": 0.6484375, "loss_num": 0.061279296875, "loss_xval": 1.6015625, "num_input_tokens_seen": 156958584, "step": 2370 }, { "epoch": 0.22193101511676885, "grad_norm": 30.499183654785156, "learning_rate": 5e-05, "loss": 1.3854, "num_input_tokens_seen": 157023540, "step": 2371 }, { "epoch": 0.22193101511676885, "loss": 1.431348204612732, "loss_ce": 0.007764289155602455, "loss_iou": 0.5625, "loss_num": 0.059814453125, "loss_xval": 1.421875, "num_input_tokens_seen": 157023540, "step": 2371 }, { "epoch": 0.22202461740066456, "grad_norm": 24.115846633911133, "learning_rate": 5e-05, "loss": 1.6977, "num_input_tokens_seen": 157090452, "step": 2372 }, { "epoch": 0.22202461740066456, "loss": 1.7665398120880127, "loss_ce": 0.003844510531052947, "loss_iou": 0.72265625, "loss_num": 0.06396484375, "loss_xval": 1.765625, "num_input_tokens_seen": 157090452, "step": 2372 }, { "epoch": 0.2221182196845603, "grad_norm": 68.1358642578125, "learning_rate": 5e-05, "loss": 1.3712, "num_input_tokens_seen": 157156644, "step": 2373 }, { "epoch": 0.2221182196845603, "loss": 1.4100452661514282, "loss_ce": 0.002330407965928316, "loss_iou": 0.58984375, "loss_num": 0.04541015625, "loss_xval": 1.40625, "num_input_tokens_seen": 157156644, "step": 2373 }, { "epoch": 0.22221182196845604, "grad_norm": 19.001420974731445, "learning_rate": 5e-05, "loss": 1.7268, "num_input_tokens_seen": 157223328, "step": 2374 }, { "epoch": 0.22221182196845604, "loss": 1.8332513570785522, "loss_ce": 0.00463805440813303, "loss_iou": 0.70703125, "loss_num": 0.0830078125, "loss_xval": 1.828125, "num_input_tokens_seen": 157223328, "step": 2374 }, { "epoch": 0.22230542425235175, "grad_norm": 84.92061614990234, "learning_rate": 5e-05, "loss": 1.2984, "num_input_tokens_seen": 157290796, "step": 2375 }, { "epoch": 0.22230542425235175, "loss": 1.2620265483856201, "loss_ce": 0.0022609643638134003, "loss_iou": 0.5390625, "loss_num": 0.036376953125, "loss_xval": 1.2578125, "num_input_tokens_seen": 157290796, "step": 2375 }, { "epoch": 0.2223990265362475, "grad_norm": 19.052217483520508, "learning_rate": 5e-05, "loss": 1.2273, "num_input_tokens_seen": 157357600, "step": 2376 }, { "epoch": 0.2223990265362475, "loss": 1.0218197107315063, "loss_ce": 0.00521816685795784, "loss_iou": 0.421875, "loss_num": 0.03466796875, "loss_xval": 1.015625, "num_input_tokens_seen": 157357600, "step": 2376 }, { "epoch": 0.2224926288201432, "grad_norm": 31.591190338134766, "learning_rate": 5e-05, "loss": 1.5013, "num_input_tokens_seen": 157423792, "step": 2377 }, { "epoch": 0.2224926288201432, "loss": 1.574617862701416, "loss_ce": 0.004793614149093628, "loss_iou": 0.62109375, "loss_num": 0.06591796875, "loss_xval": 1.5703125, "num_input_tokens_seen": 157423792, "step": 2377 }, { "epoch": 0.22258623110403894, "grad_norm": 30.82547950744629, "learning_rate": 5e-05, "loss": 1.2971, "num_input_tokens_seen": 157489616, "step": 2378 }, { "epoch": 0.22258623110403894, "loss": 1.2583870887756348, "loss_ce": 0.003260166384279728, "loss_iou": 0.51953125, "loss_num": 0.04345703125, "loss_xval": 1.2578125, "num_input_tokens_seen": 157489616, "step": 2378 }, { "epoch": 0.22267983338793468, "grad_norm": 20.78563690185547, "learning_rate": 5e-05, "loss": 1.6914, "num_input_tokens_seen": 157556560, "step": 2379 }, { "epoch": 0.22267983338793468, "loss": 1.684895396232605, "loss_ce": 0.0071610379964113235, "loss_iou": 0.72265625, "loss_num": 0.046630859375, "loss_xval": 1.6796875, "num_input_tokens_seen": 157556560, "step": 2379 }, { "epoch": 0.2227734356718304, "grad_norm": 15.464996337890625, "learning_rate": 5e-05, "loss": 1.0427, "num_input_tokens_seen": 157621252, "step": 2380 }, { "epoch": 0.2227734356718304, "loss": 0.9868307113647461, "loss_ce": 0.009291645139455795, "loss_iou": 0.376953125, "loss_num": 0.044677734375, "loss_xval": 0.9765625, "num_input_tokens_seen": 157621252, "step": 2380 }, { "epoch": 0.22286703795572613, "grad_norm": 46.25885009765625, "learning_rate": 5e-05, "loss": 1.4793, "num_input_tokens_seen": 157686704, "step": 2381 }, { "epoch": 0.22286703795572613, "loss": 1.4747636318206787, "loss_ce": 0.0040604411624372005, "loss_iou": 0.6171875, "loss_num": 0.046875, "loss_xval": 1.46875, "num_input_tokens_seen": 157686704, "step": 2381 }, { "epoch": 0.22296064023962184, "grad_norm": 24.369220733642578, "learning_rate": 5e-05, "loss": 1.5605, "num_input_tokens_seen": 157753524, "step": 2382 }, { "epoch": 0.22296064023962184, "loss": 1.5344555377960205, "loss_ce": 0.005158551037311554, "loss_iou": 0.6796875, "loss_num": 0.03369140625, "loss_xval": 1.53125, "num_input_tokens_seen": 157753524, "step": 2382 }, { "epoch": 0.22305424252351758, "grad_norm": 24.736207962036133, "learning_rate": 5e-05, "loss": 1.4471, "num_input_tokens_seen": 157819664, "step": 2383 }, { "epoch": 0.22305424252351758, "loss": 1.467447280883789, "loss_ce": 0.0069979652762413025, "loss_iou": 0.6171875, "loss_num": 0.044921875, "loss_xval": 1.4609375, "num_input_tokens_seen": 157819664, "step": 2383 }, { "epoch": 0.22314784480741331, "grad_norm": 22.33594512939453, "learning_rate": 5e-05, "loss": 1.4575, "num_input_tokens_seen": 157885804, "step": 2384 }, { "epoch": 0.22314784480741331, "loss": 1.5102100372314453, "loss_ce": 0.006303833797574043, "loss_iou": 0.609375, "loss_num": 0.057373046875, "loss_xval": 1.5, "num_input_tokens_seen": 157885804, "step": 2384 }, { "epoch": 0.22324144709130903, "grad_norm": 32.80448913574219, "learning_rate": 5e-05, "loss": 1.416, "num_input_tokens_seen": 157951864, "step": 2385 }, { "epoch": 0.22324144709130903, "loss": 1.3747851848602295, "loss_ce": 0.0041798185557127, "loss_iou": 0.6015625, "loss_num": 0.033935546875, "loss_xval": 1.3671875, "num_input_tokens_seen": 157951864, "step": 2385 }, { "epoch": 0.22333504937520476, "grad_norm": 19.613697052001953, "learning_rate": 5e-05, "loss": 1.6081, "num_input_tokens_seen": 158019132, "step": 2386 }, { "epoch": 0.22333504937520476, "loss": 1.527367115020752, "loss_ce": 0.006370993331074715, "loss_iou": 0.58203125, "loss_num": 0.0712890625, "loss_xval": 1.5234375, "num_input_tokens_seen": 158019132, "step": 2386 }, { "epoch": 0.22342865165910047, "grad_norm": 17.705842971801758, "learning_rate": 5e-05, "loss": 1.5608, "num_input_tokens_seen": 158085192, "step": 2387 }, { "epoch": 0.22342865165910047, "loss": 1.56697678565979, "loss_ce": 0.004476805683225393, "loss_iou": 0.6015625, "loss_num": 0.07275390625, "loss_xval": 1.5625, "num_input_tokens_seen": 158085192, "step": 2387 }, { "epoch": 0.2235222539429962, "grad_norm": 22.97504234313965, "learning_rate": 5e-05, "loss": 1.2713, "num_input_tokens_seen": 158152236, "step": 2388 }, { "epoch": 0.2235222539429962, "loss": 1.4159389734268188, "loss_ce": 0.0028530380222946405, "loss_iou": 0.5625, "loss_num": 0.0576171875, "loss_xval": 1.4140625, "num_input_tokens_seen": 158152236, "step": 2388 }, { "epoch": 0.22361585622689192, "grad_norm": 39.97024154663086, "learning_rate": 5e-05, "loss": 1.3626, "num_input_tokens_seen": 158219804, "step": 2389 }, { "epoch": 0.22361585622689192, "loss": 1.429793357849121, "loss_ce": 0.0064534759148955345, "loss_iou": 0.59765625, "loss_num": 0.046630859375, "loss_xval": 1.421875, "num_input_tokens_seen": 158219804, "step": 2389 }, { "epoch": 0.22370945851078766, "grad_norm": 55.85667419433594, "learning_rate": 5e-05, "loss": 1.4422, "num_input_tokens_seen": 158285144, "step": 2390 }, { "epoch": 0.22370945851078766, "loss": 1.5298120975494385, "loss_ce": 0.00295661692507565, "loss_iou": 0.70703125, "loss_num": 0.022216796875, "loss_xval": 1.5234375, "num_input_tokens_seen": 158285144, "step": 2390 }, { "epoch": 0.2238030607946834, "grad_norm": 18.258556365966797, "learning_rate": 5e-05, "loss": 1.3819, "num_input_tokens_seen": 158351220, "step": 2391 }, { "epoch": 0.2238030607946834, "loss": 1.1339879035949707, "loss_ce": 0.003128534648567438, "loss_iou": 0.490234375, "loss_num": 0.0299072265625, "loss_xval": 1.1328125, "num_input_tokens_seen": 158351220, "step": 2391 }, { "epoch": 0.2238966630785791, "grad_norm": 32.55884552001953, "learning_rate": 5e-05, "loss": 1.4388, "num_input_tokens_seen": 158418340, "step": 2392 }, { "epoch": 0.2238966630785791, "loss": 1.4010716676712036, "loss_ce": 0.005563849583268166, "loss_iou": 0.5859375, "loss_num": 0.04443359375, "loss_xval": 1.3984375, "num_input_tokens_seen": 158418340, "step": 2392 }, { "epoch": 0.22399026536247485, "grad_norm": 20.28363609313965, "learning_rate": 5e-05, "loss": 1.734, "num_input_tokens_seen": 158484052, "step": 2393 }, { "epoch": 0.22399026536247485, "loss": 1.9104260206222534, "loss_ce": 0.014918213710188866, "loss_iou": 0.765625, "loss_num": 0.07275390625, "loss_xval": 1.8984375, "num_input_tokens_seen": 158484052, "step": 2393 }, { "epoch": 0.22408386764637056, "grad_norm": 38.84884262084961, "learning_rate": 5e-05, "loss": 1.6276, "num_input_tokens_seen": 158549924, "step": 2394 }, { "epoch": 0.22408386764637056, "loss": 1.6365025043487549, "loss_ce": 0.002713385969400406, "loss_iou": 0.6875, "loss_num": 0.0517578125, "loss_xval": 1.6328125, "num_input_tokens_seen": 158549924, "step": 2394 }, { "epoch": 0.2241774699302663, "grad_norm": 38.612953186035156, "learning_rate": 5e-05, "loss": 1.6393, "num_input_tokens_seen": 158616112, "step": 2395 }, { "epoch": 0.2241774699302663, "loss": 1.5509424209594727, "loss_ce": 0.0021142764016985893, "loss_iou": 0.68359375, "loss_num": 0.03515625, "loss_xval": 1.546875, "num_input_tokens_seen": 158616112, "step": 2395 }, { "epoch": 0.22427107221416204, "grad_norm": 20.79043197631836, "learning_rate": 5e-05, "loss": 1.6254, "num_input_tokens_seen": 158681624, "step": 2396 }, { "epoch": 0.22427107221416204, "loss": 1.4152926206588745, "loss_ce": 0.010507477447390556, "loss_iou": 0.6171875, "loss_num": 0.0341796875, "loss_xval": 1.40625, "num_input_tokens_seen": 158681624, "step": 2396 }, { "epoch": 0.22436467449805775, "grad_norm": 18.30155372619629, "learning_rate": 5e-05, "loss": 1.379, "num_input_tokens_seen": 158747932, "step": 2397 }, { "epoch": 0.22436467449805775, "loss": 1.2756130695343018, "loss_ce": 0.00461688544601202, "loss_iou": 0.5546875, "loss_num": 0.03271484375, "loss_xval": 1.2734375, "num_input_tokens_seen": 158747932, "step": 2397 }, { "epoch": 0.2244582767819535, "grad_norm": 26.021142959594727, "learning_rate": 5e-05, "loss": 1.6453, "num_input_tokens_seen": 158814508, "step": 2398 }, { "epoch": 0.2244582767819535, "loss": 1.6735025644302368, "loss_ce": 0.008463489823043346, "loss_iou": 0.61328125, "loss_num": 0.0869140625, "loss_xval": 1.6640625, "num_input_tokens_seen": 158814508, "step": 2398 }, { "epoch": 0.2245518790658492, "grad_norm": 23.390729904174805, "learning_rate": 5e-05, "loss": 1.5187, "num_input_tokens_seen": 158881304, "step": 2399 }, { "epoch": 0.2245518790658492, "loss": 1.4685648679733276, "loss_ce": 0.0037210932932794094, "loss_iou": 0.62890625, "loss_num": 0.042236328125, "loss_xval": 1.46875, "num_input_tokens_seen": 158881304, "step": 2399 }, { "epoch": 0.22464548134974494, "grad_norm": 124.52165222167969, "learning_rate": 5e-05, "loss": 1.3992, "num_input_tokens_seen": 158948252, "step": 2400 }, { "epoch": 0.22464548134974494, "loss": 1.2291803359985352, "loss_ce": 0.003106124699115753, "loss_iou": 0.55078125, "loss_num": 0.0245361328125, "loss_xval": 1.2265625, "num_input_tokens_seen": 158948252, "step": 2400 }, { "epoch": 0.22473908363364067, "grad_norm": 18.20048713684082, "learning_rate": 5e-05, "loss": 1.5878, "num_input_tokens_seen": 159013608, "step": 2401 }, { "epoch": 0.22473908363364067, "loss": 1.5494269132614136, "loss_ce": 0.011341002769768238, "loss_iou": 0.6328125, "loss_num": 0.0537109375, "loss_xval": 1.5390625, "num_input_tokens_seen": 159013608, "step": 2401 }, { "epoch": 0.22483268591753638, "grad_norm": 17.248010635375977, "learning_rate": 5e-05, "loss": 1.0412, "num_input_tokens_seen": 159078748, "step": 2402 }, { "epoch": 0.22483268591753638, "loss": 1.0647587776184082, "loss_ce": 0.005676723085343838, "loss_iou": 0.4296875, "loss_num": 0.0400390625, "loss_xval": 1.0625, "num_input_tokens_seen": 159078748, "step": 2402 }, { "epoch": 0.22492628820143212, "grad_norm": 21.244176864624023, "learning_rate": 5e-05, "loss": 1.3451, "num_input_tokens_seen": 159145504, "step": 2403 }, { "epoch": 0.22492628820143212, "loss": 1.3699020147323608, "loss_ce": 0.0041793640702962875, "loss_iou": 0.59375, "loss_num": 0.035400390625, "loss_xval": 1.3671875, "num_input_tokens_seen": 159145504, "step": 2403 }, { "epoch": 0.22501989048532783, "grad_norm": 19.881961822509766, "learning_rate": 5e-05, "loss": 1.5705, "num_input_tokens_seen": 159212348, "step": 2404 }, { "epoch": 0.22501989048532783, "loss": 1.5467236042022705, "loss_ce": 0.00570790795609355, "loss_iou": 0.67578125, "loss_num": 0.037353515625, "loss_xval": 1.5390625, "num_input_tokens_seen": 159212348, "step": 2404 }, { "epoch": 0.22511349276922357, "grad_norm": 38.16630172729492, "learning_rate": 5e-05, "loss": 1.4168, "num_input_tokens_seen": 159279100, "step": 2405 }, { "epoch": 0.22511349276922357, "loss": 1.619866132736206, "loss_ce": 0.00658475561067462, "loss_iou": 0.6015625, "loss_num": 0.0810546875, "loss_xval": 1.609375, "num_input_tokens_seen": 159279100, "step": 2405 }, { "epoch": 0.22520709505311928, "grad_norm": 19.63469123840332, "learning_rate": 5e-05, "loss": 1.7053, "num_input_tokens_seen": 159345336, "step": 2406 }, { "epoch": 0.22520709505311928, "loss": 1.769553780555725, "loss_ce": 0.004905343055725098, "loss_iou": 0.73828125, "loss_num": 0.05810546875, "loss_xval": 1.765625, "num_input_tokens_seen": 159345336, "step": 2406 }, { "epoch": 0.22530069733701502, "grad_norm": 16.83295249938965, "learning_rate": 5e-05, "loss": 1.4323, "num_input_tokens_seen": 159410568, "step": 2407 }, { "epoch": 0.22530069733701502, "loss": 1.3493835926055908, "loss_ce": 0.006121892482042313, "loss_iou": 0.59375, "loss_num": 0.031494140625, "loss_xval": 1.34375, "num_input_tokens_seen": 159410568, "step": 2407 }, { "epoch": 0.22539429962091076, "grad_norm": 23.202192306518555, "learning_rate": 5e-05, "loss": 1.4436, "num_input_tokens_seen": 159477396, "step": 2408 }, { "epoch": 0.22539429962091076, "loss": 1.3589022159576416, "loss_ce": 0.0014803714584559202, "loss_iou": 0.5703125, "loss_num": 0.04248046875, "loss_xval": 1.359375, "num_input_tokens_seen": 159477396, "step": 2408 }, { "epoch": 0.22548790190480647, "grad_norm": 33.11485290527344, "learning_rate": 5e-05, "loss": 1.2378, "num_input_tokens_seen": 159543900, "step": 2409 }, { "epoch": 0.22548790190480647, "loss": 1.1053540706634521, "loss_ce": 0.0037914826534688473, "loss_iou": 0.484375, "loss_num": 0.0262451171875, "loss_xval": 1.1015625, "num_input_tokens_seen": 159543900, "step": 2409 }, { "epoch": 0.2255815041887022, "grad_norm": 24.576881408691406, "learning_rate": 5e-05, "loss": 1.5728, "num_input_tokens_seen": 159609492, "step": 2410 }, { "epoch": 0.2255815041887022, "loss": 1.673002004623413, "loss_ce": 0.006009810138493776, "loss_iou": 0.68359375, "loss_num": 0.06005859375, "loss_xval": 1.6640625, "num_input_tokens_seen": 159609492, "step": 2410 }, { "epoch": 0.22567510647259792, "grad_norm": 10.936545372009277, "learning_rate": 5e-05, "loss": 1.3943, "num_input_tokens_seen": 159675948, "step": 2411 }, { "epoch": 0.22567510647259792, "loss": 1.3140408992767334, "loss_ce": 0.004958903882652521, "loss_iou": 0.546875, "loss_num": 0.043701171875, "loss_xval": 1.3125, "num_input_tokens_seen": 159675948, "step": 2411 }, { "epoch": 0.22576870875649366, "grad_norm": 25.12563133239746, "learning_rate": 5e-05, "loss": 1.3101, "num_input_tokens_seen": 159742976, "step": 2412 }, { "epoch": 0.22576870875649366, "loss": 1.296428918838501, "loss_ce": 0.0015070445369929075, "loss_iou": 0.53515625, "loss_num": 0.044189453125, "loss_xval": 1.296875, "num_input_tokens_seen": 159742976, "step": 2412 }, { "epoch": 0.2258623110403894, "grad_norm": 19.09864044189453, "learning_rate": 5e-05, "loss": 1.5128, "num_input_tokens_seen": 159809092, "step": 2413 }, { "epoch": 0.2258623110403894, "loss": 1.62244713306427, "loss_ce": 0.010142412967979908, "loss_iou": 0.5859375, "loss_num": 0.0888671875, "loss_xval": 1.609375, "num_input_tokens_seen": 159809092, "step": 2413 }, { "epoch": 0.2259559133242851, "grad_norm": 16.004119873046875, "learning_rate": 5e-05, "loss": 1.3332, "num_input_tokens_seen": 159874516, "step": 2414 }, { "epoch": 0.2259559133242851, "loss": 1.2717313766479492, "loss_ce": 0.003664960153400898, "loss_iou": 0.55078125, "loss_num": 0.03271484375, "loss_xval": 1.265625, "num_input_tokens_seen": 159874516, "step": 2414 }, { "epoch": 0.22604951560818085, "grad_norm": 31.018705368041992, "learning_rate": 5e-05, "loss": 1.3362, "num_input_tokens_seen": 159941204, "step": 2415 }, { "epoch": 0.22604951560818085, "loss": 1.4464552402496338, "loss_ce": 0.004072476644068956, "loss_iou": 0.61328125, "loss_num": 0.04296875, "loss_xval": 1.4453125, "num_input_tokens_seen": 159941204, "step": 2415 }, { "epoch": 0.22614311789207656, "grad_norm": 37.567779541015625, "learning_rate": 5e-05, "loss": 1.3376, "num_input_tokens_seen": 160007588, "step": 2416 }, { "epoch": 0.22614311789207656, "loss": 1.5280895233154297, "loss_ce": 0.003675463143736124, "loss_iou": 0.6171875, "loss_num": 0.058837890625, "loss_xval": 1.5234375, "num_input_tokens_seen": 160007588, "step": 2416 }, { "epoch": 0.2262367201759723, "grad_norm": 23.152658462524414, "learning_rate": 5e-05, "loss": 1.6915, "num_input_tokens_seen": 160073604, "step": 2417 }, { "epoch": 0.2262367201759723, "loss": 1.6573272943496704, "loss_ce": 0.004983613267540932, "loss_iou": 0.671875, "loss_num": 0.061279296875, "loss_xval": 1.65625, "num_input_tokens_seen": 160073604, "step": 2417 }, { "epoch": 0.22633032245986803, "grad_norm": 14.996236801147461, "learning_rate": 5e-05, "loss": 1.4899, "num_input_tokens_seen": 160139604, "step": 2418 }, { "epoch": 0.22633032245986803, "loss": 1.6897783279418945, "loss_ce": 0.00471983477473259, "loss_iou": 0.640625, "loss_num": 0.080078125, "loss_xval": 1.6875, "num_input_tokens_seen": 160139604, "step": 2418 }, { "epoch": 0.22642392474376374, "grad_norm": 25.997526168823242, "learning_rate": 5e-05, "loss": 1.3758, "num_input_tokens_seen": 160205468, "step": 2419 }, { "epoch": 0.22642392474376374, "loss": 1.330578327178955, "loss_ce": 0.004406413994729519, "loss_iou": 0.53515625, "loss_num": 0.051025390625, "loss_xval": 1.328125, "num_input_tokens_seen": 160205468, "step": 2419 }, { "epoch": 0.22651752702765948, "grad_norm": 118.78583526611328, "learning_rate": 5e-05, "loss": 1.8314, "num_input_tokens_seen": 160271812, "step": 2420 }, { "epoch": 0.22651752702765948, "loss": 1.6269346475601196, "loss_ce": 0.009502959437668324, "loss_iou": 0.6484375, "loss_num": 0.0634765625, "loss_xval": 1.6171875, "num_input_tokens_seen": 160271812, "step": 2420 }, { "epoch": 0.2266111293115552, "grad_norm": 22.77692413330078, "learning_rate": 5e-05, "loss": 1.3515, "num_input_tokens_seen": 160337740, "step": 2421 }, { "epoch": 0.2266111293115552, "loss": 1.341922402381897, "loss_ce": 0.0084262415766716, "loss_iou": 0.5234375, "loss_num": 0.057373046875, "loss_xval": 1.3359375, "num_input_tokens_seen": 160337740, "step": 2421 }, { "epoch": 0.22670473159545093, "grad_norm": 40.46633529663086, "learning_rate": 5e-05, "loss": 1.569, "num_input_tokens_seen": 160405040, "step": 2422 }, { "epoch": 0.22670473159545093, "loss": 1.482072353363037, "loss_ce": 0.006486350670456886, "loss_iou": 0.5859375, "loss_num": 0.06005859375, "loss_xval": 1.4765625, "num_input_tokens_seen": 160405040, "step": 2422 }, { "epoch": 0.22679833387934667, "grad_norm": 24.294353485107422, "learning_rate": 5e-05, "loss": 1.6327, "num_input_tokens_seen": 160469804, "step": 2423 }, { "epoch": 0.22679833387934667, "loss": 1.397963047027588, "loss_ce": 0.004896578378975391, "loss_iou": 0.54296875, "loss_num": 0.0615234375, "loss_xval": 1.390625, "num_input_tokens_seen": 160469804, "step": 2423 }, { "epoch": 0.22689193616324238, "grad_norm": 27.036087036132812, "learning_rate": 5e-05, "loss": 1.1462, "num_input_tokens_seen": 160535412, "step": 2424 }, { "epoch": 0.22689193616324238, "loss": 1.3932316303253174, "loss_ce": 0.006024546921253204, "loss_iou": 0.56640625, "loss_num": 0.05126953125, "loss_xval": 1.390625, "num_input_tokens_seen": 160535412, "step": 2424 }, { "epoch": 0.22698553844713812, "grad_norm": 41.639774322509766, "learning_rate": 5e-05, "loss": 1.4283, "num_input_tokens_seen": 160601904, "step": 2425 }, { "epoch": 0.22698553844713812, "loss": 1.5586919784545898, "loss_ce": 0.006934266537427902, "loss_iou": 0.63671875, "loss_num": 0.05615234375, "loss_xval": 1.5546875, "num_input_tokens_seen": 160601904, "step": 2425 }, { "epoch": 0.22707914073103383, "grad_norm": 263.73138427734375, "learning_rate": 5e-05, "loss": 1.5827, "num_input_tokens_seen": 160668892, "step": 2426 }, { "epoch": 0.22707914073103383, "loss": 1.6234040260314941, "loss_ce": 0.0023101852275431156, "loss_iou": 0.6953125, "loss_num": 0.046630859375, "loss_xval": 1.625, "num_input_tokens_seen": 160668892, "step": 2426 }, { "epoch": 0.22717274301492957, "grad_norm": 19.2490291595459, "learning_rate": 5e-05, "loss": 1.1537, "num_input_tokens_seen": 160736020, "step": 2427 }, { "epoch": 0.22717274301492957, "loss": 0.9505199790000916, "loss_ce": 0.0027661044150590897, "loss_iou": 0.421875, "loss_num": 0.0208740234375, "loss_xval": 0.94921875, "num_input_tokens_seen": 160736020, "step": 2427 }, { "epoch": 0.22726634529882528, "grad_norm": 22.74749755859375, "learning_rate": 5e-05, "loss": 1.396, "num_input_tokens_seen": 160802752, "step": 2428 }, { "epoch": 0.22726634529882528, "loss": 1.4164843559265137, "loss_ce": 0.005351502448320389, "loss_iou": 0.5859375, "loss_num": 0.04736328125, "loss_xval": 1.4140625, "num_input_tokens_seen": 160802752, "step": 2428 }, { "epoch": 0.22735994758272102, "grad_norm": 23.023794174194336, "learning_rate": 5e-05, "loss": 1.3875, "num_input_tokens_seen": 160869360, "step": 2429 }, { "epoch": 0.22735994758272102, "loss": 1.240903377532959, "loss_ce": 0.007260720245540142, "loss_iou": 0.4765625, "loss_num": 0.0556640625, "loss_xval": 1.234375, "num_input_tokens_seen": 160869360, "step": 2429 }, { "epoch": 0.22745354986661676, "grad_norm": 31.687755584716797, "learning_rate": 5e-05, "loss": 1.4588, "num_input_tokens_seen": 160934984, "step": 2430 }, { "epoch": 0.22745354986661676, "loss": 1.3200249671936035, "loss_ce": 0.00435119541361928, "loss_iou": 0.5390625, "loss_num": 0.0478515625, "loss_xval": 1.3125, "num_input_tokens_seen": 160934984, "step": 2430 }, { "epoch": 0.22754715215051247, "grad_norm": 19.517658233642578, "learning_rate": 5e-05, "loss": 1.5534, "num_input_tokens_seen": 160999604, "step": 2431 }, { "epoch": 0.22754715215051247, "loss": 1.630081295967102, "loss_ce": 0.002151642693206668, "loss_iou": 0.6953125, "loss_num": 0.0478515625, "loss_xval": 1.625, "num_input_tokens_seen": 160999604, "step": 2431 }, { "epoch": 0.2276407544344082, "grad_norm": 18.604236602783203, "learning_rate": 5e-05, "loss": 1.3738, "num_input_tokens_seen": 161065536, "step": 2432 }, { "epoch": 0.2276407544344082, "loss": 1.4849238395690918, "loss_ce": 0.005492770113050938, "loss_iou": 0.54296875, "loss_num": 0.07763671875, "loss_xval": 1.4765625, "num_input_tokens_seen": 161065536, "step": 2432 }, { "epoch": 0.22773435671830392, "grad_norm": 13.682656288146973, "learning_rate": 5e-05, "loss": 1.3782, "num_input_tokens_seen": 161132252, "step": 2433 }, { "epoch": 0.22773435671830392, "loss": 1.3550634384155273, "loss_ce": 0.010092698037624359, "loss_iou": 0.5625, "loss_num": 0.044189453125, "loss_xval": 1.34375, "num_input_tokens_seen": 161132252, "step": 2433 }, { "epoch": 0.22782795900219965, "grad_norm": 22.1313419342041, "learning_rate": 5e-05, "loss": 1.4224, "num_input_tokens_seen": 161199688, "step": 2434 }, { "epoch": 0.22782795900219965, "loss": 1.3534135818481445, "loss_ce": 0.0013627284206449986, "loss_iou": 0.515625, "loss_num": 0.064453125, "loss_xval": 1.3515625, "num_input_tokens_seen": 161199688, "step": 2434 }, { "epoch": 0.2279215612860954, "grad_norm": 17.397193908691406, "learning_rate": 5e-05, "loss": 1.4757, "num_input_tokens_seen": 161265896, "step": 2435 }, { "epoch": 0.2279215612860954, "loss": 1.8262516260147095, "loss_ce": 0.00984534528106451, "loss_iou": 0.71875, "loss_num": 0.07666015625, "loss_xval": 1.8125, "num_input_tokens_seen": 161265896, "step": 2435 }, { "epoch": 0.2280151635699911, "grad_norm": 90.07242584228516, "learning_rate": 5e-05, "loss": 1.3546, "num_input_tokens_seen": 161333660, "step": 2436 }, { "epoch": 0.2280151635699911, "loss": 1.1518394947052002, "loss_ce": 0.003402027767151594, "loss_iou": 0.494140625, "loss_num": 0.0322265625, "loss_xval": 1.1484375, "num_input_tokens_seen": 161333660, "step": 2436 }, { "epoch": 0.22810876585388684, "grad_norm": 21.776294708251953, "learning_rate": 5e-05, "loss": 1.3239, "num_input_tokens_seen": 161398960, "step": 2437 }, { "epoch": 0.22810876585388684, "loss": 1.1618947982788086, "loss_ce": 0.006865587085485458, "loss_iou": 0.48828125, "loss_num": 0.03564453125, "loss_xval": 1.15625, "num_input_tokens_seen": 161398960, "step": 2437 }, { "epoch": 0.22820236813778255, "grad_norm": 38.253658294677734, "learning_rate": 5e-05, "loss": 1.5948, "num_input_tokens_seen": 161465236, "step": 2438 }, { "epoch": 0.22820236813778255, "loss": 1.6474385261535645, "loss_ce": 0.0038838572800159454, "loss_iou": 0.6640625, "loss_num": 0.06298828125, "loss_xval": 1.640625, "num_input_tokens_seen": 161465236, "step": 2438 }, { "epoch": 0.2282959704216783, "grad_norm": 35.7276611328125, "learning_rate": 5e-05, "loss": 1.5618, "num_input_tokens_seen": 161531424, "step": 2439 }, { "epoch": 0.2282959704216783, "loss": 1.5822123289108276, "loss_ce": 0.0045755887404084206, "loss_iou": 0.625, "loss_num": 0.0654296875, "loss_xval": 1.578125, "num_input_tokens_seen": 161531424, "step": 2439 }, { "epoch": 0.22838957270557403, "grad_norm": 29.47515106201172, "learning_rate": 5e-05, "loss": 1.5989, "num_input_tokens_seen": 161597336, "step": 2440 }, { "epoch": 0.22838957270557403, "loss": 1.7137210369110107, "loss_ce": 0.006201460026204586, "loss_iou": 0.703125, "loss_num": 0.059326171875, "loss_xval": 1.7109375, "num_input_tokens_seen": 161597336, "step": 2440 }, { "epoch": 0.22848317498946974, "grad_norm": 31.88434600830078, "learning_rate": 5e-05, "loss": 1.2366, "num_input_tokens_seen": 161662992, "step": 2441 }, { "epoch": 0.22848317498946974, "loss": 1.174156904220581, "loss_ce": 0.007408848498016596, "loss_iou": 0.421875, "loss_num": 0.06494140625, "loss_xval": 1.1640625, "num_input_tokens_seen": 161662992, "step": 2441 }, { "epoch": 0.22857677727336548, "grad_norm": 17.222997665405273, "learning_rate": 5e-05, "loss": 1.5757, "num_input_tokens_seen": 161729752, "step": 2442 }, { "epoch": 0.22857677727336548, "loss": 1.7142722606658936, "loss_ce": 0.007241016253829002, "loss_iou": 0.75, "loss_num": 0.04150390625, "loss_xval": 1.703125, "num_input_tokens_seen": 161729752, "step": 2442 }, { "epoch": 0.2286703795572612, "grad_norm": 23.34244155883789, "learning_rate": 5e-05, "loss": 1.4665, "num_input_tokens_seen": 161796252, "step": 2443 }, { "epoch": 0.2286703795572612, "loss": 1.3133584260940552, "loss_ce": 0.006717763841152191, "loss_iou": 0.546875, "loss_num": 0.041748046875, "loss_xval": 1.3046875, "num_input_tokens_seen": 161796252, "step": 2443 }, { "epoch": 0.22876398184115693, "grad_norm": 37.789886474609375, "learning_rate": 5e-05, "loss": 1.416, "num_input_tokens_seen": 161863252, "step": 2444 }, { "epoch": 0.22876398184115693, "loss": 1.38625168800354, "loss_ce": 0.006857120431959629, "loss_iou": 0.56640625, "loss_num": 0.048583984375, "loss_xval": 1.3828125, "num_input_tokens_seen": 161863252, "step": 2444 }, { "epoch": 0.22885758412505264, "grad_norm": 19.85990333557129, "learning_rate": 5e-05, "loss": 1.5014, "num_input_tokens_seen": 161929072, "step": 2445 }, { "epoch": 0.22885758412505264, "loss": 1.527203917503357, "loss_ce": 0.0027899290435016155, "loss_iou": 0.62890625, "loss_num": 0.0537109375, "loss_xval": 1.5234375, "num_input_tokens_seen": 161929072, "step": 2445 }, { "epoch": 0.22895118640894838, "grad_norm": 21.449256896972656, "learning_rate": 5e-05, "loss": 1.4077, "num_input_tokens_seen": 161995272, "step": 2446 }, { "epoch": 0.22895118640894838, "loss": 1.3202686309814453, "loss_ce": 0.0019092089496552944, "loss_iou": 0.5703125, "loss_num": 0.0361328125, "loss_xval": 1.3203125, "num_input_tokens_seen": 161995272, "step": 2446 }, { "epoch": 0.22904478869284411, "grad_norm": 18.70505142211914, "learning_rate": 5e-05, "loss": 1.3138, "num_input_tokens_seen": 162061728, "step": 2447 }, { "epoch": 0.22904478869284411, "loss": 1.2926361560821533, "loss_ce": 0.01138614397495985, "loss_iou": 0.5390625, "loss_num": 0.0400390625, "loss_xval": 1.28125, "num_input_tokens_seen": 162061728, "step": 2447 }, { "epoch": 0.22913839097673983, "grad_norm": 25.62189292907715, "learning_rate": 5e-05, "loss": 1.5155, "num_input_tokens_seen": 162129428, "step": 2448 }, { "epoch": 0.22913839097673983, "loss": 1.5001521110534668, "loss_ce": 0.002593392040580511, "loss_iou": 0.62109375, "loss_num": 0.052001953125, "loss_xval": 1.5, "num_input_tokens_seen": 162129428, "step": 2448 }, { "epoch": 0.22923199326063556, "grad_norm": 33.32771301269531, "learning_rate": 5e-05, "loss": 1.5063, "num_input_tokens_seen": 162196064, "step": 2449 }, { "epoch": 0.22923199326063556, "loss": 1.4936645030975342, "loss_ce": 0.004895005375146866, "loss_iou": 0.609375, "loss_num": 0.0546875, "loss_xval": 1.4921875, "num_input_tokens_seen": 162196064, "step": 2449 }, { "epoch": 0.22932559554453127, "grad_norm": 20.925540924072266, "learning_rate": 5e-05, "loss": 1.6122, "num_input_tokens_seen": 162263060, "step": 2450 }, { "epoch": 0.22932559554453127, "loss": 1.573733925819397, "loss_ce": 0.007327648811042309, "loss_iou": 0.6796875, "loss_num": 0.04052734375, "loss_xval": 1.5625, "num_input_tokens_seen": 162263060, "step": 2450 }, { "epoch": 0.229419197828427, "grad_norm": 17.177642822265625, "learning_rate": 5e-05, "loss": 1.3064, "num_input_tokens_seen": 162329316, "step": 2451 }, { "epoch": 0.229419197828427, "loss": 1.2348629236221313, "loss_ce": 0.00732386251911521, "loss_iou": 0.443359375, "loss_num": 0.06787109375, "loss_xval": 1.2265625, "num_input_tokens_seen": 162329316, "step": 2451 }, { "epoch": 0.22951280011232275, "grad_norm": 28.06122398376465, "learning_rate": 5e-05, "loss": 1.3107, "num_input_tokens_seen": 162395300, "step": 2452 }, { "epoch": 0.22951280011232275, "loss": 1.3751388788223267, "loss_ce": 0.0040450915694236755, "loss_iou": 0.56640625, "loss_num": 0.04833984375, "loss_xval": 1.375, "num_input_tokens_seen": 162395300, "step": 2452 }, { "epoch": 0.22960640239621846, "grad_norm": 22.68967056274414, "learning_rate": 5e-05, "loss": 1.6941, "num_input_tokens_seen": 162461892, "step": 2453 }, { "epoch": 0.22960640239621846, "loss": 1.5317165851593018, "loss_ce": 0.0029080549720674753, "loss_iou": 0.63671875, "loss_num": 0.050537109375, "loss_xval": 1.53125, "num_input_tokens_seen": 162461892, "step": 2453 }, { "epoch": 0.2297000046801142, "grad_norm": 26.86604881286621, "learning_rate": 5e-05, "loss": 1.2431, "num_input_tokens_seen": 162527000, "step": 2454 }, { "epoch": 0.2297000046801142, "loss": 1.3855384588241577, "loss_ce": 0.0052894484251737595, "loss_iou": 0.53125, "loss_num": 0.06298828125, "loss_xval": 1.3828125, "num_input_tokens_seen": 162527000, "step": 2454 }, { "epoch": 0.2297936069640099, "grad_norm": 23.77687644958496, "learning_rate": 5e-05, "loss": 1.4307, "num_input_tokens_seen": 162593708, "step": 2455 }, { "epoch": 0.2297936069640099, "loss": 1.532151222229004, "loss_ce": 0.005784048233181238, "loss_iou": 0.6328125, "loss_num": 0.052001953125, "loss_xval": 1.5234375, "num_input_tokens_seen": 162593708, "step": 2455 }, { "epoch": 0.22988720924790565, "grad_norm": 27.570907592773438, "learning_rate": 5e-05, "loss": 1.1868, "num_input_tokens_seen": 162659100, "step": 2456 }, { "epoch": 0.22988720924790565, "loss": 1.067798376083374, "loss_ce": 0.005298304371535778, "loss_iou": 0.451171875, "loss_num": 0.031982421875, "loss_xval": 1.0625, "num_input_tokens_seen": 162659100, "step": 2456 }, { "epoch": 0.2299808115318014, "grad_norm": 25.42987632751465, "learning_rate": 5e-05, "loss": 1.4613, "num_input_tokens_seen": 162725040, "step": 2457 }, { "epoch": 0.2299808115318014, "loss": 1.4514778852462769, "loss_ce": 0.006165330298244953, "loss_iou": 0.59375, "loss_num": 0.05224609375, "loss_xval": 1.4453125, "num_input_tokens_seen": 162725040, "step": 2457 }, { "epoch": 0.2300744138156971, "grad_norm": 32.58747863769531, "learning_rate": 5e-05, "loss": 1.2337, "num_input_tokens_seen": 162790956, "step": 2458 }, { "epoch": 0.2300744138156971, "loss": 1.39103364944458, "loss_ce": 0.0067563047632575035, "loss_iou": 0.55859375, "loss_num": 0.052734375, "loss_xval": 1.3828125, "num_input_tokens_seen": 162790956, "step": 2458 }, { "epoch": 0.23016801609959284, "grad_norm": 68.71968841552734, "learning_rate": 5e-05, "loss": 1.4733, "num_input_tokens_seen": 162857136, "step": 2459 }, { "epoch": 0.23016801609959284, "loss": 1.5733240842819214, "loss_ce": 0.006429579574614763, "loss_iou": 0.6640625, "loss_num": 0.04833984375, "loss_xval": 1.5703125, "num_input_tokens_seen": 162857136, "step": 2459 }, { "epoch": 0.23026161838348855, "grad_norm": 43.434669494628906, "learning_rate": 5e-05, "loss": 1.3512, "num_input_tokens_seen": 162923532, "step": 2460 }, { "epoch": 0.23026161838348855, "loss": 1.428882122039795, "loss_ce": 0.006030574440956116, "loss_iou": 0.51953125, "loss_num": 0.0771484375, "loss_xval": 1.421875, "num_input_tokens_seen": 162923532, "step": 2460 }, { "epoch": 0.2303552206673843, "grad_norm": 12.635337829589844, "learning_rate": 5e-05, "loss": 1.3098, "num_input_tokens_seen": 162988940, "step": 2461 }, { "epoch": 0.2303552206673843, "loss": 1.4358150959014893, "loss_ce": 0.006982141640037298, "loss_iou": 0.5859375, "loss_num": 0.05224609375, "loss_xval": 1.4296875, "num_input_tokens_seen": 162988940, "step": 2461 }, { "epoch": 0.23044882295128002, "grad_norm": 20.529218673706055, "learning_rate": 5e-05, "loss": 1.1557, "num_input_tokens_seen": 163055324, "step": 2462 }, { "epoch": 0.23044882295128002, "loss": 1.2897112369537354, "loss_ce": 0.005043351091444492, "loss_iou": 0.5, "loss_num": 0.056640625, "loss_xval": 1.28125, "num_input_tokens_seen": 163055324, "step": 2462 }, { "epoch": 0.23054242523517574, "grad_norm": 138.20254516601562, "learning_rate": 5e-05, "loss": 1.2349, "num_input_tokens_seen": 163121024, "step": 2463 }, { "epoch": 0.23054242523517574, "loss": 1.0889167785644531, "loss_ce": 0.0020027703139930964, "loss_iou": 0.4453125, "loss_num": 0.03857421875, "loss_xval": 1.0859375, "num_input_tokens_seen": 163121024, "step": 2463 }, { "epoch": 0.23063602751907147, "grad_norm": 49.121238708496094, "learning_rate": 5e-05, "loss": 1.4, "num_input_tokens_seen": 163188208, "step": 2464 }, { "epoch": 0.23063602751907147, "loss": 1.3350986242294312, "loss_ce": 0.00453227711841464, "loss_iou": 0.57421875, "loss_num": 0.035888671875, "loss_xval": 1.328125, "num_input_tokens_seen": 163188208, "step": 2464 }, { "epoch": 0.23072962980296718, "grad_norm": 19.054418563842773, "learning_rate": 5e-05, "loss": 1.4856, "num_input_tokens_seen": 163254636, "step": 2465 }, { "epoch": 0.23072962980296718, "loss": 1.639803171157837, "loss_ce": 0.005037473049014807, "loss_iou": 0.71875, "loss_num": 0.0390625, "loss_xval": 1.6328125, "num_input_tokens_seen": 163254636, "step": 2465 }, { "epoch": 0.23082323208686292, "grad_norm": 18.13214683532715, "learning_rate": 5e-05, "loss": 1.3198, "num_input_tokens_seen": 163320572, "step": 2466 }, { "epoch": 0.23082323208686292, "loss": 1.2554048299789429, "loss_ce": 0.0063813598826527596, "loss_iou": 0.51171875, "loss_num": 0.045166015625, "loss_xval": 1.25, "num_input_tokens_seen": 163320572, "step": 2466 }, { "epoch": 0.23091683437075863, "grad_norm": 24.170122146606445, "learning_rate": 5e-05, "loss": 1.5222, "num_input_tokens_seen": 163387168, "step": 2467 }, { "epoch": 0.23091683437075863, "loss": 1.4282277822494507, "loss_ce": 0.0024465336464345455, "loss_iou": 0.5859375, "loss_num": 0.05078125, "loss_xval": 1.421875, "num_input_tokens_seen": 163387168, "step": 2467 }, { "epoch": 0.23101043665465437, "grad_norm": 21.741044998168945, "learning_rate": 5e-05, "loss": 1.7628, "num_input_tokens_seen": 163453236, "step": 2468 }, { "epoch": 0.23101043665465437, "loss": 1.7581052780151367, "loss_ce": 0.002245889976620674, "loss_iou": 0.6953125, "loss_num": 0.07373046875, "loss_xval": 1.7578125, "num_input_tokens_seen": 163453236, "step": 2468 }, { "epoch": 0.2311040389385501, "grad_norm": 17.663761138916016, "learning_rate": 5e-05, "loss": 1.4071, "num_input_tokens_seen": 163520240, "step": 2469 }, { "epoch": 0.2311040389385501, "loss": 1.6644530296325684, "loss_ce": 0.006249843165278435, "loss_iou": 0.64453125, "loss_num": 0.0732421875, "loss_xval": 1.65625, "num_input_tokens_seen": 163520240, "step": 2469 }, { "epoch": 0.23119764122244582, "grad_norm": 22.036527633666992, "learning_rate": 5e-05, "loss": 1.2826, "num_input_tokens_seen": 163586436, "step": 2470 }, { "epoch": 0.23119764122244582, "loss": 1.221808671951294, "loss_ce": 0.0015938271535560489, "loss_iou": 0.546875, "loss_num": 0.02490234375, "loss_xval": 1.21875, "num_input_tokens_seen": 163586436, "step": 2470 }, { "epoch": 0.23129124350634156, "grad_norm": 18.36457061767578, "learning_rate": 5e-05, "loss": 1.3659, "num_input_tokens_seen": 163652996, "step": 2471 }, { "epoch": 0.23129124350634156, "loss": 1.5381481647491455, "loss_ce": 0.005433372221887112, "loss_iou": 0.62109375, "loss_num": 0.057861328125, "loss_xval": 1.53125, "num_input_tokens_seen": 163652996, "step": 2471 }, { "epoch": 0.23138484579023727, "grad_norm": 28.031639099121094, "learning_rate": 5e-05, "loss": 1.3604, "num_input_tokens_seen": 163719344, "step": 2472 }, { "epoch": 0.23138484579023727, "loss": 1.3550891876220703, "loss_ce": 0.006944580003619194, "loss_iou": 0.55859375, "loss_num": 0.046630859375, "loss_xval": 1.3515625, "num_input_tokens_seen": 163719344, "step": 2472 }, { "epoch": 0.231478448074133, "grad_norm": 27.000850677490234, "learning_rate": 5e-05, "loss": 1.6887, "num_input_tokens_seen": 163784896, "step": 2473 }, { "epoch": 0.231478448074133, "loss": 2.0026726722717285, "loss_ce": 0.007555382791906595, "loss_iou": 0.75, "loss_num": 0.09814453125, "loss_xval": 1.9921875, "num_input_tokens_seen": 163784896, "step": 2473 }, { "epoch": 0.23157205035802875, "grad_norm": 31.808427810668945, "learning_rate": 5e-05, "loss": 1.4269, "num_input_tokens_seen": 163850464, "step": 2474 }, { "epoch": 0.23157205035802875, "loss": 1.3507449626922607, "loss_ce": 0.006994950119405985, "loss_iou": 0.50390625, "loss_num": 0.0673828125, "loss_xval": 1.34375, "num_input_tokens_seen": 163850464, "step": 2474 }, { "epoch": 0.23166565264192446, "grad_norm": 28.826396942138672, "learning_rate": 5e-05, "loss": 1.5188, "num_input_tokens_seen": 163917052, "step": 2475 }, { "epoch": 0.23166565264192446, "loss": 1.6371958255767822, "loss_ce": 0.0014536024536937475, "loss_iou": 0.6796875, "loss_num": 0.0546875, "loss_xval": 1.6328125, "num_input_tokens_seen": 163917052, "step": 2475 }, { "epoch": 0.2317592549258202, "grad_norm": 37.960697174072266, "learning_rate": 5e-05, "loss": 1.5083, "num_input_tokens_seen": 163984472, "step": 2476 }, { "epoch": 0.2317592549258202, "loss": 1.3793046474456787, "loss_ce": 0.0028398879803717136, "loss_iou": 0.56640625, "loss_num": 0.048095703125, "loss_xval": 1.375, "num_input_tokens_seen": 163984472, "step": 2476 }, { "epoch": 0.2318528572097159, "grad_norm": 20.171327590942383, "learning_rate": 5e-05, "loss": 1.6516, "num_input_tokens_seen": 164049132, "step": 2477 }, { "epoch": 0.2318528572097159, "loss": 1.6791355609893799, "loss_ce": 0.0023777266032993793, "loss_iou": 0.70703125, "loss_num": 0.052978515625, "loss_xval": 1.6796875, "num_input_tokens_seen": 164049132, "step": 2477 }, { "epoch": 0.23194645949361165, "grad_norm": 16.24278450012207, "learning_rate": 5e-05, "loss": 1.3162, "num_input_tokens_seen": 164115196, "step": 2478 }, { "epoch": 0.23194645949361165, "loss": 1.1775946617126465, "loss_ce": 0.0057196225970983505, "loss_iou": 0.498046875, "loss_num": 0.035400390625, "loss_xval": 1.171875, "num_input_tokens_seen": 164115196, "step": 2478 }, { "epoch": 0.23204006177750738, "grad_norm": 17.705053329467773, "learning_rate": 5e-05, "loss": 1.4433, "num_input_tokens_seen": 164181904, "step": 2479 }, { "epoch": 0.23204006177750738, "loss": 1.3429597616195679, "loss_ce": 0.003115981351584196, "loss_iou": 0.53515625, "loss_num": 0.0537109375, "loss_xval": 1.34375, "num_input_tokens_seen": 164181904, "step": 2479 }, { "epoch": 0.2321336640614031, "grad_norm": 35.55115509033203, "learning_rate": 5e-05, "loss": 1.4613, "num_input_tokens_seen": 164247788, "step": 2480 }, { "epoch": 0.2321336640614031, "loss": 1.6351828575134277, "loss_ce": 0.003347002901136875, "loss_iou": 0.6796875, "loss_num": 0.05517578125, "loss_xval": 1.6328125, "num_input_tokens_seen": 164247788, "step": 2480 }, { "epoch": 0.23222726634529883, "grad_norm": 24.525876998901367, "learning_rate": 5e-05, "loss": 1.6514, "num_input_tokens_seen": 164314020, "step": 2481 }, { "epoch": 0.23222726634529883, "loss": 1.6250824928283691, "loss_ce": 0.004476991947740316, "loss_iou": 0.68359375, "loss_num": 0.050537109375, "loss_xval": 1.6171875, "num_input_tokens_seen": 164314020, "step": 2481 }, { "epoch": 0.23232086862919454, "grad_norm": 23.42792320251465, "learning_rate": 5e-05, "loss": 1.5881, "num_input_tokens_seen": 164379820, "step": 2482 }, { "epoch": 0.23232086862919454, "loss": 1.5978208780288696, "loss_ce": 0.007000564597547054, "loss_iou": 0.58984375, "loss_num": 0.08203125, "loss_xval": 1.59375, "num_input_tokens_seen": 164379820, "step": 2482 }, { "epoch": 0.23241447091309028, "grad_norm": 24.587223052978516, "learning_rate": 5e-05, "loss": 1.5849, "num_input_tokens_seen": 164446628, "step": 2483 }, { "epoch": 0.23241447091309028, "loss": 1.4252188205718994, "loss_ce": 0.0033439500257372856, "loss_iou": 0.59765625, "loss_num": 0.045654296875, "loss_xval": 1.421875, "num_input_tokens_seen": 164446628, "step": 2483 }, { "epoch": 0.232508073196986, "grad_norm": 37.54349136352539, "learning_rate": 5e-05, "loss": 1.6122, "num_input_tokens_seen": 164512932, "step": 2484 }, { "epoch": 0.232508073196986, "loss": 1.735964298248291, "loss_ce": 0.00744863785803318, "loss_iou": 0.703125, "loss_num": 0.064453125, "loss_xval": 1.7265625, "num_input_tokens_seen": 164512932, "step": 2484 }, { "epoch": 0.23260167548088173, "grad_norm": 31.54380226135254, "learning_rate": 5e-05, "loss": 1.7512, "num_input_tokens_seen": 164579648, "step": 2485 }, { "epoch": 0.23260167548088173, "loss": 1.6679766178131104, "loss_ce": 0.004890634212642908, "loss_iou": 0.6875, "loss_num": 0.056884765625, "loss_xval": 1.6640625, "num_input_tokens_seen": 164579648, "step": 2485 }, { "epoch": 0.23269527776477747, "grad_norm": 30.32734489440918, "learning_rate": 5e-05, "loss": 1.6871, "num_input_tokens_seen": 164646276, "step": 2486 }, { "epoch": 0.23269527776477747, "loss": 1.61893892288208, "loss_ce": 0.006634257733821869, "loss_iou": 0.62890625, "loss_num": 0.0703125, "loss_xval": 1.609375, "num_input_tokens_seen": 164646276, "step": 2486 }, { "epoch": 0.23278888004867318, "grad_norm": 19.408811569213867, "learning_rate": 5e-05, "loss": 1.402, "num_input_tokens_seen": 164711348, "step": 2487 }, { "epoch": 0.23278888004867318, "loss": 1.409168004989624, "loss_ce": 0.00743465218693018, "loss_iou": 0.54296875, "loss_num": 0.06298828125, "loss_xval": 1.3984375, "num_input_tokens_seen": 164711348, "step": 2487 }, { "epoch": 0.23288248233256892, "grad_norm": 25.209152221679688, "learning_rate": 5e-05, "loss": 1.3289, "num_input_tokens_seen": 164777464, "step": 2488 }, { "epoch": 0.23288248233256892, "loss": 1.4044851064682007, "loss_ce": 0.006535861641168594, "loss_iou": 0.5859375, "loss_num": 0.044677734375, "loss_xval": 1.3984375, "num_input_tokens_seen": 164777464, "step": 2488 }, { "epoch": 0.23297608461646463, "grad_norm": 59.48554611206055, "learning_rate": 5e-05, "loss": 1.4737, "num_input_tokens_seen": 164845024, "step": 2489 }, { "epoch": 0.23297608461646463, "loss": 1.4949105978012085, "loss_ce": 0.00272312480956316, "loss_iou": 0.64453125, "loss_num": 0.040771484375, "loss_xval": 1.4921875, "num_input_tokens_seen": 164845024, "step": 2489 }, { "epoch": 0.23306968690036037, "grad_norm": 14.618805885314941, "learning_rate": 5e-05, "loss": 1.2474, "num_input_tokens_seen": 164911024, "step": 2490 }, { "epoch": 0.23306968690036037, "loss": 1.3296984434127808, "loss_ce": 0.00401481706649065, "loss_iou": 0.578125, "loss_num": 0.034423828125, "loss_xval": 1.328125, "num_input_tokens_seen": 164911024, "step": 2490 }, { "epoch": 0.2331632891842561, "grad_norm": 39.86266326904297, "learning_rate": 5e-05, "loss": 1.2735, "num_input_tokens_seen": 164978628, "step": 2491 }, { "epoch": 0.2331632891842561, "loss": 1.2402257919311523, "loss_ce": 0.0019445550860837102, "loss_iou": 0.5390625, "loss_num": 0.03271484375, "loss_xval": 1.234375, "num_input_tokens_seen": 164978628, "step": 2491 }, { "epoch": 0.23325689146815182, "grad_norm": 47.401302337646484, "learning_rate": 5e-05, "loss": 1.3271, "num_input_tokens_seen": 165046160, "step": 2492 }, { "epoch": 0.23325689146815182, "loss": 1.4952867031097412, "loss_ce": 0.0065172468312084675, "loss_iou": 0.61328125, "loss_num": 0.052978515625, "loss_xval": 1.4921875, "num_input_tokens_seen": 165046160, "step": 2492 }, { "epoch": 0.23335049375204756, "grad_norm": 39.57675552368164, "learning_rate": 5e-05, "loss": 1.4569, "num_input_tokens_seen": 165112804, "step": 2493 }, { "epoch": 0.23335049375204756, "loss": 1.4527122974395752, "loss_ce": 0.002516983076930046, "loss_iou": 0.65234375, "loss_num": 0.0286865234375, "loss_xval": 1.453125, "num_input_tokens_seen": 165112804, "step": 2493 }, { "epoch": 0.23344409603594327, "grad_norm": 28.581764221191406, "learning_rate": 5e-05, "loss": 1.2105, "num_input_tokens_seen": 165178208, "step": 2494 }, { "epoch": 0.23344409603594327, "loss": 1.2816894054412842, "loss_ce": 0.004833861254155636, "loss_iou": 0.490234375, "loss_num": 0.059814453125, "loss_xval": 1.2734375, "num_input_tokens_seen": 165178208, "step": 2494 }, { "epoch": 0.233537698319839, "grad_norm": 37.20583724975586, "learning_rate": 5e-05, "loss": 1.3242, "num_input_tokens_seen": 165244368, "step": 2495 }, { "epoch": 0.233537698319839, "loss": 1.6345446109771729, "loss_ce": 0.005638356786221266, "loss_iou": 0.6640625, "loss_num": 0.059814453125, "loss_xval": 1.625, "num_input_tokens_seen": 165244368, "step": 2495 }, { "epoch": 0.23363130060373474, "grad_norm": 36.57848358154297, "learning_rate": 5e-05, "loss": 1.2767, "num_input_tokens_seen": 165309484, "step": 2496 }, { "epoch": 0.23363130060373474, "loss": 1.1039924621582031, "loss_ce": 0.005237653851509094, "loss_iou": 0.43359375, "loss_num": 0.0458984375, "loss_xval": 1.1015625, "num_input_tokens_seen": 165309484, "step": 2496 }, { "epoch": 0.23372490288763045, "grad_norm": 21.84840965270996, "learning_rate": 5e-05, "loss": 1.4515, "num_input_tokens_seen": 165375508, "step": 2497 }, { "epoch": 0.23372490288763045, "loss": 1.5355206727981567, "loss_ce": 0.0062238117679953575, "loss_iou": 0.63671875, "loss_num": 0.051025390625, "loss_xval": 1.53125, "num_input_tokens_seen": 165375508, "step": 2497 }, { "epoch": 0.2338185051715262, "grad_norm": 25.96573829650879, "learning_rate": 5e-05, "loss": 1.0797, "num_input_tokens_seen": 165442652, "step": 2498 }, { "epoch": 0.2338185051715262, "loss": 0.9413748979568481, "loss_ce": 0.002898360835388303, "loss_iou": 0.3828125, "loss_num": 0.03466796875, "loss_xval": 0.9375, "num_input_tokens_seen": 165442652, "step": 2498 }, { "epoch": 0.2339121074554219, "grad_norm": 20.81844139099121, "learning_rate": 5e-05, "loss": 1.321, "num_input_tokens_seen": 165508484, "step": 2499 }, { "epoch": 0.2339121074554219, "loss": 1.4115042686462402, "loss_ce": 0.0052542174234986305, "loss_iou": 0.59375, "loss_num": 0.04443359375, "loss_xval": 1.40625, "num_input_tokens_seen": 165508484, "step": 2499 }, { "epoch": 0.23400570973931764, "grad_norm": 106.0998764038086, "learning_rate": 5e-05, "loss": 1.2157, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23400570973931764, "eval_seeclick_CIoU": 0.1758987456560135, "eval_seeclick_GIoU": 0.1970961093902588, "eval_seeclick_IoU": 0.285983145236969, "eval_seeclick_MAE_all": 0.15556222945451736, "eval_seeclick_MAE_h": 0.11707764118909836, "eval_seeclick_MAE_w": 0.12314826250076294, "eval_seeclick_MAE_x_boxes": 0.20792122185230255, "eval_seeclick_MAE_y_boxes": 0.12664693593978882, "eval_seeclick_NUM_probability": 0.9996010661125183, "eval_seeclick_inside_bbox": 0.48750001192092896, "eval_seeclick_loss": 2.4451475143432617, "eval_seeclick_loss_ce": 0.014546331018209457, "eval_seeclick_loss_iou": 0.839599609375, "eval_seeclick_loss_num": 0.1605987548828125, "eval_seeclick_loss_xval": 2.479736328125, "eval_seeclick_runtime": 70.2802, "eval_seeclick_samples_per_second": 0.669, "eval_seeclick_steps_per_second": 0.028, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23400570973931764, "eval_icons_CIoU": -0.07744761649519205, "eval_icons_GIoU": 0.010248284786939621, "eval_icons_IoU": 0.10521091893315315, "eval_icons_MAE_all": 0.1695949211716652, "eval_icons_MAE_h": 0.09764321148395538, "eval_icons_MAE_w": 0.20428355783224106, "eval_icons_MAE_x_boxes": 0.13130392134189606, "eval_icons_MAE_y_boxes": 0.10688390210270882, "eval_icons_NUM_probability": 0.9999231398105621, "eval_icons_inside_bbox": 0.1614583358168602, "eval_icons_loss": 2.9076406955718994, "eval_icons_loss_ce": 0.00013697430404135957, "eval_icons_loss_iou": 1.0185546875, "eval_icons_loss_num": 0.1790771484375, "eval_icons_loss_xval": 2.931640625, "eval_icons_runtime": 66.9052, "eval_icons_samples_per_second": 0.747, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23400570973931764, "eval_screenspot_CIoU": 0.021585943798224132, "eval_screenspot_GIoU": 0.03971382789313793, "eval_screenspot_IoU": 0.18650566041469574, "eval_screenspot_MAE_all": 0.20787508289019266, "eval_screenspot_MAE_h": 0.17082890371481577, "eval_screenspot_MAE_w": 0.17836354921261469, "eval_screenspot_MAE_x_boxes": 0.22417324284712473, "eval_screenspot_MAE_y_boxes": 0.16064871350924173, "eval_screenspot_NUM_probability": 0.9998685717582703, "eval_screenspot_inside_bbox": 0.38333333532015484, "eval_screenspot_loss": 3.0285556316375732, "eval_screenspot_loss_ce": 0.023904730876286823, "eval_screenspot_loss_iou": 0.974609375, "eval_screenspot_loss_num": 0.21490478515625, "eval_screenspot_loss_xval": 3.0231119791666665, "eval_screenspot_runtime": 124.9603, "eval_screenspot_samples_per_second": 0.712, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23400570973931764, "eval_compot_CIoU": 0.00973070040345192, "eval_compot_GIoU": 0.06087764725089073, "eval_compot_IoU": 0.16261964663863182, "eval_compot_MAE_all": 0.18344344943761826, "eval_compot_MAE_h": 0.08078011125326157, "eval_compot_MAE_w": 0.23833151906728745, "eval_compot_MAE_x_boxes": 0.15341763198375702, "eval_compot_MAE_y_boxes": 0.1260961815714836, "eval_compot_NUM_probability": 0.9998997449874878, "eval_compot_inside_bbox": 0.3038194477558136, "eval_compot_loss": 2.89353609085083, "eval_compot_loss_ce": 0.007296753115952015, "eval_compot_loss_iou": 0.966064453125, "eval_compot_loss_num": 0.190185546875, "eval_compot_loss_xval": 2.8828125, "eval_compot_runtime": 78.7073, "eval_compot_samples_per_second": 0.635, "eval_compot_steps_per_second": 0.025, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23400570973931764, "eval_custom_ui_MAE_all": 0.15342652052640915, "eval_custom_ui_MAE_x": 0.14219188317656517, "eval_custom_ui_MAE_y": 0.16466113924980164, "eval_custom_ui_NUM_probability": 0.9999070465564728, "eval_custom_ui_loss": 0.8835628032684326, "eval_custom_ui_loss_ce": 0.16172069311141968, "eval_custom_ui_loss_num": 0.148651123046875, "eval_custom_ui_loss_xval": 0.743408203125, "eval_custom_ui_runtime": 53.8258, "eval_custom_ui_samples_per_second": 0.929, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23400570973931764, "loss": 0.8828193545341492, "loss_ce": 0.17334669828414917, "loss_iou": 0.0, "loss_num": 0.1416015625, "loss_xval": 0.7109375, "num_input_tokens_seen": 165574880, "step": 2500 }, { "epoch": 0.23409931202321338, "grad_norm": 24.63797378540039, "learning_rate": 5e-05, "loss": 1.4581, "num_input_tokens_seen": 165640432, "step": 2501 }, { "epoch": 0.23409931202321338, "loss": 1.4565620422363281, "loss_ce": 0.003437077160924673, "loss_iou": 0.5703125, "loss_num": 0.0625, "loss_xval": 1.453125, "num_input_tokens_seen": 165640432, "step": 2501 }, { "epoch": 0.2341929143071091, "grad_norm": 39.45976638793945, "learning_rate": 5e-05, "loss": 1.3657, "num_input_tokens_seen": 165707380, "step": 2502 }, { "epoch": 0.2341929143071091, "loss": 1.3734135627746582, "loss_ce": 0.001831514062359929, "loss_iou": 0.5859375, "loss_num": 0.040771484375, "loss_xval": 1.375, "num_input_tokens_seen": 165707380, "step": 2502 }, { "epoch": 0.23428651659100483, "grad_norm": 18.85283088684082, "learning_rate": 5e-05, "loss": 1.1919, "num_input_tokens_seen": 165773616, "step": 2503 }, { "epoch": 0.23428651659100483, "loss": 1.2751984596252441, "loss_ce": 0.004690650384873152, "loss_iou": 0.54296875, "loss_num": 0.037353515625, "loss_xval": 1.2734375, "num_input_tokens_seen": 165773616, "step": 2503 }, { "epoch": 0.23438011887490054, "grad_norm": 15.156516075134277, "learning_rate": 5e-05, "loss": 1.2399, "num_input_tokens_seen": 165839672, "step": 2504 }, { "epoch": 0.23438011887490054, "loss": 1.2254211902618408, "loss_ce": 0.00435184221714735, "loss_iou": 0.48828125, "loss_num": 0.048583984375, "loss_xval": 1.21875, "num_input_tokens_seen": 165839672, "step": 2504 }, { "epoch": 0.23447372115879628, "grad_norm": 40.05813217163086, "learning_rate": 5e-05, "loss": 1.4874, "num_input_tokens_seen": 165905700, "step": 2505 }, { "epoch": 0.23447372115879628, "loss": 1.6928784847259521, "loss_ce": 0.005378500558435917, "loss_iou": 0.68359375, "loss_num": 0.064453125, "loss_xval": 1.6875, "num_input_tokens_seen": 165905700, "step": 2505 }, { "epoch": 0.234567323442692, "grad_norm": 82.37883758544922, "learning_rate": 5e-05, "loss": 1.8889, "num_input_tokens_seen": 165972368, "step": 2506 }, { "epoch": 0.234567323442692, "loss": 1.720952033996582, "loss_ce": 0.007084723096340895, "loss_iou": 0.71875, "loss_num": 0.0556640625, "loss_xval": 1.7109375, "num_input_tokens_seen": 165972368, "step": 2506 }, { "epoch": 0.23466092572658773, "grad_norm": 25.550870895385742, "learning_rate": 5e-05, "loss": 1.5326, "num_input_tokens_seen": 166037584, "step": 2507 }, { "epoch": 0.23466092572658773, "loss": 1.562875747680664, "loss_ce": 0.003793750423938036, "loss_iou": 0.61328125, "loss_num": 0.06689453125, "loss_xval": 1.5625, "num_input_tokens_seen": 166037584, "step": 2507 }, { "epoch": 0.23475452801048347, "grad_norm": 35.482093811035156, "learning_rate": 5e-05, "loss": 1.3808, "num_input_tokens_seen": 166103516, "step": 2508 }, { "epoch": 0.23475452801048347, "loss": 1.5787138938903809, "loss_ce": 0.005471743177622557, "loss_iou": 0.65625, "loss_num": 0.052734375, "loss_xval": 1.5703125, "num_input_tokens_seen": 166103516, "step": 2508 }, { "epoch": 0.23484813029437918, "grad_norm": 25.37134552001953, "learning_rate": 5e-05, "loss": 1.4643, "num_input_tokens_seen": 166168932, "step": 2509 }, { "epoch": 0.23484813029437918, "loss": 1.3653082847595215, "loss_ce": 0.005933395121246576, "loss_iou": 0.578125, "loss_num": 0.041259765625, "loss_xval": 1.359375, "num_input_tokens_seen": 166168932, "step": 2509 }, { "epoch": 0.23494173257827491, "grad_norm": 16.15148162841797, "learning_rate": 5e-05, "loss": 1.1893, "num_input_tokens_seen": 166235624, "step": 2510 }, { "epoch": 0.23494173257827491, "loss": 1.31050705909729, "loss_ce": 0.0014250393724069, "loss_iou": 0.54296875, "loss_num": 0.04443359375, "loss_xval": 1.3125, "num_input_tokens_seen": 166235624, "step": 2510 }, { "epoch": 0.23503533486217063, "grad_norm": 24.526351928710938, "learning_rate": 5e-05, "loss": 1.2833, "num_input_tokens_seen": 166301484, "step": 2511 }, { "epoch": 0.23503533486217063, "loss": 1.2813926935195923, "loss_ce": 0.0016074995510280132, "loss_iou": 0.5546875, "loss_num": 0.0341796875, "loss_xval": 1.28125, "num_input_tokens_seen": 166301484, "step": 2511 }, { "epoch": 0.23512893714606636, "grad_norm": 19.538311004638672, "learning_rate": 5e-05, "loss": 1.6607, "num_input_tokens_seen": 166368404, "step": 2512 }, { "epoch": 0.23512893714606636, "loss": 1.573952078819275, "loss_ce": 0.006569328717887402, "loss_iou": 0.6640625, "loss_num": 0.047119140625, "loss_xval": 1.5703125, "num_input_tokens_seen": 166368404, "step": 2512 }, { "epoch": 0.2352225394299621, "grad_norm": 53.490726470947266, "learning_rate": 5e-05, "loss": 1.3682, "num_input_tokens_seen": 166432760, "step": 2513 }, { "epoch": 0.2352225394299621, "loss": 1.5700562000274658, "loss_ce": 0.006091253831982613, "loss_iou": 0.64453125, "loss_num": 0.055419921875, "loss_xval": 1.5625, "num_input_tokens_seen": 166432760, "step": 2513 }, { "epoch": 0.2353161417138578, "grad_norm": 13.956862449645996, "learning_rate": 5e-05, "loss": 1.4508, "num_input_tokens_seen": 166498608, "step": 2514 }, { "epoch": 0.2353161417138578, "loss": 1.46309232711792, "loss_ce": 0.008014105260372162, "loss_iou": 0.609375, "loss_num": 0.04736328125, "loss_xval": 1.453125, "num_input_tokens_seen": 166498608, "step": 2514 }, { "epoch": 0.23540974399775355, "grad_norm": 18.782583236694336, "learning_rate": 5e-05, "loss": 1.3075, "num_input_tokens_seen": 166564932, "step": 2515 }, { "epoch": 0.23540974399775355, "loss": 1.373844027519226, "loss_ce": 0.0027503168676048517, "loss_iou": 0.609375, "loss_num": 0.0311279296875, "loss_xval": 1.375, "num_input_tokens_seen": 166564932, "step": 2515 }, { "epoch": 0.23550334628164926, "grad_norm": 16.891401290893555, "learning_rate": 5e-05, "loss": 1.3846, "num_input_tokens_seen": 166632072, "step": 2516 }, { "epoch": 0.23550334628164926, "loss": 1.401080846786499, "loss_ce": 0.0060612596571445465, "loss_iou": 0.5625, "loss_num": 0.05419921875, "loss_xval": 1.3984375, "num_input_tokens_seen": 166632072, "step": 2516 }, { "epoch": 0.235596948565545, "grad_norm": 31.363994598388672, "learning_rate": 5e-05, "loss": 1.3179, "num_input_tokens_seen": 166698428, "step": 2517 }, { "epoch": 0.235596948565545, "loss": 1.6091729402542114, "loss_ce": 0.008586999028921127, "loss_iou": 0.61328125, "loss_num": 0.0751953125, "loss_xval": 1.6015625, "num_input_tokens_seen": 166698428, "step": 2517 }, { "epoch": 0.23569055084944074, "grad_norm": 22.80681610107422, "learning_rate": 5e-05, "loss": 1.5486, "num_input_tokens_seen": 166764724, "step": 2518 }, { "epoch": 0.23569055084944074, "loss": 1.6586039066314697, "loss_ce": 0.00430699298158288, "loss_iou": 0.65625, "loss_num": 0.068359375, "loss_xval": 1.65625, "num_input_tokens_seen": 166764724, "step": 2518 }, { "epoch": 0.23578415313333645, "grad_norm": 23.773784637451172, "learning_rate": 5e-05, "loss": 1.41, "num_input_tokens_seen": 166831072, "step": 2519 }, { "epoch": 0.23578415313333645, "loss": 1.5527911186218262, "loss_ce": 0.007869203574955463, "loss_iou": 0.62890625, "loss_num": 0.05712890625, "loss_xval": 1.546875, "num_input_tokens_seen": 166831072, "step": 2519 }, { "epoch": 0.2358777554172322, "grad_norm": 26.95271110534668, "learning_rate": 5e-05, "loss": 1.6262, "num_input_tokens_seen": 166897312, "step": 2520 }, { "epoch": 0.2358777554172322, "loss": 1.5096721649169922, "loss_ce": 0.005765873938798904, "loss_iou": 0.6953125, "loss_num": 0.0234375, "loss_xval": 1.5, "num_input_tokens_seen": 166897312, "step": 2520 }, { "epoch": 0.2359713577011279, "grad_norm": 34.8698844909668, "learning_rate": 5e-05, "loss": 1.5628, "num_input_tokens_seen": 166964008, "step": 2521 }, { "epoch": 0.2359713577011279, "loss": 1.5502536296844482, "loss_ce": 0.00386689486913383, "loss_iou": 0.640625, "loss_num": 0.0537109375, "loss_xval": 1.546875, "num_input_tokens_seen": 166964008, "step": 2521 }, { "epoch": 0.23606495998502364, "grad_norm": 47.92771530151367, "learning_rate": 5e-05, "loss": 1.5755, "num_input_tokens_seen": 167029896, "step": 2522 }, { "epoch": 0.23606495998502364, "loss": 1.4616825580596924, "loss_ce": 0.004651276394724846, "loss_iou": 0.62109375, "loss_num": 0.042724609375, "loss_xval": 1.453125, "num_input_tokens_seen": 167029896, "step": 2522 }, { "epoch": 0.23615856226891938, "grad_norm": 13.946309089660645, "learning_rate": 5e-05, "loss": 1.438, "num_input_tokens_seen": 167096340, "step": 2523 }, { "epoch": 0.23615856226891938, "loss": 1.4711437225341797, "loss_ce": 0.0028819835279136896, "loss_iou": 0.59375, "loss_num": 0.056396484375, "loss_xval": 1.46875, "num_input_tokens_seen": 167096340, "step": 2523 }, { "epoch": 0.23625216455281509, "grad_norm": 22.62273406982422, "learning_rate": 5e-05, "loss": 1.2367, "num_input_tokens_seen": 167163156, "step": 2524 }, { "epoch": 0.23625216455281509, "loss": 1.2503703832626343, "loss_ce": 0.00622974056750536, "loss_iou": 0.55078125, "loss_num": 0.0291748046875, "loss_xval": 1.2421875, "num_input_tokens_seen": 167163156, "step": 2524 }, { "epoch": 0.23634576683671082, "grad_norm": 21.35040855407715, "learning_rate": 5e-05, "loss": 1.2765, "num_input_tokens_seen": 167229016, "step": 2525 }, { "epoch": 0.23634576683671082, "loss": 1.3842897415161133, "loss_ce": 0.0034303911961615086, "loss_iou": 0.5390625, "loss_num": 0.060791015625, "loss_xval": 1.3828125, "num_input_tokens_seen": 167229016, "step": 2525 }, { "epoch": 0.23643936912060654, "grad_norm": 29.040626525878906, "learning_rate": 5e-05, "loss": 1.3072, "num_input_tokens_seen": 167294660, "step": 2526 }, { "epoch": 0.23643936912060654, "loss": 1.3200488090515137, "loss_ce": 0.006572265177965164, "loss_iou": 0.53125, "loss_num": 0.049560546875, "loss_xval": 1.3125, "num_input_tokens_seen": 167294660, "step": 2526 }, { "epoch": 0.23653297140450227, "grad_norm": 36.906776428222656, "learning_rate": 5e-05, "loss": 1.3674, "num_input_tokens_seen": 167361328, "step": 2527 }, { "epoch": 0.23653297140450227, "loss": 1.4203541278839111, "loss_ce": 0.0028736297972500324, "loss_iou": 0.61328125, "loss_num": 0.0380859375, "loss_xval": 1.4140625, "num_input_tokens_seen": 167361328, "step": 2527 }, { "epoch": 0.23662657368839798, "grad_norm": 25.944272994995117, "learning_rate": 5e-05, "loss": 1.4463, "num_input_tokens_seen": 167427928, "step": 2528 }, { "epoch": 0.23662657368839798, "loss": 1.4592499732971191, "loss_ce": 0.0051484620198607445, "loss_iou": 0.6015625, "loss_num": 0.050048828125, "loss_xval": 1.453125, "num_input_tokens_seen": 167427928, "step": 2528 }, { "epoch": 0.23672017597229372, "grad_norm": 26.98100471496582, "learning_rate": 5e-05, "loss": 1.1391, "num_input_tokens_seen": 167493916, "step": 2529 }, { "epoch": 0.23672017597229372, "loss": 1.1518512964248657, "loss_ce": 0.008784936740994453, "loss_iou": 0.490234375, "loss_num": 0.032470703125, "loss_xval": 1.140625, "num_input_tokens_seen": 167493916, "step": 2529 }, { "epoch": 0.23681377825618946, "grad_norm": 19.34450912475586, "learning_rate": 5e-05, "loss": 1.4203, "num_input_tokens_seen": 167559904, "step": 2530 }, { "epoch": 0.23681377825618946, "loss": 1.5328006744384766, "loss_ce": 0.01131628081202507, "loss_iou": 0.66796875, "loss_num": 0.037841796875, "loss_xval": 1.5234375, "num_input_tokens_seen": 167559904, "step": 2530 }, { "epoch": 0.23690738054008517, "grad_norm": 15.683530807495117, "learning_rate": 5e-05, "loss": 1.1991, "num_input_tokens_seen": 167625640, "step": 2531 }, { "epoch": 0.23690738054008517, "loss": 1.133538007736206, "loss_ce": 0.006584820803254843, "loss_iou": 0.4765625, "loss_num": 0.03466796875, "loss_xval": 1.125, "num_input_tokens_seen": 167625640, "step": 2531 }, { "epoch": 0.2370009828239809, "grad_norm": 32.248294830322266, "learning_rate": 5e-05, "loss": 1.4016, "num_input_tokens_seen": 167692004, "step": 2532 }, { "epoch": 0.2370009828239809, "loss": 1.3093624114990234, "loss_ce": 0.005773622542619705, "loss_iou": 0.53515625, "loss_num": 0.046875, "loss_xval": 1.3046875, "num_input_tokens_seen": 167692004, "step": 2532 }, { "epoch": 0.23709458510787662, "grad_norm": 69.06314849853516, "learning_rate": 5e-05, "loss": 1.3207, "num_input_tokens_seen": 167757324, "step": 2533 }, { "epoch": 0.23709458510787662, "loss": 1.0936673879623413, "loss_ce": 0.006265057250857353, "loss_iou": 0.400390625, "loss_num": 0.056884765625, "loss_xval": 1.0859375, "num_input_tokens_seen": 167757324, "step": 2533 }, { "epoch": 0.23718818739177236, "grad_norm": 18.926410675048828, "learning_rate": 5e-05, "loss": 1.5909, "num_input_tokens_seen": 167825068, "step": 2534 }, { "epoch": 0.23718818739177236, "loss": 1.6822926998138428, "loss_ce": 0.0035817273892462254, "loss_iou": 0.65234375, "loss_num": 0.0751953125, "loss_xval": 1.6796875, "num_input_tokens_seen": 167825068, "step": 2534 }, { "epoch": 0.2372817896756681, "grad_norm": 24.251678466796875, "learning_rate": 5e-05, "loss": 1.3461, "num_input_tokens_seen": 167891032, "step": 2535 }, { "epoch": 0.2372817896756681, "loss": 1.2933571338653564, "loss_ce": 0.008689153008162975, "loss_iou": 0.546875, "loss_num": 0.037353515625, "loss_xval": 1.28125, "num_input_tokens_seen": 167891032, "step": 2535 }, { "epoch": 0.2373753919595638, "grad_norm": 21.610767364501953, "learning_rate": 5e-05, "loss": 1.4972, "num_input_tokens_seen": 167957360, "step": 2536 }, { "epoch": 0.2373753919595638, "loss": 1.4585134983062744, "loss_ce": 0.005510497838258743, "loss_iou": 0.578125, "loss_num": 0.06005859375, "loss_xval": 1.453125, "num_input_tokens_seen": 167957360, "step": 2536 }, { "epoch": 0.23746899424345955, "grad_norm": 25.770593643188477, "learning_rate": 5e-05, "loss": 1.4145, "num_input_tokens_seen": 168024016, "step": 2537 }, { "epoch": 0.23746899424345955, "loss": 1.599055290222168, "loss_ce": 0.0033521135337650776, "loss_iou": 0.6484375, "loss_num": 0.059326171875, "loss_xval": 1.59375, "num_input_tokens_seen": 168024016, "step": 2537 }, { "epoch": 0.23756259652735526, "grad_norm": 26.87066078186035, "learning_rate": 5e-05, "loss": 1.4639, "num_input_tokens_seen": 168090236, "step": 2538 }, { "epoch": 0.23756259652735526, "loss": 1.3521130084991455, "loss_ce": 0.004456783644855022, "loss_iou": 0.5859375, "loss_num": 0.035400390625, "loss_xval": 1.34375, "num_input_tokens_seen": 168090236, "step": 2538 }, { "epoch": 0.237656198811251, "grad_norm": 33.97237014770508, "learning_rate": 5e-05, "loss": 1.4855, "num_input_tokens_seen": 168156936, "step": 2539 }, { "epoch": 0.237656198811251, "loss": 1.5603784322738647, "loss_ce": 0.007644066587090492, "loss_iou": 0.6328125, "loss_num": 0.057861328125, "loss_xval": 1.5546875, "num_input_tokens_seen": 168156936, "step": 2539 }, { "epoch": 0.23774980109514673, "grad_norm": 42.24701690673828, "learning_rate": 5e-05, "loss": 1.2734, "num_input_tokens_seen": 168221500, "step": 2540 }, { "epoch": 0.23774980109514673, "loss": 1.040938138961792, "loss_ce": 0.007857018150389194, "loss_iou": 0.439453125, "loss_num": 0.030517578125, "loss_xval": 1.03125, "num_input_tokens_seen": 168221500, "step": 2540 }, { "epoch": 0.23784340337904245, "grad_norm": 31.573814392089844, "learning_rate": 5e-05, "loss": 1.4149, "num_input_tokens_seen": 168287724, "step": 2541 }, { "epoch": 0.23784340337904245, "loss": 1.4590835571289062, "loss_ce": 0.006935225334018469, "loss_iou": 0.609375, "loss_num": 0.0458984375, "loss_xval": 1.453125, "num_input_tokens_seen": 168287724, "step": 2541 }, { "epoch": 0.23793700566293818, "grad_norm": 21.765670776367188, "learning_rate": 5e-05, "loss": 1.7283, "num_input_tokens_seen": 168354320, "step": 2542 }, { "epoch": 0.23793700566293818, "loss": 1.7900198698043823, "loss_ce": 0.008769959211349487, "loss_iou": 0.70703125, "loss_num": 0.0732421875, "loss_xval": 1.78125, "num_input_tokens_seen": 168354320, "step": 2542 }, { "epoch": 0.2380306079468339, "grad_norm": 14.011946678161621, "learning_rate": 5e-05, "loss": 1.3828, "num_input_tokens_seen": 168420804, "step": 2543 }, { "epoch": 0.2380306079468339, "loss": 1.226111650466919, "loss_ce": 0.0019905937369912863, "loss_iou": 0.498046875, "loss_num": 0.045654296875, "loss_xval": 1.2265625, "num_input_tokens_seen": 168420804, "step": 2543 }, { "epoch": 0.23812421023072963, "grad_norm": 16.307533264160156, "learning_rate": 5e-05, "loss": 1.0762, "num_input_tokens_seen": 168486652, "step": 2544 }, { "epoch": 0.23812421023072963, "loss": 1.153876543045044, "loss_ce": 0.0034858197905123234, "loss_iou": 0.50390625, "loss_num": 0.0284423828125, "loss_xval": 1.1484375, "num_input_tokens_seen": 168486652, "step": 2544 }, { "epoch": 0.23821781251462534, "grad_norm": 42.976375579833984, "learning_rate": 5e-05, "loss": 1.509, "num_input_tokens_seen": 168553540, "step": 2545 }, { "epoch": 0.23821781251462534, "loss": 1.487226128578186, "loss_ce": 0.006269030272960663, "loss_iou": 0.58984375, "loss_num": 0.060302734375, "loss_xval": 1.484375, "num_input_tokens_seen": 168553540, "step": 2545 }, { "epoch": 0.23831141479852108, "grad_norm": 24.15453338623047, "learning_rate": 5e-05, "loss": 1.6722, "num_input_tokens_seen": 168620332, "step": 2546 }, { "epoch": 0.23831141479852108, "loss": 1.7856779098510742, "loss_ce": 0.007357646245509386, "loss_iou": 0.7578125, "loss_num": 0.052978515625, "loss_xval": 1.78125, "num_input_tokens_seen": 168620332, "step": 2546 }, { "epoch": 0.23840501708241682, "grad_norm": 27.00608253479004, "learning_rate": 5e-05, "loss": 1.299, "num_input_tokens_seen": 168686700, "step": 2547 }, { "epoch": 0.23840501708241682, "loss": 1.1916871070861816, "loss_ce": 0.0071167671121656895, "loss_iou": 0.4453125, "loss_num": 0.05859375, "loss_xval": 1.1875, "num_input_tokens_seen": 168686700, "step": 2547 }, { "epoch": 0.23849861936631253, "grad_norm": 33.46535873413086, "learning_rate": 5e-05, "loss": 1.5463, "num_input_tokens_seen": 168752716, "step": 2548 }, { "epoch": 0.23849861936631253, "loss": 1.7102900743484497, "loss_ce": 0.007165055721998215, "loss_iou": 0.75390625, "loss_num": 0.039794921875, "loss_xval": 1.703125, "num_input_tokens_seen": 168752716, "step": 2548 }, { "epoch": 0.23859222165020827, "grad_norm": 25.385046005249023, "learning_rate": 5e-05, "loss": 1.7107, "num_input_tokens_seen": 168819832, "step": 2549 }, { "epoch": 0.23859222165020827, "loss": 1.8767709732055664, "loss_ce": 0.004700660705566406, "loss_iou": 0.80859375, "loss_num": 0.051025390625, "loss_xval": 1.875, "num_input_tokens_seen": 168819832, "step": 2549 }, { "epoch": 0.23868582393410398, "grad_norm": 29.79375457763672, "learning_rate": 5e-05, "loss": 1.3082, "num_input_tokens_seen": 168885716, "step": 2550 }, { "epoch": 0.23868582393410398, "loss": 1.150732159614563, "loss_ce": 0.0037594810128211975, "loss_iou": 0.494140625, "loss_num": 0.0322265625, "loss_xval": 1.1484375, "num_input_tokens_seen": 168885716, "step": 2550 }, { "epoch": 0.23877942621799972, "grad_norm": 19.679378509521484, "learning_rate": 5e-05, "loss": 1.5818, "num_input_tokens_seen": 168950856, "step": 2551 }, { "epoch": 0.23877942621799972, "loss": 1.5313818454742432, "loss_ce": 0.009897556155920029, "loss_iou": 0.54296875, "loss_num": 0.0869140625, "loss_xval": 1.5234375, "num_input_tokens_seen": 168950856, "step": 2551 }, { "epoch": 0.23887302850189546, "grad_norm": 35.51392364501953, "learning_rate": 5e-05, "loss": 1.3755, "num_input_tokens_seen": 169017284, "step": 2552 }, { "epoch": 0.23887302850189546, "loss": 1.4114516973495483, "loss_ce": 0.004225119948387146, "loss_iou": 0.58203125, "loss_num": 0.04931640625, "loss_xval": 1.40625, "num_input_tokens_seen": 169017284, "step": 2552 }, { "epoch": 0.23896663078579117, "grad_norm": 17.957441329956055, "learning_rate": 5e-05, "loss": 1.6443, "num_input_tokens_seen": 169083700, "step": 2553 }, { "epoch": 0.23896663078579117, "loss": 1.655320167541504, "loss_ce": 0.010788802057504654, "loss_iou": 0.6640625, "loss_num": 0.0634765625, "loss_xval": 1.640625, "num_input_tokens_seen": 169083700, "step": 2553 }, { "epoch": 0.2390602330696869, "grad_norm": 25.54216194152832, "learning_rate": 5e-05, "loss": 1.5068, "num_input_tokens_seen": 169150616, "step": 2554 }, { "epoch": 0.2390602330696869, "loss": 1.3988134860992432, "loss_ce": 0.009165081195533276, "loss_iou": 0.5859375, "loss_num": 0.04345703125, "loss_xval": 1.390625, "num_input_tokens_seen": 169150616, "step": 2554 }, { "epoch": 0.23915383535358262, "grad_norm": 22.393592834472656, "learning_rate": 5e-05, "loss": 1.4269, "num_input_tokens_seen": 169217596, "step": 2555 }, { "epoch": 0.23915383535358262, "loss": 1.2467050552368164, "loss_ce": 0.0064705777913331985, "loss_iou": 0.50390625, "loss_num": 0.0458984375, "loss_xval": 1.2421875, "num_input_tokens_seen": 169217596, "step": 2555 }, { "epoch": 0.23924743763747836, "grad_norm": 22.189350128173828, "learning_rate": 5e-05, "loss": 1.6242, "num_input_tokens_seen": 169283752, "step": 2556 }, { "epoch": 0.23924743763747836, "loss": 1.204790711402893, "loss_ce": 0.003618875052779913, "loss_iou": 0.50390625, "loss_num": 0.0390625, "loss_xval": 1.203125, "num_input_tokens_seen": 169283752, "step": 2556 }, { "epoch": 0.2393410399213741, "grad_norm": 18.53838348388672, "learning_rate": 5e-05, "loss": 1.3443, "num_input_tokens_seen": 169351472, "step": 2557 }, { "epoch": 0.2393410399213741, "loss": 1.192333698272705, "loss_ce": 0.004833739250898361, "loss_iou": 0.5078125, "loss_num": 0.0341796875, "loss_xval": 1.1875, "num_input_tokens_seen": 169351472, "step": 2557 }, { "epoch": 0.2394346422052698, "grad_norm": 36.56483840942383, "learning_rate": 5e-05, "loss": 1.4114, "num_input_tokens_seen": 169417264, "step": 2558 }, { "epoch": 0.2394346422052698, "loss": 1.2931647300720215, "loss_ce": 0.0021491688676178455, "loss_iou": 0.5546875, "loss_num": 0.035888671875, "loss_xval": 1.2890625, "num_input_tokens_seen": 169417264, "step": 2558 }, { "epoch": 0.23952824448916554, "grad_norm": 28.568727493286133, "learning_rate": 5e-05, "loss": 1.7392, "num_input_tokens_seen": 169482812, "step": 2559 }, { "epoch": 0.23952824448916554, "loss": 1.7891249656677246, "loss_ce": 0.0068985046818852425, "loss_iou": 0.734375, "loss_num": 0.06298828125, "loss_xval": 1.78125, "num_input_tokens_seen": 169482812, "step": 2559 }, { "epoch": 0.23962184677306125, "grad_norm": 14.091453552246094, "learning_rate": 5e-05, "loss": 1.4067, "num_input_tokens_seen": 169548612, "step": 2560 }, { "epoch": 0.23962184677306125, "loss": 1.3772664070129395, "loss_ce": 0.007149163633584976, "loss_iou": 0.5390625, "loss_num": 0.05859375, "loss_xval": 1.3671875, "num_input_tokens_seen": 169548612, "step": 2560 }, { "epoch": 0.239715449056957, "grad_norm": 19.679611206054688, "learning_rate": 5e-05, "loss": 1.453, "num_input_tokens_seen": 169614736, "step": 2561 }, { "epoch": 0.239715449056957, "loss": 1.468279242515564, "loss_ce": 0.008318359032273293, "loss_iou": 0.578125, "loss_num": 0.060791015625, "loss_xval": 1.4609375, "num_input_tokens_seen": 169614736, "step": 2561 }, { "epoch": 0.23980905134085273, "grad_norm": 23.91083335876465, "learning_rate": 5e-05, "loss": 1.1501, "num_input_tokens_seen": 169680272, "step": 2562 }, { "epoch": 0.23980905134085273, "loss": 1.3164702653884888, "loss_ce": 0.006411626935005188, "loss_iou": 0.5546875, "loss_num": 0.040771484375, "loss_xval": 1.3125, "num_input_tokens_seen": 169680272, "step": 2562 }, { "epoch": 0.23990265362474844, "grad_norm": 27.89542007446289, "learning_rate": 5e-05, "loss": 1.3645, "num_input_tokens_seen": 169745792, "step": 2563 }, { "epoch": 0.23990265362474844, "loss": 1.1841665506362915, "loss_ce": 0.00862942449748516, "loss_iou": 0.45703125, "loss_num": 0.05224609375, "loss_xval": 1.171875, "num_input_tokens_seen": 169745792, "step": 2563 }, { "epoch": 0.23999625590864418, "grad_norm": 21.266613006591797, "learning_rate": 5e-05, "loss": 1.4262, "num_input_tokens_seen": 169812188, "step": 2564 }, { "epoch": 0.23999625590864418, "loss": 1.3133118152618408, "loss_ce": 0.010089192539453506, "loss_iou": 0.5546875, "loss_num": 0.03857421875, "loss_xval": 1.3046875, "num_input_tokens_seen": 169812188, "step": 2564 }, { "epoch": 0.2400898581925399, "grad_norm": 21.182580947875977, "learning_rate": 5e-05, "loss": 1.4142, "num_input_tokens_seen": 169877916, "step": 2565 }, { "epoch": 0.2400898581925399, "loss": 1.5413999557495117, "loss_ce": 0.0062437597662210464, "loss_iou": 0.62109375, "loss_num": 0.057861328125, "loss_xval": 1.53125, "num_input_tokens_seen": 169877916, "step": 2565 }, { "epoch": 0.24018346047643563, "grad_norm": 52.13093185424805, "learning_rate": 5e-05, "loss": 1.5151, "num_input_tokens_seen": 169944320, "step": 2566 }, { "epoch": 0.24018346047643563, "loss": 1.5705912113189697, "loss_ce": 0.010044336318969727, "loss_iou": 0.65625, "loss_num": 0.049072265625, "loss_xval": 1.5625, "num_input_tokens_seen": 169944320, "step": 2566 }, { "epoch": 0.24027706276033134, "grad_norm": 23.22975730895996, "learning_rate": 5e-05, "loss": 1.3332, "num_input_tokens_seen": 170011128, "step": 2567 }, { "epoch": 0.24027706276033134, "loss": 1.5615291595458984, "loss_ce": 0.007818352431058884, "loss_iou": 0.6484375, "loss_num": 0.05224609375, "loss_xval": 1.5546875, "num_input_tokens_seen": 170011128, "step": 2567 }, { "epoch": 0.24037066504422708, "grad_norm": 25.49972152709961, "learning_rate": 5e-05, "loss": 1.3914, "num_input_tokens_seen": 170077496, "step": 2568 }, { "epoch": 0.24037066504422708, "loss": 1.5926051139831543, "loss_ce": 0.007644252851605415, "loss_iou": 0.6171875, "loss_num": 0.06982421875, "loss_xval": 1.5859375, "num_input_tokens_seen": 170077496, "step": 2568 }, { "epoch": 0.24046426732812282, "grad_norm": 40.0810546875, "learning_rate": 5e-05, "loss": 1.5399, "num_input_tokens_seen": 170144536, "step": 2569 }, { "epoch": 0.24046426732812282, "loss": 1.6190154552459717, "loss_ce": 0.0047575682401657104, "loss_iou": 0.6953125, "loss_num": 0.045166015625, "loss_xval": 1.6171875, "num_input_tokens_seen": 170144536, "step": 2569 }, { "epoch": 0.24055786961201853, "grad_norm": 88.32617950439453, "learning_rate": 5e-05, "loss": 1.5308, "num_input_tokens_seen": 170210088, "step": 2570 }, { "epoch": 0.24055786961201853, "loss": 1.5420403480529785, "loss_ce": 0.006395814009010792, "loss_iou": 0.58203125, "loss_num": 0.07373046875, "loss_xval": 1.5390625, "num_input_tokens_seen": 170210088, "step": 2570 }, { "epoch": 0.24065147189591427, "grad_norm": 21.787992477416992, "learning_rate": 5e-05, "loss": 1.495, "num_input_tokens_seen": 170276552, "step": 2571 }, { "epoch": 0.24065147189591427, "loss": 1.5120552778244019, "loss_ce": 0.004242782015353441, "loss_iou": 0.65625, "loss_num": 0.03857421875, "loss_xval": 1.5078125, "num_input_tokens_seen": 170276552, "step": 2571 }, { "epoch": 0.24074507417980998, "grad_norm": 11.673762321472168, "learning_rate": 5e-05, "loss": 1.3003, "num_input_tokens_seen": 170342504, "step": 2572 }, { "epoch": 0.24074507417980998, "loss": 1.2681864500045776, "loss_ce": 0.0025614858604967594, "loss_iou": 0.4921875, "loss_num": 0.056396484375, "loss_xval": 1.265625, "num_input_tokens_seen": 170342504, "step": 2572 }, { "epoch": 0.24083867646370571, "grad_norm": 25.950706481933594, "learning_rate": 5e-05, "loss": 1.3317, "num_input_tokens_seen": 170408564, "step": 2573 }, { "epoch": 0.24083867646370571, "loss": 1.2869504690170288, "loss_ce": 0.006188774481415749, "loss_iou": 0.53515625, "loss_num": 0.041015625, "loss_xval": 1.28125, "num_input_tokens_seen": 170408564, "step": 2573 }, { "epoch": 0.24093227874760145, "grad_norm": 27.499576568603516, "learning_rate": 5e-05, "loss": 1.2999, "num_input_tokens_seen": 170473744, "step": 2574 }, { "epoch": 0.24093227874760145, "loss": 1.6399996280670166, "loss_ce": 0.005234008654952049, "loss_iou": 0.66015625, "loss_num": 0.062255859375, "loss_xval": 1.6328125, "num_input_tokens_seen": 170473744, "step": 2574 }, { "epoch": 0.24102588103149716, "grad_norm": 20.279264450073242, "learning_rate": 5e-05, "loss": 1.614, "num_input_tokens_seen": 170540660, "step": 2575 }, { "epoch": 0.24102588103149716, "loss": 1.6746065616607666, "loss_ce": 0.0037080540787428617, "loss_iou": 0.66796875, "loss_num": 0.06689453125, "loss_xval": 1.671875, "num_input_tokens_seen": 170540660, "step": 2575 }, { "epoch": 0.2411194833153929, "grad_norm": 12.744171142578125, "learning_rate": 5e-05, "loss": 1.1389, "num_input_tokens_seen": 170607608, "step": 2576 }, { "epoch": 0.2411194833153929, "loss": 1.1031405925750732, "loss_ce": 0.004996098577976227, "loss_iou": 0.46484375, "loss_num": 0.0341796875, "loss_xval": 1.1015625, "num_input_tokens_seen": 170607608, "step": 2576 }, { "epoch": 0.2412130855992886, "grad_norm": 19.449844360351562, "learning_rate": 5e-05, "loss": 1.3246, "num_input_tokens_seen": 170674336, "step": 2577 }, { "epoch": 0.2412130855992886, "loss": 1.2694692611694336, "loss_ce": 0.008727147243916988, "loss_iou": 0.51953125, "loss_num": 0.0439453125, "loss_xval": 1.2578125, "num_input_tokens_seen": 170674336, "step": 2577 }, { "epoch": 0.24130668788318435, "grad_norm": 19.487255096435547, "learning_rate": 5e-05, "loss": 1.522, "num_input_tokens_seen": 170741276, "step": 2578 }, { "epoch": 0.24130668788318435, "loss": 1.6049238443374634, "loss_ce": 0.006291025318205357, "loss_iou": 0.66796875, "loss_num": 0.05322265625, "loss_xval": 1.6015625, "num_input_tokens_seen": 170741276, "step": 2578 }, { "epoch": 0.2414002901670801, "grad_norm": 30.820302963256836, "learning_rate": 5e-05, "loss": 1.2787, "num_input_tokens_seen": 170807096, "step": 2579 }, { "epoch": 0.2414002901670801, "loss": 1.2158865928649902, "loss_ce": 0.007878745906054974, "loss_iou": 0.53515625, "loss_num": 0.0269775390625, "loss_xval": 1.2109375, "num_input_tokens_seen": 170807096, "step": 2579 }, { "epoch": 0.2414938924509758, "grad_norm": 28.080768585205078, "learning_rate": 5e-05, "loss": 1.1996, "num_input_tokens_seen": 170872096, "step": 2580 }, { "epoch": 0.2414938924509758, "loss": 1.2678003311157227, "loss_ce": 0.003395965788513422, "loss_iou": 0.4921875, "loss_num": 0.055908203125, "loss_xval": 1.265625, "num_input_tokens_seen": 170872096, "step": 2580 }, { "epoch": 0.24158749473487154, "grad_norm": 35.67443084716797, "learning_rate": 5e-05, "loss": 1.4635, "num_input_tokens_seen": 170937528, "step": 2581 }, { "epoch": 0.24158749473487154, "loss": 1.3514882326126099, "loss_ce": 0.006136074662208557, "loss_iou": 0.5390625, "loss_num": 0.053955078125, "loss_xval": 1.34375, "num_input_tokens_seen": 170937528, "step": 2581 }, { "epoch": 0.24168109701876725, "grad_norm": 23.175838470458984, "learning_rate": 5e-05, "loss": 1.2044, "num_input_tokens_seen": 171003588, "step": 2582 }, { "epoch": 0.24168109701876725, "loss": 1.240459680557251, "loss_ce": 0.0031549385748803616, "loss_iou": 0.54296875, "loss_num": 0.030029296875, "loss_xval": 1.234375, "num_input_tokens_seen": 171003588, "step": 2582 }, { "epoch": 0.241774699302663, "grad_norm": 36.16810989379883, "learning_rate": 5e-05, "loss": 1.6477, "num_input_tokens_seen": 171069960, "step": 2583 }, { "epoch": 0.241774699302663, "loss": 1.805147409439087, "loss_ce": 0.003389598336070776, "loss_iou": 0.75, "loss_num": 0.06103515625, "loss_xval": 1.8046875, "num_input_tokens_seen": 171069960, "step": 2583 }, { "epoch": 0.2418683015865587, "grad_norm": 20.256290435791016, "learning_rate": 5e-05, "loss": 1.6536, "num_input_tokens_seen": 171136008, "step": 2584 }, { "epoch": 0.2418683015865587, "loss": 1.4925332069396973, "loss_ce": 0.0032754617277532816, "loss_iou": 0.625, "loss_num": 0.0478515625, "loss_xval": 1.4921875, "num_input_tokens_seen": 171136008, "step": 2584 }, { "epoch": 0.24196190387045444, "grad_norm": 26.217763900756836, "learning_rate": 5e-05, "loss": 1.3569, "num_input_tokens_seen": 171202648, "step": 2585 }, { "epoch": 0.24196190387045444, "loss": 1.381807565689087, "loss_ce": 0.0033896476961672306, "loss_iou": 0.546875, "loss_num": 0.056884765625, "loss_xval": 1.375, "num_input_tokens_seen": 171202648, "step": 2585 }, { "epoch": 0.24205550615435018, "grad_norm": 36.30076599121094, "learning_rate": 5e-05, "loss": 1.4802, "num_input_tokens_seen": 171268772, "step": 2586 }, { "epoch": 0.24205550615435018, "loss": 1.5037238597869873, "loss_ce": 0.003235521959140897, "loss_iou": 0.63671875, "loss_num": 0.04541015625, "loss_xval": 1.5, "num_input_tokens_seen": 171268772, "step": 2586 }, { "epoch": 0.24214910843824589, "grad_norm": 23.128686904907227, "learning_rate": 5e-05, "loss": 1.7105, "num_input_tokens_seen": 171335572, "step": 2587 }, { "epoch": 0.24214910843824589, "loss": 1.7145795822143555, "loss_ce": 0.006571783684194088, "loss_iou": 0.68359375, "loss_num": 0.06884765625, "loss_xval": 1.7109375, "num_input_tokens_seen": 171335572, "step": 2587 }, { "epoch": 0.24224271072214162, "grad_norm": 18.27375030517578, "learning_rate": 5e-05, "loss": 1.1838, "num_input_tokens_seen": 171402616, "step": 2588 }, { "epoch": 0.24224271072214162, "loss": 1.2350122928619385, "loss_ce": 0.003078640438616276, "loss_iou": 0.546875, "loss_num": 0.02783203125, "loss_xval": 1.234375, "num_input_tokens_seen": 171402616, "step": 2588 }, { "epoch": 0.24233631300603733, "grad_norm": 22.334333419799805, "learning_rate": 5e-05, "loss": 1.5672, "num_input_tokens_seen": 171468692, "step": 2589 }, { "epoch": 0.24233631300603733, "loss": 1.8665173053741455, "loss_ce": 0.006165698170661926, "loss_iou": 0.6953125, "loss_num": 0.09326171875, "loss_xval": 1.859375, "num_input_tokens_seen": 171468692, "step": 2589 }, { "epoch": 0.24242991528993307, "grad_norm": 27.002775192260742, "learning_rate": 5e-05, "loss": 1.409, "num_input_tokens_seen": 171534336, "step": 2590 }, { "epoch": 0.24242991528993307, "loss": 1.4501802921295166, "loss_ce": 0.009262454695999622, "loss_iou": 0.5625, "loss_num": 0.0634765625, "loss_xval": 1.4375, "num_input_tokens_seen": 171534336, "step": 2590 }, { "epoch": 0.2425235175738288, "grad_norm": 33.853355407714844, "learning_rate": 5e-05, "loss": 1.4523, "num_input_tokens_seen": 171600636, "step": 2591 }, { "epoch": 0.2425235175738288, "loss": 1.5113219022750854, "loss_ce": 0.006439114920794964, "loss_iou": 0.6328125, "loss_num": 0.04833984375, "loss_xval": 1.5078125, "num_input_tokens_seen": 171600636, "step": 2591 }, { "epoch": 0.24261711985772452, "grad_norm": 22.62224769592285, "learning_rate": 5e-05, "loss": 1.4184, "num_input_tokens_seen": 171666148, "step": 2592 }, { "epoch": 0.24261711985772452, "loss": 1.409839391708374, "loss_ce": 0.004566041752696037, "loss_iou": 0.60546875, "loss_num": 0.0390625, "loss_xval": 1.40625, "num_input_tokens_seen": 171666148, "step": 2592 }, { "epoch": 0.24271072214162026, "grad_norm": 20.081279754638672, "learning_rate": 5e-05, "loss": 1.0565, "num_input_tokens_seen": 171731468, "step": 2593 }, { "epoch": 0.24271072214162026, "loss": 1.211484670639038, "loss_ce": 0.005918317008763552, "loss_iou": 0.51171875, "loss_num": 0.036865234375, "loss_xval": 1.203125, "num_input_tokens_seen": 171731468, "step": 2593 }, { "epoch": 0.24280432442551597, "grad_norm": 24.427959442138672, "learning_rate": 5e-05, "loss": 1.3446, "num_input_tokens_seen": 171797680, "step": 2594 }, { "epoch": 0.24280432442551597, "loss": 1.4312868118286133, "loss_ce": 0.00550546171143651, "loss_iou": 0.578125, "loss_num": 0.05419921875, "loss_xval": 1.421875, "num_input_tokens_seen": 171797680, "step": 2594 }, { "epoch": 0.2428979267094117, "grad_norm": 24.9825439453125, "learning_rate": 5e-05, "loss": 1.4902, "num_input_tokens_seen": 171865792, "step": 2595 }, { "epoch": 0.2428979267094117, "loss": 1.6268292665481567, "loss_ce": 0.0018292388413101435, "loss_iou": 0.65625, "loss_num": 0.062255859375, "loss_xval": 1.625, "num_input_tokens_seen": 171865792, "step": 2595 }, { "epoch": 0.24299152899330745, "grad_norm": 65.03515625, "learning_rate": 5e-05, "loss": 1.6393, "num_input_tokens_seen": 171932384, "step": 2596 }, { "epoch": 0.24299152899330745, "loss": 1.5383191108703613, "loss_ce": 0.005604333709925413, "loss_iou": 0.62890625, "loss_num": 0.05419921875, "loss_xval": 1.53125, "num_input_tokens_seen": 171932384, "step": 2596 }, { "epoch": 0.24308513127720316, "grad_norm": 31.7029972076416, "learning_rate": 5e-05, "loss": 1.1096, "num_input_tokens_seen": 171998412, "step": 2597 }, { "epoch": 0.24308513127720316, "loss": 1.1933958530426025, "loss_ce": 0.004919266793876886, "loss_iou": 0.49609375, "loss_num": 0.0390625, "loss_xval": 1.1875, "num_input_tokens_seen": 171998412, "step": 2597 }, { "epoch": 0.2431787335610989, "grad_norm": 21.96271324157715, "learning_rate": 5e-05, "loss": 1.4555, "num_input_tokens_seen": 172064900, "step": 2598 }, { "epoch": 0.2431787335610989, "loss": 1.7012999057769775, "loss_ce": 0.0030576358549296856, "loss_iou": 0.7265625, "loss_num": 0.049072265625, "loss_xval": 1.6953125, "num_input_tokens_seen": 172064900, "step": 2598 }, { "epoch": 0.2432723358449946, "grad_norm": 35.74529266357422, "learning_rate": 5e-05, "loss": 1.3992, "num_input_tokens_seen": 172131392, "step": 2599 }, { "epoch": 0.2432723358449946, "loss": 1.285733938217163, "loss_ce": 0.009122655726969242, "loss_iou": 0.490234375, "loss_num": 0.05908203125, "loss_xval": 1.2734375, "num_input_tokens_seen": 172131392, "step": 2599 }, { "epoch": 0.24336593812889035, "grad_norm": 23.304189682006836, "learning_rate": 5e-05, "loss": 1.8166, "num_input_tokens_seen": 172198288, "step": 2600 }, { "epoch": 0.24336593812889035, "loss": 2.132443904876709, "loss_ce": 0.0064674364402890205, "loss_iou": 0.80859375, "loss_num": 0.1015625, "loss_xval": 2.125, "num_input_tokens_seen": 172198288, "step": 2600 }, { "epoch": 0.24345954041278609, "grad_norm": 10.42893123626709, "learning_rate": 5e-05, "loss": 1.513, "num_input_tokens_seen": 172264740, "step": 2601 }, { "epoch": 0.24345954041278609, "loss": 1.3585100173950195, "loss_ce": 0.004017863888293505, "loss_iou": 0.53125, "loss_num": 0.05859375, "loss_xval": 1.3515625, "num_input_tokens_seen": 172264740, "step": 2601 }, { "epoch": 0.2435531426966818, "grad_norm": 21.951444625854492, "learning_rate": 5e-05, "loss": 1.2679, "num_input_tokens_seen": 172330220, "step": 2602 }, { "epoch": 0.2435531426966818, "loss": 1.268410086631775, "loss_ce": 0.002296827267855406, "loss_iou": 0.51171875, "loss_num": 0.048828125, "loss_xval": 1.265625, "num_input_tokens_seen": 172330220, "step": 2602 }, { "epoch": 0.24364674498057753, "grad_norm": 27.78486442565918, "learning_rate": 5e-05, "loss": 1.6186, "num_input_tokens_seen": 172397216, "step": 2603 }, { "epoch": 0.24364674498057753, "loss": 1.6075851917266846, "loss_ce": 0.0011398645583540201, "loss_iou": 0.6484375, "loss_num": 0.061767578125, "loss_xval": 1.609375, "num_input_tokens_seen": 172397216, "step": 2603 }, { "epoch": 0.24374034726447324, "grad_norm": 23.510292053222656, "learning_rate": 5e-05, "loss": 1.1218, "num_input_tokens_seen": 172464464, "step": 2604 }, { "epoch": 0.24374034726447324, "loss": 1.1526579856872559, "loss_ce": 0.006173687055706978, "loss_iou": 0.484375, "loss_num": 0.03564453125, "loss_xval": 1.1484375, "num_input_tokens_seen": 172464464, "step": 2604 }, { "epoch": 0.24383394954836898, "grad_norm": 29.873241424560547, "learning_rate": 5e-05, "loss": 1.663, "num_input_tokens_seen": 172530968, "step": 2605 }, { "epoch": 0.24383394954836898, "loss": 1.8581818342208862, "loss_ce": 0.0080841314047575, "loss_iou": 0.7265625, "loss_num": 0.08056640625, "loss_xval": 1.8515625, "num_input_tokens_seen": 172530968, "step": 2605 }, { "epoch": 0.2439275518322647, "grad_norm": 27.9725399017334, "learning_rate": 5e-05, "loss": 1.344, "num_input_tokens_seen": 172597692, "step": 2606 }, { "epoch": 0.2439275518322647, "loss": 1.5610685348510742, "loss_ce": 0.006869280710816383, "loss_iou": 0.6328125, "loss_num": 0.0576171875, "loss_xval": 1.5546875, "num_input_tokens_seen": 172597692, "step": 2606 }, { "epoch": 0.24402115411616043, "grad_norm": 20.911762237548828, "learning_rate": 5e-05, "loss": 1.3184, "num_input_tokens_seen": 172663132, "step": 2607 }, { "epoch": 0.24402115411616043, "loss": 1.3391375541687012, "loss_ce": 0.002467667916789651, "loss_iou": 0.57421875, "loss_num": 0.037841796875, "loss_xval": 1.3359375, "num_input_tokens_seen": 172663132, "step": 2607 }, { "epoch": 0.24411475640005617, "grad_norm": 17.5421085357666, "learning_rate": 5e-05, "loss": 1.5946, "num_input_tokens_seen": 172730444, "step": 2608 }, { "epoch": 0.24411475640005617, "loss": 1.7361726760864258, "loss_ce": 0.006192308850586414, "loss_iou": 0.6796875, "loss_num": 0.07421875, "loss_xval": 1.7265625, "num_input_tokens_seen": 172730444, "step": 2608 }, { "epoch": 0.24420835868395188, "grad_norm": 19.712034225463867, "learning_rate": 5e-05, "loss": 1.4634, "num_input_tokens_seen": 172796284, "step": 2609 }, { "epoch": 0.24420835868395188, "loss": 1.4511772394180298, "loss_ce": 0.008306168019771576, "loss_iou": 0.58203125, "loss_num": 0.05517578125, "loss_xval": 1.4453125, "num_input_tokens_seen": 172796284, "step": 2609 }, { "epoch": 0.24430196096784762, "grad_norm": 31.43054962158203, "learning_rate": 5e-05, "loss": 1.6605, "num_input_tokens_seen": 172862912, "step": 2610 }, { "epoch": 0.24430196096784762, "loss": 1.5833003520965576, "loss_ce": 0.007128553930670023, "loss_iou": 0.6484375, "loss_num": 0.05615234375, "loss_xval": 1.578125, "num_input_tokens_seen": 172862912, "step": 2610 }, { "epoch": 0.24439556325174333, "grad_norm": 52.54152297973633, "learning_rate": 5e-05, "loss": 1.7262, "num_input_tokens_seen": 172930156, "step": 2611 }, { "epoch": 0.24439556325174333, "loss": 1.826629638671875, "loss_ce": 0.0024108190555125475, "loss_iou": 0.796875, "loss_num": 0.046142578125, "loss_xval": 1.828125, "num_input_tokens_seen": 172930156, "step": 2611 }, { "epoch": 0.24448916553563907, "grad_norm": 15.385767936706543, "learning_rate": 5e-05, "loss": 1.3084, "num_input_tokens_seen": 172996928, "step": 2612 }, { "epoch": 0.24448916553563907, "loss": 1.4285422563552856, "loss_ce": 0.0027609597891569138, "loss_iou": 0.59765625, "loss_num": 0.04638671875, "loss_xval": 1.421875, "num_input_tokens_seen": 172996928, "step": 2612 }, { "epoch": 0.2445827678195348, "grad_norm": 17.045854568481445, "learning_rate": 5e-05, "loss": 1.0687, "num_input_tokens_seen": 173062400, "step": 2613 }, { "epoch": 0.2445827678195348, "loss": 0.8779564499855042, "loss_ce": 0.0014916412765160203, "loss_iou": 0.376953125, "loss_num": 0.0247802734375, "loss_xval": 0.875, "num_input_tokens_seen": 173062400, "step": 2613 }, { "epoch": 0.24467637010343052, "grad_norm": 24.084728240966797, "learning_rate": 5e-05, "loss": 1.4739, "num_input_tokens_seen": 173128656, "step": 2614 }, { "epoch": 0.24467637010343052, "loss": 1.4581732749938965, "loss_ce": 0.007978045381605625, "loss_iou": 0.58203125, "loss_num": 0.05810546875, "loss_xval": 1.453125, "num_input_tokens_seen": 173128656, "step": 2614 }, { "epoch": 0.24476997238732626, "grad_norm": 19.442224502563477, "learning_rate": 5e-05, "loss": 1.5273, "num_input_tokens_seen": 173195136, "step": 2615 }, { "epoch": 0.24476997238732626, "loss": 1.490162968635559, "loss_ce": 0.004811372607946396, "loss_iou": 0.578125, "loss_num": 0.06689453125, "loss_xval": 1.484375, "num_input_tokens_seen": 173195136, "step": 2615 }, { "epoch": 0.24486357467122197, "grad_norm": 17.347576141357422, "learning_rate": 5e-05, "loss": 1.2705, "num_input_tokens_seen": 173262156, "step": 2616 }, { "epoch": 0.24486357467122197, "loss": 1.2691221237182617, "loss_ce": 0.006426775828003883, "loss_iou": 0.55859375, "loss_num": 0.029296875, "loss_xval": 1.265625, "num_input_tokens_seen": 173262156, "step": 2616 }, { "epoch": 0.2449571769551177, "grad_norm": 25.86452293395996, "learning_rate": 5e-05, "loss": 1.3638, "num_input_tokens_seen": 173329596, "step": 2617 }, { "epoch": 0.2449571769551177, "loss": 1.4407048225402832, "loss_ce": 0.008087588474154472, "loss_iou": 0.62109375, "loss_num": 0.0380859375, "loss_xval": 1.4296875, "num_input_tokens_seen": 173329596, "step": 2617 }, { "epoch": 0.24505077923901344, "grad_norm": 22.31635856628418, "learning_rate": 5e-05, "loss": 1.4014, "num_input_tokens_seen": 173394924, "step": 2618 }, { "epoch": 0.24505077923901344, "loss": 1.4378025531768799, "loss_ce": 0.00616195984184742, "loss_iou": 0.58984375, "loss_num": 0.05029296875, "loss_xval": 1.4296875, "num_input_tokens_seen": 173394924, "step": 2618 }, { "epoch": 0.24514438152290915, "grad_norm": 18.50616455078125, "learning_rate": 5e-05, "loss": 1.5329, "num_input_tokens_seen": 173461368, "step": 2619 }, { "epoch": 0.24514438152290915, "loss": 1.4879770278930664, "loss_ce": 0.0036019599065184593, "loss_iou": 0.63671875, "loss_num": 0.042236328125, "loss_xval": 1.484375, "num_input_tokens_seen": 173461368, "step": 2619 }, { "epoch": 0.2452379838068049, "grad_norm": 54.79182815551758, "learning_rate": 5e-05, "loss": 1.1385, "num_input_tokens_seen": 173528312, "step": 2620 }, { "epoch": 0.2452379838068049, "loss": 1.1173272132873535, "loss_ce": 0.006487318314611912, "loss_iou": 0.458984375, "loss_num": 0.0380859375, "loss_xval": 1.109375, "num_input_tokens_seen": 173528312, "step": 2620 }, { "epoch": 0.2453315860907006, "grad_norm": 27.31485366821289, "learning_rate": 5e-05, "loss": 1.3793, "num_input_tokens_seen": 173594048, "step": 2621 }, { "epoch": 0.2453315860907006, "loss": 1.5278196334838867, "loss_ce": 0.009264917112886906, "loss_iou": 0.6484375, "loss_num": 0.044677734375, "loss_xval": 1.515625, "num_input_tokens_seen": 173594048, "step": 2621 }, { "epoch": 0.24542518837459634, "grad_norm": 35.106300354003906, "learning_rate": 5e-05, "loss": 1.4417, "num_input_tokens_seen": 173660156, "step": 2622 }, { "epoch": 0.24542518837459634, "loss": 1.4897924661636353, "loss_ce": 0.009323794394731522, "loss_iou": 0.62890625, "loss_num": 0.04541015625, "loss_xval": 1.484375, "num_input_tokens_seen": 173660156, "step": 2622 }, { "epoch": 0.24551879065849208, "grad_norm": 18.99059295654297, "learning_rate": 5e-05, "loss": 1.864, "num_input_tokens_seen": 173725528, "step": 2623 }, { "epoch": 0.24551879065849208, "loss": 1.9642072916030884, "loss_ce": 0.006199460010975599, "loss_iou": 0.75390625, "loss_num": 0.09033203125, "loss_xval": 1.9609375, "num_input_tokens_seen": 173725528, "step": 2623 }, { "epoch": 0.2456123929423878, "grad_norm": 27.83020782470703, "learning_rate": 5e-05, "loss": 1.4457, "num_input_tokens_seen": 173790980, "step": 2624 }, { "epoch": 0.2456123929423878, "loss": 1.4178355932235718, "loss_ce": 0.005726196337491274, "loss_iou": 0.56640625, "loss_num": 0.055908203125, "loss_xval": 1.4140625, "num_input_tokens_seen": 173790980, "step": 2624 }, { "epoch": 0.24570599522628353, "grad_norm": 17.386829376220703, "learning_rate": 5e-05, "loss": 1.1944, "num_input_tokens_seen": 173857956, "step": 2625 }, { "epoch": 0.24570599522628353, "loss": 1.383741855621338, "loss_ce": 0.010206678882241249, "loss_iou": 0.56640625, "loss_num": 0.049072265625, "loss_xval": 1.375, "num_input_tokens_seen": 173857956, "step": 2625 }, { "epoch": 0.24579959751017924, "grad_norm": 22.78047752380371, "learning_rate": 5e-05, "loss": 1.1274, "num_input_tokens_seen": 173923328, "step": 2626 }, { "epoch": 0.24579959751017924, "loss": 1.2062945365905762, "loss_ce": 0.002406663727015257, "loss_iou": 0.50390625, "loss_num": 0.039794921875, "loss_xval": 1.203125, "num_input_tokens_seen": 173923328, "step": 2626 }, { "epoch": 0.24589319979407498, "grad_norm": 19.326377868652344, "learning_rate": 5e-05, "loss": 1.1684, "num_input_tokens_seen": 173990208, "step": 2627 }, { "epoch": 0.24589319979407498, "loss": 1.2504380941390991, "loss_ce": 0.0028795571997761726, "loss_iou": 0.53515625, "loss_num": 0.035888671875, "loss_xval": 1.25, "num_input_tokens_seen": 173990208, "step": 2627 }, { "epoch": 0.2459868020779707, "grad_norm": 63.571311950683594, "learning_rate": 5e-05, "loss": 1.5023, "num_input_tokens_seen": 174055968, "step": 2628 }, { "epoch": 0.2459868020779707, "loss": 1.4654862880706787, "loss_ce": 0.004548734985291958, "loss_iou": 0.62109375, "loss_num": 0.044189453125, "loss_xval": 1.4609375, "num_input_tokens_seen": 174055968, "step": 2628 }, { "epoch": 0.24608040436186643, "grad_norm": 19.17978858947754, "learning_rate": 5e-05, "loss": 1.2832, "num_input_tokens_seen": 174122268, "step": 2629 }, { "epoch": 0.24608040436186643, "loss": 1.0923244953155518, "loss_ce": 0.004433851223438978, "loss_iou": 0.484375, "loss_num": 0.023681640625, "loss_xval": 1.0859375, "num_input_tokens_seen": 174122268, "step": 2629 }, { "epoch": 0.24617400664576217, "grad_norm": 23.298887252807617, "learning_rate": 5e-05, "loss": 1.3279, "num_input_tokens_seen": 174189404, "step": 2630 }, { "epoch": 0.24617400664576217, "loss": 1.3415188789367676, "loss_ce": 0.006069748662412167, "loss_iou": 0.55859375, "loss_num": 0.04443359375, "loss_xval": 1.3359375, "num_input_tokens_seen": 174189404, "step": 2630 }, { "epoch": 0.24626760892965788, "grad_norm": 40.40921401977539, "learning_rate": 5e-05, "loss": 1.6967, "num_input_tokens_seen": 174255792, "step": 2631 }, { "epoch": 0.24626760892965788, "loss": 1.7132081985473633, "loss_ce": 0.009594895876944065, "loss_iou": 0.67578125, "loss_num": 0.0703125, "loss_xval": 1.703125, "num_input_tokens_seen": 174255792, "step": 2631 }, { "epoch": 0.24636121121355362, "grad_norm": 22.798112869262695, "learning_rate": 5e-05, "loss": 1.2834, "num_input_tokens_seen": 174321896, "step": 2632 }, { "epoch": 0.24636121121355362, "loss": 1.1797900199890137, "loss_ce": 0.00478690629824996, "loss_iou": 0.494140625, "loss_num": 0.03759765625, "loss_xval": 1.171875, "num_input_tokens_seen": 174321896, "step": 2632 }, { "epoch": 0.24645481349744933, "grad_norm": 26.252437591552734, "learning_rate": 5e-05, "loss": 1.6086, "num_input_tokens_seen": 174386764, "step": 2633 }, { "epoch": 0.24645481349744933, "loss": 1.3368690013885498, "loss_ce": 0.0036171525716781616, "loss_iou": 0.5625, "loss_num": 0.042236328125, "loss_xval": 1.3359375, "num_input_tokens_seen": 174386764, "step": 2633 }, { "epoch": 0.24654841578134506, "grad_norm": 31.601247787475586, "learning_rate": 5e-05, "loss": 1.4251, "num_input_tokens_seen": 174452944, "step": 2634 }, { "epoch": 0.24654841578134506, "loss": 1.297585129737854, "loss_ce": 0.004616389982402325, "loss_iou": 0.58203125, "loss_num": 0.0255126953125, "loss_xval": 1.296875, "num_input_tokens_seen": 174452944, "step": 2634 }, { "epoch": 0.2466420180652408, "grad_norm": 59.829978942871094, "learning_rate": 5e-05, "loss": 1.5846, "num_input_tokens_seen": 174518836, "step": 2635 }, { "epoch": 0.2466420180652408, "loss": 1.516141653060913, "loss_ce": 0.005399535410106182, "loss_iou": 0.61328125, "loss_num": 0.056396484375, "loss_xval": 1.5078125, "num_input_tokens_seen": 174518836, "step": 2635 }, { "epoch": 0.24673562034913651, "grad_norm": 18.393312454223633, "learning_rate": 5e-05, "loss": 1.2716, "num_input_tokens_seen": 174584940, "step": 2636 }, { "epoch": 0.24673562034913651, "loss": 1.2012829780578613, "loss_ce": 0.007435383275151253, "loss_iou": 0.5234375, "loss_num": 0.0296630859375, "loss_xval": 1.1953125, "num_input_tokens_seen": 174584940, "step": 2636 }, { "epoch": 0.24682922263303225, "grad_norm": 21.422407150268555, "learning_rate": 5e-05, "loss": 1.6451, "num_input_tokens_seen": 174650984, "step": 2637 }, { "epoch": 0.24682922263303225, "loss": 1.4750261306762695, "loss_ce": 0.005299532786011696, "loss_iou": 0.625, "loss_num": 0.04443359375, "loss_xval": 1.46875, "num_input_tokens_seen": 174650984, "step": 2637 }, { "epoch": 0.24692282491692796, "grad_norm": 18.727937698364258, "learning_rate": 5e-05, "loss": 1.3546, "num_input_tokens_seen": 174717668, "step": 2638 }, { "epoch": 0.24692282491692796, "loss": 1.2518789768218994, "loss_ce": 0.007738376036286354, "loss_iou": 0.515625, "loss_num": 0.042236328125, "loss_xval": 1.2421875, "num_input_tokens_seen": 174717668, "step": 2638 }, { "epoch": 0.2470164272008237, "grad_norm": 29.191953659057617, "learning_rate": 5e-05, "loss": 1.5017, "num_input_tokens_seen": 174783284, "step": 2639 }, { "epoch": 0.2470164272008237, "loss": 1.5968594551086426, "loss_ce": 0.007992290891706944, "loss_iou": 0.671875, "loss_num": 0.048828125, "loss_xval": 1.5859375, "num_input_tokens_seen": 174783284, "step": 2639 }, { "epoch": 0.24711002948471944, "grad_norm": 22.436279296875, "learning_rate": 5e-05, "loss": 1.6543, "num_input_tokens_seen": 174849024, "step": 2640 }, { "epoch": 0.24711002948471944, "loss": 1.5690126419067383, "loss_ce": 0.006512564606964588, "loss_iou": 0.61328125, "loss_num": 0.06640625, "loss_xval": 1.5625, "num_input_tokens_seen": 174849024, "step": 2640 }, { "epoch": 0.24720363176861515, "grad_norm": 39.852760314941406, "learning_rate": 5e-05, "loss": 1.4085, "num_input_tokens_seen": 174915840, "step": 2641 }, { "epoch": 0.24720363176861515, "loss": 1.4357869625091553, "loss_ce": 0.004146297927945852, "loss_iou": 0.6171875, "loss_num": 0.0390625, "loss_xval": 1.4296875, "num_input_tokens_seen": 174915840, "step": 2641 }, { "epoch": 0.2472972340525109, "grad_norm": 17.24985122680664, "learning_rate": 5e-05, "loss": 1.623, "num_input_tokens_seen": 174982032, "step": 2642 }, { "epoch": 0.2472972340525109, "loss": 1.6391594409942627, "loss_ce": 0.0063468292355537415, "loss_iou": 0.71484375, "loss_num": 0.04052734375, "loss_xval": 1.6328125, "num_input_tokens_seen": 174982032, "step": 2642 }, { "epoch": 0.2473908363364066, "grad_norm": 22.94183349609375, "learning_rate": 5e-05, "loss": 1.3532, "num_input_tokens_seen": 175048324, "step": 2643 }, { "epoch": 0.2473908363364066, "loss": 1.2797331809997559, "loss_ce": 0.005807357374578714, "loss_iou": 0.5625, "loss_num": 0.030517578125, "loss_xval": 1.2734375, "num_input_tokens_seen": 175048324, "step": 2643 }, { "epoch": 0.24748443862030234, "grad_norm": 33.577816009521484, "learning_rate": 5e-05, "loss": 1.4464, "num_input_tokens_seen": 175114584, "step": 2644 }, { "epoch": 0.24748443862030234, "loss": 1.29689359664917, "loss_ce": 0.004901424515992403, "loss_iou": 0.57421875, "loss_num": 0.028076171875, "loss_xval": 1.2890625, "num_input_tokens_seen": 175114584, "step": 2644 }, { "epoch": 0.24757804090419805, "grad_norm": 20.85687255859375, "learning_rate": 5e-05, "loss": 1.6549, "num_input_tokens_seen": 175180888, "step": 2645 }, { "epoch": 0.24757804090419805, "loss": 1.7518808841705322, "loss_ce": 0.005787082947790623, "loss_iou": 0.71875, "loss_num": 0.060791015625, "loss_xval": 1.75, "num_input_tokens_seen": 175180888, "step": 2645 }, { "epoch": 0.2476716431880938, "grad_norm": 17.017568588256836, "learning_rate": 5e-05, "loss": 1.1824, "num_input_tokens_seen": 175246964, "step": 2646 }, { "epoch": 0.2476716431880938, "loss": 1.1612648963928223, "loss_ce": 0.006479742005467415, "loss_iou": 0.5078125, "loss_num": 0.02734375, "loss_xval": 1.15625, "num_input_tokens_seen": 175246964, "step": 2646 }, { "epoch": 0.24776524547198953, "grad_norm": 27.16044044494629, "learning_rate": 5e-05, "loss": 1.4088, "num_input_tokens_seen": 175313044, "step": 2647 }, { "epoch": 0.24776524547198953, "loss": 1.6399792432785034, "loss_ce": 0.006190221756696701, "loss_iou": 0.66796875, "loss_num": 0.059326171875, "loss_xval": 1.6328125, "num_input_tokens_seen": 175313044, "step": 2647 }, { "epoch": 0.24785884775588524, "grad_norm": 32.581966400146484, "learning_rate": 5e-05, "loss": 1.3722, "num_input_tokens_seen": 175379092, "step": 2648 }, { "epoch": 0.24785884775588524, "loss": 1.215409517288208, "loss_ce": 0.0037396925035864115, "loss_iou": 0.515625, "loss_num": 0.035888671875, "loss_xval": 1.2109375, "num_input_tokens_seen": 175379092, "step": 2648 }, { "epoch": 0.24795245003978097, "grad_norm": 39.34456253051758, "learning_rate": 5e-05, "loss": 1.4922, "num_input_tokens_seen": 175444548, "step": 2649 }, { "epoch": 0.24795245003978097, "loss": 1.4172290563583374, "loss_ce": 0.006126766093075275, "loss_iou": 0.5859375, "loss_num": 0.0478515625, "loss_xval": 1.4140625, "num_input_tokens_seen": 175444548, "step": 2649 }, { "epoch": 0.24804605232367669, "grad_norm": 23.662206649780273, "learning_rate": 5e-05, "loss": 1.2165, "num_input_tokens_seen": 175511196, "step": 2650 }, { "epoch": 0.24804605232367669, "loss": 1.199338436126709, "loss_ce": 0.011350231245160103, "loss_iou": 0.49609375, "loss_num": 0.0390625, "loss_xval": 1.1875, "num_input_tokens_seen": 175511196, "step": 2650 }, { "epoch": 0.24813965460757242, "grad_norm": 25.820735931396484, "learning_rate": 5e-05, "loss": 1.6155, "num_input_tokens_seen": 175577820, "step": 2651 }, { "epoch": 0.24813965460757242, "loss": 1.6761157512664795, "loss_ce": 0.010588502511382103, "loss_iou": 0.6484375, "loss_num": 0.07373046875, "loss_xval": 1.6640625, "num_input_tokens_seen": 175577820, "step": 2651 }, { "epoch": 0.24823325689146816, "grad_norm": 31.59154510498047, "learning_rate": 5e-05, "loss": 1.2588, "num_input_tokens_seen": 175644212, "step": 2652 }, { "epoch": 0.24823325689146816, "loss": 1.0714844465255737, "loss_ce": 0.0028809530194848776, "loss_iou": 0.45703125, "loss_num": 0.03076171875, "loss_xval": 1.0703125, "num_input_tokens_seen": 175644212, "step": 2652 }, { "epoch": 0.24832685917536387, "grad_norm": 20.59835433959961, "learning_rate": 5e-05, "loss": 1.7055, "num_input_tokens_seen": 175710016, "step": 2653 }, { "epoch": 0.24832685917536387, "loss": 1.7338181734085083, "loss_ce": 0.0053025586530566216, "loss_iou": 0.70703125, "loss_num": 0.06298828125, "loss_xval": 1.7265625, "num_input_tokens_seen": 175710016, "step": 2653 }, { "epoch": 0.2484204614592596, "grad_norm": 62.92511749267578, "learning_rate": 5e-05, "loss": 1.279, "num_input_tokens_seen": 175775460, "step": 2654 }, { "epoch": 0.2484204614592596, "loss": 1.4731266498565674, "loss_ce": 0.0038884193636476994, "loss_iou": 0.61328125, "loss_num": 0.04833984375, "loss_xval": 1.46875, "num_input_tokens_seen": 175775460, "step": 2654 }, { "epoch": 0.24851406374315532, "grad_norm": 27.96456527709961, "learning_rate": 5e-05, "loss": 1.3247, "num_input_tokens_seen": 175841836, "step": 2655 }, { "epoch": 0.24851406374315532, "loss": 1.4556419849395752, "loss_ce": 0.004470153711736202, "loss_iou": 0.6171875, "loss_num": 0.042724609375, "loss_xval": 1.453125, "num_input_tokens_seen": 175841836, "step": 2655 }, { "epoch": 0.24860766602705106, "grad_norm": 20.63963508605957, "learning_rate": 5e-05, "loss": 1.2664, "num_input_tokens_seen": 175907648, "step": 2656 }, { "epoch": 0.24860766602705106, "loss": 1.1753497123718262, "loss_ce": 0.0049395374953746796, "loss_iou": 0.4609375, "loss_num": 0.050048828125, "loss_xval": 1.171875, "num_input_tokens_seen": 175907648, "step": 2656 }, { "epoch": 0.2487012683109468, "grad_norm": 28.03647804260254, "learning_rate": 5e-05, "loss": 1.4001, "num_input_tokens_seen": 175974800, "step": 2657 }, { "epoch": 0.2487012683109468, "loss": 1.455979824066162, "loss_ce": 0.006761068478226662, "loss_iou": 0.58984375, "loss_num": 0.0537109375, "loss_xval": 1.453125, "num_input_tokens_seen": 175974800, "step": 2657 }, { "epoch": 0.2487948705948425, "grad_norm": 20.680822372436523, "learning_rate": 5e-05, "loss": 1.5617, "num_input_tokens_seen": 176040236, "step": 2658 }, { "epoch": 0.2487948705948425, "loss": 1.7301642894744873, "loss_ce": 0.0045783137902617455, "loss_iou": 0.6953125, "loss_num": 0.06689453125, "loss_xval": 1.7265625, "num_input_tokens_seen": 176040236, "step": 2658 }, { "epoch": 0.24888847287873825, "grad_norm": 14.060856819152832, "learning_rate": 5e-05, "loss": 1.2418, "num_input_tokens_seen": 176106048, "step": 2659 }, { "epoch": 0.24888847287873825, "loss": 1.3035095930099487, "loss_ce": 0.00468143867328763, "loss_iou": 0.5390625, "loss_num": 0.043701171875, "loss_xval": 1.296875, "num_input_tokens_seen": 176106048, "step": 2659 }, { "epoch": 0.24898207516263396, "grad_norm": 24.060583114624023, "learning_rate": 5e-05, "loss": 1.6085, "num_input_tokens_seen": 176172460, "step": 2660 }, { "epoch": 0.24898207516263396, "loss": 1.6445289850234985, "loss_ce": 0.007321933750063181, "loss_iou": 0.6796875, "loss_num": 0.05615234375, "loss_xval": 1.640625, "num_input_tokens_seen": 176172460, "step": 2660 }, { "epoch": 0.2490756774465297, "grad_norm": 17.083534240722656, "learning_rate": 5e-05, "loss": 1.5867, "num_input_tokens_seen": 176237648, "step": 2661 }, { "epoch": 0.2490756774465297, "loss": 1.48751962184906, "loss_ce": 0.008515719324350357, "loss_iou": 0.5625, "loss_num": 0.0703125, "loss_xval": 1.4765625, "num_input_tokens_seen": 176237648, "step": 2661 }, { "epoch": 0.24916927973042544, "grad_norm": 18.185260772705078, "learning_rate": 5e-05, "loss": 1.3118, "num_input_tokens_seen": 176303192, "step": 2662 }, { "epoch": 0.24916927973042544, "loss": 1.2686445713043213, "loss_ce": 0.002531296107918024, "loss_iou": 0.546875, "loss_num": 0.03466796875, "loss_xval": 1.265625, "num_input_tokens_seen": 176303192, "step": 2662 }, { "epoch": 0.24926288201432115, "grad_norm": 37.6169319152832, "learning_rate": 5e-05, "loss": 1.2758, "num_input_tokens_seen": 176368208, "step": 2663 }, { "epoch": 0.24926288201432115, "loss": 1.2598035335540771, "loss_ce": 0.003944148309528828, "loss_iou": 0.51953125, "loss_num": 0.04296875, "loss_xval": 1.2578125, "num_input_tokens_seen": 176368208, "step": 2663 }, { "epoch": 0.24935648429821688, "grad_norm": 21.37662696838379, "learning_rate": 5e-05, "loss": 1.4104, "num_input_tokens_seen": 176434056, "step": 2664 }, { "epoch": 0.24935648429821688, "loss": 1.366624116897583, "loss_ce": 0.0018779993988573551, "loss_iou": 0.5625, "loss_num": 0.04736328125, "loss_xval": 1.3671875, "num_input_tokens_seen": 176434056, "step": 2664 }, { "epoch": 0.2494500865821126, "grad_norm": 29.62290382385254, "learning_rate": 5e-05, "loss": 1.2874, "num_input_tokens_seen": 176500160, "step": 2665 }, { "epoch": 0.2494500865821126, "loss": 1.2492676973342896, "loss_ce": 0.00415048748254776, "loss_iou": 0.50390625, "loss_num": 0.047119140625, "loss_xval": 1.2421875, "num_input_tokens_seen": 176500160, "step": 2665 }, { "epoch": 0.24954368886600833, "grad_norm": 37.912166595458984, "learning_rate": 5e-05, "loss": 1.4483, "num_input_tokens_seen": 176567612, "step": 2666 }, { "epoch": 0.24954368886600833, "loss": 1.4608955383300781, "loss_ce": 0.00386425806209445, "loss_iou": 0.61328125, "loss_num": 0.04541015625, "loss_xval": 1.453125, "num_input_tokens_seen": 176567612, "step": 2666 }, { "epoch": 0.24963729114990404, "grad_norm": 27.29871368408203, "learning_rate": 5e-05, "loss": 1.7305, "num_input_tokens_seen": 176633248, "step": 2667 }, { "epoch": 0.24963729114990404, "loss": 1.5257866382598877, "loss_ce": 0.006255402695387602, "loss_iou": 0.66796875, "loss_num": 0.03662109375, "loss_xval": 1.515625, "num_input_tokens_seen": 176633248, "step": 2667 }, { "epoch": 0.24973089343379978, "grad_norm": 26.80462646484375, "learning_rate": 5e-05, "loss": 1.5406, "num_input_tokens_seen": 176699628, "step": 2668 }, { "epoch": 0.24973089343379978, "loss": 1.4624890089035034, "loss_ce": 0.003504673484712839, "loss_iou": 0.609375, "loss_num": 0.048095703125, "loss_xval": 1.4609375, "num_input_tokens_seen": 176699628, "step": 2668 }, { "epoch": 0.24982449571769552, "grad_norm": 25.51641845703125, "learning_rate": 5e-05, "loss": 1.2969, "num_input_tokens_seen": 176765148, "step": 2669 }, { "epoch": 0.24982449571769552, "loss": 1.2877111434936523, "loss_ce": 0.0035315491259098053, "loss_iou": 0.52734375, "loss_num": 0.046142578125, "loss_xval": 1.28125, "num_input_tokens_seen": 176765148, "step": 2669 }, { "epoch": 0.24991809800159123, "grad_norm": 24.73008918762207, "learning_rate": 5e-05, "loss": 1.4034, "num_input_tokens_seen": 176832276, "step": 2670 }, { "epoch": 0.24991809800159123, "loss": 1.4069923162460327, "loss_ce": 0.00367200979962945, "loss_iou": 0.578125, "loss_num": 0.049560546875, "loss_xval": 1.40625, "num_input_tokens_seen": 176832276, "step": 2670 }, { "epoch": 0.25001170028548697, "grad_norm": 19.667558670043945, "learning_rate": 5e-05, "loss": 1.3253, "num_input_tokens_seen": 176899092, "step": 2671 }, { "epoch": 0.25001170028548697, "loss": 1.4717576503753662, "loss_ce": 0.0098434928804636, "loss_iou": 0.5859375, "loss_num": 0.0576171875, "loss_xval": 1.4609375, "num_input_tokens_seen": 176899092, "step": 2671 }, { "epoch": 0.2501053025693827, "grad_norm": 38.891414642333984, "learning_rate": 5e-05, "loss": 1.25, "num_input_tokens_seen": 176964424, "step": 2672 }, { "epoch": 0.2501053025693827, "loss": 1.4178887605667114, "loss_ce": 0.0038262358866631985, "loss_iou": 0.6171875, "loss_num": 0.0361328125, "loss_xval": 1.4140625, "num_input_tokens_seen": 176964424, "step": 2672 }, { "epoch": 0.25019890485327845, "grad_norm": 28.1275577545166, "learning_rate": 5e-05, "loss": 1.35, "num_input_tokens_seen": 177029964, "step": 2673 }, { "epoch": 0.25019890485327845, "loss": 1.209371566772461, "loss_ce": 0.003805126529186964, "loss_iou": 0.5078125, "loss_num": 0.03759765625, "loss_xval": 1.203125, "num_input_tokens_seen": 177029964, "step": 2673 }, { "epoch": 0.25029250713717416, "grad_norm": 21.706510543823242, "learning_rate": 5e-05, "loss": 1.477, "num_input_tokens_seen": 177096660, "step": 2674 }, { "epoch": 0.25029250713717416, "loss": 1.4117348194122314, "loss_ce": 0.0035316282883286476, "loss_iou": 0.609375, "loss_num": 0.03857421875, "loss_xval": 1.40625, "num_input_tokens_seen": 177096660, "step": 2674 }, { "epoch": 0.25038610942106987, "grad_norm": 28.70165252685547, "learning_rate": 5e-05, "loss": 1.3448, "num_input_tokens_seen": 177163348, "step": 2675 }, { "epoch": 0.25038610942106987, "loss": 1.3969063758850098, "loss_ce": 0.007257949095219374, "loss_iou": 0.578125, "loss_num": 0.046142578125, "loss_xval": 1.390625, "num_input_tokens_seen": 177163348, "step": 2675 }, { "epoch": 0.2504797117049656, "grad_norm": 16.7537841796875, "learning_rate": 5e-05, "loss": 1.32, "num_input_tokens_seen": 177230340, "step": 2676 }, { "epoch": 0.2504797117049656, "loss": 1.1728780269622803, "loss_ce": 0.0053975507616996765, "loss_iou": 0.51171875, "loss_num": 0.029052734375, "loss_xval": 1.1640625, "num_input_tokens_seen": 177230340, "step": 2676 }, { "epoch": 0.25057331398886135, "grad_norm": 16.73251724243164, "learning_rate": 5e-05, "loss": 1.2454, "num_input_tokens_seen": 177296248, "step": 2677 }, { "epoch": 0.25057331398886135, "loss": 0.9803268909454346, "loss_ce": 0.0032761115580797195, "loss_iou": 0.388671875, "loss_num": 0.0400390625, "loss_xval": 0.9765625, "num_input_tokens_seen": 177296248, "step": 2677 }, { "epoch": 0.25066691627275706, "grad_norm": 36.54281234741211, "learning_rate": 5e-05, "loss": 1.6695, "num_input_tokens_seen": 177362724, "step": 2678 }, { "epoch": 0.25066691627275706, "loss": 1.7774443626403809, "loss_ce": 0.004983500111848116, "loss_iou": 0.72265625, "loss_num": 0.0654296875, "loss_xval": 1.7734375, "num_input_tokens_seen": 177362724, "step": 2678 }, { "epoch": 0.25076051855665277, "grad_norm": 20.147615432739258, "learning_rate": 5e-05, "loss": 1.416, "num_input_tokens_seen": 177427576, "step": 2679 }, { "epoch": 0.25076051855665277, "loss": 1.381352424621582, "loss_ce": 0.009282177314162254, "loss_iou": 0.5703125, "loss_num": 0.046630859375, "loss_xval": 1.375, "num_input_tokens_seen": 177427576, "step": 2679 }, { "epoch": 0.25085412084054853, "grad_norm": 52.44255065917969, "learning_rate": 5e-05, "loss": 1.335, "num_input_tokens_seen": 177494076, "step": 2680 }, { "epoch": 0.25085412084054853, "loss": 1.235190510749817, "loss_ce": 0.007163163274526596, "loss_iou": 0.4921875, "loss_num": 0.04833984375, "loss_xval": 1.2265625, "num_input_tokens_seen": 177494076, "step": 2680 }, { "epoch": 0.25094772312444424, "grad_norm": 23.108659744262695, "learning_rate": 5e-05, "loss": 1.303, "num_input_tokens_seen": 177559392, "step": 2681 }, { "epoch": 0.25094772312444424, "loss": 1.3295401334762573, "loss_ce": 0.007030394859611988, "loss_iou": 0.490234375, "loss_num": 0.068359375, "loss_xval": 1.3203125, "num_input_tokens_seen": 177559392, "step": 2681 }, { "epoch": 0.25104132540833995, "grad_norm": 46.50721740722656, "learning_rate": 5e-05, "loss": 1.5887, "num_input_tokens_seen": 177625180, "step": 2682 }, { "epoch": 0.25104132540833995, "loss": 1.9244065284729004, "loss_ce": 0.007414279039949179, "loss_iou": 0.7890625, "loss_num": 0.06689453125, "loss_xval": 1.9140625, "num_input_tokens_seen": 177625180, "step": 2682 }, { "epoch": 0.25113492769223567, "grad_norm": 20.515562057495117, "learning_rate": 5e-05, "loss": 1.9332, "num_input_tokens_seen": 177691284, "step": 2683 }, { "epoch": 0.25113492769223567, "loss": 2.0065956115722656, "loss_ce": 0.004642449785023928, "loss_iou": 0.82421875, "loss_num": 0.06982421875, "loss_xval": 2.0, "num_input_tokens_seen": 177691284, "step": 2683 }, { "epoch": 0.25122852997613143, "grad_norm": 29.184473037719727, "learning_rate": 5e-05, "loss": 1.2766, "num_input_tokens_seen": 177757404, "step": 2684 }, { "epoch": 0.25122852997613143, "loss": 1.3202893733978271, "loss_ce": 0.0024182756897062063, "loss_iou": 0.546875, "loss_num": 0.045654296875, "loss_xval": 1.3203125, "num_input_tokens_seen": 177757404, "step": 2684 }, { "epoch": 0.25132213226002714, "grad_norm": 29.344213485717773, "learning_rate": 5e-05, "loss": 1.4858, "num_input_tokens_seen": 177822436, "step": 2685 }, { "epoch": 0.25132213226002714, "loss": 1.5259044170379639, "loss_ce": 0.006861462257802486, "loss_iou": 0.62109375, "loss_num": 0.05517578125, "loss_xval": 1.515625, "num_input_tokens_seen": 177822436, "step": 2685 }, { "epoch": 0.25141573454392285, "grad_norm": 23.936859130859375, "learning_rate": 5e-05, "loss": 1.55, "num_input_tokens_seen": 177887576, "step": 2686 }, { "epoch": 0.25141573454392285, "loss": 1.5547282695770264, "loss_ce": 0.006876666098833084, "loss_iou": 0.59375, "loss_num": 0.0712890625, "loss_xval": 1.546875, "num_input_tokens_seen": 177887576, "step": 2686 }, { "epoch": 0.2515093368278186, "grad_norm": 23.946048736572266, "learning_rate": 5e-05, "loss": 1.2561, "num_input_tokens_seen": 177953140, "step": 2687 }, { "epoch": 0.2515093368278186, "loss": 1.2796629667282104, "loss_ce": 0.0067137982696294785, "loss_iou": 0.51171875, "loss_num": 0.049560546875, "loss_xval": 1.2734375, "num_input_tokens_seen": 177953140, "step": 2687 }, { "epoch": 0.25160293911171433, "grad_norm": 21.224424362182617, "learning_rate": 5e-05, "loss": 1.1258, "num_input_tokens_seen": 178019340, "step": 2688 }, { "epoch": 0.25160293911171433, "loss": 1.2026559114456177, "loss_ce": 0.006366831250488758, "loss_iou": 0.427734375, "loss_num": 0.06787109375, "loss_xval": 1.1953125, "num_input_tokens_seen": 178019340, "step": 2688 }, { "epoch": 0.25169654139561004, "grad_norm": 26.06874656677246, "learning_rate": 5e-05, "loss": 1.326, "num_input_tokens_seen": 178085288, "step": 2689 }, { "epoch": 0.25169654139561004, "loss": 1.1321706771850586, "loss_ce": 0.006194021087139845, "loss_iou": 0.46875, "loss_num": 0.0380859375, "loss_xval": 1.125, "num_input_tokens_seen": 178085288, "step": 2689 }, { "epoch": 0.2517901436795058, "grad_norm": 38.97039031982422, "learning_rate": 5e-05, "loss": 1.5214, "num_input_tokens_seen": 178151204, "step": 2690 }, { "epoch": 0.2517901436795058, "loss": 1.8669145107269287, "loss_ce": 0.004609842784702778, "loss_iou": 0.74609375, "loss_num": 0.07373046875, "loss_xval": 1.859375, "num_input_tokens_seen": 178151204, "step": 2690 }, { "epoch": 0.2518837459634015, "grad_norm": 22.12947654724121, "learning_rate": 5e-05, "loss": 1.4491, "num_input_tokens_seen": 178218592, "step": 2691 }, { "epoch": 0.2518837459634015, "loss": 1.5539872646331787, "loss_ce": 0.007600470911711454, "loss_iou": 0.65234375, "loss_num": 0.048095703125, "loss_xval": 1.546875, "num_input_tokens_seen": 178218592, "step": 2691 }, { "epoch": 0.25197734824729723, "grad_norm": 17.946393966674805, "learning_rate": 5e-05, "loss": 1.4946, "num_input_tokens_seen": 178284356, "step": 2692 }, { "epoch": 0.25197734824729723, "loss": 1.4267549514770508, "loss_ce": 0.009762648493051529, "loss_iou": 0.609375, "loss_num": 0.0400390625, "loss_xval": 1.4140625, "num_input_tokens_seen": 178284356, "step": 2692 }, { "epoch": 0.25207095053119294, "grad_norm": 29.452123641967773, "learning_rate": 5e-05, "loss": 1.4284, "num_input_tokens_seen": 178351192, "step": 2693 }, { "epoch": 0.25207095053119294, "loss": 1.3941285610198975, "loss_ce": 0.005944958887994289, "loss_iou": 0.5390625, "loss_num": 0.0615234375, "loss_xval": 1.390625, "num_input_tokens_seen": 178351192, "step": 2693 }, { "epoch": 0.2521645528150887, "grad_norm": 22.85008430480957, "learning_rate": 5e-05, "loss": 1.5017, "num_input_tokens_seen": 178417352, "step": 2694 }, { "epoch": 0.2521645528150887, "loss": 1.587036371231079, "loss_ce": 0.007446564733982086, "loss_iou": 0.6328125, "loss_num": 0.0625, "loss_xval": 1.578125, "num_input_tokens_seen": 178417352, "step": 2694 }, { "epoch": 0.2522581550989844, "grad_norm": 16.949966430664062, "learning_rate": 5e-05, "loss": 1.555, "num_input_tokens_seen": 178483176, "step": 2695 }, { "epoch": 0.2522581550989844, "loss": 1.4507124423980713, "loss_ce": 0.010282731615006924, "loss_iou": 0.60546875, "loss_num": 0.04638671875, "loss_xval": 1.4375, "num_input_tokens_seen": 178483176, "step": 2695 }, { "epoch": 0.2523517573828801, "grad_norm": 25.912893295288086, "learning_rate": 5e-05, "loss": 1.3964, "num_input_tokens_seen": 178549464, "step": 2696 }, { "epoch": 0.2523517573828801, "loss": 1.68153977394104, "loss_ce": 0.009664775803685188, "loss_iou": 0.68359375, "loss_num": 0.060791015625, "loss_xval": 1.671875, "num_input_tokens_seen": 178549464, "step": 2696 }, { "epoch": 0.2524453596667759, "grad_norm": 27.0345516204834, "learning_rate": 5e-05, "loss": 1.45, "num_input_tokens_seen": 178615592, "step": 2697 }, { "epoch": 0.2524453596667759, "loss": 1.2778229713439941, "loss_ce": 0.00438541267067194, "loss_iou": 0.5078125, "loss_num": 0.0517578125, "loss_xval": 1.2734375, "num_input_tokens_seen": 178615592, "step": 2697 }, { "epoch": 0.2525389619506716, "grad_norm": 18.903898239135742, "learning_rate": 5e-05, "loss": 1.3468, "num_input_tokens_seen": 178682416, "step": 2698 }, { "epoch": 0.2525389619506716, "loss": 1.453385591506958, "loss_ce": 0.0051433793269097805, "loss_iou": 0.62890625, "loss_num": 0.03857421875, "loss_xval": 1.4453125, "num_input_tokens_seen": 178682416, "step": 2698 }, { "epoch": 0.2526325642345673, "grad_norm": 82.03213500976562, "learning_rate": 5e-05, "loss": 1.2916, "num_input_tokens_seen": 178748840, "step": 2699 }, { "epoch": 0.2526325642345673, "loss": 1.226028561592102, "loss_ce": 0.003860627766698599, "loss_iou": 0.4765625, "loss_num": 0.053466796875, "loss_xval": 1.21875, "num_input_tokens_seen": 178748840, "step": 2699 }, { "epoch": 0.252726166518463, "grad_norm": 21.426063537597656, "learning_rate": 5e-05, "loss": 1.2913, "num_input_tokens_seen": 178815888, "step": 2700 }, { "epoch": 0.252726166518463, "loss": 1.0827887058258057, "loss_ce": 0.002222394570708275, "loss_iou": 0.458984375, "loss_num": 0.032470703125, "loss_xval": 1.078125, "num_input_tokens_seen": 178815888, "step": 2700 }, { "epoch": 0.2528197688023588, "grad_norm": 35.58570861816406, "learning_rate": 5e-05, "loss": 1.3755, "num_input_tokens_seen": 178882084, "step": 2701 }, { "epoch": 0.2528197688023588, "loss": 1.2529616355895996, "loss_ce": 0.001496872864663601, "loss_iou": 0.54296875, "loss_num": 0.03271484375, "loss_xval": 1.25, "num_input_tokens_seen": 178882084, "step": 2701 }, { "epoch": 0.2529133710862545, "grad_norm": 17.67931365966797, "learning_rate": 5e-05, "loss": 1.7459, "num_input_tokens_seen": 178948252, "step": 2702 }, { "epoch": 0.2529133710862545, "loss": 1.6037399768829346, "loss_ce": 0.004130593966692686, "loss_iou": 0.68359375, "loss_num": 0.046142578125, "loss_xval": 1.6015625, "num_input_tokens_seen": 178948252, "step": 2702 }, { "epoch": 0.2530069733701502, "grad_norm": 17.60394859313965, "learning_rate": 5e-05, "loss": 1.4507, "num_input_tokens_seen": 179014804, "step": 2703 }, { "epoch": 0.2530069733701502, "loss": 1.5327136516571045, "loss_ce": 0.002928511705249548, "loss_iou": 0.59375, "loss_num": 0.06884765625, "loss_xval": 1.53125, "num_input_tokens_seen": 179014804, "step": 2703 }, { "epoch": 0.253100575654046, "grad_norm": 17.0250244140625, "learning_rate": 5e-05, "loss": 1.4212, "num_input_tokens_seen": 179081268, "step": 2704 }, { "epoch": 0.253100575654046, "loss": 1.2025288343429565, "loss_ce": 0.00331004592590034, "loss_iou": 0.48046875, "loss_num": 0.04736328125, "loss_xval": 1.203125, "num_input_tokens_seen": 179081268, "step": 2704 }, { "epoch": 0.2531941779379417, "grad_norm": 33.405540466308594, "learning_rate": 5e-05, "loss": 1.4586, "num_input_tokens_seen": 179147568, "step": 2705 }, { "epoch": 0.2531941779379417, "loss": 1.2397079467773438, "loss_ce": 0.0077743493020534515, "loss_iou": 0.427734375, "loss_num": 0.0751953125, "loss_xval": 1.234375, "num_input_tokens_seen": 179147568, "step": 2705 }, { "epoch": 0.2532877802218374, "grad_norm": 21.995677947998047, "learning_rate": 5e-05, "loss": 1.4927, "num_input_tokens_seen": 179212900, "step": 2706 }, { "epoch": 0.2532877802218374, "loss": 1.6095725297927856, "loss_ce": 0.008498257957398891, "loss_iou": 0.65234375, "loss_num": 0.0595703125, "loss_xval": 1.6015625, "num_input_tokens_seen": 179212900, "step": 2706 }, { "epoch": 0.25338138250573317, "grad_norm": 35.696563720703125, "learning_rate": 5e-05, "loss": 1.3146, "num_input_tokens_seen": 179279756, "step": 2707 }, { "epoch": 0.25338138250573317, "loss": 1.4066588878631592, "loss_ce": 0.0043151117861270905, "loss_iou": 0.6171875, "loss_num": 0.033447265625, "loss_xval": 1.40625, "num_input_tokens_seen": 179279756, "step": 2707 }, { "epoch": 0.2534749847896289, "grad_norm": 18.243179321289062, "learning_rate": 5e-05, "loss": 1.611, "num_input_tokens_seen": 179346584, "step": 2708 }, { "epoch": 0.2534749847896289, "loss": 1.4987397193908691, "loss_ce": 0.003622512798756361, "loss_iou": 0.6484375, "loss_num": 0.03955078125, "loss_xval": 1.4921875, "num_input_tokens_seen": 179346584, "step": 2708 }, { "epoch": 0.2535685870735246, "grad_norm": 19.568614959716797, "learning_rate": 5e-05, "loss": 1.3211, "num_input_tokens_seen": 179412188, "step": 2709 }, { "epoch": 0.2535685870735246, "loss": 1.2492930889129639, "loss_ce": 0.004664166830480099, "loss_iou": 0.515625, "loss_num": 0.043212890625, "loss_xval": 1.2421875, "num_input_tokens_seen": 179412188, "step": 2709 }, { "epoch": 0.2536621893574203, "grad_norm": 37.28907775878906, "learning_rate": 5e-05, "loss": 1.1901, "num_input_tokens_seen": 179477196, "step": 2710 }, { "epoch": 0.2536621893574203, "loss": 1.176241159439087, "loss_ce": 0.0024130609817802906, "loss_iou": 0.486328125, "loss_num": 0.0400390625, "loss_xval": 1.171875, "num_input_tokens_seen": 179477196, "step": 2710 }, { "epoch": 0.25375579164131606, "grad_norm": 21.42214012145996, "learning_rate": 5e-05, "loss": 1.6679, "num_input_tokens_seen": 179542548, "step": 2711 }, { "epoch": 0.25375579164131606, "loss": 1.4606550931930542, "loss_ce": 0.0041120825335383415, "loss_iou": 0.5703125, "loss_num": 0.06298828125, "loss_xval": 1.453125, "num_input_tokens_seen": 179542548, "step": 2711 }, { "epoch": 0.2538493939252118, "grad_norm": 31.344505310058594, "learning_rate": 5e-05, "loss": 1.3836, "num_input_tokens_seen": 179609724, "step": 2712 }, { "epoch": 0.2538493939252118, "loss": 1.4715662002563477, "loss_ce": 0.007699033711105585, "loss_iou": 0.59765625, "loss_num": 0.0537109375, "loss_xval": 1.4609375, "num_input_tokens_seen": 179609724, "step": 2712 }, { "epoch": 0.2539429962091075, "grad_norm": 36.948455810546875, "learning_rate": 5e-05, "loss": 1.5696, "num_input_tokens_seen": 179676504, "step": 2713 }, { "epoch": 0.2539429962091075, "loss": 1.5433683395385742, "loss_ce": 0.003329339437186718, "loss_iou": 0.66796875, "loss_num": 0.04052734375, "loss_xval": 1.5390625, "num_input_tokens_seen": 179676504, "step": 2713 }, { "epoch": 0.25403659849300325, "grad_norm": 18.40770721435547, "learning_rate": 5e-05, "loss": 1.4374, "num_input_tokens_seen": 179742200, "step": 2714 }, { "epoch": 0.25403659849300325, "loss": 1.5147056579589844, "loss_ce": 0.0020102495327591896, "loss_iou": 0.6328125, "loss_num": 0.04931640625, "loss_xval": 1.515625, "num_input_tokens_seen": 179742200, "step": 2714 }, { "epoch": 0.25413020077689896, "grad_norm": 17.650564193725586, "learning_rate": 5e-05, "loss": 1.2574, "num_input_tokens_seen": 179807896, "step": 2715 }, { "epoch": 0.25413020077689896, "loss": 1.2243345975875854, "loss_ce": 0.0050962925888597965, "loss_iou": 0.52734375, "loss_num": 0.0322265625, "loss_xval": 1.21875, "num_input_tokens_seen": 179807896, "step": 2715 }, { "epoch": 0.2542238030607947, "grad_norm": 22.725345611572266, "learning_rate": 5e-05, "loss": 1.1883, "num_input_tokens_seen": 179873904, "step": 2716 }, { "epoch": 0.2542238030607947, "loss": 1.20273756980896, "loss_ce": 0.008310049772262573, "loss_iou": 0.458984375, "loss_num": 0.054931640625, "loss_xval": 1.1953125, "num_input_tokens_seen": 179873904, "step": 2716 }, { "epoch": 0.2543174053446904, "grad_norm": 36.27304458618164, "learning_rate": 5e-05, "loss": 1.4144, "num_input_tokens_seen": 179941544, "step": 2717 }, { "epoch": 0.2543174053446904, "loss": 1.3572330474853516, "loss_ce": 0.004693983122706413, "loss_iou": 0.59765625, "loss_num": 0.0311279296875, "loss_xval": 1.3515625, "num_input_tokens_seen": 179941544, "step": 2717 }, { "epoch": 0.25441100762858615, "grad_norm": 23.77774429321289, "learning_rate": 5e-05, "loss": 1.4283, "num_input_tokens_seen": 180007776, "step": 2718 }, { "epoch": 0.25441100762858615, "loss": 1.396384596824646, "loss_ce": 0.009177593514323235, "loss_iou": 0.58984375, "loss_num": 0.041015625, "loss_xval": 1.390625, "num_input_tokens_seen": 180007776, "step": 2718 }, { "epoch": 0.25450460991248186, "grad_norm": 16.692039489746094, "learning_rate": 5e-05, "loss": 1.4522, "num_input_tokens_seen": 180074020, "step": 2719 }, { "epoch": 0.25450460991248186, "loss": 1.4519904851913452, "loss_ce": 0.0047248369082808495, "loss_iou": 0.5859375, "loss_num": 0.05517578125, "loss_xval": 1.4453125, "num_input_tokens_seen": 180074020, "step": 2719 }, { "epoch": 0.25459821219637757, "grad_norm": 19.383583068847656, "learning_rate": 5e-05, "loss": 1.2771, "num_input_tokens_seen": 180139932, "step": 2720 }, { "epoch": 0.25459821219637757, "loss": 1.0887398719787598, "loss_ce": 0.005243788007646799, "loss_iou": 0.443359375, "loss_num": 0.0390625, "loss_xval": 1.0859375, "num_input_tokens_seen": 180139932, "step": 2720 }, { "epoch": 0.25469181448027334, "grad_norm": 18.619901657104492, "learning_rate": 5e-05, "loss": 1.3926, "num_input_tokens_seen": 180205940, "step": 2721 }, { "epoch": 0.25469181448027334, "loss": 1.3840503692626953, "loss_ce": 0.004167487379163504, "loss_iou": 0.52734375, "loss_num": 0.0654296875, "loss_xval": 1.3828125, "num_input_tokens_seen": 180205940, "step": 2721 }, { "epoch": 0.25478541676416905, "grad_norm": 31.04913330078125, "learning_rate": 5e-05, "loss": 1.5844, "num_input_tokens_seen": 180272048, "step": 2722 }, { "epoch": 0.25478541676416905, "loss": 1.5141956806182861, "loss_ce": 0.006871445570141077, "loss_iou": 0.62890625, "loss_num": 0.05029296875, "loss_xval": 1.5078125, "num_input_tokens_seen": 180272048, "step": 2722 }, { "epoch": 0.25487901904806476, "grad_norm": 43.517356872558594, "learning_rate": 5e-05, "loss": 1.3605, "num_input_tokens_seen": 180338248, "step": 2723 }, { "epoch": 0.25487901904806476, "loss": 1.1855039596557617, "loss_ce": 0.0045347800478339195, "loss_iou": 0.484375, "loss_num": 0.04296875, "loss_xval": 1.1796875, "num_input_tokens_seen": 180338248, "step": 2723 }, { "epoch": 0.2549726213319605, "grad_norm": 25.927207946777344, "learning_rate": 5e-05, "loss": 1.4294, "num_input_tokens_seen": 180403364, "step": 2724 }, { "epoch": 0.2549726213319605, "loss": 1.2973136901855469, "loss_ce": 0.00727462861686945, "loss_iou": 0.54296875, "loss_num": 0.040283203125, "loss_xval": 1.2890625, "num_input_tokens_seen": 180403364, "step": 2724 }, { "epoch": 0.25506622361585624, "grad_norm": 40.147369384765625, "learning_rate": 5e-05, "loss": 1.2546, "num_input_tokens_seen": 180470304, "step": 2725 }, { "epoch": 0.25506622361585624, "loss": 0.9937068223953247, "loss_ce": 0.0034723724238574505, "loss_iou": 0.392578125, "loss_num": 0.04052734375, "loss_xval": 0.9921875, "num_input_tokens_seen": 180470304, "step": 2725 }, { "epoch": 0.25515982589975195, "grad_norm": 12.200100898742676, "learning_rate": 5e-05, "loss": 1.5515, "num_input_tokens_seen": 180536584, "step": 2726 }, { "epoch": 0.25515982589975195, "loss": 1.5923805236816406, "loss_ce": 0.002536846324801445, "loss_iou": 0.65625, "loss_num": 0.0556640625, "loss_xval": 1.59375, "num_input_tokens_seen": 180536584, "step": 2726 }, { "epoch": 0.25525342818364766, "grad_norm": 35.998226165771484, "learning_rate": 5e-05, "loss": 1.3385, "num_input_tokens_seen": 180603080, "step": 2727 }, { "epoch": 0.25525342818364766, "loss": 1.5598630905151367, "loss_ce": 0.005175570957362652, "loss_iou": 0.609375, "loss_num": 0.06689453125, "loss_xval": 1.5546875, "num_input_tokens_seen": 180603080, "step": 2727 }, { "epoch": 0.2553470304675434, "grad_norm": 42.222957611083984, "learning_rate": 5e-05, "loss": 1.4882, "num_input_tokens_seen": 180669728, "step": 2728 }, { "epoch": 0.2553470304675434, "loss": 1.555783748626709, "loss_ce": 0.0069555919617414474, "loss_iou": 0.6484375, "loss_num": 0.050537109375, "loss_xval": 1.546875, "num_input_tokens_seen": 180669728, "step": 2728 }, { "epoch": 0.25544063275143913, "grad_norm": 24.986804962158203, "learning_rate": 5e-05, "loss": 1.6722, "num_input_tokens_seen": 180736492, "step": 2729 }, { "epoch": 0.25544063275143913, "loss": 1.943702220916748, "loss_ce": 0.002295971615239978, "loss_iou": 0.8203125, "loss_num": 0.06103515625, "loss_xval": 1.9375, "num_input_tokens_seen": 180736492, "step": 2729 }, { "epoch": 0.25553423503533484, "grad_norm": 26.335451126098633, "learning_rate": 5e-05, "loss": 1.2464, "num_input_tokens_seen": 180802144, "step": 2730 }, { "epoch": 0.25553423503533484, "loss": 1.263014554977417, "loss_ce": 0.004652736242860556, "loss_iou": 0.49609375, "loss_num": 0.052978515625, "loss_xval": 1.2578125, "num_input_tokens_seen": 180802144, "step": 2730 }, { "epoch": 0.2556278373192306, "grad_norm": 20.515073776245117, "learning_rate": 5e-05, "loss": 1.4534, "num_input_tokens_seen": 180868048, "step": 2731 }, { "epoch": 0.2556278373192306, "loss": 1.412936806678772, "loss_ce": 0.002780516864731908, "loss_iou": 0.59375, "loss_num": 0.044189453125, "loss_xval": 1.40625, "num_input_tokens_seen": 180868048, "step": 2731 }, { "epoch": 0.2557214396031263, "grad_norm": 31.924388885498047, "learning_rate": 5e-05, "loss": 1.4156, "num_input_tokens_seen": 180934952, "step": 2732 }, { "epoch": 0.2557214396031263, "loss": 1.2068747282028198, "loss_ce": 0.005702856928110123, "loss_iou": 0.515625, "loss_num": 0.034423828125, "loss_xval": 1.203125, "num_input_tokens_seen": 180934952, "step": 2732 }, { "epoch": 0.25581504188702203, "grad_norm": 22.622175216674805, "learning_rate": 5e-05, "loss": 1.5098, "num_input_tokens_seen": 181001384, "step": 2733 }, { "epoch": 0.25581504188702203, "loss": 1.5945289134979248, "loss_ce": 0.0027319621294736862, "loss_iou": 0.6640625, "loss_num": 0.053466796875, "loss_xval": 1.59375, "num_input_tokens_seen": 181001384, "step": 2733 }, { "epoch": 0.25590864417091774, "grad_norm": 81.16958618164062, "learning_rate": 5e-05, "loss": 1.3243, "num_input_tokens_seen": 181067248, "step": 2734 }, { "epoch": 0.25590864417091774, "loss": 1.202168345451355, "loss_ce": 0.003926113247871399, "loss_iou": 0.51171875, "loss_num": 0.03466796875, "loss_xval": 1.1953125, "num_input_tokens_seen": 181067248, "step": 2734 }, { "epoch": 0.2560022464548135, "grad_norm": 43.052371978759766, "learning_rate": 5e-05, "loss": 1.3143, "num_input_tokens_seen": 181133096, "step": 2735 }, { "epoch": 0.2560022464548135, "loss": 1.3479100465774536, "loss_ce": 0.005136595107614994, "loss_iou": 0.578125, "loss_num": 0.03759765625, "loss_xval": 1.34375, "num_input_tokens_seen": 181133096, "step": 2735 }, { "epoch": 0.2560958487387092, "grad_norm": 23.68447494506836, "learning_rate": 5e-05, "loss": 1.2908, "num_input_tokens_seen": 181199216, "step": 2736 }, { "epoch": 0.2560958487387092, "loss": 1.4186744689941406, "loss_ce": 0.002170453779399395, "loss_iou": 0.609375, "loss_num": 0.0390625, "loss_xval": 1.4140625, "num_input_tokens_seen": 181199216, "step": 2736 }, { "epoch": 0.25618945102260493, "grad_norm": 24.601858139038086, "learning_rate": 5e-05, "loss": 1.3755, "num_input_tokens_seen": 181266216, "step": 2737 }, { "epoch": 0.25618945102260493, "loss": 1.2716262340545654, "loss_ce": 0.0030715835746377707, "loss_iou": 0.54296875, "loss_num": 0.03662109375, "loss_xval": 1.265625, "num_input_tokens_seen": 181266216, "step": 2737 }, { "epoch": 0.2562830533065007, "grad_norm": 35.577396392822266, "learning_rate": 5e-05, "loss": 1.2007, "num_input_tokens_seen": 181331656, "step": 2738 }, { "epoch": 0.2562830533065007, "loss": 1.2889866828918457, "loss_ce": 0.004318690858781338, "loss_iou": 0.53125, "loss_num": 0.04541015625, "loss_xval": 1.28125, "num_input_tokens_seen": 181331656, "step": 2738 }, { "epoch": 0.2563766555903964, "grad_norm": 22.694122314453125, "learning_rate": 5e-05, "loss": 1.6981, "num_input_tokens_seen": 181396848, "step": 2739 }, { "epoch": 0.2563766555903964, "loss": 1.7223371267318726, "loss_ce": 0.008469952270388603, "loss_iou": 0.7265625, "loss_num": 0.051513671875, "loss_xval": 1.7109375, "num_input_tokens_seen": 181396848, "step": 2739 }, { "epoch": 0.2564702578742921, "grad_norm": 18.788219451904297, "learning_rate": 5e-05, "loss": 1.2121, "num_input_tokens_seen": 181464172, "step": 2740 }, { "epoch": 0.2564702578742921, "loss": 1.1174161434173584, "loss_ce": 0.004623170010745525, "loss_iou": 0.482421875, "loss_num": 0.029541015625, "loss_xval": 1.109375, "num_input_tokens_seen": 181464172, "step": 2740 }, { "epoch": 0.2565638601581879, "grad_norm": 21.85161590576172, "learning_rate": 5e-05, "loss": 1.3487, "num_input_tokens_seen": 181530412, "step": 2741 }, { "epoch": 0.2565638601581879, "loss": 1.582802653312683, "loss_ce": 0.005166033748537302, "loss_iou": 0.63671875, "loss_num": 0.060546875, "loss_xval": 1.578125, "num_input_tokens_seen": 181530412, "step": 2741 }, { "epoch": 0.2566574624420836, "grad_norm": 21.856889724731445, "learning_rate": 5e-05, "loss": 1.261, "num_input_tokens_seen": 181595716, "step": 2742 }, { "epoch": 0.2566574624420836, "loss": 1.238128662109375, "loss_ce": 0.005218507722020149, "loss_iou": 0.462890625, "loss_num": 0.0615234375, "loss_xval": 1.234375, "num_input_tokens_seen": 181595716, "step": 2742 }, { "epoch": 0.2567510647259793, "grad_norm": 28.9006290435791, "learning_rate": 5e-05, "loss": 1.3331, "num_input_tokens_seen": 181661020, "step": 2743 }, { "epoch": 0.2567510647259793, "loss": 1.3242883682250977, "loss_ce": 0.0044641438871622086, "loss_iou": 0.5234375, "loss_num": 0.054443359375, "loss_xval": 1.3203125, "num_input_tokens_seen": 181661020, "step": 2743 }, { "epoch": 0.256844667009875, "grad_norm": 36.75323486328125, "learning_rate": 5e-05, "loss": 1.388, "num_input_tokens_seen": 181724828, "step": 2744 }, { "epoch": 0.256844667009875, "loss": 1.3701030015945435, "loss_ce": 0.003892068285495043, "loss_iou": 0.57421875, "loss_num": 0.04443359375, "loss_xval": 1.3671875, "num_input_tokens_seen": 181724828, "step": 2744 }, { "epoch": 0.2569382692937708, "grad_norm": 17.429616928100586, "learning_rate": 5e-05, "loss": 1.7901, "num_input_tokens_seen": 181791348, "step": 2745 }, { "epoch": 0.2569382692937708, "loss": 1.7535079717636108, "loss_ce": 0.003385887946933508, "loss_iou": 0.7265625, "loss_num": 0.05859375, "loss_xval": 1.75, "num_input_tokens_seen": 181791348, "step": 2745 }, { "epoch": 0.2570318715776665, "grad_norm": 17.206226348876953, "learning_rate": 5e-05, "loss": 1.6154, "num_input_tokens_seen": 181859036, "step": 2746 }, { "epoch": 0.2570318715776665, "loss": 1.644707202911377, "loss_ce": 0.007011887151747942, "loss_iou": 0.65625, "loss_num": 0.0654296875, "loss_xval": 1.640625, "num_input_tokens_seen": 181859036, "step": 2746 }, { "epoch": 0.2571254738615622, "grad_norm": 28.9249324798584, "learning_rate": 5e-05, "loss": 1.316, "num_input_tokens_seen": 181925472, "step": 2747 }, { "epoch": 0.2571254738615622, "loss": 1.2439528703689575, "loss_ce": 0.002009489107877016, "loss_iou": 0.49609375, "loss_num": 0.05029296875, "loss_xval": 1.2421875, "num_input_tokens_seen": 181925472, "step": 2747 }, { "epoch": 0.25721907614545797, "grad_norm": 28.013105392456055, "learning_rate": 5e-05, "loss": 1.3691, "num_input_tokens_seen": 181992116, "step": 2748 }, { "epoch": 0.25721907614545797, "loss": 1.388258695602417, "loss_ce": 0.005446320399641991, "loss_iou": 0.546875, "loss_num": 0.0576171875, "loss_xval": 1.3828125, "num_input_tokens_seen": 181992116, "step": 2748 }, { "epoch": 0.2573126784293537, "grad_norm": 35.12394714355469, "learning_rate": 5e-05, "loss": 1.4747, "num_input_tokens_seen": 182057936, "step": 2749 }, { "epoch": 0.2573126784293537, "loss": 1.5037202835083008, "loss_ce": 0.0037203137762844563, "loss_iou": 0.63671875, "loss_num": 0.045654296875, "loss_xval": 1.5, "num_input_tokens_seen": 182057936, "step": 2749 }, { "epoch": 0.2574062807132494, "grad_norm": 21.23040008544922, "learning_rate": 5e-05, "loss": 1.5964, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.2574062807132494, "eval_seeclick_CIoU": 0.1042308509349823, "eval_seeclick_GIoU": 0.11520424857735634, "eval_seeclick_IoU": 0.24981697648763657, "eval_seeclick_MAE_all": 0.17666172981262207, "eval_seeclick_MAE_h": 0.08429580554366112, "eval_seeclick_MAE_w": 0.12163790687918663, "eval_seeclick_MAE_x_boxes": 0.2853519022464752, "eval_seeclick_MAE_y_boxes": 0.17233942449092865, "eval_seeclick_NUM_probability": 0.9995305836200714, "eval_seeclick_inside_bbox": 0.3895833343267441, "eval_seeclick_loss": 2.7157585620880127, "eval_seeclick_loss_ce": 0.014326652977615595, "eval_seeclick_loss_iou": 0.934326171875, "eval_seeclick_loss_num": 0.17431640625, "eval_seeclick_loss_xval": 2.73876953125, "eval_seeclick_runtime": 68.5211, "eval_seeclick_samples_per_second": 0.686, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.2574062807132494, "eval_icons_CIoU": -0.09721673280000687, "eval_icons_GIoU": -0.002693554386496544, "eval_icons_IoU": 0.0957326740026474, "eval_icons_MAE_all": 0.18497255444526672, "eval_icons_MAE_h": 0.139219768345356, "eval_icons_MAE_w": 0.16029571741819382, "eval_icons_MAE_x_boxes": 0.1328095756471157, "eval_icons_MAE_y_boxes": 0.14838873222470284, "eval_icons_NUM_probability": 0.9932429194450378, "eval_icons_inside_bbox": 0.13715277798473835, "eval_icons_loss": 2.913668155670166, "eval_icons_loss_ce": 0.0013117790513206273, "eval_icons_loss_iou": 1.00439453125, "eval_icons_loss_num": 0.1877899169921875, "eval_icons_loss_xval": 2.947265625, "eval_icons_runtime": 92.2466, "eval_icons_samples_per_second": 0.542, "eval_icons_steps_per_second": 0.022, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.2574062807132494, "eval_screenspot_CIoU": -0.0036787251010537148, "eval_screenspot_GIoU": 0.03668480211248001, "eval_screenspot_IoU": 0.16975745558738708, "eval_screenspot_MAE_all": 0.19455142815907797, "eval_screenspot_MAE_h": 0.1320437416434288, "eval_screenspot_MAE_w": 0.16170996675888696, "eval_screenspot_MAE_x_boxes": 0.26663068930308026, "eval_screenspot_MAE_y_boxes": 0.13205540676911673, "eval_screenspot_NUM_probability": 0.99985804160436, "eval_screenspot_inside_bbox": 0.3283333381017049, "eval_screenspot_loss": 2.9288907051086426, "eval_screenspot_loss_ce": 0.008995980645219484, "eval_screenspot_loss_iou": 0.9759114583333334, "eval_screenspot_loss_num": 0.19524129231770834, "eval_screenspot_loss_xval": 2.9270833333333335, "eval_screenspot_runtime": 132.5325, "eval_screenspot_samples_per_second": 0.672, "eval_screenspot_steps_per_second": 0.023, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.2574062807132494, "eval_compot_CIoU": -0.029431015253067017, "eval_compot_GIoU": 0.039341045543551445, "eval_compot_IoU": 0.14003757759928703, "eval_compot_MAE_all": 0.19349998235702515, "eval_compot_MAE_h": 0.07970436103641987, "eval_compot_MAE_w": 0.21002302691340446, "eval_compot_MAE_x_boxes": 0.19608338177204132, "eval_compot_MAE_y_boxes": 0.1365150660276413, "eval_compot_NUM_probability": 0.9998738169670105, "eval_compot_inside_bbox": 0.2447916716337204, "eval_compot_loss": 2.974236488342285, "eval_compot_loss_ce": 0.0023781840573064983, "eval_compot_loss_iou": 0.98388671875, "eval_compot_loss_num": 0.204254150390625, "eval_compot_loss_xval": 2.98681640625, "eval_compot_runtime": 74.0009, "eval_compot_samples_per_second": 0.676, "eval_compot_steps_per_second": 0.027, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.2574062807132494, "eval_custom_ui_MAE_all": 0.15309859812259674, "eval_custom_ui_MAE_x": 0.1523282527923584, "eval_custom_ui_MAE_y": 0.15386895462870598, "eval_custom_ui_NUM_probability": 0.9999546706676483, "eval_custom_ui_loss": 0.9221082925796509, "eval_custom_ui_loss_ce": 0.21552801877260208, "eval_custom_ui_loss_num": 0.152130126953125, "eval_custom_ui_loss_xval": 0.7607421875, "eval_custom_ui_runtime": 56.3642, "eval_custom_ui_samples_per_second": 0.887, "eval_custom_ui_steps_per_second": 0.035, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.2574062807132494, "loss": 0.9935225248336792, "loss_ce": 0.24449913203716278, "loss_iou": 0.0, "loss_num": 0.1494140625, "loss_xval": 0.75, "num_input_tokens_seen": 182123980, "step": 2750 }, { "epoch": 0.25749988299714516, "grad_norm": 11.523423194885254, "learning_rate": 5e-05, "loss": 0.9672, "num_input_tokens_seen": 182188460, "step": 2751 }, { "epoch": 0.25749988299714516, "loss": 0.9552120566368103, "loss_ce": 0.0072140078991651535, "loss_iou": 0.38671875, "loss_num": 0.034912109375, "loss_xval": 0.94921875, "num_input_tokens_seen": 182188460, "step": 2751 }, { "epoch": 0.25759348528104087, "grad_norm": 19.382591247558594, "learning_rate": 5e-05, "loss": 1.3101, "num_input_tokens_seen": 182254364, "step": 2752 }, { "epoch": 0.25759348528104087, "loss": 1.4123663902282715, "loss_ce": 0.004651571623980999, "loss_iou": 0.53515625, "loss_num": 0.06787109375, "loss_xval": 1.40625, "num_input_tokens_seen": 182254364, "step": 2752 }, { "epoch": 0.2576870875649366, "grad_norm": 24.27301788330078, "learning_rate": 5e-05, "loss": 1.3711, "num_input_tokens_seen": 182320480, "step": 2753 }, { "epoch": 0.2576870875649366, "loss": 1.2736258506774902, "loss_ce": 0.0031180973164737225, "loss_iou": 0.55078125, "loss_num": 0.034423828125, "loss_xval": 1.2734375, "num_input_tokens_seen": 182320480, "step": 2753 }, { "epoch": 0.2577806898488323, "grad_norm": 23.738521575927734, "learning_rate": 5e-05, "loss": 1.4014, "num_input_tokens_seen": 182386000, "step": 2754 }, { "epoch": 0.2577806898488323, "loss": 1.3582301139831543, "loss_ce": 0.005202867090702057, "loss_iou": 0.59375, "loss_num": 0.0322265625, "loss_xval": 1.3515625, "num_input_tokens_seen": 182386000, "step": 2754 }, { "epoch": 0.25787429213272806, "grad_norm": 21.385990142822266, "learning_rate": 5e-05, "loss": 1.5267, "num_input_tokens_seen": 182451800, "step": 2755 }, { "epoch": 0.25787429213272806, "loss": 1.4563599824905396, "loss_ce": 0.0061646560207009315, "loss_iou": 0.63671875, "loss_num": 0.034912109375, "loss_xval": 1.453125, "num_input_tokens_seen": 182451800, "step": 2755 }, { "epoch": 0.25796789441662377, "grad_norm": 39.32674026489258, "learning_rate": 5e-05, "loss": 1.3266, "num_input_tokens_seen": 182518708, "step": 2756 }, { "epoch": 0.25796789441662377, "loss": 1.2763360738754272, "loss_ce": 0.005828249268233776, "loss_iou": 0.52734375, "loss_num": 0.04248046875, "loss_xval": 1.2734375, "num_input_tokens_seen": 182518708, "step": 2756 }, { "epoch": 0.2580614967005195, "grad_norm": 22.322223663330078, "learning_rate": 5e-05, "loss": 1.6627, "num_input_tokens_seen": 182585652, "step": 2757 }, { "epoch": 0.2580614967005195, "loss": 1.9377355575561523, "loss_ce": 0.005118353758007288, "loss_iou": 0.7421875, "loss_num": 0.08984375, "loss_xval": 1.9296875, "num_input_tokens_seen": 182585652, "step": 2757 }, { "epoch": 0.25815509898441524, "grad_norm": 15.85788631439209, "learning_rate": 5e-05, "loss": 1.1743, "num_input_tokens_seen": 182653176, "step": 2758 }, { "epoch": 0.25815509898441524, "loss": 1.165595293045044, "loss_ce": 0.005927319638431072, "loss_iou": 0.466796875, "loss_num": 0.04443359375, "loss_xval": 1.15625, "num_input_tokens_seen": 182653176, "step": 2758 }, { "epoch": 0.25824870126831095, "grad_norm": 28.82282066345215, "learning_rate": 5e-05, "loss": 1.3383, "num_input_tokens_seen": 182719236, "step": 2759 }, { "epoch": 0.25824870126831095, "loss": 1.2076382637023926, "loss_ce": 0.003536621108651161, "loss_iou": 0.4921875, "loss_num": 0.04345703125, "loss_xval": 1.203125, "num_input_tokens_seen": 182719236, "step": 2759 }, { "epoch": 0.25834230355220666, "grad_norm": 24.307004928588867, "learning_rate": 5e-05, "loss": 1.4537, "num_input_tokens_seen": 182784992, "step": 2760 }, { "epoch": 0.25834230355220666, "loss": 1.4684849977493286, "loss_ce": 0.004373685456812382, "loss_iou": 0.625, "loss_num": 0.04296875, "loss_xval": 1.4609375, "num_input_tokens_seen": 182784992, "step": 2760 }, { "epoch": 0.2584359058361024, "grad_norm": 20.741451263427734, "learning_rate": 5e-05, "loss": 1.3699, "num_input_tokens_seen": 182851400, "step": 2761 }, { "epoch": 0.2584359058361024, "loss": 1.5804247856140137, "loss_ce": 0.003276292234659195, "loss_iou": 0.60546875, "loss_num": 0.0732421875, "loss_xval": 1.578125, "num_input_tokens_seen": 182851400, "step": 2761 }, { "epoch": 0.25852950811999814, "grad_norm": 25.782926559448242, "learning_rate": 5e-05, "loss": 1.4695, "num_input_tokens_seen": 182917160, "step": 2762 }, { "epoch": 0.25852950811999814, "loss": 1.7858836650848389, "loss_ce": 0.003657030873000622, "loss_iou": 0.6640625, "loss_num": 0.0908203125, "loss_xval": 1.78125, "num_input_tokens_seen": 182917160, "step": 2762 }, { "epoch": 0.25862311040389385, "grad_norm": 17.015609741210938, "learning_rate": 5e-05, "loss": 1.2913, "num_input_tokens_seen": 182984212, "step": 2763 }, { "epoch": 0.25862311040389385, "loss": 1.214355707168579, "loss_ce": 0.003906491212546825, "loss_iou": 0.5, "loss_num": 0.04150390625, "loss_xval": 1.2109375, "num_input_tokens_seen": 182984212, "step": 2763 }, { "epoch": 0.25871671268778956, "grad_norm": 42.560546875, "learning_rate": 5e-05, "loss": 1.4358, "num_input_tokens_seen": 183050128, "step": 2764 }, { "epoch": 0.25871671268778956, "loss": 1.6791510581970215, "loss_ce": 0.009229252114892006, "loss_iou": 0.6875, "loss_num": 0.059326171875, "loss_xval": 1.671875, "num_input_tokens_seen": 183050128, "step": 2764 }, { "epoch": 0.25881031497168533, "grad_norm": 31.24652862548828, "learning_rate": 5e-05, "loss": 1.3654, "num_input_tokens_seen": 183116300, "step": 2765 }, { "epoch": 0.25881031497168533, "loss": 1.324753761291504, "loss_ce": 0.0024880152195692062, "loss_iou": 0.58203125, "loss_num": 0.0322265625, "loss_xval": 1.3203125, "num_input_tokens_seen": 183116300, "step": 2765 }, { "epoch": 0.25890391725558104, "grad_norm": 39.36247634887695, "learning_rate": 5e-05, "loss": 1.5224, "num_input_tokens_seen": 183182028, "step": 2766 }, { "epoch": 0.25890391725558104, "loss": 1.3314727544784546, "loss_ce": 0.0013946664985269308, "loss_iou": 0.55859375, "loss_num": 0.041748046875, "loss_xval": 1.328125, "num_input_tokens_seen": 183182028, "step": 2766 }, { "epoch": 0.25899751953947675, "grad_norm": 56.544677734375, "learning_rate": 5e-05, "loss": 1.494, "num_input_tokens_seen": 183247356, "step": 2767 }, { "epoch": 0.25899751953947675, "loss": 1.3784685134887695, "loss_ce": 0.006093028001487255, "loss_iou": 0.54296875, "loss_num": 0.056884765625, "loss_xval": 1.375, "num_input_tokens_seen": 183247356, "step": 2767 }, { "epoch": 0.2590911218233725, "grad_norm": 27.066333770751953, "learning_rate": 5e-05, "loss": 1.5124, "num_input_tokens_seen": 183312848, "step": 2768 }, { "epoch": 0.2590911218233725, "loss": 1.449023962020874, "loss_ce": 0.003711439436301589, "loss_iou": 0.6015625, "loss_num": 0.04736328125, "loss_xval": 1.4453125, "num_input_tokens_seen": 183312848, "step": 2768 }, { "epoch": 0.2591847241072682, "grad_norm": 26.6560115814209, "learning_rate": 5e-05, "loss": 1.336, "num_input_tokens_seen": 183378696, "step": 2769 }, { "epoch": 0.2591847241072682, "loss": 1.3047513961791992, "loss_ce": 0.003665025345981121, "loss_iou": 0.5234375, "loss_num": 0.051025390625, "loss_xval": 1.3046875, "num_input_tokens_seen": 183378696, "step": 2769 }, { "epoch": 0.25927832639116394, "grad_norm": 30.571502685546875, "learning_rate": 5e-05, "loss": 1.053, "num_input_tokens_seen": 183444588, "step": 2770 }, { "epoch": 0.25927832639116394, "loss": 1.1597281694412231, "loss_ce": 0.0010368122020736337, "loss_iou": 0.515625, "loss_num": 0.025634765625, "loss_xval": 1.15625, "num_input_tokens_seen": 183444588, "step": 2770 }, { "epoch": 0.25937192867505965, "grad_norm": 30.87347984313965, "learning_rate": 5e-05, "loss": 1.4284, "num_input_tokens_seen": 183511516, "step": 2771 }, { "epoch": 0.25937192867505965, "loss": 1.3620257377624512, "loss_ce": 0.011439764872193336, "loss_iou": 0.59765625, "loss_num": 0.030517578125, "loss_xval": 1.3515625, "num_input_tokens_seen": 183511516, "step": 2771 }, { "epoch": 0.2594655309589554, "grad_norm": 25.916946411132812, "learning_rate": 5e-05, "loss": 1.5079, "num_input_tokens_seen": 183578912, "step": 2772 }, { "epoch": 0.2594655309589554, "loss": 1.4635601043701172, "loss_ce": 0.0035991547629237175, "loss_iou": 0.62890625, "loss_num": 0.039794921875, "loss_xval": 1.4609375, "num_input_tokens_seen": 183578912, "step": 2772 }, { "epoch": 0.2595591332428511, "grad_norm": 19.248422622680664, "learning_rate": 5e-05, "loss": 1.3325, "num_input_tokens_seen": 183645392, "step": 2773 }, { "epoch": 0.2595591332428511, "loss": 1.1251640319824219, "loss_ce": 0.005046752281486988, "loss_iou": 0.498046875, "loss_num": 0.025390625, "loss_xval": 1.1171875, "num_input_tokens_seen": 183645392, "step": 2773 }, { "epoch": 0.25965273552674684, "grad_norm": 32.65757369995117, "learning_rate": 5e-05, "loss": 1.1309, "num_input_tokens_seen": 183712312, "step": 2774 }, { "epoch": 0.25965273552674684, "loss": 1.0271244049072266, "loss_ce": 0.0031376397237181664, "loss_iou": 0.416015625, "loss_num": 0.03857421875, "loss_xval": 1.0234375, "num_input_tokens_seen": 183712312, "step": 2774 }, { "epoch": 0.2597463378106426, "grad_norm": 25.546913146972656, "learning_rate": 5e-05, "loss": 1.5667, "num_input_tokens_seen": 183778704, "step": 2775 }, { "epoch": 0.2597463378106426, "loss": 1.499668002128601, "loss_ce": 0.0030860367696732283, "loss_iou": 0.6484375, "loss_num": 0.040283203125, "loss_xval": 1.5, "num_input_tokens_seen": 183778704, "step": 2775 }, { "epoch": 0.2598399400945383, "grad_norm": 22.19900894165039, "learning_rate": 5e-05, "loss": 1.2377, "num_input_tokens_seen": 183844476, "step": 2776 }, { "epoch": 0.2598399400945383, "loss": 1.196016550064087, "loss_ce": 0.010469729080796242, "loss_iou": 0.50390625, "loss_num": 0.0361328125, "loss_xval": 1.1875, "num_input_tokens_seen": 183844476, "step": 2776 }, { "epoch": 0.259933542378434, "grad_norm": 26.540695190429688, "learning_rate": 5e-05, "loss": 1.2683, "num_input_tokens_seen": 183911088, "step": 2777 }, { "epoch": 0.259933542378434, "loss": 1.228979468345642, "loss_ce": 0.004858322907239199, "loss_iou": 0.51953125, "loss_num": 0.036376953125, "loss_xval": 1.2265625, "num_input_tokens_seen": 183911088, "step": 2777 }, { "epoch": 0.26002714466232973, "grad_norm": 19.824424743652344, "learning_rate": 5e-05, "loss": 1.6691, "num_input_tokens_seen": 183974276, "step": 2778 }, { "epoch": 0.26002714466232973, "loss": 1.5102863311767578, "loss_ce": 0.005037275142967701, "loss_iou": 0.64453125, "loss_num": 0.0439453125, "loss_xval": 1.5078125, "num_input_tokens_seen": 183974276, "step": 2778 }, { "epoch": 0.2601207469462255, "grad_norm": 17.756515502929688, "learning_rate": 5e-05, "loss": 1.3528, "num_input_tokens_seen": 184040060, "step": 2779 }, { "epoch": 0.2601207469462255, "loss": 1.3978538513183594, "loss_ce": 0.009182073175907135, "loss_iou": 0.5234375, "loss_num": 0.06884765625, "loss_xval": 1.390625, "num_input_tokens_seen": 184040060, "step": 2779 }, { "epoch": 0.2602143492301212, "grad_norm": 26.4755802154541, "learning_rate": 5e-05, "loss": 1.2616, "num_input_tokens_seen": 184106404, "step": 2780 }, { "epoch": 0.2602143492301212, "loss": 1.2944135665893555, "loss_ce": 0.008280863054096699, "loss_iou": 0.5390625, "loss_num": 0.041259765625, "loss_xval": 1.2890625, "num_input_tokens_seen": 184106404, "step": 2780 }, { "epoch": 0.2603079515140169, "grad_norm": 22.432933807373047, "learning_rate": 5e-05, "loss": 1.4327, "num_input_tokens_seen": 184172288, "step": 2781 }, { "epoch": 0.2603079515140169, "loss": 1.4781384468078613, "loss_ce": 0.004505617544054985, "loss_iou": 0.609375, "loss_num": 0.05078125, "loss_xval": 1.4765625, "num_input_tokens_seen": 184172288, "step": 2781 }, { "epoch": 0.2604015537979127, "grad_norm": 19.73869514465332, "learning_rate": 5e-05, "loss": 1.1615, "num_input_tokens_seen": 184238320, "step": 2782 }, { "epoch": 0.2604015537979127, "loss": 1.0642637014389038, "loss_ce": 0.004571388475596905, "loss_iou": 0.419921875, "loss_num": 0.044189453125, "loss_xval": 1.0625, "num_input_tokens_seen": 184238320, "step": 2782 }, { "epoch": 0.2604951560818084, "grad_norm": 31.140819549560547, "learning_rate": 5e-05, "loss": 1.4513, "num_input_tokens_seen": 184305112, "step": 2783 }, { "epoch": 0.2604951560818084, "loss": 1.5943620204925537, "loss_ce": 0.0054949019104242325, "loss_iou": 0.65625, "loss_num": 0.05517578125, "loss_xval": 1.5859375, "num_input_tokens_seen": 184305112, "step": 2783 }, { "epoch": 0.2605887583657041, "grad_norm": 80.94667053222656, "learning_rate": 5e-05, "loss": 1.1728, "num_input_tokens_seen": 184371380, "step": 2784 }, { "epoch": 0.2605887583657041, "loss": 1.103699803352356, "loss_ce": 0.005067044869065285, "loss_iou": 0.455078125, "loss_num": 0.037353515625, "loss_xval": 1.1015625, "num_input_tokens_seen": 184371380, "step": 2784 }, { "epoch": 0.2606823606495999, "grad_norm": 25.863245010375977, "learning_rate": 5e-05, "loss": 1.5235, "num_input_tokens_seen": 184437636, "step": 2785 }, { "epoch": 0.2606823606495999, "loss": 1.53908109664917, "loss_ce": 0.01027250662446022, "loss_iou": 0.6328125, "loss_num": 0.052734375, "loss_xval": 1.53125, "num_input_tokens_seen": 184437636, "step": 2785 }, { "epoch": 0.2607759629334956, "grad_norm": 10.690145492553711, "learning_rate": 5e-05, "loss": 1.1675, "num_input_tokens_seen": 184502828, "step": 2786 }, { "epoch": 0.2607759629334956, "loss": 1.0218708515167236, "loss_ce": 0.0071919020265340805, "loss_iou": 0.4375, "loss_num": 0.0277099609375, "loss_xval": 1.015625, "num_input_tokens_seen": 184502828, "step": 2786 }, { "epoch": 0.2608695652173913, "grad_norm": 13.819395065307617, "learning_rate": 5e-05, "loss": 1.263, "num_input_tokens_seen": 184569736, "step": 2787 }, { "epoch": 0.2608695652173913, "loss": 1.0895116329193115, "loss_ce": 0.007480449974536896, "loss_iou": 0.4609375, "loss_num": 0.031982421875, "loss_xval": 1.078125, "num_input_tokens_seen": 184569736, "step": 2787 }, { "epoch": 0.260963167501287, "grad_norm": 22.60026741027832, "learning_rate": 5e-05, "loss": 1.3177, "num_input_tokens_seen": 184635668, "step": 2788 }, { "epoch": 0.260963167501287, "loss": 1.3702020645141602, "loss_ce": 0.0054559423588216305, "loss_iou": 0.55078125, "loss_num": 0.052001953125, "loss_xval": 1.3671875, "num_input_tokens_seen": 184635668, "step": 2788 }, { "epoch": 0.2610567697851828, "grad_norm": 30.772146224975586, "learning_rate": 5e-05, "loss": 1.313, "num_input_tokens_seen": 184702996, "step": 2789 }, { "epoch": 0.2610567697851828, "loss": 1.282770037651062, "loss_ce": 0.004937991499900818, "loss_iou": 0.5546875, "loss_num": 0.0341796875, "loss_xval": 1.28125, "num_input_tokens_seen": 184702996, "step": 2789 }, { "epoch": 0.2611503720690785, "grad_norm": 21.066852569580078, "learning_rate": 5e-05, "loss": 1.708, "num_input_tokens_seen": 184769136, "step": 2790 }, { "epoch": 0.2611503720690785, "loss": 1.485233187675476, "loss_ce": 0.0057409899309277534, "loss_iou": 0.6328125, "loss_num": 0.042236328125, "loss_xval": 1.4765625, "num_input_tokens_seen": 184769136, "step": 2790 }, { "epoch": 0.2612439743529742, "grad_norm": 23.4888973236084, "learning_rate": 5e-05, "loss": 1.2869, "num_input_tokens_seen": 184836248, "step": 2791 }, { "epoch": 0.2612439743529742, "loss": 1.3349745273590088, "loss_ce": 0.002943230327218771, "loss_iou": 0.546875, "loss_num": 0.047607421875, "loss_xval": 1.328125, "num_input_tokens_seen": 184836248, "step": 2791 }, { "epoch": 0.26133757663686996, "grad_norm": 23.718120574951172, "learning_rate": 5e-05, "loss": 1.6147, "num_input_tokens_seen": 184903464, "step": 2792 }, { "epoch": 0.26133757663686996, "loss": 1.5826460123062134, "loss_ce": 0.003544393926858902, "loss_iou": 0.66796875, "loss_num": 0.048828125, "loss_xval": 1.578125, "num_input_tokens_seen": 184903464, "step": 2792 }, { "epoch": 0.26143117892076567, "grad_norm": 20.823057174682617, "learning_rate": 5e-05, "loss": 1.4117, "num_input_tokens_seen": 184970044, "step": 2793 }, { "epoch": 0.26143117892076567, "loss": 1.2039194107055664, "loss_ce": 0.004212336614727974, "loss_iou": 0.486328125, "loss_num": 0.045166015625, "loss_xval": 1.203125, "num_input_tokens_seen": 184970044, "step": 2793 }, { "epoch": 0.2615247812046614, "grad_norm": 36.8116340637207, "learning_rate": 5e-05, "loss": 1.4354, "num_input_tokens_seen": 185035952, "step": 2794 }, { "epoch": 0.2615247812046614, "loss": 1.3722023963928223, "loss_ce": 0.0069679622538387775, "loss_iou": 0.55859375, "loss_num": 0.0498046875, "loss_xval": 1.3671875, "num_input_tokens_seen": 185035952, "step": 2794 }, { "epoch": 0.2616183834885571, "grad_norm": 24.805438995361328, "learning_rate": 5e-05, "loss": 1.5389, "num_input_tokens_seen": 185103444, "step": 2795 }, { "epoch": 0.2616183834885571, "loss": 1.482750654220581, "loss_ce": 0.004235108382999897, "loss_iou": 0.61328125, "loss_num": 0.049560546875, "loss_xval": 1.4765625, "num_input_tokens_seen": 185103444, "step": 2795 }, { "epoch": 0.26171198577245286, "grad_norm": 32.0396614074707, "learning_rate": 5e-05, "loss": 1.4879, "num_input_tokens_seen": 185169992, "step": 2796 }, { "epoch": 0.26171198577245286, "loss": 1.4500646591186523, "loss_ce": 0.005728684365749359, "loss_iou": 0.640625, "loss_num": 0.0322265625, "loss_xval": 1.4453125, "num_input_tokens_seen": 185169992, "step": 2796 }, { "epoch": 0.26180558805634857, "grad_norm": 30.2569637298584, "learning_rate": 5e-05, "loss": 1.5313, "num_input_tokens_seen": 185236976, "step": 2797 }, { "epoch": 0.26180558805634857, "loss": 1.5174522399902344, "loss_ce": 0.00719832070171833, "loss_iou": 0.6484375, "loss_num": 0.042236328125, "loss_xval": 1.5078125, "num_input_tokens_seen": 185236976, "step": 2797 }, { "epoch": 0.2618991903402443, "grad_norm": 21.909534454345703, "learning_rate": 5e-05, "loss": 1.3901, "num_input_tokens_seen": 185302904, "step": 2798 }, { "epoch": 0.2618991903402443, "loss": 1.467483639717102, "loss_ce": 0.005081304349005222, "loss_iou": 0.55859375, "loss_num": 0.068359375, "loss_xval": 1.4609375, "num_input_tokens_seen": 185302904, "step": 2798 }, { "epoch": 0.26199279262414005, "grad_norm": 21.550695419311523, "learning_rate": 5e-05, "loss": 1.1799, "num_input_tokens_seen": 185369352, "step": 2799 }, { "epoch": 0.26199279262414005, "loss": 1.1303417682647705, "loss_ce": 0.0038770241662859917, "loss_iou": 0.486328125, "loss_num": 0.0308837890625, "loss_xval": 1.125, "num_input_tokens_seen": 185369352, "step": 2799 }, { "epoch": 0.26208639490803576, "grad_norm": 18.965072631835938, "learning_rate": 5e-05, "loss": 1.3469, "num_input_tokens_seen": 185433792, "step": 2800 }, { "epoch": 0.26208639490803576, "loss": 1.271073341369629, "loss_ce": 0.003495211945846677, "loss_iou": 0.53515625, "loss_num": 0.039794921875, "loss_xval": 1.265625, "num_input_tokens_seen": 185433792, "step": 2800 }, { "epoch": 0.26217999719193147, "grad_norm": 14.17694091796875, "learning_rate": 5e-05, "loss": 1.3766, "num_input_tokens_seen": 185500448, "step": 2801 }, { "epoch": 0.26217999719193147, "loss": 1.3449500799179077, "loss_ce": 0.0065711867064237595, "loss_iou": 0.5625, "loss_num": 0.0419921875, "loss_xval": 1.3359375, "num_input_tokens_seen": 185500448, "step": 2801 }, { "epoch": 0.26227359947582723, "grad_norm": 21.094636917114258, "learning_rate": 5e-05, "loss": 1.1819, "num_input_tokens_seen": 185566104, "step": 2802 }, { "epoch": 0.26227359947582723, "loss": 1.209665060043335, "loss_ce": 0.0053192367777228355, "loss_iou": 0.5, "loss_num": 0.041259765625, "loss_xval": 1.203125, "num_input_tokens_seen": 185566104, "step": 2802 }, { "epoch": 0.26236720175972295, "grad_norm": 35.77684020996094, "learning_rate": 5e-05, "loss": 1.611, "num_input_tokens_seen": 185631140, "step": 2803 }, { "epoch": 0.26236720175972295, "loss": 1.7878296375274658, "loss_ce": 0.006579712964594364, "loss_iou": 0.74609375, "loss_num": 0.0576171875, "loss_xval": 1.78125, "num_input_tokens_seen": 185631140, "step": 2803 }, { "epoch": 0.26246080404361866, "grad_norm": 22.20318031311035, "learning_rate": 5e-05, "loss": 1.5298, "num_input_tokens_seen": 185697824, "step": 2804 }, { "epoch": 0.26246080404361866, "loss": 1.627734661102295, "loss_ce": 0.00859402772039175, "loss_iou": 0.66796875, "loss_num": 0.05712890625, "loss_xval": 1.6171875, "num_input_tokens_seen": 185697824, "step": 2804 }, { "epoch": 0.26255440632751437, "grad_norm": 31.52370834350586, "learning_rate": 5e-05, "loss": 1.2775, "num_input_tokens_seen": 185763704, "step": 2805 }, { "epoch": 0.26255440632751437, "loss": 1.156463384628296, "loss_ce": 0.003936439752578735, "loss_iou": 0.482421875, "loss_num": 0.037353515625, "loss_xval": 1.15625, "num_input_tokens_seen": 185763704, "step": 2805 }, { "epoch": 0.26264800861141013, "grad_norm": 34.81428527832031, "learning_rate": 5e-05, "loss": 1.4791, "num_input_tokens_seen": 185830772, "step": 2806 }, { "epoch": 0.26264800861141013, "loss": 1.4220342636108398, "loss_ce": 0.004553778562694788, "loss_iou": 0.5859375, "loss_num": 0.0498046875, "loss_xval": 1.4140625, "num_input_tokens_seen": 185830772, "step": 2806 }, { "epoch": 0.26274161089530584, "grad_norm": 23.893314361572266, "learning_rate": 5e-05, "loss": 1.1632, "num_input_tokens_seen": 185897716, "step": 2807 }, { "epoch": 0.26274161089530584, "loss": 1.2179107666015625, "loss_ce": 0.003066919045522809, "loss_iou": 0.5546875, "loss_num": 0.020751953125, "loss_xval": 1.21875, "num_input_tokens_seen": 185897716, "step": 2807 }, { "epoch": 0.26283521317920155, "grad_norm": 13.81638240814209, "learning_rate": 5e-05, "loss": 1.3039, "num_input_tokens_seen": 185962712, "step": 2808 }, { "epoch": 0.26283521317920155, "loss": 1.4558379650115967, "loss_ce": 0.011990239843726158, "loss_iou": 0.56640625, "loss_num": 0.06201171875, "loss_xval": 1.4453125, "num_input_tokens_seen": 185962712, "step": 2808 }, { "epoch": 0.2629288154630973, "grad_norm": 65.97924041748047, "learning_rate": 5e-05, "loss": 1.4595, "num_input_tokens_seen": 186029268, "step": 2809 }, { "epoch": 0.2629288154630973, "loss": 1.5803302526474, "loss_ce": 0.005623304285109043, "loss_iou": 0.62890625, "loss_num": 0.06396484375, "loss_xval": 1.578125, "num_input_tokens_seen": 186029268, "step": 2809 }, { "epoch": 0.26302241774699303, "grad_norm": 26.90452766418457, "learning_rate": 5e-05, "loss": 1.2901, "num_input_tokens_seen": 186095640, "step": 2810 }, { "epoch": 0.26302241774699303, "loss": 1.1605780124664307, "loss_ce": 0.004038172774016857, "loss_iou": 0.498046875, "loss_num": 0.0322265625, "loss_xval": 1.15625, "num_input_tokens_seen": 186095640, "step": 2810 }, { "epoch": 0.26311602003088874, "grad_norm": 42.861717224121094, "learning_rate": 5e-05, "loss": 1.3228, "num_input_tokens_seen": 186161412, "step": 2811 }, { "epoch": 0.26311602003088874, "loss": 1.1959176063537598, "loss_ce": 0.0030464723240584135, "loss_iou": 0.474609375, "loss_num": 0.048583984375, "loss_xval": 1.1953125, "num_input_tokens_seen": 186161412, "step": 2811 }, { "epoch": 0.2632096223147845, "grad_norm": 26.436466217041016, "learning_rate": 5e-05, "loss": 1.3872, "num_input_tokens_seen": 186227896, "step": 2812 }, { "epoch": 0.2632096223147845, "loss": 1.3306233882904053, "loss_ce": 0.00835779495537281, "loss_iou": 0.53125, "loss_num": 0.05224609375, "loss_xval": 1.3203125, "num_input_tokens_seen": 186227896, "step": 2812 }, { "epoch": 0.2633032245986802, "grad_norm": 26.288841247558594, "learning_rate": 5e-05, "loss": 1.6034, "num_input_tokens_seen": 186293408, "step": 2813 }, { "epoch": 0.2633032245986802, "loss": 1.5791151523590088, "loss_ce": 0.003919846843928099, "loss_iou": 0.625, "loss_num": 0.0654296875, "loss_xval": 1.578125, "num_input_tokens_seen": 186293408, "step": 2813 }, { "epoch": 0.26339682688257593, "grad_norm": 18.455432891845703, "learning_rate": 5e-05, "loss": 1.313, "num_input_tokens_seen": 186359436, "step": 2814 }, { "epoch": 0.26339682688257593, "loss": 1.1406774520874023, "loss_ce": 0.002493802923709154, "loss_iou": 0.49609375, "loss_num": 0.029052734375, "loss_xval": 1.140625, "num_input_tokens_seen": 186359436, "step": 2814 }, { "epoch": 0.26349042916647164, "grad_norm": 22.164289474487305, "learning_rate": 5e-05, "loss": 1.6511, "num_input_tokens_seen": 186426008, "step": 2815 }, { "epoch": 0.26349042916647164, "loss": 1.6130539178848267, "loss_ce": 0.006364461965858936, "loss_iou": 0.6171875, "loss_num": 0.0751953125, "loss_xval": 1.609375, "num_input_tokens_seen": 186426008, "step": 2815 }, { "epoch": 0.2635840314503674, "grad_norm": 23.62710189819336, "learning_rate": 5e-05, "loss": 1.2416, "num_input_tokens_seen": 186492532, "step": 2816 }, { "epoch": 0.2635840314503674, "loss": 1.4303010702133179, "loss_ce": 0.0054963938891887665, "loss_iou": 0.5703125, "loss_num": 0.0576171875, "loss_xval": 1.421875, "num_input_tokens_seen": 186492532, "step": 2816 }, { "epoch": 0.2636776337342631, "grad_norm": 28.17098617553711, "learning_rate": 5e-05, "loss": 1.4781, "num_input_tokens_seen": 186559828, "step": 2817 }, { "epoch": 0.2636776337342631, "loss": 1.3736687898635864, "loss_ce": 0.004039898049086332, "loss_iou": 0.5859375, "loss_num": 0.03955078125, "loss_xval": 1.3671875, "num_input_tokens_seen": 186559828, "step": 2817 }, { "epoch": 0.26377123601815883, "grad_norm": 38.10930252075195, "learning_rate": 5e-05, "loss": 1.3573, "num_input_tokens_seen": 186625564, "step": 2818 }, { "epoch": 0.26377123601815883, "loss": 1.46852445602417, "loss_ce": 0.003680661553516984, "loss_iou": 0.5859375, "loss_num": 0.058837890625, "loss_xval": 1.46875, "num_input_tokens_seen": 186625564, "step": 2818 }, { "epoch": 0.2638648383020546, "grad_norm": 21.143159866333008, "learning_rate": 5e-05, "loss": 1.6812, "num_input_tokens_seen": 186691476, "step": 2819 }, { "epoch": 0.2638648383020546, "loss": 1.597029447555542, "loss_ce": 0.004744234029203653, "loss_iou": 0.6953125, "loss_num": 0.039794921875, "loss_xval": 1.59375, "num_input_tokens_seen": 186691476, "step": 2819 }, { "epoch": 0.2639584405859503, "grad_norm": 20.390413284301758, "learning_rate": 5e-05, "loss": 1.4927, "num_input_tokens_seen": 186757432, "step": 2820 }, { "epoch": 0.2639584405859503, "loss": 1.5646171569824219, "loss_ce": 0.004558563232421875, "loss_iou": 0.6328125, "loss_num": 0.057861328125, "loss_xval": 1.5625, "num_input_tokens_seen": 186757432, "step": 2820 }, { "epoch": 0.264052042869846, "grad_norm": 30.135969161987305, "learning_rate": 5e-05, "loss": 1.3072, "num_input_tokens_seen": 186823556, "step": 2821 }, { "epoch": 0.264052042869846, "loss": 1.3670326471328735, "loss_ce": 0.009122512303292751, "loss_iou": 0.54296875, "loss_num": 0.054443359375, "loss_xval": 1.359375, "num_input_tokens_seen": 186823556, "step": 2821 }, { "epoch": 0.2641456451537417, "grad_norm": 23.44801139831543, "learning_rate": 5e-05, "loss": 1.393, "num_input_tokens_seen": 186889228, "step": 2822 }, { "epoch": 0.2641456451537417, "loss": 1.400672197341919, "loss_ce": 0.0063850851729512215, "loss_iou": 0.5703125, "loss_num": 0.05029296875, "loss_xval": 1.390625, "num_input_tokens_seen": 186889228, "step": 2822 }, { "epoch": 0.2642392474376375, "grad_norm": 24.048946380615234, "learning_rate": 5e-05, "loss": 1.4017, "num_input_tokens_seen": 186956736, "step": 2823 }, { "epoch": 0.2642392474376375, "loss": 1.4926737546920776, "loss_ce": 0.004392510745674372, "loss_iou": 0.625, "loss_num": 0.047607421875, "loss_xval": 1.484375, "num_input_tokens_seen": 186956736, "step": 2823 }, { "epoch": 0.2643328497215332, "grad_norm": 20.472166061401367, "learning_rate": 5e-05, "loss": 1.5982, "num_input_tokens_seen": 187022132, "step": 2824 }, { "epoch": 0.2643328497215332, "loss": 1.6638191938400269, "loss_ce": 0.00561615452170372, "loss_iou": 0.69140625, "loss_num": 0.054931640625, "loss_xval": 1.65625, "num_input_tokens_seen": 187022132, "step": 2824 }, { "epoch": 0.2644264520054289, "grad_norm": 17.666561126708984, "learning_rate": 5e-05, "loss": 1.0993, "num_input_tokens_seen": 187089604, "step": 2825 }, { "epoch": 0.2644264520054289, "loss": 1.113770604133606, "loss_ce": 0.005860441364347935, "loss_iou": 0.46484375, "loss_num": 0.03564453125, "loss_xval": 1.109375, "num_input_tokens_seen": 187089604, "step": 2825 }, { "epoch": 0.2645200542893247, "grad_norm": 20.389936447143555, "learning_rate": 5e-05, "loss": 1.4501, "num_input_tokens_seen": 187155168, "step": 2826 }, { "epoch": 0.2645200542893247, "loss": 1.4092261791229248, "loss_ce": 0.004929243586957455, "loss_iou": 0.578125, "loss_num": 0.05029296875, "loss_xval": 1.40625, "num_input_tokens_seen": 187155168, "step": 2826 }, { "epoch": 0.2646136565732204, "grad_norm": 27.508337020874023, "learning_rate": 5e-05, "loss": 1.3513, "num_input_tokens_seen": 187222568, "step": 2827 }, { "epoch": 0.2646136565732204, "loss": 1.421954870223999, "loss_ce": 0.0069158636033535, "loss_iou": 0.578125, "loss_num": 0.0517578125, "loss_xval": 1.4140625, "num_input_tokens_seen": 187222568, "step": 2827 }, { "epoch": 0.2647072588571161, "grad_norm": 25.480224609375, "learning_rate": 5e-05, "loss": 1.5858, "num_input_tokens_seen": 187288552, "step": 2828 }, { "epoch": 0.2647072588571161, "loss": 1.385055422782898, "loss_ce": 0.0032194985542446375, "loss_iou": 0.59375, "loss_num": 0.039306640625, "loss_xval": 1.3828125, "num_input_tokens_seen": 187288552, "step": 2828 }, { "epoch": 0.26480086114101187, "grad_norm": 18.64525604248047, "learning_rate": 5e-05, "loss": 1.0948, "num_input_tokens_seen": 187354668, "step": 2829 }, { "epoch": 0.26480086114101187, "loss": 1.1075193881988525, "loss_ce": 0.004980382043868303, "loss_iou": 0.458984375, "loss_num": 0.03759765625, "loss_xval": 1.1015625, "num_input_tokens_seen": 187354668, "step": 2829 }, { "epoch": 0.2648944634249076, "grad_norm": 19.179067611694336, "learning_rate": 5e-05, "loss": 1.3061, "num_input_tokens_seen": 187420132, "step": 2830 }, { "epoch": 0.2648944634249076, "loss": 1.288672685623169, "loss_ce": 0.0030281427316367626, "loss_iou": 0.53515625, "loss_num": 0.04345703125, "loss_xval": 1.2890625, "num_input_tokens_seen": 187420132, "step": 2830 }, { "epoch": 0.2649880657088033, "grad_norm": 32.32212829589844, "learning_rate": 5e-05, "loss": 1.2404, "num_input_tokens_seen": 187485640, "step": 2831 }, { "epoch": 0.2649880657088033, "loss": 1.1736130714416504, "loss_ce": 0.003447064198553562, "loss_iou": 0.50390625, "loss_num": 0.033203125, "loss_xval": 1.171875, "num_input_tokens_seen": 187485640, "step": 2831 }, { "epoch": 0.265081667992699, "grad_norm": 20.428314208984375, "learning_rate": 5e-05, "loss": 1.5183, "num_input_tokens_seen": 187552516, "step": 2832 }, { "epoch": 0.265081667992699, "loss": 1.547531247138977, "loss_ce": 0.00749216740950942, "loss_iou": 0.6484375, "loss_num": 0.048583984375, "loss_xval": 1.5390625, "num_input_tokens_seen": 187552516, "step": 2832 }, { "epoch": 0.26517527027659477, "grad_norm": 18.128150939941406, "learning_rate": 5e-05, "loss": 1.4426, "num_input_tokens_seen": 187618952, "step": 2833 }, { "epoch": 0.26517527027659477, "loss": 1.3896745443344116, "loss_ce": 0.0039323908276855946, "loss_iou": 0.5625, "loss_num": 0.051025390625, "loss_xval": 1.3828125, "num_input_tokens_seen": 187618952, "step": 2833 }, { "epoch": 0.2652688725604905, "grad_norm": 21.625513076782227, "learning_rate": 5e-05, "loss": 1.3989, "num_input_tokens_seen": 187684672, "step": 2834 }, { "epoch": 0.2652688725604905, "loss": 1.372078537940979, "loss_ce": 0.003426195355132222, "loss_iou": 0.55078125, "loss_num": 0.052734375, "loss_xval": 1.3671875, "num_input_tokens_seen": 187684672, "step": 2834 }, { "epoch": 0.2653624748443862, "grad_norm": 66.1303482055664, "learning_rate": 5e-05, "loss": 1.4198, "num_input_tokens_seen": 187752044, "step": 2835 }, { "epoch": 0.2653624748443862, "loss": 1.0787060260772705, "loss_ce": 0.006684429943561554, "loss_iou": 0.470703125, "loss_num": 0.02587890625, "loss_xval": 1.0703125, "num_input_tokens_seen": 187752044, "step": 2835 }, { "epoch": 0.26545607712828195, "grad_norm": 24.83431625366211, "learning_rate": 5e-05, "loss": 1.4151, "num_input_tokens_seen": 187818192, "step": 2836 }, { "epoch": 0.26545607712828195, "loss": 1.3280391693115234, "loss_ce": 0.004796979017555714, "loss_iou": 0.55078125, "loss_num": 0.044677734375, "loss_xval": 1.3203125, "num_input_tokens_seen": 187818192, "step": 2836 }, { "epoch": 0.26554967941217766, "grad_norm": 29.779151916503906, "learning_rate": 5e-05, "loss": 1.5633, "num_input_tokens_seen": 187884536, "step": 2837 }, { "epoch": 0.26554967941217766, "loss": 1.3751624822616577, "loss_ce": 0.0026038698852062225, "loss_iou": 0.58984375, "loss_num": 0.03857421875, "loss_xval": 1.375, "num_input_tokens_seen": 187884536, "step": 2837 }, { "epoch": 0.2656432816960734, "grad_norm": 19.289663314819336, "learning_rate": 5e-05, "loss": 1.2095, "num_input_tokens_seen": 187951608, "step": 2838 }, { "epoch": 0.2656432816960734, "loss": 1.2762291431427002, "loss_ce": 0.004256377462297678, "loss_iou": 0.50390625, "loss_num": 0.052978515625, "loss_xval": 1.2734375, "num_input_tokens_seen": 187951608, "step": 2838 }, { "epoch": 0.2657368839799691, "grad_norm": 20.678348541259766, "learning_rate": 5e-05, "loss": 1.4225, "num_input_tokens_seen": 188016236, "step": 2839 }, { "epoch": 0.2657368839799691, "loss": 1.25014066696167, "loss_ce": 0.0045352717861533165, "loss_iou": 0.5078125, "loss_num": 0.046142578125, "loss_xval": 1.2421875, "num_input_tokens_seen": 188016236, "step": 2839 }, { "epoch": 0.26583048626386485, "grad_norm": 36.310726165771484, "learning_rate": 5e-05, "loss": 1.3749, "num_input_tokens_seen": 188082668, "step": 2840 }, { "epoch": 0.26583048626386485, "loss": 1.3680238723754883, "loss_ce": 0.005230925511568785, "loss_iou": 0.56640625, "loss_num": 0.046142578125, "loss_xval": 1.359375, "num_input_tokens_seen": 188082668, "step": 2840 }, { "epoch": 0.26592408854776056, "grad_norm": 99.97769927978516, "learning_rate": 5e-05, "loss": 1.4957, "num_input_tokens_seen": 188148920, "step": 2841 }, { "epoch": 0.26592408854776056, "loss": 1.4386107921600342, "loss_ce": 0.006481905467808247, "loss_iou": 0.59375, "loss_num": 0.048095703125, "loss_xval": 1.4296875, "num_input_tokens_seen": 188148920, "step": 2841 }, { "epoch": 0.2660176908316563, "grad_norm": 34.94300079345703, "learning_rate": 5e-05, "loss": 1.1268, "num_input_tokens_seen": 188215264, "step": 2842 }, { "epoch": 0.2660176908316563, "loss": 1.1796425580978394, "loss_ce": 0.009232343174517155, "loss_iou": 0.41015625, "loss_num": 0.06982421875, "loss_xval": 1.171875, "num_input_tokens_seen": 188215264, "step": 2842 }, { "epoch": 0.26611129311555204, "grad_norm": 18.755163192749023, "learning_rate": 5e-05, "loss": 1.088, "num_input_tokens_seen": 188281200, "step": 2843 }, { "epoch": 0.26611129311555204, "loss": 0.9051686525344849, "loss_ce": 0.003557295072823763, "loss_iou": 0.412109375, "loss_num": 0.015625, "loss_xval": 0.90234375, "num_input_tokens_seen": 188281200, "step": 2843 }, { "epoch": 0.26620489539944775, "grad_norm": 40.20686721801758, "learning_rate": 5e-05, "loss": 1.3989, "num_input_tokens_seen": 188347904, "step": 2844 }, { "epoch": 0.26620489539944775, "loss": 1.5748178958892822, "loss_ce": 0.008899862878024578, "loss_iou": 0.625, "loss_num": 0.0625, "loss_xval": 1.5625, "num_input_tokens_seen": 188347904, "step": 2844 }, { "epoch": 0.26629849768334346, "grad_norm": 28.67582130432129, "learning_rate": 5e-05, "loss": 1.2557, "num_input_tokens_seen": 188413588, "step": 2845 }, { "epoch": 0.26629849768334346, "loss": 1.5271004438400269, "loss_ce": 0.013428614474833012, "loss_iou": 0.61328125, "loss_num": 0.0576171875, "loss_xval": 1.515625, "num_input_tokens_seen": 188413588, "step": 2845 }, { "epoch": 0.2663920999672392, "grad_norm": 24.36979103088379, "learning_rate": 5e-05, "loss": 1.0983, "num_input_tokens_seen": 188479768, "step": 2846 }, { "epoch": 0.2663920999672392, "loss": 1.0076212882995605, "loss_ce": 0.005423974245786667, "loss_iou": 0.435546875, "loss_num": 0.0260009765625, "loss_xval": 1.0, "num_input_tokens_seen": 188479768, "step": 2846 }, { "epoch": 0.26648570225113494, "grad_norm": 20.857608795166016, "learning_rate": 5e-05, "loss": 1.4772, "num_input_tokens_seen": 188546268, "step": 2847 }, { "epoch": 0.26648570225113494, "loss": 1.3241288661956787, "loss_ce": 0.004792914725840092, "loss_iou": 0.53515625, "loss_num": 0.04931640625, "loss_xval": 1.3203125, "num_input_tokens_seen": 188546268, "step": 2847 }, { "epoch": 0.26657930453503065, "grad_norm": 21.56261444091797, "learning_rate": 5e-05, "loss": 1.242, "num_input_tokens_seen": 188611944, "step": 2848 }, { "epoch": 0.26657930453503065, "loss": 1.2294247150421143, "loss_ce": 0.007012532092630863, "loss_iou": 0.51953125, "loss_num": 0.035888671875, "loss_xval": 1.21875, "num_input_tokens_seen": 188611944, "step": 2848 }, { "epoch": 0.26667290681892636, "grad_norm": 18.722837448120117, "learning_rate": 5e-05, "loss": 1.3164, "num_input_tokens_seen": 188679540, "step": 2849 }, { "epoch": 0.26667290681892636, "loss": 1.3445260524749756, "loss_ce": 0.0032175127416849136, "loss_iou": 0.5703125, "loss_num": 0.040771484375, "loss_xval": 1.34375, "num_input_tokens_seen": 188679540, "step": 2849 }, { "epoch": 0.2667665091028221, "grad_norm": 25.41478729248047, "learning_rate": 5e-05, "loss": 1.7732, "num_input_tokens_seen": 188746728, "step": 2850 }, { "epoch": 0.2667665091028221, "loss": 1.7352827787399292, "loss_ce": 0.010673293843865395, "loss_iou": 0.6953125, "loss_num": 0.06689453125, "loss_xval": 1.7265625, "num_input_tokens_seen": 188746728, "step": 2850 }, { "epoch": 0.26686011138671784, "grad_norm": 47.63404083251953, "learning_rate": 5e-05, "loss": 1.7756, "num_input_tokens_seen": 188813028, "step": 2851 }, { "epoch": 0.26686011138671784, "loss": 1.6079998016357422, "loss_ce": 0.004484123550355434, "loss_iou": 0.69140625, "loss_num": 0.043701171875, "loss_xval": 1.6015625, "num_input_tokens_seen": 188813028, "step": 2851 }, { "epoch": 0.26695371367061355, "grad_norm": 27.597810745239258, "learning_rate": 5e-05, "loss": 1.6244, "num_input_tokens_seen": 188879340, "step": 2852 }, { "epoch": 0.26695371367061355, "loss": 1.4877656698226929, "loss_ce": 0.013156229630112648, "loss_iou": 0.59375, "loss_num": 0.05810546875, "loss_xval": 1.4765625, "num_input_tokens_seen": 188879340, "step": 2852 }, { "epoch": 0.2670473159545093, "grad_norm": 20.776538848876953, "learning_rate": 5e-05, "loss": 1.1581, "num_input_tokens_seen": 188945600, "step": 2853 }, { "epoch": 0.2670473159545093, "loss": 1.2930817604064941, "loss_ce": 0.0030426857993006706, "loss_iou": 0.57421875, "loss_num": 0.0286865234375, "loss_xval": 1.2890625, "num_input_tokens_seen": 188945600, "step": 2853 }, { "epoch": 0.267140918238405, "grad_norm": 32.74734115600586, "learning_rate": 5e-05, "loss": 1.3094, "num_input_tokens_seen": 189011012, "step": 2854 }, { "epoch": 0.267140918238405, "loss": 1.3553078174591064, "loss_ce": 0.003257071366533637, "loss_iou": 0.58203125, "loss_num": 0.037841796875, "loss_xval": 1.3515625, "num_input_tokens_seen": 189011012, "step": 2854 }, { "epoch": 0.26723452052230073, "grad_norm": 24.080137252807617, "learning_rate": 5e-05, "loss": 1.308, "num_input_tokens_seen": 189077788, "step": 2855 }, { "epoch": 0.26723452052230073, "loss": 1.4663535356521606, "loss_ce": 0.005416055675595999, "loss_iou": 0.59765625, "loss_num": 0.05322265625, "loss_xval": 1.4609375, "num_input_tokens_seen": 189077788, "step": 2855 }, { "epoch": 0.26732812280619644, "grad_norm": 24.878042221069336, "learning_rate": 5e-05, "loss": 1.3854, "num_input_tokens_seen": 189145404, "step": 2856 }, { "epoch": 0.26732812280619644, "loss": 1.4070184230804443, "loss_ce": 0.004674690775573254, "loss_iou": 0.6015625, "loss_num": 0.039306640625, "loss_xval": 1.40625, "num_input_tokens_seen": 189145404, "step": 2856 }, { "epoch": 0.2674217250900922, "grad_norm": 43.244544982910156, "learning_rate": 5e-05, "loss": 1.3105, "num_input_tokens_seen": 189211512, "step": 2857 }, { "epoch": 0.2674217250900922, "loss": 1.3713688850402832, "loss_ce": 0.002716538030654192, "loss_iou": 0.59765625, "loss_num": 0.03466796875, "loss_xval": 1.3671875, "num_input_tokens_seen": 189211512, "step": 2857 }, { "epoch": 0.2675153273739879, "grad_norm": 33.19589614868164, "learning_rate": 5e-05, "loss": 1.2343, "num_input_tokens_seen": 189277824, "step": 2858 }, { "epoch": 0.2675153273739879, "loss": 1.409435749053955, "loss_ce": 0.00709204887971282, "loss_iou": 0.6015625, "loss_num": 0.040283203125, "loss_xval": 1.40625, "num_input_tokens_seen": 189277824, "step": 2858 }, { "epoch": 0.26760892965788363, "grad_norm": 25.327096939086914, "learning_rate": 5e-05, "loss": 1.6612, "num_input_tokens_seen": 189345460, "step": 2859 }, { "epoch": 0.26760892965788363, "loss": 1.7268409729003906, "loss_ce": 0.011997177265584469, "loss_iou": 0.6875, "loss_num": 0.0673828125, "loss_xval": 1.71875, "num_input_tokens_seen": 189345460, "step": 2859 }, { "epoch": 0.2677025319417794, "grad_norm": 35.089046478271484, "learning_rate": 5e-05, "loss": 1.2811, "num_input_tokens_seen": 189411616, "step": 2860 }, { "epoch": 0.2677025319417794, "loss": 1.1830344200134277, "loss_ce": 0.006429228000342846, "loss_iou": 0.490234375, "loss_num": 0.039306640625, "loss_xval": 1.1796875, "num_input_tokens_seen": 189411616, "step": 2860 }, { "epoch": 0.2677961342256751, "grad_norm": 22.38773536682129, "learning_rate": 5e-05, "loss": 1.3018, "num_input_tokens_seen": 189478252, "step": 2861 }, { "epoch": 0.2677961342256751, "loss": 1.30426824092865, "loss_ce": 0.005928391590714455, "loss_iou": 0.54296875, "loss_num": 0.04150390625, "loss_xval": 1.296875, "num_input_tokens_seen": 189478252, "step": 2861 }, { "epoch": 0.2678897365095708, "grad_norm": 13.499034881591797, "learning_rate": 5e-05, "loss": 1.381, "num_input_tokens_seen": 189544556, "step": 2862 }, { "epoch": 0.2678897365095708, "loss": 1.378824234008789, "loss_ce": 0.004068333189934492, "loss_iou": 0.53515625, "loss_num": 0.0615234375, "loss_xval": 1.375, "num_input_tokens_seen": 189544556, "step": 2862 }, { "epoch": 0.2679833387934666, "grad_norm": 28.108396530151367, "learning_rate": 5e-05, "loss": 1.1849, "num_input_tokens_seen": 189610940, "step": 2863 }, { "epoch": 0.2679833387934666, "loss": 1.0894317626953125, "loss_ce": 0.0059356591664254665, "loss_iou": 0.453125, "loss_num": 0.035400390625, "loss_xval": 1.0859375, "num_input_tokens_seen": 189610940, "step": 2863 }, { "epoch": 0.2680769410773623, "grad_norm": 30.4018611907959, "learning_rate": 5e-05, "loss": 1.3455, "num_input_tokens_seen": 189677316, "step": 2864 }, { "epoch": 0.2680769410773623, "loss": 1.3459986448287964, "loss_ce": 0.003713519312441349, "loss_iou": 0.58203125, "loss_num": 0.035888671875, "loss_xval": 1.34375, "num_input_tokens_seen": 189677316, "step": 2864 }, { "epoch": 0.268170543361258, "grad_norm": 21.577816009521484, "learning_rate": 5e-05, "loss": 1.735, "num_input_tokens_seen": 189743008, "step": 2865 }, { "epoch": 0.268170543361258, "loss": 1.7270687818527222, "loss_ce": 0.00978353712707758, "loss_iou": 0.71875, "loss_num": 0.056640625, "loss_xval": 1.71875, "num_input_tokens_seen": 189743008, "step": 2865 }, { "epoch": 0.2682641456451537, "grad_norm": 15.325296401977539, "learning_rate": 5e-05, "loss": 1.1921, "num_input_tokens_seen": 189809328, "step": 2866 }, { "epoch": 0.2682641456451537, "loss": 1.2022290229797363, "loss_ce": 0.005939934402704239, "loss_iou": 0.48046875, "loss_num": 0.046630859375, "loss_xval": 1.1953125, "num_input_tokens_seen": 189809328, "step": 2866 }, { "epoch": 0.2683577479290495, "grad_norm": 26.48148536682129, "learning_rate": 5e-05, "loss": 1.4575, "num_input_tokens_seen": 189876472, "step": 2867 }, { "epoch": 0.2683577479290495, "loss": 1.2837471961975098, "loss_ce": 0.002985516097396612, "loss_iou": 0.53515625, "loss_num": 0.0419921875, "loss_xval": 1.28125, "num_input_tokens_seen": 189876472, "step": 2867 }, { "epoch": 0.2684513502129452, "grad_norm": 23.65822410583496, "learning_rate": 5e-05, "loss": 1.5448, "num_input_tokens_seen": 189942504, "step": 2868 }, { "epoch": 0.2684513502129452, "loss": 1.5133476257324219, "loss_ce": 0.005535096861422062, "loss_iou": 0.6328125, "loss_num": 0.048095703125, "loss_xval": 1.5078125, "num_input_tokens_seen": 189942504, "step": 2868 }, { "epoch": 0.2685449524968409, "grad_norm": 35.416561126708984, "learning_rate": 5e-05, "loss": 1.5336, "num_input_tokens_seen": 190008468, "step": 2869 }, { "epoch": 0.2685449524968409, "loss": 1.5356783866882324, "loss_ce": 0.0034517257008701563, "loss_iou": 0.64453125, "loss_num": 0.048095703125, "loss_xval": 1.53125, "num_input_tokens_seen": 190008468, "step": 2869 }, { "epoch": 0.26863855478073667, "grad_norm": 25.929424285888672, "learning_rate": 5e-05, "loss": 1.6353, "num_input_tokens_seen": 190074796, "step": 2870 }, { "epoch": 0.26863855478073667, "loss": 1.4969170093536377, "loss_ce": 0.006682596169412136, "loss_iou": 0.625, "loss_num": 0.04736328125, "loss_xval": 1.4921875, "num_input_tokens_seen": 190074796, "step": 2870 }, { "epoch": 0.2687321570646324, "grad_norm": 16.10113525390625, "learning_rate": 5e-05, "loss": 1.2961, "num_input_tokens_seen": 190141280, "step": 2871 }, { "epoch": 0.2687321570646324, "loss": 1.2921521663665771, "loss_ce": 0.006507670972496271, "loss_iou": 0.53515625, "loss_num": 0.04296875, "loss_xval": 1.2890625, "num_input_tokens_seen": 190141280, "step": 2871 }, { "epoch": 0.2688257593485281, "grad_norm": 19.313873291015625, "learning_rate": 5e-05, "loss": 1.3619, "num_input_tokens_seen": 190207592, "step": 2872 }, { "epoch": 0.2688257593485281, "loss": 1.3472394943237305, "loss_ce": 0.007395853754132986, "loss_iou": 0.59375, "loss_num": 0.0302734375, "loss_xval": 1.34375, "num_input_tokens_seen": 190207592, "step": 2872 }, { "epoch": 0.2689193616324238, "grad_norm": 28.079877853393555, "learning_rate": 5e-05, "loss": 1.1623, "num_input_tokens_seen": 190272680, "step": 2873 }, { "epoch": 0.2689193616324238, "loss": 1.1926419734954834, "loss_ce": 0.006240631453692913, "loss_iou": 0.4765625, "loss_num": 0.046875, "loss_xval": 1.1875, "num_input_tokens_seen": 190272680, "step": 2873 }, { "epoch": 0.26901296391631957, "grad_norm": 44.42527389526367, "learning_rate": 5e-05, "loss": 1.3441, "num_input_tokens_seen": 190338988, "step": 2874 }, { "epoch": 0.26901296391631957, "loss": 1.4027814865112305, "loss_ce": 0.004832353442907333, "loss_iou": 0.6171875, "loss_num": 0.03369140625, "loss_xval": 1.3984375, "num_input_tokens_seen": 190338988, "step": 2874 }, { "epoch": 0.2691065662002153, "grad_norm": 32.68692398071289, "learning_rate": 5e-05, "loss": 1.2964, "num_input_tokens_seen": 190405276, "step": 2875 }, { "epoch": 0.2691065662002153, "loss": 1.1693854331970215, "loss_ce": 0.0037360701244324446, "loss_iou": 0.498046875, "loss_num": 0.033935546875, "loss_xval": 1.1640625, "num_input_tokens_seen": 190405276, "step": 2875 }, { "epoch": 0.269200168484111, "grad_norm": 24.40705680847168, "learning_rate": 5e-05, "loss": 1.3722, "num_input_tokens_seen": 190469664, "step": 2876 }, { "epoch": 0.269200168484111, "loss": 1.316448450088501, "loss_ce": 0.0034601371735334396, "loss_iou": 0.55859375, "loss_num": 0.0390625, "loss_xval": 1.3125, "num_input_tokens_seen": 190469664, "step": 2876 }, { "epoch": 0.26929377076800676, "grad_norm": 34.18718338012695, "learning_rate": 5e-05, "loss": 1.3552, "num_input_tokens_seen": 190536500, "step": 2877 }, { "epoch": 0.26929377076800676, "loss": 1.2667336463928223, "loss_ce": 0.01185077615082264, "loss_iou": 0.5234375, "loss_num": 0.041015625, "loss_xval": 1.2578125, "num_input_tokens_seen": 190536500, "step": 2877 }, { "epoch": 0.26938737305190247, "grad_norm": 35.88064193725586, "learning_rate": 5e-05, "loss": 1.2269, "num_input_tokens_seen": 190603388, "step": 2878 }, { "epoch": 0.26938737305190247, "loss": 1.1076096296310425, "loss_ce": 0.007511980831623077, "loss_iou": 0.494140625, "loss_num": 0.0220947265625, "loss_xval": 1.1015625, "num_input_tokens_seen": 190603388, "step": 2878 }, { "epoch": 0.2694809753357982, "grad_norm": 19.359575271606445, "learning_rate": 5e-05, "loss": 1.5661, "num_input_tokens_seen": 190668976, "step": 2879 }, { "epoch": 0.2694809753357982, "loss": 1.6734386682510376, "loss_ce": 0.00351678766310215, "loss_iou": 0.72265625, "loss_num": 0.04541015625, "loss_xval": 1.671875, "num_input_tokens_seen": 190668976, "step": 2879 }, { "epoch": 0.26957457761969394, "grad_norm": 28.439434051513672, "learning_rate": 5e-05, "loss": 1.3632, "num_input_tokens_seen": 190734648, "step": 2880 }, { "epoch": 0.26957457761969394, "loss": 1.3256981372833252, "loss_ce": 0.004409067332744598, "loss_iou": 0.55859375, "loss_num": 0.041259765625, "loss_xval": 1.3203125, "num_input_tokens_seen": 190734648, "step": 2880 }, { "epoch": 0.26966817990358966, "grad_norm": 37.80298614501953, "learning_rate": 5e-05, "loss": 1.6045, "num_input_tokens_seen": 190802596, "step": 2881 }, { "epoch": 0.26966817990358966, "loss": 1.8723331689834595, "loss_ce": 0.011493292637169361, "loss_iou": 0.73046875, "loss_num": 0.080078125, "loss_xval": 1.859375, "num_input_tokens_seen": 190802596, "step": 2881 }, { "epoch": 0.26976178218748537, "grad_norm": 18.29846954345703, "learning_rate": 5e-05, "loss": 1.6557, "num_input_tokens_seen": 190867928, "step": 2882 }, { "epoch": 0.26976178218748537, "loss": 1.7859479188919067, "loss_ce": 0.005552499555051327, "loss_iou": 0.671875, "loss_num": 0.087890625, "loss_xval": 1.78125, "num_input_tokens_seen": 190867928, "step": 2882 }, { "epoch": 0.2698553844713811, "grad_norm": 32.939884185791016, "learning_rate": 5e-05, "loss": 1.3791, "num_input_tokens_seen": 190933964, "step": 2883 }, { "epoch": 0.2698553844713811, "loss": 1.2570348978042603, "loss_ce": 0.007523206993937492, "loss_iou": 0.5234375, "loss_num": 0.040283203125, "loss_xval": 1.25, "num_input_tokens_seen": 190933964, "step": 2883 }, { "epoch": 0.26994898675527684, "grad_norm": 25.31010627746582, "learning_rate": 5e-05, "loss": 1.2881, "num_input_tokens_seen": 191000160, "step": 2884 }, { "epoch": 0.26994898675527684, "loss": 1.1675686836242676, "loss_ce": 0.0059475041925907135, "loss_iou": 0.5234375, "loss_num": 0.0234375, "loss_xval": 1.1640625, "num_input_tokens_seen": 191000160, "step": 2884 }, { "epoch": 0.27004258903917255, "grad_norm": 24.51418113708496, "learning_rate": 5e-05, "loss": 1.5254, "num_input_tokens_seen": 191067692, "step": 2885 }, { "epoch": 0.27004258903917255, "loss": 1.6295093297958374, "loss_ce": 0.0035327691584825516, "loss_iou": 0.6796875, "loss_num": 0.053466796875, "loss_xval": 1.625, "num_input_tokens_seen": 191067692, "step": 2885 }, { "epoch": 0.27013619132306826, "grad_norm": 54.28732681274414, "learning_rate": 5e-05, "loss": 1.5643, "num_input_tokens_seen": 191133808, "step": 2886 }, { "epoch": 0.27013619132306826, "loss": 1.6064716577529907, "loss_ce": 0.006618119310587645, "loss_iou": 0.6796875, "loss_num": 0.0478515625, "loss_xval": 1.6015625, "num_input_tokens_seen": 191133808, "step": 2886 }, { "epoch": 0.27022979360696403, "grad_norm": 36.00764465332031, "learning_rate": 5e-05, "loss": 1.6871, "num_input_tokens_seen": 191200632, "step": 2887 }, { "epoch": 0.27022979360696403, "loss": 1.5753734111785889, "loss_ce": 0.005549072287976742, "loss_iou": 0.65625, "loss_num": 0.052001953125, "loss_xval": 1.5703125, "num_input_tokens_seen": 191200632, "step": 2887 }, { "epoch": 0.27032339589085974, "grad_norm": 17.422168731689453, "learning_rate": 5e-05, "loss": 1.3207, "num_input_tokens_seen": 191266844, "step": 2888 }, { "epoch": 0.27032339589085974, "loss": 1.2167034149169922, "loss_ce": 0.005277687218040228, "loss_iou": 0.53515625, "loss_num": 0.0279541015625, "loss_xval": 1.2109375, "num_input_tokens_seen": 191266844, "step": 2888 }, { "epoch": 0.27041699817475545, "grad_norm": 17.126590728759766, "learning_rate": 5e-05, "loss": 1.2939, "num_input_tokens_seen": 191333324, "step": 2889 }, { "epoch": 0.27041699817475545, "loss": 1.2734203338623047, "loss_ce": 0.002912552561610937, "loss_iou": 0.5546875, "loss_num": 0.03271484375, "loss_xval": 1.2734375, "num_input_tokens_seen": 191333324, "step": 2889 }, { "epoch": 0.2705106004586512, "grad_norm": 18.5518798828125, "learning_rate": 5e-05, "loss": 1.3767, "num_input_tokens_seen": 191400596, "step": 2890 }, { "epoch": 0.2705106004586512, "loss": 1.410339117050171, "loss_ce": 0.004577350337058306, "loss_iou": 0.58203125, "loss_num": 0.048828125, "loss_xval": 1.40625, "num_input_tokens_seen": 191400596, "step": 2890 }, { "epoch": 0.27060420274254693, "grad_norm": 21.558935165405273, "learning_rate": 5e-05, "loss": 1.539, "num_input_tokens_seen": 191466084, "step": 2891 }, { "epoch": 0.27060420274254693, "loss": 1.5051676034927368, "loss_ce": 0.006144177168607712, "loss_iou": 0.66015625, "loss_num": 0.03564453125, "loss_xval": 1.5, "num_input_tokens_seen": 191466084, "step": 2891 }, { "epoch": 0.27069780502644264, "grad_norm": 33.934967041015625, "learning_rate": 5e-05, "loss": 1.3565, "num_input_tokens_seen": 191533188, "step": 2892 }, { "epoch": 0.27069780502644264, "loss": 1.3920419216156006, "loss_ce": 0.004346621688455343, "loss_iou": 0.61328125, "loss_num": 0.03173828125, "loss_xval": 1.390625, "num_input_tokens_seen": 191533188, "step": 2892 }, { "epoch": 0.27079140731033835, "grad_norm": 26.771821975708008, "learning_rate": 5e-05, "loss": 1.4158, "num_input_tokens_seen": 191599268, "step": 2893 }, { "epoch": 0.27079140731033835, "loss": 1.4195261001586914, "loss_ce": 0.0064400564879179, "loss_iou": 0.62890625, "loss_num": 0.0301513671875, "loss_xval": 1.4140625, "num_input_tokens_seen": 191599268, "step": 2893 }, { "epoch": 0.2708850095942341, "grad_norm": 34.096168518066406, "learning_rate": 5e-05, "loss": 1.4672, "num_input_tokens_seen": 191666396, "step": 2894 }, { "epoch": 0.2708850095942341, "loss": 1.256096363067627, "loss_ce": 0.0021900050342082977, "loss_iou": 0.546875, "loss_num": 0.031494140625, "loss_xval": 1.25, "num_input_tokens_seen": 191666396, "step": 2894 }, { "epoch": 0.2709786118781298, "grad_norm": 22.507471084594727, "learning_rate": 5e-05, "loss": 1.5978, "num_input_tokens_seen": 191732840, "step": 2895 }, { "epoch": 0.2709786118781298, "loss": 1.6347594261169434, "loss_ce": 0.008294529281556606, "loss_iou": 0.6484375, "loss_num": 0.06591796875, "loss_xval": 1.625, "num_input_tokens_seen": 191732840, "step": 2895 }, { "epoch": 0.27107221416202554, "grad_norm": 18.68686866760254, "learning_rate": 5e-05, "loss": 1.2171, "num_input_tokens_seen": 191798476, "step": 2896 }, { "epoch": 0.27107221416202554, "loss": 1.451460838317871, "loss_ce": 0.0056601292453706264, "loss_iou": 0.578125, "loss_num": 0.057861328125, "loss_xval": 1.4453125, "num_input_tokens_seen": 191798476, "step": 2896 }, { "epoch": 0.2711658164459213, "grad_norm": 16.7393856048584, "learning_rate": 5e-05, "loss": 1.3787, "num_input_tokens_seen": 191865140, "step": 2897 }, { "epoch": 0.2711658164459213, "loss": 1.4760732650756836, "loss_ce": 0.0029288004152476788, "loss_iou": 0.58984375, "loss_num": 0.0595703125, "loss_xval": 1.4765625, "num_input_tokens_seen": 191865140, "step": 2897 }, { "epoch": 0.271259418729817, "grad_norm": 34.328731536865234, "learning_rate": 5e-05, "loss": 1.1515, "num_input_tokens_seen": 191930724, "step": 2898 }, { "epoch": 0.271259418729817, "loss": 1.3820092678070068, "loss_ce": 0.005056167021393776, "loss_iou": 0.546875, "loss_num": 0.056396484375, "loss_xval": 1.375, "num_input_tokens_seen": 191930724, "step": 2898 }, { "epoch": 0.2713530210137127, "grad_norm": 27.414579391479492, "learning_rate": 5e-05, "loss": 1.3494, "num_input_tokens_seen": 191997548, "step": 2899 }, { "epoch": 0.2713530210137127, "loss": 1.383734941482544, "loss_ce": 0.006781915668398142, "loss_iou": 0.5859375, "loss_num": 0.041015625, "loss_xval": 1.375, "num_input_tokens_seen": 191997548, "step": 2899 }, { "epoch": 0.27144662329760844, "grad_norm": 32.842403411865234, "learning_rate": 5e-05, "loss": 1.5143, "num_input_tokens_seen": 192063176, "step": 2900 }, { "epoch": 0.27144662329760844, "loss": 1.651855230331421, "loss_ce": 0.0039059417322278023, "loss_iou": 0.6796875, "loss_num": 0.056884765625, "loss_xval": 1.6484375, "num_input_tokens_seen": 192063176, "step": 2900 }, { "epoch": 0.2715402255815042, "grad_norm": 31.889387130737305, "learning_rate": 5e-05, "loss": 1.4789, "num_input_tokens_seen": 192130028, "step": 2901 }, { "epoch": 0.2715402255815042, "loss": 1.3662561178207397, "loss_ce": 0.0039514051750302315, "loss_iou": 0.58203125, "loss_num": 0.039306640625, "loss_xval": 1.359375, "num_input_tokens_seen": 192130028, "step": 2901 }, { "epoch": 0.2716338278653999, "grad_norm": 18.446582794189453, "learning_rate": 5e-05, "loss": 1.3101, "num_input_tokens_seen": 192197000, "step": 2902 }, { "epoch": 0.2716338278653999, "loss": 1.2791718244552612, "loss_ce": 0.00866398960351944, "loss_iou": 0.546875, "loss_num": 0.035400390625, "loss_xval": 1.2734375, "num_input_tokens_seen": 192197000, "step": 2902 }, { "epoch": 0.2717274301492956, "grad_norm": 29.704530715942383, "learning_rate": 5e-05, "loss": 1.3292, "num_input_tokens_seen": 192263336, "step": 2903 }, { "epoch": 0.2717274301492956, "loss": 1.4902559518814087, "loss_ce": 0.00832241028547287, "loss_iou": 0.6484375, "loss_num": 0.037353515625, "loss_xval": 1.484375, "num_input_tokens_seen": 192263336, "step": 2903 }, { "epoch": 0.2718210324331914, "grad_norm": 51.86347198486328, "learning_rate": 5e-05, "loss": 1.365, "num_input_tokens_seen": 192329180, "step": 2904 }, { "epoch": 0.2718210324331914, "loss": 1.4872848987579346, "loss_ce": 0.004863027948886156, "loss_iou": 0.6328125, "loss_num": 0.04296875, "loss_xval": 1.484375, "num_input_tokens_seen": 192329180, "step": 2904 }, { "epoch": 0.2719146347170871, "grad_norm": 38.360008239746094, "learning_rate": 5e-05, "loss": 1.6924, "num_input_tokens_seen": 192396212, "step": 2905 }, { "epoch": 0.2719146347170871, "loss": 1.7409523725509644, "loss_ce": 0.00657739769667387, "loss_iou": 0.72265625, "loss_num": 0.0576171875, "loss_xval": 1.734375, "num_input_tokens_seen": 192396212, "step": 2905 }, { "epoch": 0.2720082370009828, "grad_norm": 56.90835952758789, "learning_rate": 5e-05, "loss": 1.2948, "num_input_tokens_seen": 192462316, "step": 2906 }, { "epoch": 0.2720082370009828, "loss": 1.3784031867980957, "loss_ce": 0.0034032172989100218, "loss_iou": 0.55859375, "loss_num": 0.05224609375, "loss_xval": 1.375, "num_input_tokens_seen": 192462316, "step": 2906 }, { "epoch": 0.2721018392848786, "grad_norm": 26.823774337768555, "learning_rate": 5e-05, "loss": 1.2908, "num_input_tokens_seen": 192528460, "step": 2907 }, { "epoch": 0.2721018392848786, "loss": 1.345680594444275, "loss_ce": 0.006813411600887775, "loss_iou": 0.59375, "loss_num": 0.0299072265625, "loss_xval": 1.3359375, "num_input_tokens_seen": 192528460, "step": 2907 }, { "epoch": 0.2721954415687743, "grad_norm": 28.281461715698242, "learning_rate": 5e-05, "loss": 1.5285, "num_input_tokens_seen": 192594860, "step": 2908 }, { "epoch": 0.2721954415687743, "loss": 1.528496265411377, "loss_ce": 0.005058795213699341, "loss_iou": 0.64453125, "loss_num": 0.047119140625, "loss_xval": 1.5234375, "num_input_tokens_seen": 192594860, "step": 2908 }, { "epoch": 0.27228904385267, "grad_norm": 28.82747459411621, "learning_rate": 5e-05, "loss": 1.3875, "num_input_tokens_seen": 192660764, "step": 2909 }, { "epoch": 0.27228904385267, "loss": 1.5593502521514893, "loss_ce": 0.004662739112973213, "loss_iou": 0.67578125, "loss_num": 0.04052734375, "loss_xval": 1.5546875, "num_input_tokens_seen": 192660764, "step": 2909 }, { "epoch": 0.2723826461365657, "grad_norm": 20.43279266357422, "learning_rate": 5e-05, "loss": 1.3634, "num_input_tokens_seen": 192727436, "step": 2910 }, { "epoch": 0.2723826461365657, "loss": 1.2430782318115234, "loss_ce": 0.005773566663265228, "loss_iou": 0.515625, "loss_num": 0.040283203125, "loss_xval": 1.234375, "num_input_tokens_seen": 192727436, "step": 2910 }, { "epoch": 0.2724762484204615, "grad_norm": 32.585853576660156, "learning_rate": 5e-05, "loss": 1.3047, "num_input_tokens_seen": 192793132, "step": 2911 }, { "epoch": 0.2724762484204615, "loss": 1.2785714864730835, "loss_ce": 0.003180821891874075, "loss_iou": 0.5390625, "loss_num": 0.03857421875, "loss_xval": 1.2734375, "num_input_tokens_seen": 192793132, "step": 2911 }, { "epoch": 0.2725698507043572, "grad_norm": 26.237361907958984, "learning_rate": 5e-05, "loss": 1.5109, "num_input_tokens_seen": 192859248, "step": 2912 }, { "epoch": 0.2725698507043572, "loss": 1.487302303314209, "loss_ce": 0.005368643440306187, "loss_iou": 0.578125, "loss_num": 0.0654296875, "loss_xval": 1.484375, "num_input_tokens_seen": 192859248, "step": 2912 }, { "epoch": 0.2726634529882529, "grad_norm": 21.532819747924805, "learning_rate": 5e-05, "loss": 1.5673, "num_input_tokens_seen": 192924620, "step": 2913 }, { "epoch": 0.2726634529882529, "loss": 1.6626427173614502, "loss_ce": 0.0024864422157406807, "loss_iou": 0.66796875, "loss_num": 0.0654296875, "loss_xval": 1.65625, "num_input_tokens_seen": 192924620, "step": 2913 }, { "epoch": 0.27275705527214866, "grad_norm": 169.73837280273438, "learning_rate": 5e-05, "loss": 1.3346, "num_input_tokens_seen": 192990120, "step": 2914 }, { "epoch": 0.27275705527214866, "loss": 1.394890308380127, "loss_ce": 0.002800529822707176, "loss_iou": 0.578125, "loss_num": 0.04736328125, "loss_xval": 1.390625, "num_input_tokens_seen": 192990120, "step": 2914 }, { "epoch": 0.2728506575560444, "grad_norm": 20.661970138549805, "learning_rate": 5e-05, "loss": 1.4836, "num_input_tokens_seen": 193056160, "step": 2915 }, { "epoch": 0.2728506575560444, "loss": 1.3897316455841064, "loss_ce": 0.0035012071020901203, "loss_iou": 0.58203125, "loss_num": 0.045166015625, "loss_xval": 1.3828125, "num_input_tokens_seen": 193056160, "step": 2915 }, { "epoch": 0.2729442598399401, "grad_norm": 36.34321594238281, "learning_rate": 5e-05, "loss": 1.2865, "num_input_tokens_seen": 193122320, "step": 2916 }, { "epoch": 0.2729442598399401, "loss": 1.4770772457122803, "loss_ce": 0.004421040415763855, "loss_iou": 0.609375, "loss_num": 0.051025390625, "loss_xval": 1.46875, "num_input_tokens_seen": 193122320, "step": 2916 }, { "epoch": 0.2730378621238358, "grad_norm": 23.29226303100586, "learning_rate": 5e-05, "loss": 1.7117, "num_input_tokens_seen": 193188984, "step": 2917 }, { "epoch": 0.2730378621238358, "loss": 1.7196775674819946, "loss_ce": 0.004833762533962727, "loss_iou": 0.74609375, "loss_num": 0.045166015625, "loss_xval": 1.71875, "num_input_tokens_seen": 193188984, "step": 2917 }, { "epoch": 0.27313146440773156, "grad_norm": 17.75802230834961, "learning_rate": 5e-05, "loss": 1.3121, "num_input_tokens_seen": 193255256, "step": 2918 }, { "epoch": 0.27313146440773156, "loss": 1.1056413650512695, "loss_ce": 0.0035905414260923862, "loss_iou": 0.4375, "loss_num": 0.0458984375, "loss_xval": 1.1015625, "num_input_tokens_seen": 193255256, "step": 2918 }, { "epoch": 0.27322506669162727, "grad_norm": 15.197121620178223, "learning_rate": 5e-05, "loss": 1.2931, "num_input_tokens_seen": 193321308, "step": 2919 }, { "epoch": 0.27322506669162727, "loss": 1.4740924835205078, "loss_ce": 0.004854124039411545, "loss_iou": 0.5859375, "loss_num": 0.059814453125, "loss_xval": 1.46875, "num_input_tokens_seen": 193321308, "step": 2919 }, { "epoch": 0.273318668975523, "grad_norm": 14.801645278930664, "learning_rate": 5e-05, "loss": 1.266, "num_input_tokens_seen": 193386928, "step": 2920 }, { "epoch": 0.273318668975523, "loss": 1.546339511871338, "loss_ce": 0.00630049267783761, "loss_iou": 0.625, "loss_num": 0.05859375, "loss_xval": 1.5390625, "num_input_tokens_seen": 193386928, "step": 2920 }, { "epoch": 0.27341227125941875, "grad_norm": 35.99678039550781, "learning_rate": 5e-05, "loss": 1.5811, "num_input_tokens_seen": 193454828, "step": 2921 }, { "epoch": 0.27341227125941875, "loss": 1.6381123065948486, "loss_ce": 0.00627634534612298, "loss_iou": 0.62890625, "loss_num": 0.07470703125, "loss_xval": 1.6328125, "num_input_tokens_seen": 193454828, "step": 2921 }, { "epoch": 0.27350587354331446, "grad_norm": 24.44113540649414, "learning_rate": 5e-05, "loss": 1.4893, "num_input_tokens_seen": 193520452, "step": 2922 }, { "epoch": 0.27350587354331446, "loss": 1.6143724918365479, "loss_ce": 0.003044459968805313, "loss_iou": 0.64453125, "loss_num": 0.06494140625, "loss_xval": 1.609375, "num_input_tokens_seen": 193520452, "step": 2922 }, { "epoch": 0.27359947582721017, "grad_norm": 20.205230712890625, "learning_rate": 5e-05, "loss": 1.3415, "num_input_tokens_seen": 193586104, "step": 2923 }, { "epoch": 0.27359947582721017, "loss": 1.3367654085159302, "loss_ce": 0.014988003298640251, "loss_iou": 0.50390625, "loss_num": 0.0634765625, "loss_xval": 1.3203125, "num_input_tokens_seen": 193586104, "step": 2923 }, { "epoch": 0.27369307811110594, "grad_norm": 22.40543556213379, "learning_rate": 5e-05, "loss": 1.63, "num_input_tokens_seen": 193654220, "step": 2924 }, { "epoch": 0.27369307811110594, "loss": 1.5842056274414062, "loss_ce": 0.008033794350922108, "loss_iou": 0.65625, "loss_num": 0.052734375, "loss_xval": 1.578125, "num_input_tokens_seen": 193654220, "step": 2924 }, { "epoch": 0.27378668039500165, "grad_norm": 31.34164047241211, "learning_rate": 5e-05, "loss": 1.3402, "num_input_tokens_seen": 193720852, "step": 2925 }, { "epoch": 0.27378668039500165, "loss": 1.2183868885040283, "loss_ce": 0.0030548027716577053, "loss_iou": 0.50390625, "loss_num": 0.042236328125, "loss_xval": 1.21875, "num_input_tokens_seen": 193720852, "step": 2925 }, { "epoch": 0.27388028267889736, "grad_norm": 21.536550521850586, "learning_rate": 5e-05, "loss": 1.5679, "num_input_tokens_seen": 193787988, "step": 2926 }, { "epoch": 0.27388028267889736, "loss": 1.6892585754394531, "loss_ce": 0.009571101516485214, "loss_iou": 0.66796875, "loss_num": 0.068359375, "loss_xval": 1.6796875, "num_input_tokens_seen": 193787988, "step": 2926 }, { "epoch": 0.27397388496279307, "grad_norm": 26.01751136779785, "learning_rate": 5e-05, "loss": 1.4598, "num_input_tokens_seen": 193853776, "step": 2927 }, { "epoch": 0.27397388496279307, "loss": 1.3240104913711548, "loss_ce": 0.00955736543983221, "loss_iou": 0.5078125, "loss_num": 0.059326171875, "loss_xval": 1.3125, "num_input_tokens_seen": 193853776, "step": 2927 }, { "epoch": 0.27406748724668883, "grad_norm": 19.382383346557617, "learning_rate": 5e-05, "loss": 1.4183, "num_input_tokens_seen": 193920368, "step": 2928 }, { "epoch": 0.27406748724668883, "loss": 1.4621467590332031, "loss_ce": 0.013904567807912827, "loss_iou": 0.59375, "loss_num": 0.052734375, "loss_xval": 1.4453125, "num_input_tokens_seen": 193920368, "step": 2928 }, { "epoch": 0.27416108953058455, "grad_norm": 28.4448299407959, "learning_rate": 5e-05, "loss": 1.2862, "num_input_tokens_seen": 193985652, "step": 2929 }, { "epoch": 0.27416108953058455, "loss": 1.1947191953659058, "loss_ce": 0.008592411875724792, "loss_iou": 0.515625, "loss_num": 0.0303955078125, "loss_xval": 1.1875, "num_input_tokens_seen": 193985652, "step": 2929 }, { "epoch": 0.27425469181448026, "grad_norm": 157.48585510253906, "learning_rate": 5e-05, "loss": 1.303, "num_input_tokens_seen": 194050880, "step": 2930 }, { "epoch": 0.27425469181448026, "loss": 1.106088638305664, "loss_ce": 0.0033055199310183525, "loss_iou": 0.412109375, "loss_num": 0.055419921875, "loss_xval": 1.1015625, "num_input_tokens_seen": 194050880, "step": 2930 }, { "epoch": 0.274348294098376, "grad_norm": 27.36667823791504, "learning_rate": 5e-05, "loss": 1.2582, "num_input_tokens_seen": 194118500, "step": 2931 }, { "epoch": 0.274348294098376, "loss": 1.366217851638794, "loss_ce": 0.00977259874343872, "loss_iou": 0.53125, "loss_num": 0.05908203125, "loss_xval": 1.359375, "num_input_tokens_seen": 194118500, "step": 2931 }, { "epoch": 0.27444189638227173, "grad_norm": 45.056976318359375, "learning_rate": 5e-05, "loss": 1.1968, "num_input_tokens_seen": 194184068, "step": 2932 }, { "epoch": 0.27444189638227173, "loss": 1.1848256587982178, "loss_ce": 0.003184966742992401, "loss_iou": 0.51953125, "loss_num": 0.0289306640625, "loss_xval": 1.1796875, "num_input_tokens_seen": 194184068, "step": 2932 }, { "epoch": 0.27453549866616744, "grad_norm": 37.12651062011719, "learning_rate": 5e-05, "loss": 1.2082, "num_input_tokens_seen": 194251356, "step": 2933 }, { "epoch": 0.27453549866616744, "loss": 1.125702142715454, "loss_ce": 0.0055850474163889885, "loss_iou": 0.48046875, "loss_num": 0.03173828125, "loss_xval": 1.1171875, "num_input_tokens_seen": 194251356, "step": 2933 }, { "epoch": 0.27462910095006315, "grad_norm": 32.226539611816406, "learning_rate": 5e-05, "loss": 1.6397, "num_input_tokens_seen": 194317776, "step": 2934 }, { "epoch": 0.27462910095006315, "loss": 1.6473486423492432, "loss_ce": 0.003793943440541625, "loss_iou": 0.6953125, "loss_num": 0.05029296875, "loss_xval": 1.640625, "num_input_tokens_seen": 194317776, "step": 2934 }, { "epoch": 0.2747227032339589, "grad_norm": 36.53327178955078, "learning_rate": 5e-05, "loss": 1.2686, "num_input_tokens_seen": 194384300, "step": 2935 }, { "epoch": 0.2747227032339589, "loss": 1.4336204528808594, "loss_ce": 0.002956465817987919, "loss_iou": 0.59375, "loss_num": 0.048095703125, "loss_xval": 1.4296875, "num_input_tokens_seen": 194384300, "step": 2935 }, { "epoch": 0.27481630551785463, "grad_norm": 38.007835388183594, "learning_rate": 5e-05, "loss": 1.6442, "num_input_tokens_seen": 194450960, "step": 2936 }, { "epoch": 0.27481630551785463, "loss": 1.7354243993759155, "loss_ce": 0.0020259853918105364, "loss_iou": 0.72265625, "loss_num": 0.057373046875, "loss_xval": 1.734375, "num_input_tokens_seen": 194450960, "step": 2936 }, { "epoch": 0.27490990780175034, "grad_norm": 23.463176727294922, "learning_rate": 5e-05, "loss": 1.6341, "num_input_tokens_seen": 194517312, "step": 2937 }, { "epoch": 0.27490990780175034, "loss": 1.3556169271469116, "loss_ce": 0.006495887879282236, "loss_iou": 0.578125, "loss_num": 0.03955078125, "loss_xval": 1.3515625, "num_input_tokens_seen": 194517312, "step": 2937 }, { "epoch": 0.2750035100856461, "grad_norm": 54.84428024291992, "learning_rate": 5e-05, "loss": 1.2504, "num_input_tokens_seen": 194583340, "step": 2938 }, { "epoch": 0.2750035100856461, "loss": 1.0731542110443115, "loss_ce": 0.0057714711874723434, "loss_iou": 0.46875, "loss_num": 0.02587890625, "loss_xval": 1.0703125, "num_input_tokens_seen": 194583340, "step": 2938 }, { "epoch": 0.2750971123695418, "grad_norm": 17.140056610107422, "learning_rate": 5e-05, "loss": 1.3787, "num_input_tokens_seen": 194649472, "step": 2939 }, { "epoch": 0.2750971123695418, "loss": 1.184022068977356, "loss_ce": 0.003113870043307543, "loss_iou": 0.49609375, "loss_num": 0.03759765625, "loss_xval": 1.1796875, "num_input_tokens_seen": 194649472, "step": 2939 }, { "epoch": 0.27519071465343753, "grad_norm": 34.75260925292969, "learning_rate": 5e-05, "loss": 1.6385, "num_input_tokens_seen": 194714968, "step": 2940 }, { "epoch": 0.27519071465343753, "loss": 1.5107307434082031, "loss_ce": 0.003406504401937127, "loss_iou": 0.61328125, "loss_num": 0.056884765625, "loss_xval": 1.5078125, "num_input_tokens_seen": 194714968, "step": 2940 }, { "epoch": 0.2752843169373333, "grad_norm": 20.1380558013916, "learning_rate": 5e-05, "loss": 1.5287, "num_input_tokens_seen": 194781976, "step": 2941 }, { "epoch": 0.2752843169373333, "loss": 1.6637755632400513, "loss_ce": 0.005572447087615728, "loss_iou": 0.70703125, "loss_num": 0.04833984375, "loss_xval": 1.65625, "num_input_tokens_seen": 194781976, "step": 2941 }, { "epoch": 0.275377919221229, "grad_norm": 19.91967010498047, "learning_rate": 5e-05, "loss": 1.4581, "num_input_tokens_seen": 194847748, "step": 2942 }, { "epoch": 0.275377919221229, "loss": 1.4977524280548096, "loss_ce": 0.0031234631314873695, "loss_iou": 0.58984375, "loss_num": 0.0625, "loss_xval": 1.4921875, "num_input_tokens_seen": 194847748, "step": 2942 }, { "epoch": 0.2754715215051247, "grad_norm": 27.349401473999023, "learning_rate": 5e-05, "loss": 1.4075, "num_input_tokens_seen": 194913456, "step": 2943 }, { "epoch": 0.2754715215051247, "loss": 1.5793519020080566, "loss_ce": 0.0065980092622339725, "loss_iou": 0.63671875, "loss_num": 0.0595703125, "loss_xval": 1.5703125, "num_input_tokens_seen": 194913456, "step": 2943 }, { "epoch": 0.2755651237890204, "grad_norm": 32.3651123046875, "learning_rate": 5e-05, "loss": 1.3806, "num_input_tokens_seen": 194980020, "step": 2944 }, { "epoch": 0.2755651237890204, "loss": 1.4813790321350098, "loss_ce": 0.0023751629050821066, "loss_iou": 0.609375, "loss_num": 0.05224609375, "loss_xval": 1.4765625, "num_input_tokens_seen": 194980020, "step": 2944 }, { "epoch": 0.2756587260729162, "grad_norm": 53.90221405029297, "learning_rate": 5e-05, "loss": 1.6783, "num_input_tokens_seen": 195045816, "step": 2945 }, { "epoch": 0.2756587260729162, "loss": 1.4300849437713623, "loss_ce": 0.0033271731808781624, "loss_iou": 0.60546875, "loss_num": 0.04345703125, "loss_xval": 1.4296875, "num_input_tokens_seen": 195045816, "step": 2945 }, { "epoch": 0.2757523283568119, "grad_norm": 44.97574996948242, "learning_rate": 5e-05, "loss": 1.4494, "num_input_tokens_seen": 195111356, "step": 2946 }, { "epoch": 0.2757523283568119, "loss": 1.2630583047866821, "loss_ce": 0.004269261844456196, "loss_iou": 0.55078125, "loss_num": 0.031494140625, "loss_xval": 1.2578125, "num_input_tokens_seen": 195111356, "step": 2946 }, { "epoch": 0.2758459306407076, "grad_norm": 27.603666305541992, "learning_rate": 5e-05, "loss": 1.4105, "num_input_tokens_seen": 195178164, "step": 2947 }, { "epoch": 0.2758459306407076, "loss": 1.4767403602600098, "loss_ce": 0.008966884575784206, "loss_iou": 0.62109375, "loss_num": 0.04541015625, "loss_xval": 1.46875, "num_input_tokens_seen": 195178164, "step": 2947 }, { "epoch": 0.2759395329246034, "grad_norm": 52.44571304321289, "learning_rate": 5e-05, "loss": 1.6437, "num_input_tokens_seen": 195243740, "step": 2948 }, { "epoch": 0.2759395329246034, "loss": 1.5359150171279907, "loss_ce": 0.004665090702474117, "loss_iou": 0.671875, "loss_num": 0.03759765625, "loss_xval": 1.53125, "num_input_tokens_seen": 195243740, "step": 2948 }, { "epoch": 0.2760331352084991, "grad_norm": 15.909627914428711, "learning_rate": 5e-05, "loss": 1.1997, "num_input_tokens_seen": 195310312, "step": 2949 }, { "epoch": 0.2760331352084991, "loss": 1.267456293106079, "loss_ce": 0.006958204787224531, "loss_iou": 0.54296875, "loss_num": 0.03515625, "loss_xval": 1.2578125, "num_input_tokens_seen": 195310312, "step": 2949 }, { "epoch": 0.2761267374923948, "grad_norm": 22.354604721069336, "learning_rate": 5e-05, "loss": 1.1309, "num_input_tokens_seen": 195376212, "step": 2950 }, { "epoch": 0.2761267374923948, "loss": 1.3071309328079224, "loss_ce": 0.007814496755599976, "loss_iou": 0.5390625, "loss_num": 0.044677734375, "loss_xval": 1.296875, "num_input_tokens_seen": 195376212, "step": 2950 }, { "epoch": 0.27622033977629057, "grad_norm": 37.252540588378906, "learning_rate": 5e-05, "loss": 1.4142, "num_input_tokens_seen": 195441904, "step": 2951 }, { "epoch": 0.27622033977629057, "loss": 1.4334460496902466, "loss_ce": 0.005223315209150314, "loss_iou": 0.59375, "loss_num": 0.047607421875, "loss_xval": 1.4296875, "num_input_tokens_seen": 195441904, "step": 2951 }, { "epoch": 0.2763139420601863, "grad_norm": 18.50592613220215, "learning_rate": 5e-05, "loss": 1.1673, "num_input_tokens_seen": 195506856, "step": 2952 }, { "epoch": 0.2763139420601863, "loss": 1.4549554586410522, "loss_ce": 0.003783546620979905, "loss_iou": 0.6171875, "loss_num": 0.043701171875, "loss_xval": 1.453125, "num_input_tokens_seen": 195506856, "step": 2952 }, { "epoch": 0.276407544344082, "grad_norm": 14.848483085632324, "learning_rate": 5e-05, "loss": 1.2464, "num_input_tokens_seen": 195573092, "step": 2953 }, { "epoch": 0.276407544344082, "loss": 1.1065553426742554, "loss_ce": 0.0073121171444654465, "loss_iou": 0.4609375, "loss_num": 0.03515625, "loss_xval": 1.1015625, "num_input_tokens_seen": 195573092, "step": 2953 }, { "epoch": 0.2765011466279777, "grad_norm": 15.150407791137695, "learning_rate": 5e-05, "loss": 1.0623, "num_input_tokens_seen": 195639176, "step": 2954 }, { "epoch": 0.2765011466279777, "loss": 1.060774564743042, "loss_ce": 0.006087028421461582, "loss_iou": 0.416015625, "loss_num": 0.044677734375, "loss_xval": 1.0546875, "num_input_tokens_seen": 195639176, "step": 2954 }, { "epoch": 0.27659474891187347, "grad_norm": 17.979511260986328, "learning_rate": 5e-05, "loss": 1.2765, "num_input_tokens_seen": 195705584, "step": 2955 }, { "epoch": 0.27659474891187347, "loss": 1.4038522243499756, "loss_ce": 0.005414669401943684, "loss_iou": 0.61328125, "loss_num": 0.033935546875, "loss_xval": 1.3984375, "num_input_tokens_seen": 195705584, "step": 2955 }, { "epoch": 0.2766883511957692, "grad_norm": 69.79885864257812, "learning_rate": 5e-05, "loss": 1.2342, "num_input_tokens_seen": 195772380, "step": 2956 }, { "epoch": 0.2766883511957692, "loss": 1.0038360357284546, "loss_ce": 0.003836047602817416, "loss_iou": 0.439453125, "loss_num": 0.024169921875, "loss_xval": 1.0, "num_input_tokens_seen": 195772380, "step": 2956 }, { "epoch": 0.2767819534796649, "grad_norm": 21.76862144470215, "learning_rate": 5e-05, "loss": 1.3137, "num_input_tokens_seen": 195838060, "step": 2957 }, { "epoch": 0.2767819534796649, "loss": 1.3768516778945923, "loss_ce": 0.006734518799930811, "loss_iou": 0.52734375, "loss_num": 0.0625, "loss_xval": 1.3671875, "num_input_tokens_seen": 195838060, "step": 2957 }, { "epoch": 0.27687555576356065, "grad_norm": 37.61552429199219, "learning_rate": 5e-05, "loss": 1.4519, "num_input_tokens_seen": 195903216, "step": 2958 }, { "epoch": 0.27687555576356065, "loss": 1.625257968902588, "loss_ce": 0.007093844935297966, "loss_iou": 0.6640625, "loss_num": 0.058349609375, "loss_xval": 1.6171875, "num_input_tokens_seen": 195903216, "step": 2958 }, { "epoch": 0.27696915804745637, "grad_norm": 22.148937225341797, "learning_rate": 5e-05, "loss": 1.5944, "num_input_tokens_seen": 195970892, "step": 2959 }, { "epoch": 0.27696915804745637, "loss": 1.5753856897354126, "loss_ce": 0.008979421108961105, "loss_iou": 0.59765625, "loss_num": 0.07373046875, "loss_xval": 1.5625, "num_input_tokens_seen": 195970892, "step": 2959 }, { "epoch": 0.2770627603313521, "grad_norm": 188.84820556640625, "learning_rate": 5e-05, "loss": 1.2096, "num_input_tokens_seen": 196036800, "step": 2960 }, { "epoch": 0.2770627603313521, "loss": 1.1954622268676758, "loss_ce": 0.006009131204336882, "loss_iou": 0.51171875, "loss_num": 0.03369140625, "loss_xval": 1.1875, "num_input_tokens_seen": 196036800, "step": 2960 }, { "epoch": 0.2771563626152478, "grad_norm": 40.225379943847656, "learning_rate": 5e-05, "loss": 1.4014, "num_input_tokens_seen": 196102516, "step": 2961 }, { "epoch": 0.2771563626152478, "loss": 1.2238408327102661, "loss_ce": 0.0036260043270885944, "loss_iou": 0.5234375, "loss_num": 0.0341796875, "loss_xval": 1.21875, "num_input_tokens_seen": 196102516, "step": 2961 }, { "epoch": 0.27724996489914355, "grad_norm": 30.62041473388672, "learning_rate": 5e-05, "loss": 1.5254, "num_input_tokens_seen": 196169528, "step": 2962 }, { "epoch": 0.27724996489914355, "loss": 1.6488304138183594, "loss_ce": 0.008205480873584747, "loss_iou": 0.67578125, "loss_num": 0.05859375, "loss_xval": 1.640625, "num_input_tokens_seen": 196169528, "step": 2962 }, { "epoch": 0.27734356718303926, "grad_norm": 19.16793441772461, "learning_rate": 5e-05, "loss": 1.203, "num_input_tokens_seen": 196235160, "step": 2963 }, { "epoch": 0.27734356718303926, "loss": 1.2300286293029785, "loss_ce": 0.005968559067696333, "loss_iou": 0.546875, "loss_num": 0.0262451171875, "loss_xval": 1.2265625, "num_input_tokens_seen": 196235160, "step": 2963 }, { "epoch": 0.277437169466935, "grad_norm": 13.29127311706543, "learning_rate": 5e-05, "loss": 1.3865, "num_input_tokens_seen": 196300332, "step": 2964 }, { "epoch": 0.277437169466935, "loss": 1.5598807334899902, "loss_ce": 0.0042167287319898605, "loss_iou": 0.64453125, "loss_num": 0.0537109375, "loss_xval": 1.5546875, "num_input_tokens_seen": 196300332, "step": 2964 }, { "epoch": 0.27753077175083074, "grad_norm": 21.9099178314209, "learning_rate": 5e-05, "loss": 1.1395, "num_input_tokens_seen": 196366608, "step": 2965 }, { "epoch": 0.27753077175083074, "loss": 1.1302073001861572, "loss_ce": 0.004719014745205641, "loss_iou": 0.51953125, "loss_num": 0.0179443359375, "loss_xval": 1.125, "num_input_tokens_seen": 196366608, "step": 2965 }, { "epoch": 0.27762437403472645, "grad_norm": 34.35336685180664, "learning_rate": 5e-05, "loss": 1.1855, "num_input_tokens_seen": 196433176, "step": 2966 }, { "epoch": 0.27762437403472645, "loss": 1.1923853158950806, "loss_ce": 0.0024439168628305197, "loss_iou": 0.53125, "loss_num": 0.0257568359375, "loss_xval": 1.1875, "num_input_tokens_seen": 196433176, "step": 2966 }, { "epoch": 0.27771797631862216, "grad_norm": 32.035465240478516, "learning_rate": 5e-05, "loss": 1.5564, "num_input_tokens_seen": 196499096, "step": 2967 }, { "epoch": 0.27771797631862216, "loss": 1.439605474472046, "loss_ce": 0.00796491652727127, "loss_iou": 0.5390625, "loss_num": 0.07080078125, "loss_xval": 1.4296875, "num_input_tokens_seen": 196499096, "step": 2967 }, { "epoch": 0.27781157860251793, "grad_norm": 41.921775817871094, "learning_rate": 5e-05, "loss": 1.1717, "num_input_tokens_seen": 196564628, "step": 2968 }, { "epoch": 0.27781157860251793, "loss": 1.1814939975738525, "loss_ce": 0.004736181348562241, "loss_iou": 0.515625, "loss_num": 0.02880859375, "loss_xval": 1.1796875, "num_input_tokens_seen": 196564628, "step": 2968 }, { "epoch": 0.27790518088641364, "grad_norm": 20.5039005279541, "learning_rate": 5e-05, "loss": 1.0963, "num_input_tokens_seen": 196631252, "step": 2969 }, { "epoch": 0.27790518088641364, "loss": 1.1294777393341064, "loss_ce": 0.004477834329009056, "loss_iou": 0.51171875, "loss_num": 0.019775390625, "loss_xval": 1.125, "num_input_tokens_seen": 196631252, "step": 2969 }, { "epoch": 0.27799878317030935, "grad_norm": 30.32698631286621, "learning_rate": 5e-05, "loss": 1.4396, "num_input_tokens_seen": 196697688, "step": 2970 }, { "epoch": 0.27799878317030935, "loss": 1.4818978309631348, "loss_ce": 0.004846978932619095, "loss_iou": 0.60546875, "loss_num": 0.0537109375, "loss_xval": 1.4765625, "num_input_tokens_seen": 196697688, "step": 2970 }, { "epoch": 0.27809238545420506, "grad_norm": 38.12827682495117, "learning_rate": 5e-05, "loss": 1.4142, "num_input_tokens_seen": 196765388, "step": 2971 }, { "epoch": 0.27809238545420506, "loss": 1.5492026805877686, "loss_ce": 0.003304253565147519, "loss_iou": 0.671875, "loss_num": 0.040771484375, "loss_xval": 1.546875, "num_input_tokens_seen": 196765388, "step": 2971 }, { "epoch": 0.2781859877381008, "grad_norm": 21.766862869262695, "learning_rate": 5e-05, "loss": 1.3314, "num_input_tokens_seen": 196831972, "step": 2972 }, { "epoch": 0.2781859877381008, "loss": 1.363213300704956, "loss_ce": 0.005791435018181801, "loss_iou": 0.578125, "loss_num": 0.03955078125, "loss_xval": 1.359375, "num_input_tokens_seen": 196831972, "step": 2972 }, { "epoch": 0.27827959002199654, "grad_norm": 32.41834259033203, "learning_rate": 5e-05, "loss": 1.4518, "num_input_tokens_seen": 196898752, "step": 2973 }, { "epoch": 0.27827959002199654, "loss": 1.669748067855835, "loss_ce": 0.004709017463028431, "loss_iou": 0.68359375, "loss_num": 0.059814453125, "loss_xval": 1.6640625, "num_input_tokens_seen": 196898752, "step": 2973 }, { "epoch": 0.27837319230589225, "grad_norm": 19.054615020751953, "learning_rate": 5e-05, "loss": 1.4841, "num_input_tokens_seen": 196964800, "step": 2974 }, { "epoch": 0.27837319230589225, "loss": 1.5721250772476196, "loss_ce": 0.0047422437928617, "loss_iou": 0.70703125, "loss_num": 0.03076171875, "loss_xval": 1.5703125, "num_input_tokens_seen": 196964800, "step": 2974 }, { "epoch": 0.278466794589788, "grad_norm": 20.208951950073242, "learning_rate": 5e-05, "loss": 1.3882, "num_input_tokens_seen": 197031488, "step": 2975 }, { "epoch": 0.278466794589788, "loss": 1.6346020698547363, "loss_ce": 0.007160606794059277, "loss_iou": 0.65625, "loss_num": 0.0634765625, "loss_xval": 1.625, "num_input_tokens_seen": 197031488, "step": 2975 }, { "epoch": 0.2785603968736837, "grad_norm": 38.382999420166016, "learning_rate": 5e-05, "loss": 1.2024, "num_input_tokens_seen": 197097028, "step": 2976 }, { "epoch": 0.2785603968736837, "loss": 1.2237555980682373, "loss_ce": 0.010376797057688236, "loss_iou": 0.52734375, "loss_num": 0.032470703125, "loss_xval": 1.2109375, "num_input_tokens_seen": 197097028, "step": 2976 }, { "epoch": 0.27865399915757944, "grad_norm": 22.0007266998291, "learning_rate": 5e-05, "loss": 1.533, "num_input_tokens_seen": 197162968, "step": 2977 }, { "epoch": 0.27865399915757944, "loss": 1.6800990104675293, "loss_ce": 0.006270837038755417, "loss_iou": 0.65625, "loss_num": 0.07177734375, "loss_xval": 1.671875, "num_input_tokens_seen": 197162968, "step": 2977 }, { "epoch": 0.27874760144147515, "grad_norm": 27.104589462280273, "learning_rate": 5e-05, "loss": 1.2597, "num_input_tokens_seen": 197229888, "step": 2978 }, { "epoch": 0.27874760144147515, "loss": 1.3282208442687988, "loss_ce": 0.00302558159455657, "loss_iou": 0.55078125, "loss_num": 0.0439453125, "loss_xval": 1.328125, "num_input_tokens_seen": 197229888, "step": 2978 }, { "epoch": 0.2788412037253709, "grad_norm": 18.77463150024414, "learning_rate": 5e-05, "loss": 1.2428, "num_input_tokens_seen": 197295440, "step": 2979 }, { "epoch": 0.2788412037253709, "loss": 1.28849458694458, "loss_ce": 0.006878322921693325, "loss_iou": 0.5625, "loss_num": 0.03125, "loss_xval": 1.28125, "num_input_tokens_seen": 197295440, "step": 2979 }, { "epoch": 0.2789348060092666, "grad_norm": 19.471467971801758, "learning_rate": 5e-05, "loss": 1.4861, "num_input_tokens_seen": 197361704, "step": 2980 }, { "epoch": 0.2789348060092666, "loss": 1.2765750885009766, "loss_ce": 0.005090806633234024, "loss_iou": 0.55078125, "loss_num": 0.033935546875, "loss_xval": 1.2734375, "num_input_tokens_seen": 197361704, "step": 2980 }, { "epoch": 0.27902840829316233, "grad_norm": 29.813678741455078, "learning_rate": 5e-05, "loss": 1.3685, "num_input_tokens_seen": 197428532, "step": 2981 }, { "epoch": 0.27902840829316233, "loss": 1.3627123832702637, "loss_ce": 0.006755331996828318, "loss_iou": 0.578125, "loss_num": 0.0400390625, "loss_xval": 1.359375, "num_input_tokens_seen": 197428532, "step": 2981 }, { "epoch": 0.2791220105770581, "grad_norm": 28.789997100830078, "learning_rate": 5e-05, "loss": 1.5348, "num_input_tokens_seen": 197494760, "step": 2982 }, { "epoch": 0.2791220105770581, "loss": 1.7443268299102783, "loss_ce": 0.003115879837423563, "loss_iou": 0.7109375, "loss_num": 0.0634765625, "loss_xval": 1.7421875, "num_input_tokens_seen": 197494760, "step": 2982 }, { "epoch": 0.2792156128609538, "grad_norm": 27.926149368286133, "learning_rate": 5e-05, "loss": 1.3637, "num_input_tokens_seen": 197561264, "step": 2983 }, { "epoch": 0.2792156128609538, "loss": 1.33914315700531, "loss_ce": 0.007111984305083752, "loss_iou": 0.52734375, "loss_num": 0.055908203125, "loss_xval": 1.328125, "num_input_tokens_seen": 197561264, "step": 2983 }, { "epoch": 0.2793092151448495, "grad_norm": 27.39906883239746, "learning_rate": 5e-05, "loss": 1.3701, "num_input_tokens_seen": 197628624, "step": 2984 }, { "epoch": 0.2793092151448495, "loss": 1.346161127090454, "loss_ce": 0.003387751057744026, "loss_iou": 0.55078125, "loss_num": 0.048828125, "loss_xval": 1.34375, "num_input_tokens_seen": 197628624, "step": 2984 }, { "epoch": 0.2794028174287453, "grad_norm": 23.686147689819336, "learning_rate": 5e-05, "loss": 1.147, "num_input_tokens_seen": 197695456, "step": 2985 }, { "epoch": 0.2794028174287453, "loss": 0.9516488313674927, "loss_ce": 0.0029184240847826004, "loss_iou": 0.390625, "loss_num": 0.033447265625, "loss_xval": 0.94921875, "num_input_tokens_seen": 197695456, "step": 2985 }, { "epoch": 0.279496419712641, "grad_norm": 41.54317092895508, "learning_rate": 5e-05, "loss": 1.2913, "num_input_tokens_seen": 197760268, "step": 2986 }, { "epoch": 0.279496419712641, "loss": 1.451521396636963, "loss_ce": 0.004744125995784998, "loss_iou": 0.6015625, "loss_num": 0.048095703125, "loss_xval": 1.4453125, "num_input_tokens_seen": 197760268, "step": 2986 }, { "epoch": 0.2795900219965367, "grad_norm": 30.887428283691406, "learning_rate": 5e-05, "loss": 1.2778, "num_input_tokens_seen": 197826376, "step": 2987 }, { "epoch": 0.2795900219965367, "loss": 1.0921101570129395, "loss_ce": 0.006050621159374714, "loss_iou": 0.46484375, "loss_num": 0.031005859375, "loss_xval": 1.0859375, "num_input_tokens_seen": 197826376, "step": 2987 }, { "epoch": 0.2796836242804324, "grad_norm": 54.29893112182617, "learning_rate": 5e-05, "loss": 1.4286, "num_input_tokens_seen": 197893984, "step": 2988 }, { "epoch": 0.2796836242804324, "loss": 1.2299041748046875, "loss_ce": 0.0028534168377518654, "loss_iou": 0.5, "loss_num": 0.04541015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 197893984, "step": 2988 }, { "epoch": 0.2797772265643282, "grad_norm": 12.72755241394043, "learning_rate": 5e-05, "loss": 1.3166, "num_input_tokens_seen": 197959892, "step": 2989 }, { "epoch": 0.2797772265643282, "loss": 1.489954948425293, "loss_ce": 0.007533114403486252, "loss_iou": 0.53125, "loss_num": 0.08447265625, "loss_xval": 1.484375, "num_input_tokens_seen": 197959892, "step": 2989 }, { "epoch": 0.2798708288482239, "grad_norm": 16.953954696655273, "learning_rate": 5e-05, "loss": 1.0609, "num_input_tokens_seen": 198027220, "step": 2990 }, { "epoch": 0.2798708288482239, "loss": 0.993353545665741, "loss_ce": 0.0011660554446280003, "loss_iou": 0.431640625, "loss_num": 0.0255126953125, "loss_xval": 0.9921875, "num_input_tokens_seen": 198027220, "step": 2990 }, { "epoch": 0.2799644311321196, "grad_norm": 20.705690383911133, "learning_rate": 5e-05, "loss": 1.1586, "num_input_tokens_seen": 198093512, "step": 2991 }, { "epoch": 0.2799644311321196, "loss": 1.2157834768295288, "loss_ce": 0.009591442532837391, "loss_iou": 0.482421875, "loss_num": 0.04833984375, "loss_xval": 1.203125, "num_input_tokens_seen": 198093512, "step": 2991 }, { "epoch": 0.2800580334160154, "grad_norm": 49.34597396850586, "learning_rate": 5e-05, "loss": 1.3502, "num_input_tokens_seen": 198160084, "step": 2992 }, { "epoch": 0.2800580334160154, "loss": 1.2780870199203491, "loss_ce": 0.0036730102729052305, "loss_iou": 0.5625, "loss_num": 0.02978515625, "loss_xval": 1.2734375, "num_input_tokens_seen": 198160084, "step": 2992 }, { "epoch": 0.2801516356999111, "grad_norm": 24.546768188476562, "learning_rate": 5e-05, "loss": 1.5367, "num_input_tokens_seen": 198226756, "step": 2993 }, { "epoch": 0.2801516356999111, "loss": 1.599737286567688, "loss_ce": 0.009405290707945824, "loss_iou": 0.6640625, "loss_num": 0.052490234375, "loss_xval": 1.59375, "num_input_tokens_seen": 198226756, "step": 2993 }, { "epoch": 0.2802452379838068, "grad_norm": 36.58216857910156, "learning_rate": 5e-05, "loss": 1.3061, "num_input_tokens_seen": 198292888, "step": 2994 }, { "epoch": 0.2802452379838068, "loss": 1.2798948287963867, "loss_ce": 0.0069457050412893295, "loss_iou": 0.51171875, "loss_num": 0.049560546875, "loss_xval": 1.2734375, "num_input_tokens_seen": 198292888, "step": 2994 }, { "epoch": 0.2803388402677025, "grad_norm": 24.769245147705078, "learning_rate": 5e-05, "loss": 1.5064, "num_input_tokens_seen": 198359084, "step": 2995 }, { "epoch": 0.2803388402677025, "loss": 1.5779725313186646, "loss_ce": 0.003753824159502983, "loss_iou": 0.6328125, "loss_num": 0.0625, "loss_xval": 1.578125, "num_input_tokens_seen": 198359084, "step": 2995 }, { "epoch": 0.28043244255159827, "grad_norm": 49.46518325805664, "learning_rate": 5e-05, "loss": 1.2283, "num_input_tokens_seen": 198424884, "step": 2996 }, { "epoch": 0.28043244255159827, "loss": 1.0733356475830078, "loss_ce": 0.003511441173031926, "loss_iou": 0.453125, "loss_num": 0.033203125, "loss_xval": 1.0703125, "num_input_tokens_seen": 198424884, "step": 2996 }, { "epoch": 0.280526044835494, "grad_norm": 21.528493881225586, "learning_rate": 5e-05, "loss": 1.3659, "num_input_tokens_seen": 198492052, "step": 2997 }, { "epoch": 0.280526044835494, "loss": 1.4436354637145996, "loss_ce": 0.0071120294742286205, "loss_iou": 0.59765625, "loss_num": 0.048095703125, "loss_xval": 1.4375, "num_input_tokens_seen": 198492052, "step": 2997 }, { "epoch": 0.2806196471193897, "grad_norm": 14.494715690612793, "learning_rate": 5e-05, "loss": 1.3066, "num_input_tokens_seen": 198558456, "step": 2998 }, { "epoch": 0.2806196471193897, "loss": 1.400672435760498, "loss_ce": 0.007361851632595062, "loss_iou": 0.54296875, "loss_num": 0.061279296875, "loss_xval": 1.390625, "num_input_tokens_seen": 198558456, "step": 2998 }, { "epoch": 0.28071324940328546, "grad_norm": 18.866601943969727, "learning_rate": 5e-05, "loss": 1.34, "num_input_tokens_seen": 198625364, "step": 2999 }, { "epoch": 0.28071324940328546, "loss": 1.5606812238693237, "loss_ce": 0.007946882396936417, "loss_iou": 0.60546875, "loss_num": 0.068359375, "loss_xval": 1.5546875, "num_input_tokens_seen": 198625364, "step": 2999 }, { "epoch": 0.28080685168718117, "grad_norm": 38.40473556518555, "learning_rate": 5e-05, "loss": 1.4858, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.28080685168718117, "eval_seeclick_CIoU": 0.1211421936750412, "eval_seeclick_GIoU": 0.1370389722287655, "eval_seeclick_IoU": 0.25907187163829803, "eval_seeclick_MAE_all": 0.1756228357553482, "eval_seeclick_MAE_h": 0.07288940250873566, "eval_seeclick_MAE_w": 0.12167743965983391, "eval_seeclick_MAE_x_boxes": 0.2986074537038803, "eval_seeclick_MAE_y_boxes": 0.16409114748239517, "eval_seeclick_NUM_probability": 0.9997413158416748, "eval_seeclick_inside_bbox": 0.4072916805744171, "eval_seeclick_loss": 2.6142280101776123, "eval_seeclick_loss_ce": 0.014518491458147764, "eval_seeclick_loss_iou": 0.8895263671875, "eval_seeclick_loss_num": 0.17076873779296875, "eval_seeclick_loss_xval": 2.63232421875, "eval_seeclick_runtime": 73.1545, "eval_seeclick_samples_per_second": 0.642, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.28080685168718117, "eval_icons_CIoU": -0.06424631550908089, "eval_icons_GIoU": 0.05589934252202511, "eval_icons_IoU": 0.11858085170388222, "eval_icons_MAE_all": 0.1848635897040367, "eval_icons_MAE_h": 0.13594580814242363, "eval_icons_MAE_w": 0.17704887688159943, "eval_icons_MAE_x_boxes": 0.15803752839565277, "eval_icons_MAE_y_boxes": 0.09891069307923317, "eval_icons_NUM_probability": 0.999882310628891, "eval_icons_inside_bbox": 0.1770833358168602, "eval_icons_loss": 2.838784694671631, "eval_icons_loss_ce": 4.0777424146654084e-05, "eval_icons_loss_iou": 0.958251953125, "eval_icons_loss_num": 0.1959228515625, "eval_icons_loss_xval": 2.896484375, "eval_icons_runtime": 75.6626, "eval_icons_samples_per_second": 0.661, "eval_icons_steps_per_second": 0.026, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.28080685168718117, "eval_screenspot_CIoU": 0.04999221861362457, "eval_screenspot_GIoU": 0.07912557261685531, "eval_screenspot_IoU": 0.20241647958755493, "eval_screenspot_MAE_all": 0.18727018435796103, "eval_screenspot_MAE_h": 0.0899810865521431, "eval_screenspot_MAE_w": 0.13406882186730704, "eval_screenspot_MAE_x_boxes": 0.2660200943549474, "eval_screenspot_MAE_y_boxes": 0.1705679049094518, "eval_screenspot_NUM_probability": 0.9999146262804667, "eval_screenspot_inside_bbox": 0.39958332975705463, "eval_screenspot_loss": 2.8131790161132812, "eval_screenspot_loss_ce": 0.00783678920318683, "eval_screenspot_loss_iou": 0.9337565104166666, "eval_screenspot_loss_num": 0.18758138020833334, "eval_screenspot_loss_xval": 2.8033854166666665, "eval_screenspot_runtime": 128.2036, "eval_screenspot_samples_per_second": 0.694, "eval_screenspot_steps_per_second": 0.023, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.28080685168718117, "eval_compot_CIoU": -0.06844502128660679, "eval_compot_GIoU": -0.005227629095315933, "eval_compot_IoU": 0.09907376766204834, "eval_compot_MAE_all": 0.187107652425766, "eval_compot_MAE_h": 0.09452997334301472, "eval_compot_MAE_w": 0.16281145438551903, "eval_compot_MAE_x_boxes": 0.16504347324371338, "eval_compot_MAE_y_boxes": 0.17207197099924088, "eval_compot_NUM_probability": 0.9998281896114349, "eval_compot_inside_bbox": 0.1736111119389534, "eval_compot_loss": 3.0248312950134277, "eval_compot_loss_ce": 0.0022454506251960993, "eval_compot_loss_iou": 1.035888671875, "eval_compot_loss_num": 0.2079315185546875, "eval_compot_loss_xval": 3.11083984375, "eval_compot_runtime": 69.8459, "eval_compot_samples_per_second": 0.716, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.28080685168718117, "eval_custom_ui_MAE_all": 0.14128626137971878, "eval_custom_ui_MAE_x": 0.114876639097929, "eval_custom_ui_MAE_y": 0.16769587993621826, "eval_custom_ui_NUM_probability": 0.9999775290489197, "eval_custom_ui_loss": 0.8501137495040894, "eval_custom_ui_loss_ce": 0.19192413985729218, "eval_custom_ui_loss_num": 0.14447021484375, "eval_custom_ui_loss_xval": 0.7225341796875, "eval_custom_ui_runtime": 58.6663, "eval_custom_ui_samples_per_second": 0.852, "eval_custom_ui_steps_per_second": 0.034, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.28080685168718117, "loss": 0.904617190361023, "loss_ce": 0.21296684443950653, "loss_iou": 0.0, "loss_num": 0.138671875, "loss_xval": 0.69140625, "num_input_tokens_seen": 198691704, "step": 3000 }, { "epoch": 0.2809004539710769, "grad_norm": 22.667634963989258, "learning_rate": 5e-05, "loss": 1.5151, "num_input_tokens_seen": 198757224, "step": 3001 }, { "epoch": 0.2809004539710769, "loss": 1.5522687435150146, "loss_ce": 0.005393712781369686, "loss_iou": 0.6328125, "loss_num": 0.054931640625, "loss_xval": 1.546875, "num_input_tokens_seen": 198757224, "step": 3001 }, { "epoch": 0.28099405625497265, "grad_norm": 16.118173599243164, "learning_rate": 5e-05, "loss": 1.435, "num_input_tokens_seen": 198823776, "step": 3002 }, { "epoch": 0.28099405625497265, "loss": 1.3930273056030273, "loss_ce": 0.00923829060047865, "loss_iou": 0.5859375, "loss_num": 0.042236328125, "loss_xval": 1.3828125, "num_input_tokens_seen": 198823776, "step": 3002 }, { "epoch": 0.28108765853886836, "grad_norm": 39.46125793457031, "learning_rate": 5e-05, "loss": 1.2334, "num_input_tokens_seen": 198889656, "step": 3003 }, { "epoch": 0.28108765853886836, "loss": 1.145675778388977, "loss_ce": 0.004074192140251398, "loss_iou": 0.50390625, "loss_num": 0.02685546875, "loss_xval": 1.140625, "num_input_tokens_seen": 198889656, "step": 3003 }, { "epoch": 0.28118126082276407, "grad_norm": 23.5291748046875, "learning_rate": 5e-05, "loss": 1.2286, "num_input_tokens_seen": 198956604, "step": 3004 }, { "epoch": 0.28118126082276407, "loss": 1.3669800758361816, "loss_ce": 0.004675284028053284, "loss_iou": 0.55859375, "loss_num": 0.048828125, "loss_xval": 1.359375, "num_input_tokens_seen": 198956604, "step": 3004 }, { "epoch": 0.2812748631066598, "grad_norm": 24.502634048461914, "learning_rate": 5e-05, "loss": 1.3943, "num_input_tokens_seen": 199022784, "step": 3005 }, { "epoch": 0.2812748631066598, "loss": 1.472860336303711, "loss_ce": 0.01070205494761467, "loss_iou": 0.5859375, "loss_num": 0.05810546875, "loss_xval": 1.4609375, "num_input_tokens_seen": 199022784, "step": 3005 }, { "epoch": 0.28136846539055554, "grad_norm": 20.00545883178711, "learning_rate": 5e-05, "loss": 1.1291, "num_input_tokens_seen": 199089608, "step": 3006 }, { "epoch": 0.28136846539055554, "loss": 1.335639476776123, "loss_ce": 0.005561283323913813, "loss_iou": 0.55078125, "loss_num": 0.045654296875, "loss_xval": 1.328125, "num_input_tokens_seen": 199089608, "step": 3006 }, { "epoch": 0.28146206767445126, "grad_norm": 25.719745635986328, "learning_rate": 5e-05, "loss": 1.1503, "num_input_tokens_seen": 199156128, "step": 3007 }, { "epoch": 0.28146206767445126, "loss": 1.2904092073440552, "loss_ce": 0.0032998290844261646, "loss_iou": 0.57421875, "loss_num": 0.028076171875, "loss_xval": 1.2890625, "num_input_tokens_seen": 199156128, "step": 3007 }, { "epoch": 0.28155566995834697, "grad_norm": 27.325746536254883, "learning_rate": 5e-05, "loss": 1.4628, "num_input_tokens_seen": 199221836, "step": 3008 }, { "epoch": 0.28155566995834697, "loss": 1.4212934970855713, "loss_ce": 0.004301358945667744, "loss_iou": 0.625, "loss_num": 0.033447265625, "loss_xval": 1.4140625, "num_input_tokens_seen": 199221836, "step": 3008 }, { "epoch": 0.28164927224224273, "grad_norm": 18.342214584350586, "learning_rate": 5e-05, "loss": 1.0876, "num_input_tokens_seen": 199287648, "step": 3009 }, { "epoch": 0.28164927224224273, "loss": 1.2151768207550049, "loss_ce": 0.0037509948015213013, "loss_iou": 0.51953125, "loss_num": 0.0341796875, "loss_xval": 1.2109375, "num_input_tokens_seen": 199287648, "step": 3009 }, { "epoch": 0.28174287452613844, "grad_norm": 21.493207931518555, "learning_rate": 5e-05, "loss": 1.5262, "num_input_tokens_seen": 199354116, "step": 3010 }, { "epoch": 0.28174287452613844, "loss": 1.3997085094451904, "loss_ce": 0.0022476729936897755, "loss_iou": 0.6171875, "loss_num": 0.033203125, "loss_xval": 1.3984375, "num_input_tokens_seen": 199354116, "step": 3010 }, { "epoch": 0.28183647681003415, "grad_norm": 21.33682632446289, "learning_rate": 5e-05, "loss": 1.1049, "num_input_tokens_seen": 199420316, "step": 3011 }, { "epoch": 0.28183647681003415, "loss": 0.9362991452217102, "loss_ce": 0.006611632648855448, "loss_iou": 0.380859375, "loss_num": 0.033447265625, "loss_xval": 0.9296875, "num_input_tokens_seen": 199420316, "step": 3011 }, { "epoch": 0.2819300790939299, "grad_norm": 32.78297805786133, "learning_rate": 5e-05, "loss": 1.318, "num_input_tokens_seen": 199487272, "step": 3012 }, { "epoch": 0.2819300790939299, "loss": 1.1804206371307373, "loss_ce": 0.00659254239872098, "loss_iou": 0.5, "loss_num": 0.033935546875, "loss_xval": 1.171875, "num_input_tokens_seen": 199487272, "step": 3012 }, { "epoch": 0.28202368137782563, "grad_norm": 26.72673797607422, "learning_rate": 5e-05, "loss": 1.2979, "num_input_tokens_seen": 199553644, "step": 3013 }, { "epoch": 0.28202368137782563, "loss": 1.1061081886291504, "loss_ce": 0.00747529324144125, "loss_iou": 0.466796875, "loss_num": 0.033203125, "loss_xval": 1.1015625, "num_input_tokens_seen": 199553644, "step": 3013 }, { "epoch": 0.28211728366172134, "grad_norm": 25.93937110900879, "learning_rate": 5e-05, "loss": 1.4116, "num_input_tokens_seen": 199619412, "step": 3014 }, { "epoch": 0.28211728366172134, "loss": 1.2846951484680176, "loss_ce": 0.004909959621727467, "loss_iou": 0.53515625, "loss_num": 0.041748046875, "loss_xval": 1.28125, "num_input_tokens_seen": 199619412, "step": 3014 }, { "epoch": 0.28221088594561705, "grad_norm": 33.346580505371094, "learning_rate": 5e-05, "loss": 1.5906, "num_input_tokens_seen": 199686320, "step": 3015 }, { "epoch": 0.28221088594561705, "loss": 1.7591780424118042, "loss_ce": 0.004783453419804573, "loss_iou": 0.73828125, "loss_num": 0.056396484375, "loss_xval": 1.7578125, "num_input_tokens_seen": 199686320, "step": 3015 }, { "epoch": 0.2823044882295128, "grad_norm": 26.67934799194336, "learning_rate": 5e-05, "loss": 1.5406, "num_input_tokens_seen": 199752700, "step": 3016 }, { "epoch": 0.2823044882295128, "loss": 1.3743374347686768, "loss_ce": 0.003243792802095413, "loss_iou": 0.6015625, "loss_num": 0.033203125, "loss_xval": 1.375, "num_input_tokens_seen": 199752700, "step": 3016 }, { "epoch": 0.28239809051340853, "grad_norm": 75.38923645019531, "learning_rate": 5e-05, "loss": 1.0007, "num_input_tokens_seen": 199817348, "step": 3017 }, { "epoch": 0.28239809051340853, "loss": 0.8193593621253967, "loss_ce": 0.007103499956429005, "loss_iou": 0.33203125, "loss_num": 0.0296630859375, "loss_xval": 0.8125, "num_input_tokens_seen": 199817348, "step": 3017 }, { "epoch": 0.28249169279730424, "grad_norm": 25.729848861694336, "learning_rate": 5e-05, "loss": 1.4244, "num_input_tokens_seen": 199883940, "step": 3018 }, { "epoch": 0.28249169279730424, "loss": 1.4630744457244873, "loss_ce": 0.006043207366019487, "loss_iou": 0.5859375, "loss_num": 0.056884765625, "loss_xval": 1.453125, "num_input_tokens_seen": 199883940, "step": 3018 }, { "epoch": 0.2825852950812, "grad_norm": 76.04930114746094, "learning_rate": 5e-05, "loss": 1.4571, "num_input_tokens_seen": 199950904, "step": 3019 }, { "epoch": 0.2825852950812, "loss": 1.5464365482330322, "loss_ce": 0.0054209185764193535, "loss_iou": 0.6640625, "loss_num": 0.042724609375, "loss_xval": 1.5390625, "num_input_tokens_seen": 199950904, "step": 3019 }, { "epoch": 0.2826788973650957, "grad_norm": 105.80778503417969, "learning_rate": 5e-05, "loss": 1.5398, "num_input_tokens_seen": 200017584, "step": 3020 }, { "epoch": 0.2826788973650957, "loss": 1.6474591493606567, "loss_ce": 0.0034162087831646204, "loss_iou": 0.671875, "loss_num": 0.060546875, "loss_xval": 1.640625, "num_input_tokens_seen": 200017584, "step": 3020 }, { "epoch": 0.2827724996489914, "grad_norm": 24.542709350585938, "learning_rate": 5e-05, "loss": 1.4423, "num_input_tokens_seen": 200083628, "step": 3021 }, { "epoch": 0.2827724996489914, "loss": 1.3654754161834717, "loss_ce": 0.008297751657664776, "loss_iou": 0.58984375, "loss_num": 0.035888671875, "loss_xval": 1.359375, "num_input_tokens_seen": 200083628, "step": 3021 }, { "epoch": 0.28286610193288714, "grad_norm": 23.94529151916504, "learning_rate": 5e-05, "loss": 1.3947, "num_input_tokens_seen": 200150140, "step": 3022 }, { "epoch": 0.28286610193288714, "loss": 1.3242911100387573, "loss_ce": 0.003490261733531952, "loss_iou": 0.61328125, "loss_num": 0.01904296875, "loss_xval": 1.3203125, "num_input_tokens_seen": 200150140, "step": 3022 }, { "epoch": 0.2829597042167829, "grad_norm": 26.056406021118164, "learning_rate": 5e-05, "loss": 1.2948, "num_input_tokens_seen": 200216752, "step": 3023 }, { "epoch": 0.2829597042167829, "loss": 1.2260204553604126, "loss_ce": 0.0023876808118075132, "loss_iou": 0.53125, "loss_num": 0.031982421875, "loss_xval": 1.2265625, "num_input_tokens_seen": 200216752, "step": 3023 }, { "epoch": 0.2830533065006786, "grad_norm": 37.56977081298828, "learning_rate": 5e-05, "loss": 1.694, "num_input_tokens_seen": 200284168, "step": 3024 }, { "epoch": 0.2830533065006786, "loss": 1.7354222536087036, "loss_ce": 0.0030004363507032394, "loss_iou": 0.72265625, "loss_num": 0.057373046875, "loss_xval": 1.734375, "num_input_tokens_seen": 200284168, "step": 3024 }, { "epoch": 0.2831469087845743, "grad_norm": 21.75420570373535, "learning_rate": 5e-05, "loss": 1.2698, "num_input_tokens_seen": 200350508, "step": 3025 }, { "epoch": 0.2831469087845743, "loss": 1.2544282674789429, "loss_ce": 0.004428272135555744, "loss_iou": 0.46875, "loss_num": 0.062255859375, "loss_xval": 1.25, "num_input_tokens_seen": 200350508, "step": 3025 }, { "epoch": 0.2832405110684701, "grad_norm": 27.25312614440918, "learning_rate": 5e-05, "loss": 1.4511, "num_input_tokens_seen": 200417128, "step": 3026 }, { "epoch": 0.2832405110684701, "loss": 1.2957229614257812, "loss_ce": 0.00812535360455513, "loss_iou": 0.546875, "loss_num": 0.038330078125, "loss_xval": 1.2890625, "num_input_tokens_seen": 200417128, "step": 3026 }, { "epoch": 0.2833341133523658, "grad_norm": 31.485076904296875, "learning_rate": 5e-05, "loss": 1.3466, "num_input_tokens_seen": 200483580, "step": 3027 }, { "epoch": 0.2833341133523658, "loss": 1.3404892683029175, "loss_ce": 0.006993155926465988, "loss_iou": 0.5703125, "loss_num": 0.038818359375, "loss_xval": 1.3359375, "num_input_tokens_seen": 200483580, "step": 3027 }, { "epoch": 0.2834277156362615, "grad_norm": 23.445817947387695, "learning_rate": 5e-05, "loss": 1.2481, "num_input_tokens_seen": 200548228, "step": 3028 }, { "epoch": 0.2834277156362615, "loss": 1.1979817152023315, "loss_ce": 0.006087195128202438, "loss_iou": 0.47265625, "loss_num": 0.0498046875, "loss_xval": 1.1953125, "num_input_tokens_seen": 200548228, "step": 3028 }, { "epoch": 0.2835213179201573, "grad_norm": 9.60848331451416, "learning_rate": 5e-05, "loss": 1.07, "num_input_tokens_seen": 200614080, "step": 3029 }, { "epoch": 0.2835213179201573, "loss": 1.2597129344940186, "loss_ce": 0.009712908416986465, "loss_iou": 0.50390625, "loss_num": 0.04833984375, "loss_xval": 1.25, "num_input_tokens_seen": 200614080, "step": 3029 }, { "epoch": 0.283614920204053, "grad_norm": 56.132328033447266, "learning_rate": 5e-05, "loss": 1.3437, "num_input_tokens_seen": 200681156, "step": 3030 }, { "epoch": 0.283614920204053, "loss": 1.1957062482833862, "loss_ce": 0.007229625713080168, "loss_iou": 0.484375, "loss_num": 0.043701171875, "loss_xval": 1.1875, "num_input_tokens_seen": 200681156, "step": 3030 }, { "epoch": 0.2837085224879487, "grad_norm": 29.766399383544922, "learning_rate": 5e-05, "loss": 1.4205, "num_input_tokens_seen": 200747064, "step": 3031 }, { "epoch": 0.2837085224879487, "loss": 1.3620655536651611, "loss_ce": 0.0026904933620244265, "loss_iou": 0.48828125, "loss_num": 0.07666015625, "loss_xval": 1.359375, "num_input_tokens_seen": 200747064, "step": 3031 }, { "epoch": 0.2838021247718444, "grad_norm": 36.39285659790039, "learning_rate": 5e-05, "loss": 1.3372, "num_input_tokens_seen": 200813232, "step": 3032 }, { "epoch": 0.2838021247718444, "loss": 1.2890684604644775, "loss_ce": 0.004156413953751326, "loss_iou": 0.55859375, "loss_num": 0.03369140625, "loss_xval": 1.28125, "num_input_tokens_seen": 200813232, "step": 3032 }, { "epoch": 0.2838957270557402, "grad_norm": 21.098520278930664, "learning_rate": 5e-05, "loss": 1.7289, "num_input_tokens_seen": 200879408, "step": 3033 }, { "epoch": 0.2838957270557402, "loss": 1.7756386995315552, "loss_ce": 0.006107440683990717, "loss_iou": 0.734375, "loss_num": 0.06005859375, "loss_xval": 1.765625, "num_input_tokens_seen": 200879408, "step": 3033 }, { "epoch": 0.2839893293396359, "grad_norm": 24.60450553894043, "learning_rate": 5e-05, "loss": 1.5552, "num_input_tokens_seen": 200945764, "step": 3034 }, { "epoch": 0.2839893293396359, "loss": 1.7291457653045654, "loss_ce": 0.004536377266049385, "loss_iou": 0.6796875, "loss_num": 0.0732421875, "loss_xval": 1.7265625, "num_input_tokens_seen": 200945764, "step": 3034 }, { "epoch": 0.2840829316235316, "grad_norm": 32.24265670776367, "learning_rate": 5e-05, "loss": 1.3851, "num_input_tokens_seen": 201012184, "step": 3035 }, { "epoch": 0.2840829316235316, "loss": 1.4628403186798096, "loss_ce": 0.002879311330616474, "loss_iou": 0.6171875, "loss_num": 0.0458984375, "loss_xval": 1.4609375, "num_input_tokens_seen": 201012184, "step": 3035 }, { "epoch": 0.28417653390742736, "grad_norm": 22.662155151367188, "learning_rate": 5e-05, "loss": 1.2645, "num_input_tokens_seen": 201078792, "step": 3036 }, { "epoch": 0.28417653390742736, "loss": 1.3859742879867554, "loss_ce": 0.006091486196964979, "loss_iou": 0.55859375, "loss_num": 0.05224609375, "loss_xval": 1.3828125, "num_input_tokens_seen": 201078792, "step": 3036 }, { "epoch": 0.2842701361913231, "grad_norm": 62.40760803222656, "learning_rate": 5e-05, "loss": 1.3122, "num_input_tokens_seen": 201144312, "step": 3037 }, { "epoch": 0.2842701361913231, "loss": 1.3730086088180542, "loss_ce": 0.0038679810240864754, "loss_iou": 0.58203125, "loss_num": 0.040771484375, "loss_xval": 1.3671875, "num_input_tokens_seen": 201144312, "step": 3037 }, { "epoch": 0.2843637384752188, "grad_norm": 19.97221565246582, "learning_rate": 5e-05, "loss": 1.2313, "num_input_tokens_seen": 201210704, "step": 3038 }, { "epoch": 0.2843637384752188, "loss": 1.2948195934295654, "loss_ce": 0.005757001228630543, "loss_iou": 0.5390625, "loss_num": 0.04296875, "loss_xval": 1.2890625, "num_input_tokens_seen": 201210704, "step": 3038 }, { "epoch": 0.2844573407591145, "grad_norm": 18.19036293029785, "learning_rate": 5e-05, "loss": 1.261, "num_input_tokens_seen": 201277684, "step": 3039 }, { "epoch": 0.2844573407591145, "loss": 1.2203940153121948, "loss_ce": 0.00262056733481586, "loss_iou": 0.52734375, "loss_num": 0.032470703125, "loss_xval": 1.21875, "num_input_tokens_seen": 201277684, "step": 3039 }, { "epoch": 0.28455094304301026, "grad_norm": 44.580387115478516, "learning_rate": 5e-05, "loss": 1.2232, "num_input_tokens_seen": 201343984, "step": 3040 }, { "epoch": 0.28455094304301026, "loss": 1.2985180616378784, "loss_ce": 0.006037507671862841, "loss_iou": 0.53125, "loss_num": 0.046142578125, "loss_xval": 1.2890625, "num_input_tokens_seen": 201343984, "step": 3040 }, { "epoch": 0.284644545326906, "grad_norm": 21.293792724609375, "learning_rate": 5e-05, "loss": 1.9223, "num_input_tokens_seen": 201410180, "step": 3041 }, { "epoch": 0.284644545326906, "loss": 1.825531244277954, "loss_ce": 0.008148333057761192, "loss_iou": 0.7578125, "loss_num": 0.059814453125, "loss_xval": 1.8203125, "num_input_tokens_seen": 201410180, "step": 3041 }, { "epoch": 0.2847381476108017, "grad_norm": 24.56401252746582, "learning_rate": 5e-05, "loss": 1.665, "num_input_tokens_seen": 201476288, "step": 3042 }, { "epoch": 0.2847381476108017, "loss": 1.6926859617233276, "loss_ce": 0.010068817995488644, "loss_iou": 0.7109375, "loss_num": 0.05224609375, "loss_xval": 1.6796875, "num_input_tokens_seen": 201476288, "step": 3042 }, { "epoch": 0.28483174989469745, "grad_norm": 67.64642333984375, "learning_rate": 5e-05, "loss": 0.9365, "num_input_tokens_seen": 201541224, "step": 3043 }, { "epoch": 0.28483174989469745, "loss": 1.1663177013397217, "loss_ce": 0.006253093481063843, "loss_iou": 0.48828125, "loss_num": 0.036865234375, "loss_xval": 1.15625, "num_input_tokens_seen": 201541224, "step": 3043 }, { "epoch": 0.28492535217859316, "grad_norm": 18.67448616027832, "learning_rate": 5e-05, "loss": 1.2049, "num_input_tokens_seen": 201607664, "step": 3044 }, { "epoch": 0.28492535217859316, "loss": 1.2268407344818115, "loss_ce": 0.006259709130972624, "loss_iou": 0.5078125, "loss_num": 0.041015625, "loss_xval": 1.21875, "num_input_tokens_seen": 201607664, "step": 3044 }, { "epoch": 0.28501895446248887, "grad_norm": 26.925634384155273, "learning_rate": 5e-05, "loss": 1.314, "num_input_tokens_seen": 201674108, "step": 3045 }, { "epoch": 0.28501895446248887, "loss": 1.528472900390625, "loss_ce": 0.005035434849560261, "loss_iou": 0.62890625, "loss_num": 0.053466796875, "loss_xval": 1.5234375, "num_input_tokens_seen": 201674108, "step": 3045 }, { "epoch": 0.28511255674638464, "grad_norm": 30.826522827148438, "learning_rate": 5e-05, "loss": 1.5354, "num_input_tokens_seen": 201740684, "step": 3046 }, { "epoch": 0.28511255674638464, "loss": 1.604750633239746, "loss_ce": 0.0075826384127140045, "loss_iou": 0.6640625, "loss_num": 0.05419921875, "loss_xval": 1.59375, "num_input_tokens_seen": 201740684, "step": 3046 }, { "epoch": 0.28520615903028035, "grad_norm": 21.951374053955078, "learning_rate": 5e-05, "loss": 1.4433, "num_input_tokens_seen": 201807480, "step": 3047 }, { "epoch": 0.28520615903028035, "loss": 1.377296805381775, "loss_ce": 0.008156189695000648, "loss_iou": 0.59375, "loss_num": 0.0361328125, "loss_xval": 1.3671875, "num_input_tokens_seen": 201807480, "step": 3047 }, { "epoch": 0.28529976131417606, "grad_norm": 39.95161437988281, "learning_rate": 5e-05, "loss": 1.5745, "num_input_tokens_seen": 201874104, "step": 3048 }, { "epoch": 0.28529976131417606, "loss": 1.4472811222076416, "loss_ce": 0.0029452352318912745, "loss_iou": 0.60546875, "loss_num": 0.047119140625, "loss_xval": 1.4453125, "num_input_tokens_seen": 201874104, "step": 3048 }, { "epoch": 0.28539336359807177, "grad_norm": 40.374271392822266, "learning_rate": 5e-05, "loss": 1.5279, "num_input_tokens_seen": 201940068, "step": 3049 }, { "epoch": 0.28539336359807177, "loss": 1.4617282152175903, "loss_ce": 0.006161773111671209, "loss_iou": 0.57421875, "loss_num": 0.061767578125, "loss_xval": 1.453125, "num_input_tokens_seen": 201940068, "step": 3049 }, { "epoch": 0.28548696588196754, "grad_norm": 20.22443962097168, "learning_rate": 5e-05, "loss": 1.4602, "num_input_tokens_seen": 202006508, "step": 3050 }, { "epoch": 0.28548696588196754, "loss": 1.4855684041976929, "loss_ce": 0.004123128484934568, "loss_iou": 0.6484375, "loss_num": 0.037109375, "loss_xval": 1.484375, "num_input_tokens_seen": 202006508, "step": 3050 }, { "epoch": 0.28558056816586325, "grad_norm": 34.23317337036133, "learning_rate": 5e-05, "loss": 1.28, "num_input_tokens_seen": 202072496, "step": 3051 }, { "epoch": 0.28558056816586325, "loss": 1.418210506439209, "loss_ce": 0.006101028528064489, "loss_iou": 0.5703125, "loss_num": 0.055419921875, "loss_xval": 1.4140625, "num_input_tokens_seen": 202072496, "step": 3051 }, { "epoch": 0.28567417044975896, "grad_norm": 37.79703140258789, "learning_rate": 5e-05, "loss": 1.4676, "num_input_tokens_seen": 202138636, "step": 3052 }, { "epoch": 0.28567417044975896, "loss": 1.6764185428619385, "loss_ce": 0.0055201370269060135, "loss_iou": 0.6796875, "loss_num": 0.0625, "loss_xval": 1.671875, "num_input_tokens_seen": 202138636, "step": 3052 }, { "epoch": 0.2857677727336547, "grad_norm": 21.096168518066406, "learning_rate": 5e-05, "loss": 1.6136, "num_input_tokens_seen": 202204992, "step": 3053 }, { "epoch": 0.2857677727336547, "loss": 1.8560161590576172, "loss_ce": 0.006406885571777821, "loss_iou": 0.75, "loss_num": 0.06884765625, "loss_xval": 1.8515625, "num_input_tokens_seen": 202204992, "step": 3053 }, { "epoch": 0.28586137501755043, "grad_norm": 18.430843353271484, "learning_rate": 5e-05, "loss": 1.2172, "num_input_tokens_seen": 202271552, "step": 3054 }, { "epoch": 0.28586137501755043, "loss": 1.1434144973754883, "loss_ce": 0.00523093156516552, "loss_iou": 0.48046875, "loss_num": 0.035400390625, "loss_xval": 1.140625, "num_input_tokens_seen": 202271552, "step": 3054 }, { "epoch": 0.28595497730144614, "grad_norm": 17.02061653137207, "learning_rate": 5e-05, "loss": 1.2744, "num_input_tokens_seen": 202337792, "step": 3055 }, { "epoch": 0.28595497730144614, "loss": 1.2045364379882812, "loss_ce": 0.008735625073313713, "loss_iou": 0.49609375, "loss_num": 0.041015625, "loss_xval": 1.1953125, "num_input_tokens_seen": 202337792, "step": 3055 }, { "epoch": 0.28604857958534186, "grad_norm": 33.488468170166016, "learning_rate": 5e-05, "loss": 1.1886, "num_input_tokens_seen": 202405424, "step": 3056 }, { "epoch": 0.28604857958534186, "loss": 1.312138557434082, "loss_ce": 0.0045213233679533005, "loss_iou": 0.54296875, "loss_num": 0.0439453125, "loss_xval": 1.3046875, "num_input_tokens_seen": 202405424, "step": 3056 }, { "epoch": 0.2861421818692376, "grad_norm": 23.083343505859375, "learning_rate": 5e-05, "loss": 1.4977, "num_input_tokens_seen": 202471848, "step": 3057 }, { "epoch": 0.2861421818692376, "loss": 1.3633100986480713, "loss_ce": 0.002958590630441904, "loss_iou": 0.60546875, "loss_num": 0.029541015625, "loss_xval": 1.359375, "num_input_tokens_seen": 202471848, "step": 3057 }, { "epoch": 0.28623578415313333, "grad_norm": 21.281354904174805, "learning_rate": 5e-05, "loss": 1.2358, "num_input_tokens_seen": 202538592, "step": 3058 }, { "epoch": 0.28623578415313333, "loss": 1.132489562034607, "loss_ce": 0.004559872671961784, "loss_iou": 0.47265625, "loss_num": 0.036376953125, "loss_xval": 1.125, "num_input_tokens_seen": 202538592, "step": 3058 }, { "epoch": 0.28632938643702904, "grad_norm": 20.42490005493164, "learning_rate": 5e-05, "loss": 1.2342, "num_input_tokens_seen": 202605584, "step": 3059 }, { "epoch": 0.28632938643702904, "loss": 1.2003170251846313, "loss_ce": 0.002074801828712225, "loss_iou": 0.52734375, "loss_num": 0.029052734375, "loss_xval": 1.1953125, "num_input_tokens_seen": 202605584, "step": 3059 }, { "epoch": 0.2864229887209248, "grad_norm": 35.977149963378906, "learning_rate": 5e-05, "loss": 1.1665, "num_input_tokens_seen": 202670260, "step": 3060 }, { "epoch": 0.2864229887209248, "loss": 1.1984047889709473, "loss_ce": 0.00504543911665678, "loss_iou": 0.478515625, "loss_num": 0.04736328125, "loss_xval": 1.1953125, "num_input_tokens_seen": 202670260, "step": 3060 }, { "epoch": 0.2865165910048205, "grad_norm": 30.093826293945312, "learning_rate": 5e-05, "loss": 1.4555, "num_input_tokens_seen": 202735752, "step": 3061 }, { "epoch": 0.2865165910048205, "loss": 1.424020528793335, "loss_ce": 0.0070283068343997, "loss_iou": 0.5234375, "loss_num": 0.07373046875, "loss_xval": 1.4140625, "num_input_tokens_seen": 202735752, "step": 3061 }, { "epoch": 0.28661019328871623, "grad_norm": 37.46656799316406, "learning_rate": 5e-05, "loss": 1.4962, "num_input_tokens_seen": 202802616, "step": 3062 }, { "epoch": 0.28661019328871623, "loss": 1.8160855770111084, "loss_ce": 0.003585491795092821, "loss_iou": 0.76171875, "loss_num": 0.0576171875, "loss_xval": 1.8125, "num_input_tokens_seen": 202802616, "step": 3062 }, { "epoch": 0.286703795572612, "grad_norm": 19.977628707885742, "learning_rate": 5e-05, "loss": 1.5627, "num_input_tokens_seen": 202869176, "step": 3063 }, { "epoch": 0.286703795572612, "loss": 1.564035177230835, "loss_ce": 0.007882889360189438, "loss_iou": 0.65234375, "loss_num": 0.05078125, "loss_xval": 1.5546875, "num_input_tokens_seen": 202869176, "step": 3063 }, { "epoch": 0.2867973978565077, "grad_norm": 185.08062744140625, "learning_rate": 5e-05, "loss": 1.4283, "num_input_tokens_seen": 202935300, "step": 3064 }, { "epoch": 0.2867973978565077, "loss": 1.2464423179626465, "loss_ce": 0.004254757426679134, "loss_iou": 0.54296875, "loss_num": 0.0311279296875, "loss_xval": 1.2421875, "num_input_tokens_seen": 202935300, "step": 3064 }, { "epoch": 0.2868910001404034, "grad_norm": 23.055692672729492, "learning_rate": 5e-05, "loss": 1.3364, "num_input_tokens_seen": 203001632, "step": 3065 }, { "epoch": 0.2868910001404034, "loss": 1.378543496131897, "loss_ce": 0.006473171524703503, "loss_iou": 0.59765625, "loss_num": 0.03466796875, "loss_xval": 1.375, "num_input_tokens_seen": 203001632, "step": 3065 }, { "epoch": 0.28698460242429913, "grad_norm": 30.766271591186523, "learning_rate": 5e-05, "loss": 1.2575, "num_input_tokens_seen": 203068964, "step": 3066 }, { "epoch": 0.28698460242429913, "loss": 1.3517169952392578, "loss_ce": 0.0030842546839267015, "loss_iou": 0.58984375, "loss_num": 0.034423828125, "loss_xval": 1.3515625, "num_input_tokens_seen": 203068964, "step": 3066 }, { "epoch": 0.2870782047081949, "grad_norm": 35.7118034362793, "learning_rate": 5e-05, "loss": 1.3582, "num_input_tokens_seen": 203135280, "step": 3067 }, { "epoch": 0.2870782047081949, "loss": 1.181583285331726, "loss_ce": 0.006290363147854805, "loss_iou": 0.5234375, "loss_num": 0.0263671875, "loss_xval": 1.171875, "num_input_tokens_seen": 203135280, "step": 3067 }, { "epoch": 0.2871718069920906, "grad_norm": 17.745744705200195, "learning_rate": 5e-05, "loss": 1.3908, "num_input_tokens_seen": 203200880, "step": 3068 }, { "epoch": 0.2871718069920906, "loss": 1.23586905002594, "loss_ce": 0.00552239827811718, "loss_iou": 0.482421875, "loss_num": 0.053466796875, "loss_xval": 1.2265625, "num_input_tokens_seen": 203200880, "step": 3068 }, { "epoch": 0.2872654092759863, "grad_norm": 21.535432815551758, "learning_rate": 5e-05, "loss": 1.3066, "num_input_tokens_seen": 203267708, "step": 3069 }, { "epoch": 0.2872654092759863, "loss": 1.5727143287658691, "loss_ce": 0.008261275477707386, "loss_iou": 0.6171875, "loss_num": 0.06591796875, "loss_xval": 1.5625, "num_input_tokens_seen": 203267708, "step": 3069 }, { "epoch": 0.2873590115598821, "grad_norm": 24.41970443725586, "learning_rate": 5e-05, "loss": 1.3044, "num_input_tokens_seen": 203334544, "step": 3070 }, { "epoch": 0.2873590115598821, "loss": 1.2093100547790527, "loss_ce": 0.005696695763617754, "loss_iou": 0.5078125, "loss_num": 0.03759765625, "loss_xval": 1.203125, "num_input_tokens_seen": 203334544, "step": 3070 }, { "epoch": 0.2874526138437778, "grad_norm": 35.85791015625, "learning_rate": 5e-05, "loss": 1.4133, "num_input_tokens_seen": 203401620, "step": 3071 }, { "epoch": 0.2874526138437778, "loss": 1.2298303842544556, "loss_ce": 0.003267889376729727, "loss_iou": 0.515625, "loss_num": 0.038818359375, "loss_xval": 1.2265625, "num_input_tokens_seen": 203401620, "step": 3071 }, { "epoch": 0.2875462161276735, "grad_norm": 186.5697784423828, "learning_rate": 5e-05, "loss": 1.3013, "num_input_tokens_seen": 203466636, "step": 3072 }, { "epoch": 0.2875462161276735, "loss": 1.560002326965332, "loss_ce": 0.005314810201525688, "loss_iou": 0.609375, "loss_num": 0.06787109375, "loss_xval": 1.5546875, "num_input_tokens_seen": 203466636, "step": 3072 }, { "epoch": 0.2876398184115692, "grad_norm": 21.278427124023438, "learning_rate": 5e-05, "loss": 1.1918, "num_input_tokens_seen": 203532620, "step": 3073 }, { "epoch": 0.2876398184115692, "loss": 1.1815638542175293, "loss_ce": 0.0032802638597786427, "loss_iou": 0.484375, "loss_num": 0.041748046875, "loss_xval": 1.1796875, "num_input_tokens_seen": 203532620, "step": 3073 }, { "epoch": 0.287733420695465, "grad_norm": 22.485498428344727, "learning_rate": 5e-05, "loss": 1.4908, "num_input_tokens_seen": 203599092, "step": 3074 }, { "epoch": 0.287733420695465, "loss": 1.5560481548309326, "loss_ce": 0.004290410317480564, "loss_iou": 0.62109375, "loss_num": 0.061279296875, "loss_xval": 1.5546875, "num_input_tokens_seen": 203599092, "step": 3074 }, { "epoch": 0.2878270229793607, "grad_norm": 37.10167694091797, "learning_rate": 5e-05, "loss": 1.7451, "num_input_tokens_seen": 203666568, "step": 3075 }, { "epoch": 0.2878270229793607, "loss": 1.476119041442871, "loss_ce": 0.003462723223492503, "loss_iou": 0.6015625, "loss_num": 0.053955078125, "loss_xval": 1.46875, "num_input_tokens_seen": 203666568, "step": 3075 }, { "epoch": 0.2879206252632564, "grad_norm": 20.182947158813477, "learning_rate": 5e-05, "loss": 1.4946, "num_input_tokens_seen": 203732500, "step": 3076 }, { "epoch": 0.2879206252632564, "loss": 1.595193862915039, "loss_ce": 0.006326563656330109, "loss_iou": 0.6640625, "loss_num": 0.05126953125, "loss_xval": 1.5859375, "num_input_tokens_seen": 203732500, "step": 3076 }, { "epoch": 0.28801422754715217, "grad_norm": 30.674283981323242, "learning_rate": 5e-05, "loss": 1.2873, "num_input_tokens_seen": 203799168, "step": 3077 }, { "epoch": 0.28801422754715217, "loss": 1.2293314933776855, "loss_ce": 0.004722068086266518, "loss_iou": 0.51171875, "loss_num": 0.040283203125, "loss_xval": 1.2265625, "num_input_tokens_seen": 203799168, "step": 3077 }, { "epoch": 0.2881078298310479, "grad_norm": 31.837526321411133, "learning_rate": 5e-05, "loss": 1.3774, "num_input_tokens_seen": 203865784, "step": 3078 }, { "epoch": 0.2881078298310479, "loss": 1.2486945390701294, "loss_ce": 0.0021125266794115305, "loss_iou": 0.51953125, "loss_num": 0.040771484375, "loss_xval": 1.25, "num_input_tokens_seen": 203865784, "step": 3078 }, { "epoch": 0.2882014321149436, "grad_norm": 23.467811584472656, "learning_rate": 5e-05, "loss": 1.7556, "num_input_tokens_seen": 203930564, "step": 3079 }, { "epoch": 0.2882014321149436, "loss": 1.7434748411178589, "loss_ce": 0.0032404691446572542, "loss_iou": 0.69140625, "loss_num": 0.07177734375, "loss_xval": 1.7421875, "num_input_tokens_seen": 203930564, "step": 3079 }, { "epoch": 0.28829503439883936, "grad_norm": 28.333711624145508, "learning_rate": 5e-05, "loss": 1.1921, "num_input_tokens_seen": 203997980, "step": 3080 }, { "epoch": 0.28829503439883936, "loss": 1.235658884048462, "loss_ce": 0.004701939411461353, "loss_iou": 0.515625, "loss_num": 0.03955078125, "loss_xval": 1.234375, "num_input_tokens_seen": 203997980, "step": 3080 }, { "epoch": 0.28838863668273507, "grad_norm": 23.09746742248535, "learning_rate": 5e-05, "loss": 1.3955, "num_input_tokens_seen": 204063484, "step": 3081 }, { "epoch": 0.28838863668273507, "loss": 1.5260694026947021, "loss_ce": 0.0075147938914597034, "loss_iou": 0.59375, "loss_num": 0.06689453125, "loss_xval": 1.515625, "num_input_tokens_seen": 204063484, "step": 3081 }, { "epoch": 0.2884822389666308, "grad_norm": 32.867923736572266, "learning_rate": 5e-05, "loss": 1.5281, "num_input_tokens_seen": 204129316, "step": 3082 }, { "epoch": 0.2884822389666308, "loss": 1.2687959671020508, "loss_ce": 0.005124174989759922, "loss_iou": 0.546875, "loss_num": 0.033935546875, "loss_xval": 1.265625, "num_input_tokens_seen": 204129316, "step": 3082 }, { "epoch": 0.2885758412505265, "grad_norm": 29.352083206176758, "learning_rate": 5e-05, "loss": 1.4065, "num_input_tokens_seen": 204196324, "step": 3083 }, { "epoch": 0.2885758412505265, "loss": 1.4482903480529785, "loss_ce": 0.005419221706688404, "loss_iou": 0.640625, "loss_num": 0.03173828125, "loss_xval": 1.4453125, "num_input_tokens_seen": 204196324, "step": 3083 }, { "epoch": 0.28866944353442225, "grad_norm": 12.212352752685547, "learning_rate": 5e-05, "loss": 1.0674, "num_input_tokens_seen": 204263408, "step": 3084 }, { "epoch": 0.28866944353442225, "loss": 0.9747363328933716, "loss_ce": 0.005986327771097422, "loss_iou": 0.408203125, "loss_num": 0.0303955078125, "loss_xval": 0.96875, "num_input_tokens_seen": 204263408, "step": 3084 }, { "epoch": 0.28876304581831796, "grad_norm": 28.14958381652832, "learning_rate": 5e-05, "loss": 1.4128, "num_input_tokens_seen": 204330552, "step": 3085 }, { "epoch": 0.28876304581831796, "loss": 1.2859044075012207, "loss_ce": 0.004654408432543278, "loss_iou": 0.5234375, "loss_num": 0.046875, "loss_xval": 1.28125, "num_input_tokens_seen": 204330552, "step": 3085 }, { "epoch": 0.2888566481022137, "grad_norm": 30.277748107910156, "learning_rate": 5e-05, "loss": 1.4481, "num_input_tokens_seen": 204398292, "step": 3086 }, { "epoch": 0.2888566481022137, "loss": 1.2599860429763794, "loss_ce": 0.0016852568369358778, "loss_iou": 0.54296875, "loss_num": 0.034423828125, "loss_xval": 1.2578125, "num_input_tokens_seen": 204398292, "step": 3086 }, { "epoch": 0.28895025038610944, "grad_norm": 27.69671058654785, "learning_rate": 5e-05, "loss": 1.2157, "num_input_tokens_seen": 204463368, "step": 3087 }, { "epoch": 0.28895025038610944, "loss": 1.235903024673462, "loss_ce": 0.004442431032657623, "loss_iou": 0.51953125, "loss_num": 0.038818359375, "loss_xval": 1.234375, "num_input_tokens_seen": 204463368, "step": 3087 }, { "epoch": 0.28904385267000515, "grad_norm": 34.46951675415039, "learning_rate": 5e-05, "loss": 1.267, "num_input_tokens_seen": 204529540, "step": 3088 }, { "epoch": 0.28904385267000515, "loss": 1.1254793405532837, "loss_ce": 0.005362133029848337, "loss_iou": 0.484375, "loss_num": 0.0303955078125, "loss_xval": 1.1171875, "num_input_tokens_seen": 204529540, "step": 3088 }, { "epoch": 0.28913745495390086, "grad_norm": 26.055301666259766, "learning_rate": 5e-05, "loss": 1.419, "num_input_tokens_seen": 204595392, "step": 3089 }, { "epoch": 0.28913745495390086, "loss": 1.349999189376831, "loss_ce": 0.0042960201390087605, "loss_iou": 0.55078125, "loss_num": 0.048095703125, "loss_xval": 1.34375, "num_input_tokens_seen": 204595392, "step": 3089 }, { "epoch": 0.28923105723779663, "grad_norm": 34.675899505615234, "learning_rate": 5e-05, "loss": 1.2665, "num_input_tokens_seen": 204661588, "step": 3090 }, { "epoch": 0.28923105723779663, "loss": 1.447137475013733, "loss_ce": 0.003289783839136362, "loss_iou": 0.625, "loss_num": 0.03955078125, "loss_xval": 1.4453125, "num_input_tokens_seen": 204661588, "step": 3090 }, { "epoch": 0.28932465952169234, "grad_norm": 21.149599075317383, "learning_rate": 5e-05, "loss": 1.4622, "num_input_tokens_seen": 204727372, "step": 3091 }, { "epoch": 0.28932465952169234, "loss": 1.498487949371338, "loss_ce": 0.0053239320404827595, "loss_iou": 0.62890625, "loss_num": 0.046875, "loss_xval": 1.4921875, "num_input_tokens_seen": 204727372, "step": 3091 }, { "epoch": 0.28941826180558805, "grad_norm": 18.59107780456543, "learning_rate": 5e-05, "loss": 1.3873, "num_input_tokens_seen": 204793768, "step": 3092 }, { "epoch": 0.28941826180558805, "loss": 1.2568249702453613, "loss_ce": 0.008045699447393417, "loss_iou": 0.48046875, "loss_num": 0.057373046875, "loss_xval": 1.25, "num_input_tokens_seen": 204793768, "step": 3092 }, { "epoch": 0.28951186408948376, "grad_norm": 15.865816116333008, "learning_rate": 5e-05, "loss": 1.2732, "num_input_tokens_seen": 204859644, "step": 3093 }, { "epoch": 0.28951186408948376, "loss": 1.2384302616119385, "loss_ce": 0.014309210702776909, "loss_iou": 0.51171875, "loss_num": 0.0400390625, "loss_xval": 1.2265625, "num_input_tokens_seen": 204859644, "step": 3093 }, { "epoch": 0.2896054663733795, "grad_norm": 44.99957275390625, "learning_rate": 5e-05, "loss": 1.2151, "num_input_tokens_seen": 204927096, "step": 3094 }, { "epoch": 0.2896054663733795, "loss": 1.2121392488479614, "loss_ce": 0.007549415808171034, "loss_iou": 0.50390625, "loss_num": 0.0390625, "loss_xval": 1.203125, "num_input_tokens_seen": 204927096, "step": 3094 }, { "epoch": 0.28969906865727524, "grad_norm": 33.015228271484375, "learning_rate": 5e-05, "loss": 1.4204, "num_input_tokens_seen": 204993976, "step": 3095 }, { "epoch": 0.28969906865727524, "loss": 1.4124513864517212, "loss_ce": 0.006201413460075855, "loss_iou": 0.57421875, "loss_num": 0.051513671875, "loss_xval": 1.40625, "num_input_tokens_seen": 204993976, "step": 3095 }, { "epoch": 0.28979267094117095, "grad_norm": 29.203514099121094, "learning_rate": 5e-05, "loss": 1.5594, "num_input_tokens_seen": 205059972, "step": 3096 }, { "epoch": 0.28979267094117095, "loss": 1.5423778295516968, "loss_ce": 0.007221626117825508, "loss_iou": 0.64453125, "loss_num": 0.0498046875, "loss_xval": 1.53125, "num_input_tokens_seen": 205059972, "step": 3096 }, { "epoch": 0.2898862732250667, "grad_norm": 42.84995651245117, "learning_rate": 5e-05, "loss": 1.4387, "num_input_tokens_seen": 205126732, "step": 3097 }, { "epoch": 0.2898862732250667, "loss": 1.4812512397766113, "loss_ce": 0.004688835237175226, "loss_iou": 0.55859375, "loss_num": 0.07177734375, "loss_xval": 1.4765625, "num_input_tokens_seen": 205126732, "step": 3097 }, { "epoch": 0.2899798755089624, "grad_norm": 14.987178802490234, "learning_rate": 5e-05, "loss": 1.1734, "num_input_tokens_seen": 205191992, "step": 3098 }, { "epoch": 0.2899798755089624, "loss": 1.015680193901062, "loss_ce": 0.0032289689406752586, "loss_iou": 0.412109375, "loss_num": 0.03759765625, "loss_xval": 1.015625, "num_input_tokens_seen": 205191992, "step": 3098 }, { "epoch": 0.29007347779285814, "grad_norm": 29.289453506469727, "learning_rate": 5e-05, "loss": 1.2393, "num_input_tokens_seen": 205259848, "step": 3099 }, { "epoch": 0.29007347779285814, "loss": 1.2359817028045654, "loss_ce": 0.002094983123242855, "loss_iou": 0.55859375, "loss_num": 0.02294921875, "loss_xval": 1.234375, "num_input_tokens_seen": 205259848, "step": 3099 }, { "epoch": 0.29016708007675385, "grad_norm": 33.743431091308594, "learning_rate": 5e-05, "loss": 1.5154, "num_input_tokens_seen": 205326768, "step": 3100 }, { "epoch": 0.29016708007675385, "loss": 1.6340134143829346, "loss_ce": 0.010966559872031212, "loss_iou": 0.65234375, "loss_num": 0.06396484375, "loss_xval": 1.625, "num_input_tokens_seen": 205326768, "step": 3100 }, { "epoch": 0.2902606823606496, "grad_norm": 27.555036544799805, "learning_rate": 5e-05, "loss": 1.3413, "num_input_tokens_seen": 205392152, "step": 3101 }, { "epoch": 0.2902606823606496, "loss": 1.3167400360107422, "loss_ce": 0.0032635091338306665, "loss_iou": 0.5625, "loss_num": 0.03857421875, "loss_xval": 1.3125, "num_input_tokens_seen": 205392152, "step": 3101 }, { "epoch": 0.2903542846445453, "grad_norm": 78.1739273071289, "learning_rate": 5e-05, "loss": 1.2446, "num_input_tokens_seen": 205458220, "step": 3102 }, { "epoch": 0.2903542846445453, "loss": 1.2450942993164062, "loss_ce": 0.004859840031713247, "loss_iou": 0.53125, "loss_num": 0.03564453125, "loss_xval": 1.2421875, "num_input_tokens_seen": 205458220, "step": 3102 }, { "epoch": 0.29044788692844103, "grad_norm": 41.430233001708984, "learning_rate": 5e-05, "loss": 1.3613, "num_input_tokens_seen": 205525000, "step": 3103 }, { "epoch": 0.29044788692844103, "loss": 1.4686214923858643, "loss_ce": 0.008111229166388512, "loss_iou": 0.609375, "loss_num": 0.048828125, "loss_xval": 1.4609375, "num_input_tokens_seen": 205525000, "step": 3103 }, { "epoch": 0.2905414892123368, "grad_norm": 48.65542984008789, "learning_rate": 5e-05, "loss": 1.1957, "num_input_tokens_seen": 205591060, "step": 3104 }, { "epoch": 0.2905414892123368, "loss": 1.1052707433700562, "loss_ce": 0.004196567926555872, "loss_iou": 0.470703125, "loss_num": 0.03173828125, "loss_xval": 1.1015625, "num_input_tokens_seen": 205591060, "step": 3104 }, { "epoch": 0.2906350914962325, "grad_norm": 23.19552993774414, "learning_rate": 5e-05, "loss": 1.4354, "num_input_tokens_seen": 205657600, "step": 3105 }, { "epoch": 0.2906350914962325, "loss": 1.42152738571167, "loss_ce": 0.008929755538702011, "loss_iou": 0.609375, "loss_num": 0.039306640625, "loss_xval": 1.4140625, "num_input_tokens_seen": 205657600, "step": 3105 }, { "epoch": 0.2907286937801282, "grad_norm": 47.86252212524414, "learning_rate": 5e-05, "loss": 1.2118, "num_input_tokens_seen": 205724056, "step": 3106 }, { "epoch": 0.2907286937801282, "loss": 1.1531574726104736, "loss_ce": 0.006673100404441357, "loss_iou": 0.478515625, "loss_num": 0.0380859375, "loss_xval": 1.1484375, "num_input_tokens_seen": 205724056, "step": 3106 }, { "epoch": 0.290822296064024, "grad_norm": 26.101696014404297, "learning_rate": 5e-05, "loss": 1.4201, "num_input_tokens_seen": 205790660, "step": 3107 }, { "epoch": 0.290822296064024, "loss": 1.4461697340011597, "loss_ce": 0.005251740105450153, "loss_iou": 0.62890625, "loss_num": 0.036865234375, "loss_xval": 1.4375, "num_input_tokens_seen": 205790660, "step": 3107 }, { "epoch": 0.2909158983479197, "grad_norm": 31.460783004760742, "learning_rate": 5e-05, "loss": 1.2205, "num_input_tokens_seen": 205857132, "step": 3108 }, { "epoch": 0.2909158983479197, "loss": 1.2419980764389038, "loss_ce": 0.005669943522661924, "loss_iou": 0.51953125, "loss_num": 0.03955078125, "loss_xval": 1.234375, "num_input_tokens_seen": 205857132, "step": 3108 }, { "epoch": 0.2910095006318154, "grad_norm": 21.015045166015625, "learning_rate": 5e-05, "loss": 1.5869, "num_input_tokens_seen": 205922040, "step": 3109 }, { "epoch": 0.2910095006318154, "loss": 1.64469575881958, "loss_ce": 0.008953599259257317, "loss_iou": 0.66015625, "loss_num": 0.0634765625, "loss_xval": 1.6328125, "num_input_tokens_seen": 205922040, "step": 3109 }, { "epoch": 0.2911031029157111, "grad_norm": 17.923660278320312, "learning_rate": 5e-05, "loss": 1.4023, "num_input_tokens_seen": 205988840, "step": 3110 }, { "epoch": 0.2911031029157111, "loss": 1.5299301147460938, "loss_ce": 0.004539435263723135, "loss_iou": 0.578125, "loss_num": 0.07421875, "loss_xval": 1.5234375, "num_input_tokens_seen": 205988840, "step": 3110 }, { "epoch": 0.2911967051996069, "grad_norm": 26.57171058654785, "learning_rate": 5e-05, "loss": 1.1806, "num_input_tokens_seen": 206055384, "step": 3111 }, { "epoch": 0.2911967051996069, "loss": 1.19579017162323, "loss_ce": 0.006336994934827089, "loss_iou": 0.51171875, "loss_num": 0.033935546875, "loss_xval": 1.1875, "num_input_tokens_seen": 206055384, "step": 3111 }, { "epoch": 0.2912903074835026, "grad_norm": 18.2117862701416, "learning_rate": 5e-05, "loss": 1.2085, "num_input_tokens_seen": 206121116, "step": 3112 }, { "epoch": 0.2912903074835026, "loss": 1.0964746475219727, "loss_ce": 0.005654362961649895, "loss_iou": 0.455078125, "loss_num": 0.0361328125, "loss_xval": 1.09375, "num_input_tokens_seen": 206121116, "step": 3112 }, { "epoch": 0.2913839097673983, "grad_norm": 11.866911888122559, "learning_rate": 5e-05, "loss": 1.1127, "num_input_tokens_seen": 206188104, "step": 3113 }, { "epoch": 0.2913839097673983, "loss": 0.9775887727737427, "loss_ce": 0.003955947235226631, "loss_iou": 0.357421875, "loss_num": 0.05224609375, "loss_xval": 0.97265625, "num_input_tokens_seen": 206188104, "step": 3113 }, { "epoch": 0.2914775120512941, "grad_norm": 24.979978561401367, "learning_rate": 5e-05, "loss": 1.4213, "num_input_tokens_seen": 206253328, "step": 3114 }, { "epoch": 0.2914775120512941, "loss": 1.524076223373413, "loss_ce": 0.004545051604509354, "loss_iou": 0.640625, "loss_num": 0.047607421875, "loss_xval": 1.515625, "num_input_tokens_seen": 206253328, "step": 3114 }, { "epoch": 0.2915711143351898, "grad_norm": 63.859676361083984, "learning_rate": 5e-05, "loss": 1.2491, "num_input_tokens_seen": 206319064, "step": 3115 }, { "epoch": 0.2915711143351898, "loss": 1.3842346668243408, "loss_ce": 0.005328364670276642, "loss_iou": 0.625, "loss_num": 0.0263671875, "loss_xval": 1.375, "num_input_tokens_seen": 206319064, "step": 3115 }, { "epoch": 0.2916647166190855, "grad_norm": 25.727230072021484, "learning_rate": 5e-05, "loss": 1.65, "num_input_tokens_seen": 206385376, "step": 3116 }, { "epoch": 0.2916647166190855, "loss": 1.6381309032440186, "loss_ce": 0.0062949820421636105, "loss_iou": 0.6796875, "loss_num": 0.054931640625, "loss_xval": 1.6328125, "num_input_tokens_seen": 206385376, "step": 3116 }, { "epoch": 0.2917583189029812, "grad_norm": 29.74369239807129, "learning_rate": 5e-05, "loss": 1.2582, "num_input_tokens_seen": 206452084, "step": 3117 }, { "epoch": 0.2917583189029812, "loss": 1.0707981586456299, "loss_ce": 0.007809832692146301, "loss_iou": 0.43359375, "loss_num": 0.0390625, "loss_xval": 1.0625, "num_input_tokens_seen": 206452084, "step": 3117 }, { "epoch": 0.291851921186877, "grad_norm": 41.445213317871094, "learning_rate": 5e-05, "loss": 1.6517, "num_input_tokens_seen": 206519040, "step": 3118 }, { "epoch": 0.291851921186877, "loss": 1.6897058486938477, "loss_ce": 0.007088775746524334, "loss_iou": 0.67578125, "loss_num": 0.06591796875, "loss_xval": 1.6796875, "num_input_tokens_seen": 206519040, "step": 3118 }, { "epoch": 0.2919455234707727, "grad_norm": 20.62035369873047, "learning_rate": 5e-05, "loss": 1.2956, "num_input_tokens_seen": 206584924, "step": 3119 }, { "epoch": 0.2919455234707727, "loss": 1.4671156406402588, "loss_ce": 0.0032485707197338343, "loss_iou": 0.56640625, "loss_num": 0.0673828125, "loss_xval": 1.4609375, "num_input_tokens_seen": 206584924, "step": 3119 }, { "epoch": 0.2920391257546684, "grad_norm": 28.10143280029297, "learning_rate": 5e-05, "loss": 1.4116, "num_input_tokens_seen": 206651260, "step": 3120 }, { "epoch": 0.2920391257546684, "loss": 1.3090413808822632, "loss_ce": 0.0024007961619645357, "loss_iou": 0.546875, "loss_num": 0.0419921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 206651260, "step": 3120 }, { "epoch": 0.29213272803856416, "grad_norm": 31.868993759155273, "learning_rate": 5e-05, "loss": 1.3152, "num_input_tokens_seen": 206717704, "step": 3121 }, { "epoch": 0.29213272803856416, "loss": 1.4792391061782837, "loss_ce": 0.0046297162771224976, "loss_iou": 0.6015625, "loss_num": 0.05419921875, "loss_xval": 1.4765625, "num_input_tokens_seen": 206717704, "step": 3121 }, { "epoch": 0.29222633032245987, "grad_norm": 51.99101638793945, "learning_rate": 5e-05, "loss": 1.7342, "num_input_tokens_seen": 206784936, "step": 3122 }, { "epoch": 0.29222633032245987, "loss": 1.75465726852417, "loss_ce": 0.00661042146384716, "loss_iou": 0.6953125, "loss_num": 0.0712890625, "loss_xval": 1.75, "num_input_tokens_seen": 206784936, "step": 3122 }, { "epoch": 0.2923199326063556, "grad_norm": 25.420894622802734, "learning_rate": 5e-05, "loss": 1.2445, "num_input_tokens_seen": 206850900, "step": 3123 }, { "epoch": 0.2923199326063556, "loss": 1.2743898630142212, "loss_ce": 0.005346850026398897, "loss_iou": 0.52734375, "loss_num": 0.042724609375, "loss_xval": 1.265625, "num_input_tokens_seen": 206850900, "step": 3123 }, { "epoch": 0.29241353489025135, "grad_norm": 28.05361557006836, "learning_rate": 5e-05, "loss": 1.1967, "num_input_tokens_seen": 206916476, "step": 3124 }, { "epoch": 0.29241353489025135, "loss": 0.8975913524627686, "loss_ce": 0.0030601024627685547, "loss_iou": 0.37890625, "loss_num": 0.027587890625, "loss_xval": 0.89453125, "num_input_tokens_seen": 206916476, "step": 3124 }, { "epoch": 0.29250713717414706, "grad_norm": 50.064666748046875, "learning_rate": 5e-05, "loss": 1.2817, "num_input_tokens_seen": 206982304, "step": 3125 }, { "epoch": 0.29250713717414706, "loss": 1.3855113983154297, "loss_ce": 0.006605205126106739, "loss_iou": 0.55859375, "loss_num": 0.052978515625, "loss_xval": 1.375, "num_input_tokens_seen": 206982304, "step": 3125 }, { "epoch": 0.29260073945804277, "grad_norm": 25.029882431030273, "learning_rate": 5e-05, "loss": 1.5905, "num_input_tokens_seen": 207049428, "step": 3126 }, { "epoch": 0.29260073945804277, "loss": 1.6488122940063477, "loss_ce": 0.003304490353912115, "loss_iou": 0.6796875, "loss_num": 0.057373046875, "loss_xval": 1.6484375, "num_input_tokens_seen": 207049428, "step": 3126 }, { "epoch": 0.2926943417419385, "grad_norm": 17.093364715576172, "learning_rate": 5e-05, "loss": 1.0913, "num_input_tokens_seen": 207115056, "step": 3127 }, { "epoch": 0.2926943417419385, "loss": 1.0598585605621338, "loss_ce": 0.001509041991084814, "loss_iou": 0.43359375, "loss_num": 0.03857421875, "loss_xval": 1.0546875, "num_input_tokens_seen": 207115056, "step": 3127 }, { "epoch": 0.29278794402583425, "grad_norm": 118.50226593017578, "learning_rate": 5e-05, "loss": 1.4114, "num_input_tokens_seen": 207181452, "step": 3128 }, { "epoch": 0.29278794402583425, "loss": 1.3256511688232422, "loss_ce": 0.008268414065241814, "loss_iou": 0.53515625, "loss_num": 0.04931640625, "loss_xval": 1.3203125, "num_input_tokens_seen": 207181452, "step": 3128 }, { "epoch": 0.29288154630972996, "grad_norm": 22.6712589263916, "learning_rate": 5e-05, "loss": 1.3039, "num_input_tokens_seen": 207247408, "step": 3129 }, { "epoch": 0.29288154630972996, "loss": 1.2579773664474487, "loss_ce": 0.005047669634222984, "loss_iou": 0.5078125, "loss_num": 0.04736328125, "loss_xval": 1.25, "num_input_tokens_seen": 207247408, "step": 3129 }, { "epoch": 0.29297514859362567, "grad_norm": 31.113643646240234, "learning_rate": 5e-05, "loss": 1.2598, "num_input_tokens_seen": 207312496, "step": 3130 }, { "epoch": 0.29297514859362567, "loss": 1.1999468803405762, "loss_ce": 0.004634324926882982, "loss_iou": 0.515625, "loss_num": 0.03369140625, "loss_xval": 1.1953125, "num_input_tokens_seen": 207312496, "step": 3130 }, { "epoch": 0.29306875087752143, "grad_norm": 22.540021896362305, "learning_rate": 5e-05, "loss": 1.474, "num_input_tokens_seen": 207379652, "step": 3131 }, { "epoch": 0.29306875087752143, "loss": 1.3370617628097534, "loss_ce": 0.004542237147688866, "loss_iou": 0.53125, "loss_num": 0.0537109375, "loss_xval": 1.3359375, "num_input_tokens_seen": 207379652, "step": 3131 }, { "epoch": 0.29316235316141714, "grad_norm": 20.153026580810547, "learning_rate": 5e-05, "loss": 1.2633, "num_input_tokens_seen": 207445852, "step": 3132 }, { "epoch": 0.29316235316141714, "loss": 1.3043044805526733, "loss_ce": 0.009382598102092743, "loss_iou": 0.52734375, "loss_num": 0.048095703125, "loss_xval": 1.296875, "num_input_tokens_seen": 207445852, "step": 3132 }, { "epoch": 0.29325595544531285, "grad_norm": 17.833038330078125, "learning_rate": 5e-05, "loss": 1.2594, "num_input_tokens_seen": 207511536, "step": 3133 }, { "epoch": 0.29325595544531285, "loss": 1.2994135618209839, "loss_ce": 0.003026842838153243, "loss_iou": 0.46875, "loss_num": 0.07177734375, "loss_xval": 1.296875, "num_input_tokens_seen": 207511536, "step": 3133 }, { "epoch": 0.29334955772920857, "grad_norm": 25.51861572265625, "learning_rate": 5e-05, "loss": 1.1744, "num_input_tokens_seen": 207579976, "step": 3134 }, { "epoch": 0.29334955772920857, "loss": 1.2061455249786377, "loss_ce": 0.0054619936272501945, "loss_iou": 0.5234375, "loss_num": 0.03125, "loss_xval": 1.203125, "num_input_tokens_seen": 207579976, "step": 3134 }, { "epoch": 0.29344316001310433, "grad_norm": 24.305742263793945, "learning_rate": 5e-05, "loss": 1.4683, "num_input_tokens_seen": 207646368, "step": 3135 }, { "epoch": 0.29344316001310433, "loss": 1.6683286428451538, "loss_ce": 0.0032895775511860847, "loss_iou": 0.66015625, "loss_num": 0.0693359375, "loss_xval": 1.6640625, "num_input_tokens_seen": 207646368, "step": 3135 }, { "epoch": 0.29353676229700004, "grad_norm": 29.360382080078125, "learning_rate": 5e-05, "loss": 1.3109, "num_input_tokens_seen": 207711772, "step": 3136 }, { "epoch": 0.29353676229700004, "loss": 1.3256011009216309, "loss_ce": 0.005288603715598583, "loss_iou": 0.55078125, "loss_num": 0.0439453125, "loss_xval": 1.3203125, "num_input_tokens_seen": 207711772, "step": 3136 }, { "epoch": 0.29363036458089575, "grad_norm": 23.152889251708984, "learning_rate": 5e-05, "loss": 1.3689, "num_input_tokens_seen": 207778708, "step": 3137 }, { "epoch": 0.29363036458089575, "loss": 1.370178461074829, "loss_ce": 0.003479274455457926, "loss_iou": 0.5625, "loss_num": 0.048583984375, "loss_xval": 1.3671875, "num_input_tokens_seen": 207778708, "step": 3137 }, { "epoch": 0.2937239668647915, "grad_norm": 21.715951919555664, "learning_rate": 5e-05, "loss": 1.1348, "num_input_tokens_seen": 207844300, "step": 3138 }, { "epoch": 0.2937239668647915, "loss": 1.3429853916168213, "loss_ce": 0.005094733089208603, "loss_iou": 0.5546875, "loss_num": 0.045654296875, "loss_xval": 1.3359375, "num_input_tokens_seen": 207844300, "step": 3138 }, { "epoch": 0.29381756914868723, "grad_norm": 21.789844512939453, "learning_rate": 5e-05, "loss": 1.3594, "num_input_tokens_seen": 207910844, "step": 3139 }, { "epoch": 0.29381756914868723, "loss": 1.5190027952194214, "loss_ce": 0.007284093182533979, "loss_iou": 0.6328125, "loss_num": 0.049560546875, "loss_xval": 1.515625, "num_input_tokens_seen": 207910844, "step": 3139 }, { "epoch": 0.29391117143258294, "grad_norm": 22.1756534576416, "learning_rate": 5e-05, "loss": 1.1716, "num_input_tokens_seen": 207976996, "step": 3140 }, { "epoch": 0.29391117143258294, "loss": 1.0930942296981812, "loss_ce": 0.003250464564189315, "loss_iou": 0.46875, "loss_num": 0.0303955078125, "loss_xval": 1.09375, "num_input_tokens_seen": 207976996, "step": 3140 }, { "epoch": 0.2940047737164787, "grad_norm": 26.880090713500977, "learning_rate": 5e-05, "loss": 1.3201, "num_input_tokens_seen": 208042332, "step": 3141 }, { "epoch": 0.2940047737164787, "loss": 1.4008922576904297, "loss_ce": 0.0053844391368329525, "loss_iou": 0.5390625, "loss_num": 0.06396484375, "loss_xval": 1.3984375, "num_input_tokens_seen": 208042332, "step": 3141 }, { "epoch": 0.2940983760003744, "grad_norm": 23.85240936279297, "learning_rate": 5e-05, "loss": 1.3306, "num_input_tokens_seen": 208107356, "step": 3142 }, { "epoch": 0.2940983760003744, "loss": 1.4011750221252441, "loss_ce": 0.0037140091881155968, "loss_iou": 0.59765625, "loss_num": 0.04052734375, "loss_xval": 1.3984375, "num_input_tokens_seen": 208107356, "step": 3142 }, { "epoch": 0.29419197828427013, "grad_norm": 31.882375717163086, "learning_rate": 5e-05, "loss": 1.4407, "num_input_tokens_seen": 208174744, "step": 3143 }, { "epoch": 0.29419197828427013, "loss": 1.424073576927185, "loss_ce": 0.0075696613639593124, "loss_iou": 0.5625, "loss_num": 0.057861328125, "loss_xval": 1.4140625, "num_input_tokens_seen": 208174744, "step": 3143 }, { "epoch": 0.29428558056816584, "grad_norm": 21.217817306518555, "learning_rate": 5e-05, "loss": 1.3887, "num_input_tokens_seen": 208239860, "step": 3144 }, { "epoch": 0.29428558056816584, "loss": 1.3767788410186768, "loss_ce": 0.0022671385668218136, "loss_iou": 0.578125, "loss_num": 0.04345703125, "loss_xval": 1.375, "num_input_tokens_seen": 208239860, "step": 3144 }, { "epoch": 0.2943791828520616, "grad_norm": 327.39508056640625, "learning_rate": 5e-05, "loss": 1.3545, "num_input_tokens_seen": 208306112, "step": 3145 }, { "epoch": 0.2943791828520616, "loss": 1.4406671524047852, "loss_ce": 0.001214096206240356, "loss_iou": 0.59765625, "loss_num": 0.048583984375, "loss_xval": 1.4375, "num_input_tokens_seen": 208306112, "step": 3145 }, { "epoch": 0.2944727851359573, "grad_norm": 32.474185943603516, "learning_rate": 5e-05, "loss": 1.5501, "num_input_tokens_seen": 208372224, "step": 3146 }, { "epoch": 0.2944727851359573, "loss": 1.6570346355438232, "loss_ce": 0.006643939297646284, "loss_iou": 0.65234375, "loss_num": 0.06982421875, "loss_xval": 1.6484375, "num_input_tokens_seen": 208372224, "step": 3146 }, { "epoch": 0.294566387419853, "grad_norm": 34.72795104980469, "learning_rate": 5e-05, "loss": 1.3922, "num_input_tokens_seen": 208439076, "step": 3147 }, { "epoch": 0.294566387419853, "loss": 1.3625001907348633, "loss_ce": 0.004101745784282684, "loss_iou": 0.59765625, "loss_num": 0.032470703125, "loss_xval": 1.359375, "num_input_tokens_seen": 208439076, "step": 3147 }, { "epoch": 0.2946599897037488, "grad_norm": 29.345046997070312, "learning_rate": 5e-05, "loss": 1.4084, "num_input_tokens_seen": 208505016, "step": 3148 }, { "epoch": 0.2946599897037488, "loss": 1.1651557683944702, "loss_ce": 0.004999512806534767, "loss_iou": 0.474609375, "loss_num": 0.0419921875, "loss_xval": 1.15625, "num_input_tokens_seen": 208505016, "step": 3148 }, { "epoch": 0.2947535919876445, "grad_norm": 22.391761779785156, "learning_rate": 5e-05, "loss": 1.3775, "num_input_tokens_seen": 208572248, "step": 3149 }, { "epoch": 0.2947535919876445, "loss": 1.5035743713378906, "loss_ce": 0.003086050506681204, "loss_iou": 0.59765625, "loss_num": 0.061279296875, "loss_xval": 1.5, "num_input_tokens_seen": 208572248, "step": 3149 }, { "epoch": 0.2948471942715402, "grad_norm": 11.164349555969238, "learning_rate": 5e-05, "loss": 1.2155, "num_input_tokens_seen": 208639124, "step": 3150 }, { "epoch": 0.2948471942715402, "loss": 1.2595999240875244, "loss_ce": 0.0026419798377901316, "loss_iou": 0.5, "loss_num": 0.051025390625, "loss_xval": 1.2578125, "num_input_tokens_seen": 208639124, "step": 3150 }, { "epoch": 0.294940796555436, "grad_norm": 11.988375663757324, "learning_rate": 5e-05, "loss": 1.067, "num_input_tokens_seen": 208705720, "step": 3151 }, { "epoch": 0.294940796555436, "loss": 1.0030790567398071, "loss_ce": 0.004421837627887726, "loss_iou": 0.369140625, "loss_num": 0.05224609375, "loss_xval": 1.0, "num_input_tokens_seen": 208705720, "step": 3151 }, { "epoch": 0.2950343988393317, "grad_norm": 18.118045806884766, "learning_rate": 5e-05, "loss": 1.2427, "num_input_tokens_seen": 208773172, "step": 3152 }, { "epoch": 0.2950343988393317, "loss": 1.4260506629943848, "loss_ce": 0.005152285099029541, "loss_iou": 0.5625, "loss_num": 0.05859375, "loss_xval": 1.421875, "num_input_tokens_seen": 208773172, "step": 3152 }, { "epoch": 0.2951280011232274, "grad_norm": 21.028003692626953, "learning_rate": 5e-05, "loss": 1.3926, "num_input_tokens_seen": 208838888, "step": 3153 }, { "epoch": 0.2951280011232274, "loss": 1.443208932876587, "loss_ce": 0.005220574326813221, "loss_iou": 0.6015625, "loss_num": 0.047607421875, "loss_xval": 1.4375, "num_input_tokens_seen": 208838888, "step": 3153 }, { "epoch": 0.2952216034071231, "grad_norm": 38.488521575927734, "learning_rate": 5e-05, "loss": 1.4908, "num_input_tokens_seen": 208905076, "step": 3154 }, { "epoch": 0.2952216034071231, "loss": 1.5528995990753174, "loss_ce": 0.004071405157446861, "loss_iou": 0.61328125, "loss_num": 0.06396484375, "loss_xval": 1.546875, "num_input_tokens_seen": 208905076, "step": 3154 }, { "epoch": 0.2953152056910189, "grad_norm": 30.399873733520508, "learning_rate": 5e-05, "loss": 1.2547, "num_input_tokens_seen": 208971024, "step": 3155 }, { "epoch": 0.2953152056910189, "loss": 1.2003772258758545, "loss_ce": 0.007017870899289846, "loss_iou": 0.5078125, "loss_num": 0.036376953125, "loss_xval": 1.1953125, "num_input_tokens_seen": 208971024, "step": 3155 }, { "epoch": 0.2954088079749146, "grad_norm": 22.53354835510254, "learning_rate": 5e-05, "loss": 1.4043, "num_input_tokens_seen": 209037196, "step": 3156 }, { "epoch": 0.2954088079749146, "loss": 1.3473820686340332, "loss_ce": 0.0021671669092029333, "loss_iou": 0.58984375, "loss_num": 0.033203125, "loss_xval": 1.34375, "num_input_tokens_seen": 209037196, "step": 3156 }, { "epoch": 0.2955024102588103, "grad_norm": 21.677459716796875, "learning_rate": 5e-05, "loss": 1.3033, "num_input_tokens_seen": 209102704, "step": 3157 }, { "epoch": 0.2955024102588103, "loss": 1.211876392364502, "loss_ce": 0.004845119547098875, "loss_iou": 0.40234375, "loss_num": 0.08056640625, "loss_xval": 1.203125, "num_input_tokens_seen": 209102704, "step": 3157 }, { "epoch": 0.29559601254270607, "grad_norm": 25.992403030395508, "learning_rate": 5e-05, "loss": 1.5325, "num_input_tokens_seen": 209168692, "step": 3158 }, { "epoch": 0.29559601254270607, "loss": 1.4381616115570068, "loss_ce": 0.003591251326724887, "loss_iou": 0.55078125, "loss_num": 0.06640625, "loss_xval": 1.4375, "num_input_tokens_seen": 209168692, "step": 3158 }, { "epoch": 0.2956896148266018, "grad_norm": 35.65980529785156, "learning_rate": 5e-05, "loss": 1.3906, "num_input_tokens_seen": 209234808, "step": 3159 }, { "epoch": 0.2956896148266018, "loss": 1.2261383533477783, "loss_ce": 0.006900131702423096, "loss_iou": 0.52734375, "loss_num": 0.032470703125, "loss_xval": 1.21875, "num_input_tokens_seen": 209234808, "step": 3159 }, { "epoch": 0.2957832171104975, "grad_norm": 25.198034286499023, "learning_rate": 5e-05, "loss": 1.6574, "num_input_tokens_seen": 209301688, "step": 3160 }, { "epoch": 0.2957832171104975, "loss": 1.6029188632965088, "loss_ce": 0.007215700577944517, "loss_iou": 0.6484375, "loss_num": 0.059814453125, "loss_xval": 1.59375, "num_input_tokens_seen": 209301688, "step": 3160 }, { "epoch": 0.2958768193943932, "grad_norm": 18.281625747680664, "learning_rate": 5e-05, "loss": 1.5526, "num_input_tokens_seen": 209367668, "step": 3161 }, { "epoch": 0.2958768193943932, "loss": 1.4791182279586792, "loss_ce": 0.007926858961582184, "loss_iou": 0.578125, "loss_num": 0.06396484375, "loss_xval": 1.46875, "num_input_tokens_seen": 209367668, "step": 3161 }, { "epoch": 0.29597042167828896, "grad_norm": 17.44465446472168, "learning_rate": 5e-05, "loss": 1.2163, "num_input_tokens_seen": 209434884, "step": 3162 }, { "epoch": 0.29597042167828896, "loss": 1.0572702884674072, "loss_ce": 0.0040476699359714985, "loss_iou": 0.427734375, "loss_num": 0.039306640625, "loss_xval": 1.0546875, "num_input_tokens_seen": 209434884, "step": 3162 }, { "epoch": 0.2960640239621847, "grad_norm": 36.99618148803711, "learning_rate": 5e-05, "loss": 1.4565, "num_input_tokens_seen": 209500584, "step": 3163 }, { "epoch": 0.2960640239621847, "loss": 1.5771830081939697, "loss_ce": 0.0029641787987202406, "loss_iou": 0.60546875, "loss_num": 0.0732421875, "loss_xval": 1.578125, "num_input_tokens_seen": 209500584, "step": 3163 }, { "epoch": 0.2961576262460804, "grad_norm": 19.906404495239258, "learning_rate": 5e-05, "loss": 1.4188, "num_input_tokens_seen": 209566176, "step": 3164 }, { "epoch": 0.2961576262460804, "loss": 1.2771902084350586, "loss_ce": 0.008635531179606915, "loss_iou": 0.5234375, "loss_num": 0.04443359375, "loss_xval": 1.265625, "num_input_tokens_seen": 209566176, "step": 3164 }, { "epoch": 0.29625122852997615, "grad_norm": 507.19354248046875, "learning_rate": 5e-05, "loss": 1.3143, "num_input_tokens_seen": 209631572, "step": 3165 }, { "epoch": 0.29625122852997615, "loss": 1.2409627437591553, "loss_ce": 0.007076114881783724, "loss_iou": 0.51953125, "loss_num": 0.0390625, "loss_xval": 1.234375, "num_input_tokens_seen": 209631572, "step": 3165 }, { "epoch": 0.29634483081387186, "grad_norm": 58.572933197021484, "learning_rate": 5e-05, "loss": 1.4134, "num_input_tokens_seen": 209697820, "step": 3166 }, { "epoch": 0.29634483081387186, "loss": 1.5384180545806885, "loss_ce": 0.010097688063979149, "loss_iou": 0.62109375, "loss_num": 0.0576171875, "loss_xval": 1.53125, "num_input_tokens_seen": 209697820, "step": 3166 }, { "epoch": 0.2964384330977676, "grad_norm": 25.61594581604004, "learning_rate": 5e-05, "loss": 1.3639, "num_input_tokens_seen": 209764664, "step": 3167 }, { "epoch": 0.2964384330977676, "loss": 1.5066044330596924, "loss_ce": 0.007581004872918129, "loss_iou": 0.625, "loss_num": 0.049560546875, "loss_xval": 1.5, "num_input_tokens_seen": 209764664, "step": 3167 }, { "epoch": 0.29653203538166334, "grad_norm": 36.73101806640625, "learning_rate": 5e-05, "loss": 1.3057, "num_input_tokens_seen": 209831260, "step": 3168 }, { "epoch": 0.29653203538166334, "loss": 1.3140450716018677, "loss_ce": 0.01228724978864193, "loss_iou": 0.5078125, "loss_num": 0.0576171875, "loss_xval": 1.3046875, "num_input_tokens_seen": 209831260, "step": 3168 }, { "epoch": 0.29662563766555905, "grad_norm": 26.18838882446289, "learning_rate": 5e-05, "loss": 1.4561, "num_input_tokens_seen": 209896908, "step": 3169 }, { "epoch": 0.29662563766555905, "loss": 1.4335148334503174, "loss_ce": 0.007367314770817757, "loss_iou": 0.56640625, "loss_num": 0.05908203125, "loss_xval": 1.4296875, "num_input_tokens_seen": 209896908, "step": 3169 }, { "epoch": 0.29671923994945476, "grad_norm": 30.429698944091797, "learning_rate": 5e-05, "loss": 1.1071, "num_input_tokens_seen": 209963180, "step": 3170 }, { "epoch": 0.29671923994945476, "loss": 1.004789113998413, "loss_ce": 0.004026209469884634, "loss_iou": 0.400390625, "loss_num": 0.0400390625, "loss_xval": 1.0, "num_input_tokens_seen": 209963180, "step": 3170 }, { "epoch": 0.29681284223335047, "grad_norm": 13.63418960571289, "learning_rate": 5e-05, "loss": 1.125, "num_input_tokens_seen": 210029628, "step": 3171 }, { "epoch": 0.29681284223335047, "loss": 0.8942599296569824, "loss_ce": 0.0036349084693938494, "loss_iou": 0.369140625, "loss_num": 0.0302734375, "loss_xval": 0.890625, "num_input_tokens_seen": 210029628, "step": 3171 }, { "epoch": 0.29690644451724624, "grad_norm": 23.76020622253418, "learning_rate": 5e-05, "loss": 1.3733, "num_input_tokens_seen": 210095444, "step": 3172 }, { "epoch": 0.29690644451724624, "loss": 1.2906464338302612, "loss_ce": 0.002560456981882453, "loss_iou": 0.52734375, "loss_num": 0.046142578125, "loss_xval": 1.2890625, "num_input_tokens_seen": 210095444, "step": 3172 }, { "epoch": 0.29700004680114195, "grad_norm": 19.09914207458496, "learning_rate": 5e-05, "loss": 1.2956, "num_input_tokens_seen": 210162084, "step": 3173 }, { "epoch": 0.29700004680114195, "loss": 1.334072470664978, "loss_ce": 0.004970910027623177, "loss_iou": 0.53125, "loss_num": 0.052490234375, "loss_xval": 1.328125, "num_input_tokens_seen": 210162084, "step": 3173 }, { "epoch": 0.29709364908503766, "grad_norm": 30.996551513671875, "learning_rate": 5e-05, "loss": 1.3614, "num_input_tokens_seen": 210230100, "step": 3174 }, { "epoch": 0.29709364908503766, "loss": 1.333590030670166, "loss_ce": 0.011324452236294746, "loss_iou": 0.546875, "loss_num": 0.045166015625, "loss_xval": 1.3203125, "num_input_tokens_seen": 210230100, "step": 3174 }, { "epoch": 0.2971872513689334, "grad_norm": 72.84232330322266, "learning_rate": 5e-05, "loss": 1.6517, "num_input_tokens_seen": 210295896, "step": 3175 }, { "epoch": 0.2971872513689334, "loss": 1.7026541233062744, "loss_ce": 0.0034353560768067837, "loss_iou": 0.71875, "loss_num": 0.05322265625, "loss_xval": 1.703125, "num_input_tokens_seen": 210295896, "step": 3175 }, { "epoch": 0.29728085365282914, "grad_norm": 33.01655197143555, "learning_rate": 5e-05, "loss": 1.5121, "num_input_tokens_seen": 210363516, "step": 3176 }, { "epoch": 0.29728085365282914, "loss": 1.541312575340271, "loss_ce": 0.007132841739803553, "loss_iou": 0.58984375, "loss_num": 0.0703125, "loss_xval": 1.53125, "num_input_tokens_seen": 210363516, "step": 3176 }, { "epoch": 0.29737445593672485, "grad_norm": 26.523902893066406, "learning_rate": 5e-05, "loss": 1.5613, "num_input_tokens_seen": 210429380, "step": 3177 }, { "epoch": 0.29737445593672485, "loss": 1.5439693927764893, "loss_ce": 0.003930393140763044, "loss_iou": 0.66796875, "loss_num": 0.04052734375, "loss_xval": 1.5390625, "num_input_tokens_seen": 210429380, "step": 3177 }, { "epoch": 0.29746805822062056, "grad_norm": 142.507080078125, "learning_rate": 5e-05, "loss": 1.2231, "num_input_tokens_seen": 210497064, "step": 3178 }, { "epoch": 0.29746805822062056, "loss": 1.1566380262374878, "loss_ce": 0.005759131163358688, "loss_iou": 0.478515625, "loss_num": 0.03857421875, "loss_xval": 1.1484375, "num_input_tokens_seen": 210497064, "step": 3178 }, { "epoch": 0.2975616605045163, "grad_norm": 26.93979835510254, "learning_rate": 5e-05, "loss": 1.3947, "num_input_tokens_seen": 210563260, "step": 3179 }, { "epoch": 0.2975616605045163, "loss": 1.2275421619415283, "loss_ce": 0.003909416031092405, "loss_iou": 0.458984375, "loss_num": 0.061279296875, "loss_xval": 1.2265625, "num_input_tokens_seen": 210563260, "step": 3179 }, { "epoch": 0.29765526278841203, "grad_norm": 32.10329055786133, "learning_rate": 5e-05, "loss": 1.6622, "num_input_tokens_seen": 210629748, "step": 3180 }, { "epoch": 0.29765526278841203, "loss": 1.7176234722137451, "loss_ce": 0.0037563140504062176, "loss_iou": 0.68359375, "loss_num": 0.06884765625, "loss_xval": 1.7109375, "num_input_tokens_seen": 210629748, "step": 3180 }, { "epoch": 0.29774886507230774, "grad_norm": 22.704957962036133, "learning_rate": 5e-05, "loss": 1.426, "num_input_tokens_seen": 210696296, "step": 3181 }, { "epoch": 0.29774886507230774, "loss": 1.5410833358764648, "loss_ce": 0.005927057005465031, "loss_iou": 0.58984375, "loss_num": 0.0712890625, "loss_xval": 1.53125, "num_input_tokens_seen": 210696296, "step": 3181 }, { "epoch": 0.2978424673562035, "grad_norm": 13.9943208694458, "learning_rate": 5e-05, "loss": 1.1389, "num_input_tokens_seen": 210762440, "step": 3182 }, { "epoch": 0.2978424673562035, "loss": 1.323799967765808, "loss_ce": 0.002510875929147005, "loss_iou": 0.51953125, "loss_num": 0.056396484375, "loss_xval": 1.3203125, "num_input_tokens_seen": 210762440, "step": 3182 }, { "epoch": 0.2979360696400992, "grad_norm": 31.6590633392334, "learning_rate": 5e-05, "loss": 1.3181, "num_input_tokens_seen": 210827516, "step": 3183 }, { "epoch": 0.2979360696400992, "loss": 1.3236525058746338, "loss_ce": 0.007734533865004778, "loss_iou": 0.5625, "loss_num": 0.03759765625, "loss_xval": 1.3125, "num_input_tokens_seen": 210827516, "step": 3183 }, { "epoch": 0.29802967192399493, "grad_norm": 79.19822692871094, "learning_rate": 5e-05, "loss": 1.429, "num_input_tokens_seen": 210893532, "step": 3184 }, { "epoch": 0.29802967192399493, "loss": 1.205948829650879, "loss_ce": 0.0072183674201369286, "loss_iou": 0.47265625, "loss_num": 0.05126953125, "loss_xval": 1.1953125, "num_input_tokens_seen": 210893532, "step": 3184 }, { "epoch": 0.2981232742078907, "grad_norm": 26.083589553833008, "learning_rate": 5e-05, "loss": 1.2951, "num_input_tokens_seen": 210960092, "step": 3185 }, { "epoch": 0.2981232742078907, "loss": 1.2965949773788452, "loss_ce": 0.007379880174994469, "loss_iou": 0.5703125, "loss_num": 0.03076171875, "loss_xval": 1.2890625, "num_input_tokens_seen": 210960092, "step": 3185 }, { "epoch": 0.2982168764917864, "grad_norm": 36.118099212646484, "learning_rate": 5e-05, "loss": 1.3877, "num_input_tokens_seen": 211027020, "step": 3186 }, { "epoch": 0.2982168764917864, "loss": 1.588663101196289, "loss_ce": 0.006631866097450256, "loss_iou": 0.59765625, "loss_num": 0.0771484375, "loss_xval": 1.578125, "num_input_tokens_seen": 211027020, "step": 3186 }, { "epoch": 0.2983104787756821, "grad_norm": 18.90194320678711, "learning_rate": 5e-05, "loss": 1.4396, "num_input_tokens_seen": 211091992, "step": 3187 }, { "epoch": 0.2983104787756821, "loss": 1.517781138420105, "loss_ce": 0.004109271802008152, "loss_iou": 0.625, "loss_num": 0.052001953125, "loss_xval": 1.515625, "num_input_tokens_seen": 211091992, "step": 3187 }, { "epoch": 0.29840408105957783, "grad_norm": 14.602373123168945, "learning_rate": 5e-05, "loss": 1.1811, "num_input_tokens_seen": 211157576, "step": 3188 }, { "epoch": 0.29840408105957783, "loss": 1.405256748199463, "loss_ce": 0.007307431660592556, "loss_iou": 0.56640625, "loss_num": 0.05322265625, "loss_xval": 1.3984375, "num_input_tokens_seen": 211157576, "step": 3188 }, { "epoch": 0.2984976833434736, "grad_norm": 17.76519203186035, "learning_rate": 5e-05, "loss": 1.2077, "num_input_tokens_seen": 211223808, "step": 3189 }, { "epoch": 0.2984976833434736, "loss": 1.30693781375885, "loss_ce": 0.0027385344728827477, "loss_iou": 0.546875, "loss_num": 0.041259765625, "loss_xval": 1.3046875, "num_input_tokens_seen": 211223808, "step": 3189 }, { "epoch": 0.2985912856273693, "grad_norm": 23.347253799438477, "learning_rate": 5e-05, "loss": 1.2762, "num_input_tokens_seen": 211290196, "step": 3190 }, { "epoch": 0.2985912856273693, "loss": 1.3785088062286377, "loss_ce": 0.00399717316031456, "loss_iou": 0.52734375, "loss_num": 0.06396484375, "loss_xval": 1.375, "num_input_tokens_seen": 211290196, "step": 3190 }, { "epoch": 0.298684887911265, "grad_norm": 11.815969467163086, "learning_rate": 5e-05, "loss": 1.2287, "num_input_tokens_seen": 211355016, "step": 3191 }, { "epoch": 0.298684887911265, "loss": 1.1194970607757568, "loss_ce": 0.007100821007043123, "loss_iou": 0.44140625, "loss_num": 0.045654296875, "loss_xval": 1.109375, "num_input_tokens_seen": 211355016, "step": 3191 }, { "epoch": 0.2987784901951608, "grad_norm": 12.142919540405273, "learning_rate": 5e-05, "loss": 1.0354, "num_input_tokens_seen": 211421372, "step": 3192 }, { "epoch": 0.2987784901951608, "loss": 1.2642282247543335, "loss_ce": 0.010321984067559242, "loss_iou": 0.478515625, "loss_num": 0.05908203125, "loss_xval": 1.25, "num_input_tokens_seen": 211421372, "step": 3192 }, { "epoch": 0.2988720924790565, "grad_norm": 31.494945526123047, "learning_rate": 5e-05, "loss": 1.2807, "num_input_tokens_seen": 211488196, "step": 3193 }, { "epoch": 0.2988720924790565, "loss": 1.1928824186325073, "loss_ce": 0.00538242980837822, "loss_iou": 0.52734375, "loss_num": 0.0269775390625, "loss_xval": 1.1875, "num_input_tokens_seen": 211488196, "step": 3193 }, { "epoch": 0.2989656947629522, "grad_norm": 23.278404235839844, "learning_rate": 5e-05, "loss": 1.3216, "num_input_tokens_seen": 211554372, "step": 3194 }, { "epoch": 0.2989656947629522, "loss": 1.3579224348068237, "loss_ce": 0.003186051268130541, "loss_iou": 0.53515625, "loss_num": 0.056640625, "loss_xval": 1.3515625, "num_input_tokens_seen": 211554372, "step": 3194 }, { "epoch": 0.2990592970468479, "grad_norm": 45.67925262451172, "learning_rate": 5e-05, "loss": 1.2529, "num_input_tokens_seen": 211620852, "step": 3195 }, { "epoch": 0.2990592970468479, "loss": 1.220914363861084, "loss_ce": 0.005582316778600216, "loss_iou": 0.50390625, "loss_num": 0.0419921875, "loss_xval": 1.21875, "num_input_tokens_seen": 211620852, "step": 3195 }, { "epoch": 0.2991528993307437, "grad_norm": 29.797014236450195, "learning_rate": 5e-05, "loss": 1.2184, "num_input_tokens_seen": 211686040, "step": 3196 }, { "epoch": 0.2991528993307437, "loss": 0.9787318110466003, "loss_ce": 0.003878271207213402, "loss_iou": 0.419921875, "loss_num": 0.027099609375, "loss_xval": 0.9765625, "num_input_tokens_seen": 211686040, "step": 3196 }, { "epoch": 0.2992465016146394, "grad_norm": 27.623170852661133, "learning_rate": 5e-05, "loss": 1.4251, "num_input_tokens_seen": 211751316, "step": 3197 }, { "epoch": 0.2992465016146394, "loss": 1.499799370765686, "loss_ce": 0.006879471242427826, "loss_iou": 0.609375, "loss_num": 0.05419921875, "loss_xval": 1.4921875, "num_input_tokens_seen": 211751316, "step": 3197 }, { "epoch": 0.2993401038985351, "grad_norm": 30.4111328125, "learning_rate": 5e-05, "loss": 1.2305, "num_input_tokens_seen": 211817220, "step": 3198 }, { "epoch": 0.2993401038985351, "loss": 1.3019930124282837, "loss_ce": 0.005118002183735371, "loss_iou": 0.51953125, "loss_num": 0.05126953125, "loss_xval": 1.296875, "num_input_tokens_seen": 211817220, "step": 3198 }, { "epoch": 0.29943370618243087, "grad_norm": 26.70878028869629, "learning_rate": 5e-05, "loss": 1.2807, "num_input_tokens_seen": 211883500, "step": 3199 }, { "epoch": 0.29943370618243087, "loss": 1.4420101642608643, "loss_ce": 0.003045236924663186, "loss_iou": 0.6171875, "loss_num": 0.041259765625, "loss_xval": 1.4375, "num_input_tokens_seen": 211883500, "step": 3199 }, { "epoch": 0.2995273084663266, "grad_norm": 32.45808792114258, "learning_rate": 5e-05, "loss": 1.4446, "num_input_tokens_seen": 211949328, "step": 3200 }, { "epoch": 0.2995273084663266, "loss": 1.4723834991455078, "loss_ce": 0.002657028380781412, "loss_iou": 0.59375, "loss_num": 0.05712890625, "loss_xval": 1.46875, "num_input_tokens_seen": 211949328, "step": 3200 }, { "epoch": 0.2996209107502223, "grad_norm": 64.23419189453125, "learning_rate": 5e-05, "loss": 1.3893, "num_input_tokens_seen": 212016048, "step": 3201 }, { "epoch": 0.2996209107502223, "loss": 1.1797385215759277, "loss_ce": 0.004933887626975775, "loss_iou": 0.50390625, "loss_num": 0.033935546875, "loss_xval": 1.171875, "num_input_tokens_seen": 212016048, "step": 3201 }, { "epoch": 0.29971451303411806, "grad_norm": 28.914752960205078, "learning_rate": 5e-05, "loss": 1.2071, "num_input_tokens_seen": 212082736, "step": 3202 }, { "epoch": 0.29971451303411806, "loss": 1.3583087921142578, "loss_ce": 0.005281480029225349, "loss_iou": 0.54296875, "loss_num": 0.053955078125, "loss_xval": 1.3515625, "num_input_tokens_seen": 212082736, "step": 3202 }, { "epoch": 0.29980811531801377, "grad_norm": 23.79576873779297, "learning_rate": 5e-05, "loss": 1.285, "num_input_tokens_seen": 212148536, "step": 3203 }, { "epoch": 0.29980811531801377, "loss": 1.318683385848999, "loss_ce": 0.008136525750160217, "loss_iou": 0.53515625, "loss_num": 0.0478515625, "loss_xval": 1.3125, "num_input_tokens_seen": 212148536, "step": 3203 }, { "epoch": 0.2999017176019095, "grad_norm": 16.102449417114258, "learning_rate": 5e-05, "loss": 1.1803, "num_input_tokens_seen": 212215436, "step": 3204 }, { "epoch": 0.2999017176019095, "loss": 1.541203260421753, "loss_ce": 0.008976730518043041, "loss_iou": 0.59765625, "loss_num": 0.0673828125, "loss_xval": 1.53125, "num_input_tokens_seen": 212215436, "step": 3204 }, { "epoch": 0.2999953198858052, "grad_norm": 15.068942070007324, "learning_rate": 5e-05, "loss": 1.4287, "num_input_tokens_seen": 212282248, "step": 3205 }, { "epoch": 0.2999953198858052, "loss": 1.438281536102295, "loss_ce": 0.00224632048048079, "loss_iou": 0.546875, "loss_num": 0.068359375, "loss_xval": 1.4375, "num_input_tokens_seen": 212282248, "step": 3205 }, { "epoch": 0.30008892216970096, "grad_norm": 38.77875518798828, "learning_rate": 5e-05, "loss": 1.2581, "num_input_tokens_seen": 212349404, "step": 3206 }, { "epoch": 0.30008892216970096, "loss": 1.3109136819839478, "loss_ce": 0.006226138211786747, "loss_iou": 0.5078125, "loss_num": 0.057373046875, "loss_xval": 1.3046875, "num_input_tokens_seen": 212349404, "step": 3206 }, { "epoch": 0.30018252445359667, "grad_norm": 31.315486907958984, "learning_rate": 5e-05, "loss": 1.3004, "num_input_tokens_seen": 212415328, "step": 3207 }, { "epoch": 0.30018252445359667, "loss": 1.4697887897491455, "loss_ce": 0.007386452984064817, "loss_iou": 0.5859375, "loss_num": 0.057373046875, "loss_xval": 1.4609375, "num_input_tokens_seen": 212415328, "step": 3207 }, { "epoch": 0.3002761267374924, "grad_norm": 24.298362731933594, "learning_rate": 5e-05, "loss": 1.088, "num_input_tokens_seen": 212480916, "step": 3208 }, { "epoch": 0.3002761267374924, "loss": 1.2162508964538574, "loss_ce": 0.0048251026310026646, "loss_iou": 0.515625, "loss_num": 0.035888671875, "loss_xval": 1.2109375, "num_input_tokens_seen": 212480916, "step": 3208 }, { "epoch": 0.30036972902138814, "grad_norm": 26.170442581176758, "learning_rate": 5e-05, "loss": 1.263, "num_input_tokens_seen": 212547208, "step": 3209 }, { "epoch": 0.30036972902138814, "loss": 1.1825824975967407, "loss_ce": 0.0043597957119345665, "loss_iou": 0.52734375, "loss_num": 0.02490234375, "loss_xval": 1.1796875, "num_input_tokens_seen": 212547208, "step": 3209 }, { "epoch": 0.30046333130528385, "grad_norm": 19.814950942993164, "learning_rate": 5e-05, "loss": 1.6239, "num_input_tokens_seen": 212613312, "step": 3210 }, { "epoch": 0.30046333130528385, "loss": 1.767000675201416, "loss_ce": 0.006258486304432154, "loss_iou": 0.671875, "loss_num": 0.083984375, "loss_xval": 1.7578125, "num_input_tokens_seen": 212613312, "step": 3210 }, { "epoch": 0.30055693358917956, "grad_norm": 10.765375137329102, "learning_rate": 5e-05, "loss": 1.3205, "num_input_tokens_seen": 212680120, "step": 3211 }, { "epoch": 0.30055693358917956, "loss": 1.3583877086639404, "loss_ce": 0.004383737687021494, "loss_iou": 0.57421875, "loss_num": 0.041015625, "loss_xval": 1.3515625, "num_input_tokens_seen": 212680120, "step": 3211 }, { "epoch": 0.30065053587307533, "grad_norm": 16.67058753967285, "learning_rate": 5e-05, "loss": 1.0711, "num_input_tokens_seen": 212746068, "step": 3212 }, { "epoch": 0.30065053587307533, "loss": 1.119780421257019, "loss_ce": 0.010405439883470535, "loss_iou": 0.48828125, "loss_num": 0.026123046875, "loss_xval": 1.109375, "num_input_tokens_seen": 212746068, "step": 3212 }, { "epoch": 0.30074413815697104, "grad_norm": 26.18053436279297, "learning_rate": 5e-05, "loss": 1.3412, "num_input_tokens_seen": 212812188, "step": 3213 }, { "epoch": 0.30074413815697104, "loss": 1.483103632926941, "loss_ce": 0.0021465462632477283, "loss_iou": 0.60546875, "loss_num": 0.0546875, "loss_xval": 1.484375, "num_input_tokens_seen": 212812188, "step": 3213 }, { "epoch": 0.30083774044086675, "grad_norm": 55.32423782348633, "learning_rate": 5e-05, "loss": 1.338, "num_input_tokens_seen": 212878832, "step": 3214 }, { "epoch": 0.30083774044086675, "loss": 1.2190561294555664, "loss_ce": 0.007141960319131613, "loss_iou": 0.51171875, "loss_num": 0.037109375, "loss_xval": 1.2109375, "num_input_tokens_seen": 212878832, "step": 3214 }, { "epoch": 0.30093134272476246, "grad_norm": 22.433765411376953, "learning_rate": 5e-05, "loss": 1.3973, "num_input_tokens_seen": 212944732, "step": 3215 }, { "epoch": 0.30093134272476246, "loss": 1.596411108970642, "loss_ce": 0.0046141850762069225, "loss_iou": 0.671875, "loss_num": 0.05029296875, "loss_xval": 1.59375, "num_input_tokens_seen": 212944732, "step": 3215 }, { "epoch": 0.30102494500865823, "grad_norm": 23.633569717407227, "learning_rate": 5e-05, "loss": 1.1616, "num_input_tokens_seen": 213011500, "step": 3216 }, { "epoch": 0.30102494500865823, "loss": 1.1757826805114746, "loss_ce": 0.004395921714603901, "loss_iou": 0.453125, "loss_num": 0.052734375, "loss_xval": 1.171875, "num_input_tokens_seen": 213011500, "step": 3216 }, { "epoch": 0.30111854729255394, "grad_norm": 39.09695053100586, "learning_rate": 5e-05, "loss": 1.5004, "num_input_tokens_seen": 213077480, "step": 3217 }, { "epoch": 0.30111854729255394, "loss": 1.3216581344604492, "loss_ce": 0.0032987878657877445, "loss_iou": 0.484375, "loss_num": 0.0703125, "loss_xval": 1.3203125, "num_input_tokens_seen": 213077480, "step": 3217 }, { "epoch": 0.30121214957644965, "grad_norm": 18.81067657470703, "learning_rate": 5e-05, "loss": 1.6657, "num_input_tokens_seen": 213143548, "step": 3218 }, { "epoch": 0.30121214957644965, "loss": 1.7820554971694946, "loss_ce": 0.005688296630978584, "loss_iou": 0.7578125, "loss_num": 0.052001953125, "loss_xval": 1.7734375, "num_input_tokens_seen": 213143548, "step": 3218 }, { "epoch": 0.3013057518603454, "grad_norm": 13.473433494567871, "learning_rate": 5e-05, "loss": 1.276, "num_input_tokens_seen": 213209108, "step": 3219 }, { "epoch": 0.3013057518603454, "loss": 1.4027621746063232, "loss_ce": 0.00823102705180645, "loss_iou": 0.52734375, "loss_num": 0.068359375, "loss_xval": 1.390625, "num_input_tokens_seen": 213209108, "step": 3219 }, { "epoch": 0.3013993541442411, "grad_norm": 35.56060791015625, "learning_rate": 5e-05, "loss": 1.4559, "num_input_tokens_seen": 213274852, "step": 3220 }, { "epoch": 0.3013993541442411, "loss": 1.5947089195251465, "loss_ce": 0.005353348329663277, "loss_iou": 0.6171875, "loss_num": 0.0703125, "loss_xval": 1.5859375, "num_input_tokens_seen": 213274852, "step": 3220 }, { "epoch": 0.30149295642813684, "grad_norm": 22.169809341430664, "learning_rate": 5e-05, "loss": 1.4235, "num_input_tokens_seen": 213341776, "step": 3221 }, { "epoch": 0.30149295642813684, "loss": 1.308863878250122, "loss_ce": 0.004176258575171232, "loss_iou": 0.546875, "loss_num": 0.0419921875, "loss_xval": 1.3046875, "num_input_tokens_seen": 213341776, "step": 3221 }, { "epoch": 0.30158655871203255, "grad_norm": 25.523279190063477, "learning_rate": 5e-05, "loss": 1.1305, "num_input_tokens_seen": 213407452, "step": 3222 }, { "epoch": 0.30158655871203255, "loss": 1.0799659490585327, "loss_ce": 0.006723684258759022, "loss_iou": 0.4140625, "loss_num": 0.04931640625, "loss_xval": 1.0703125, "num_input_tokens_seen": 213407452, "step": 3222 }, { "epoch": 0.3016801609959283, "grad_norm": 20.90241241455078, "learning_rate": 5e-05, "loss": 1.561, "num_input_tokens_seen": 213474412, "step": 3223 }, { "epoch": 0.3016801609959283, "loss": 1.497004747390747, "loss_ce": 0.006770439445972443, "loss_iou": 0.59765625, "loss_num": 0.05810546875, "loss_xval": 1.4921875, "num_input_tokens_seen": 213474412, "step": 3223 }, { "epoch": 0.301773763279824, "grad_norm": 31.53831672668457, "learning_rate": 5e-05, "loss": 1.4469, "num_input_tokens_seen": 213540040, "step": 3224 }, { "epoch": 0.301773763279824, "loss": 1.5138611793518066, "loss_ce": 0.004583851899951696, "loss_iou": 0.6328125, "loss_num": 0.049072265625, "loss_xval": 1.5078125, "num_input_tokens_seen": 213540040, "step": 3224 }, { "epoch": 0.30186736556371974, "grad_norm": 54.882659912109375, "learning_rate": 5e-05, "loss": 1.5197, "num_input_tokens_seen": 213606588, "step": 3225 }, { "epoch": 0.30186736556371974, "loss": 1.3511512279510498, "loss_ce": 0.004959766753017902, "loss_iou": 0.578125, "loss_num": 0.03857421875, "loss_xval": 1.34375, "num_input_tokens_seen": 213606588, "step": 3225 }, { "epoch": 0.3019609678476155, "grad_norm": 28.352731704711914, "learning_rate": 5e-05, "loss": 1.4563, "num_input_tokens_seen": 213673088, "step": 3226 }, { "epoch": 0.3019609678476155, "loss": 1.3883702754974365, "loss_ce": 0.006534261628985405, "loss_iou": 0.5703125, "loss_num": 0.048095703125, "loss_xval": 1.3828125, "num_input_tokens_seen": 213673088, "step": 3226 }, { "epoch": 0.3020545701315112, "grad_norm": 24.50950050354004, "learning_rate": 5e-05, "loss": 1.4034, "num_input_tokens_seen": 213738492, "step": 3227 }, { "epoch": 0.3020545701315112, "loss": 1.476147174835205, "loss_ce": 0.010326847434043884, "loss_iou": 0.60546875, "loss_num": 0.05029296875, "loss_xval": 1.46875, "num_input_tokens_seen": 213738492, "step": 3227 }, { "epoch": 0.3021481724154069, "grad_norm": 18.68236541748047, "learning_rate": 5e-05, "loss": 1.3179, "num_input_tokens_seen": 213805016, "step": 3228 }, { "epoch": 0.3021481724154069, "loss": 1.3263987302780151, "loss_ce": 0.008039271458983421, "loss_iou": 0.51953125, "loss_num": 0.055419921875, "loss_xval": 1.3203125, "num_input_tokens_seen": 213805016, "step": 3228 }, { "epoch": 0.3022417746993027, "grad_norm": 25.224550247192383, "learning_rate": 5e-05, "loss": 1.4273, "num_input_tokens_seen": 213871088, "step": 3229 }, { "epoch": 0.3022417746993027, "loss": 1.4278255701065063, "loss_ce": 0.004485721699893475, "loss_iou": 0.53515625, "loss_num": 0.0703125, "loss_xval": 1.421875, "num_input_tokens_seen": 213871088, "step": 3229 }, { "epoch": 0.3023353769831984, "grad_norm": 35.3027458190918, "learning_rate": 5e-05, "loss": 1.3905, "num_input_tokens_seen": 213937156, "step": 3230 }, { "epoch": 0.3023353769831984, "loss": 1.3928260803222656, "loss_ce": 0.005130731966346502, "loss_iou": 0.6015625, "loss_num": 0.037841796875, "loss_xval": 1.390625, "num_input_tokens_seen": 213937156, "step": 3230 }, { "epoch": 0.3024289792670941, "grad_norm": 33.67833709716797, "learning_rate": 5e-05, "loss": 1.59, "num_input_tokens_seen": 214003308, "step": 3231 }, { "epoch": 0.3024289792670941, "loss": 1.752487301826477, "loss_ce": 0.005417039152234793, "loss_iou": 0.69140625, "loss_num": 0.0732421875, "loss_xval": 1.75, "num_input_tokens_seen": 214003308, "step": 3231 }, { "epoch": 0.3025225815509898, "grad_norm": 20.47861671447754, "learning_rate": 5e-05, "loss": 1.4846, "num_input_tokens_seen": 214068572, "step": 3232 }, { "epoch": 0.3025225815509898, "loss": 1.7175261974334717, "loss_ce": 0.007076878100633621, "loss_iou": 0.6875, "loss_num": 0.06640625, "loss_xval": 1.7109375, "num_input_tokens_seen": 214068572, "step": 3232 }, { "epoch": 0.3026161838348856, "grad_norm": 13.899136543273926, "learning_rate": 5e-05, "loss": 1.1464, "num_input_tokens_seen": 214134372, "step": 3233 }, { "epoch": 0.3026161838348856, "loss": 1.2238168716430664, "loss_ce": 0.005974735599011183, "loss_iou": 0.474609375, "loss_num": 0.053466796875, "loss_xval": 1.21875, "num_input_tokens_seen": 214134372, "step": 3233 }, { "epoch": 0.3027097861187813, "grad_norm": 11.780744552612305, "learning_rate": 5e-05, "loss": 0.9685, "num_input_tokens_seen": 214200328, "step": 3234 }, { "epoch": 0.3027097861187813, "loss": 0.8649457693099976, "loss_ce": 0.004105925559997559, "loss_iou": 0.34375, "loss_num": 0.034912109375, "loss_xval": 0.859375, "num_input_tokens_seen": 214200328, "step": 3234 }, { "epoch": 0.302803388402677, "grad_norm": 20.04199981689453, "learning_rate": 5e-05, "loss": 1.3809, "num_input_tokens_seen": 214266488, "step": 3235 }, { "epoch": 0.302803388402677, "loss": 1.6200001239776611, "loss_ce": 0.007695465348660946, "loss_iou": 0.65625, "loss_num": 0.059326171875, "loss_xval": 1.609375, "num_input_tokens_seen": 214266488, "step": 3235 }, { "epoch": 0.3028969906865728, "grad_norm": 31.310379028320312, "learning_rate": 5e-05, "loss": 1.5926, "num_input_tokens_seen": 214333240, "step": 3236 }, { "epoch": 0.3028969906865728, "loss": 1.5808790922164917, "loss_ce": 0.0047071995213627815, "loss_iou": 0.65234375, "loss_num": 0.05517578125, "loss_xval": 1.578125, "num_input_tokens_seen": 214333240, "step": 3236 }, { "epoch": 0.3029905929704685, "grad_norm": 45.15845489501953, "learning_rate": 5e-05, "loss": 1.3997, "num_input_tokens_seen": 214400204, "step": 3237 }, { "epoch": 0.3029905929704685, "loss": 1.3295578956604004, "loss_ce": 0.0048508960753679276, "loss_iou": 0.546875, "loss_num": 0.047119140625, "loss_xval": 1.328125, "num_input_tokens_seen": 214400204, "step": 3237 }, { "epoch": 0.3030841952543642, "grad_norm": 34.50333786010742, "learning_rate": 5e-05, "loss": 1.646, "num_input_tokens_seen": 214466076, "step": 3238 }, { "epoch": 0.3030841952543642, "loss": 1.7696900367736816, "loss_ce": 0.008459556847810745, "loss_iou": 0.625, "loss_num": 0.1025390625, "loss_xval": 1.7578125, "num_input_tokens_seen": 214466076, "step": 3238 }, { "epoch": 0.3031777975382599, "grad_norm": 24.536317825317383, "learning_rate": 5e-05, "loss": 1.2985, "num_input_tokens_seen": 214531960, "step": 3239 }, { "epoch": 0.3031777975382599, "loss": 1.2813217639923096, "loss_ce": 0.0045273685827851295, "loss_iou": 0.482421875, "loss_num": 0.062255859375, "loss_xval": 1.2734375, "num_input_tokens_seen": 214531960, "step": 3239 }, { "epoch": 0.3032713998221557, "grad_norm": 67.76273345947266, "learning_rate": 5e-05, "loss": 1.2146, "num_input_tokens_seen": 214597236, "step": 3240 }, { "epoch": 0.3032713998221557, "loss": 1.2255988121032715, "loss_ce": 0.005872255191206932, "loss_iou": 0.494140625, "loss_num": 0.0458984375, "loss_xval": 1.21875, "num_input_tokens_seen": 214597236, "step": 3240 }, { "epoch": 0.3033650021060514, "grad_norm": 16.857189178466797, "learning_rate": 5e-05, "loss": 1.2838, "num_input_tokens_seen": 214662984, "step": 3241 }, { "epoch": 0.3033650021060514, "loss": 1.1721733808517456, "loss_ce": 0.009758800268173218, "loss_iou": 0.455078125, "loss_num": 0.05029296875, "loss_xval": 1.1640625, "num_input_tokens_seen": 214662984, "step": 3241 }, { "epoch": 0.3034586043899471, "grad_norm": 17.222375869750977, "learning_rate": 5e-05, "loss": 1.4831, "num_input_tokens_seen": 214729136, "step": 3242 }, { "epoch": 0.3034586043899471, "loss": 1.3036366701126099, "loss_ce": 0.004320286214351654, "loss_iou": 0.5390625, "loss_num": 0.043701171875, "loss_xval": 1.296875, "num_input_tokens_seen": 214729136, "step": 3242 }, { "epoch": 0.30355220667384286, "grad_norm": 25.268510818481445, "learning_rate": 5e-05, "loss": 1.2756, "num_input_tokens_seen": 214797088, "step": 3243 }, { "epoch": 0.30355220667384286, "loss": 1.2916339635849, "loss_ce": 0.006966054905205965, "loss_iou": 0.52734375, "loss_num": 0.045654296875, "loss_xval": 1.28125, "num_input_tokens_seen": 214797088, "step": 3243 }, { "epoch": 0.30364580895773857, "grad_norm": 19.913009643554688, "learning_rate": 5e-05, "loss": 1.2999, "num_input_tokens_seen": 214862864, "step": 3244 }, { "epoch": 0.30364580895773857, "loss": 1.1519443988800049, "loss_ce": 0.004971779882907867, "loss_iou": 0.478515625, "loss_num": 0.0380859375, "loss_xval": 1.1484375, "num_input_tokens_seen": 214862864, "step": 3244 }, { "epoch": 0.3037394112416343, "grad_norm": 28.684814453125, "learning_rate": 5e-05, "loss": 1.3074, "num_input_tokens_seen": 214929032, "step": 3245 }, { "epoch": 0.3037394112416343, "loss": 1.4903351068496704, "loss_ce": 0.00614324351772666, "loss_iou": 0.60546875, "loss_num": 0.053955078125, "loss_xval": 1.484375, "num_input_tokens_seen": 214929032, "step": 3245 }, { "epoch": 0.30383301352553005, "grad_norm": 25.049110412597656, "learning_rate": 5e-05, "loss": 1.5934, "num_input_tokens_seen": 214995796, "step": 3246 }, { "epoch": 0.30383301352553005, "loss": 1.5179600715637207, "loss_ce": 0.002335167722776532, "loss_iou": 0.609375, "loss_num": 0.05908203125, "loss_xval": 1.515625, "num_input_tokens_seen": 214995796, "step": 3246 }, { "epoch": 0.30392661580942576, "grad_norm": 23.012149810791016, "learning_rate": 5e-05, "loss": 1.2905, "num_input_tokens_seen": 215061024, "step": 3247 }, { "epoch": 0.30392661580942576, "loss": 1.074303388595581, "loss_ce": 0.0028923354111611843, "loss_iou": 0.478515625, "loss_num": 0.0230712890625, "loss_xval": 1.0703125, "num_input_tokens_seen": 215061024, "step": 3247 }, { "epoch": 0.30402021809332147, "grad_norm": 27.734107971191406, "learning_rate": 5e-05, "loss": 1.4956, "num_input_tokens_seen": 215126792, "step": 3248 }, { "epoch": 0.30402021809332147, "loss": 1.4625927209854126, "loss_ce": 0.006049771327525377, "loss_iou": 0.56640625, "loss_num": 0.064453125, "loss_xval": 1.453125, "num_input_tokens_seen": 215126792, "step": 3248 }, { "epoch": 0.3041138203772172, "grad_norm": 31.783884048461914, "learning_rate": 5e-05, "loss": 1.3002, "num_input_tokens_seen": 215192640, "step": 3249 }, { "epoch": 0.3041138203772172, "loss": 1.352917194366455, "loss_ce": 0.010143849067389965, "loss_iou": 0.5703125, "loss_num": 0.0400390625, "loss_xval": 1.34375, "num_input_tokens_seen": 215192640, "step": 3249 }, { "epoch": 0.30420742266111295, "grad_norm": 29.57720375061035, "learning_rate": 5e-05, "loss": 1.5682, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30420742266111295, "eval_seeclick_CIoU": 0.1455652415752411, "eval_seeclick_GIoU": 0.1675873100757599, "eval_seeclick_IoU": 0.28123968839645386, "eval_seeclick_MAE_all": 0.1613713800907135, "eval_seeclick_MAE_h": 0.0649598129093647, "eval_seeclick_MAE_w": 0.11758418381214142, "eval_seeclick_MAE_x_boxes": 0.25382599234580994, "eval_seeclick_MAE_y_boxes": 0.1627851352095604, "eval_seeclick_NUM_probability": 0.9999281167984009, "eval_seeclick_inside_bbox": 0.4229166805744171, "eval_seeclick_loss": 2.525183916091919, "eval_seeclick_loss_ce": 0.013766091782599688, "eval_seeclick_loss_iou": 0.8785400390625, "eval_seeclick_loss_num": 0.16729736328125, "eval_seeclick_loss_xval": 2.59326171875, "eval_seeclick_runtime": 64.7086, "eval_seeclick_samples_per_second": 0.726, "eval_seeclick_steps_per_second": 0.031, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30420742266111295, "eval_icons_CIoU": -0.10694961249828339, "eval_icons_GIoU": 0.029194827191531658, "eval_icons_IoU": 0.09008727595210075, "eval_icons_MAE_all": 0.18352647870779037, "eval_icons_MAE_h": 0.12440169230103493, "eval_icons_MAE_w": 0.1849340870976448, "eval_icons_MAE_x_boxes": 0.17155595868825912, "eval_icons_MAE_y_boxes": 0.08004642091691494, "eval_icons_NUM_probability": 0.9999755620956421, "eval_icons_inside_bbox": 0.1493055559694767, "eval_icons_loss": 2.8794074058532715, "eval_icons_loss_ce": 3.1658116313337814e-05, "eval_icons_loss_iou": 0.975830078125, "eval_icons_loss_num": 0.200042724609375, "eval_icons_loss_xval": 2.9501953125, "eval_icons_runtime": 67.4453, "eval_icons_samples_per_second": 0.741, "eval_icons_steps_per_second": 0.03, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30420742266111295, "eval_screenspot_CIoU": -0.025577165186405182, "eval_screenspot_GIoU": 0.006226751953363419, "eval_screenspot_IoU": 0.15358526011308035, "eval_screenspot_MAE_all": 0.22168662150700888, "eval_screenspot_MAE_h": 0.14507206281026205, "eval_screenspot_MAE_w": 0.17006426552931467, "eval_screenspot_MAE_x_boxes": 0.34340771039326984, "eval_screenspot_MAE_y_boxes": 0.14694421738386154, "eval_screenspot_NUM_probability": 0.9999379515647888, "eval_screenspot_inside_bbox": 0.2941666642824809, "eval_screenspot_loss": 3.1456081867218018, "eval_screenspot_loss_ce": 0.005427776525417964, "eval_screenspot_loss_iou": 1.0172526041666667, "eval_screenspot_loss_num": 0.2325439453125, "eval_screenspot_loss_xval": 3.1966145833333335, "eval_screenspot_runtime": 117.4274, "eval_screenspot_samples_per_second": 0.758, "eval_screenspot_steps_per_second": 0.026, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30420742266111295, "eval_compot_CIoU": -0.01815701834857464, "eval_compot_GIoU": 0.0553207378834486, "eval_compot_IoU": 0.14277129620313644, "eval_compot_MAE_all": 0.1962691694498062, "eval_compot_MAE_h": 0.1110198013484478, "eval_compot_MAE_w": 0.16523808985948563, "eval_compot_MAE_x_boxes": 0.2056160643696785, "eval_compot_MAE_y_boxes": 0.14012055471539497, "eval_compot_NUM_probability": 0.9998982548713684, "eval_compot_inside_bbox": 0.1736111119389534, "eval_compot_loss": 2.8881747722625732, "eval_compot_loss_ce": 0.001730227144435048, "eval_compot_loss_iou": 0.95166015625, "eval_compot_loss_num": 0.1961212158203125, "eval_compot_loss_xval": 2.8837890625, "eval_compot_runtime": 68.3336, "eval_compot_samples_per_second": 0.732, "eval_compot_steps_per_second": 0.029, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30420742266111295, "eval_custom_ui_MAE_all": 0.17363159358501434, "eval_custom_ui_MAE_x": 0.15588220953941345, "eval_custom_ui_MAE_y": 0.19138099253177643, "eval_custom_ui_NUM_probability": 0.9998042583465576, "eval_custom_ui_loss": 0.9675492644309998, "eval_custom_ui_loss_ce": 0.1515451893210411, "eval_custom_ui_loss_num": 0.17529296875, "eval_custom_ui_loss_xval": 0.87548828125, "eval_custom_ui_runtime": 62.3142, "eval_custom_ui_samples_per_second": 0.802, "eval_custom_ui_steps_per_second": 0.032, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30420742266111295, "loss": 1.0495203733444214, "loss_ce": 0.175008624792099, "loss_iou": 0.0, "loss_num": 0.1748046875, "loss_xval": 0.875, "num_input_tokens_seen": 215259140, "step": 3250 }, { "epoch": 0.30430102494500866, "grad_norm": 16.616992950439453, "learning_rate": 5e-05, "loss": 1.3986, "num_input_tokens_seen": 215325076, "step": 3251 }, { "epoch": 0.30430102494500866, "loss": 1.1711382865905762, "loss_ce": 0.004146065097302198, "loss_iou": 0.490234375, "loss_num": 0.037841796875, "loss_xval": 1.1640625, "num_input_tokens_seen": 215325076, "step": 3251 }, { "epoch": 0.30439462722890437, "grad_norm": 24.00971221923828, "learning_rate": 5e-05, "loss": 1.1938, "num_input_tokens_seen": 215391292, "step": 3252 }, { "epoch": 0.30439462722890437, "loss": 1.2654268741607666, "loss_ce": 0.00614947360008955, "loss_iou": 0.5390625, "loss_num": 0.036376953125, "loss_xval": 1.2578125, "num_input_tokens_seen": 215391292, "step": 3252 }, { "epoch": 0.30448822951280013, "grad_norm": 27.957509994506836, "learning_rate": 5e-05, "loss": 1.4233, "num_input_tokens_seen": 215455908, "step": 3253 }, { "epoch": 0.30448822951280013, "loss": 1.4892008304595947, "loss_ce": 0.009708629921078682, "loss_iou": 0.51953125, "loss_num": 0.0888671875, "loss_xval": 1.4765625, "num_input_tokens_seen": 215455908, "step": 3253 }, { "epoch": 0.30458183179669585, "grad_norm": 129.54901123046875, "learning_rate": 5e-05, "loss": 1.4067, "num_input_tokens_seen": 215522272, "step": 3254 }, { "epoch": 0.30458183179669585, "loss": 1.354069471359253, "loss_ce": 0.009343021549284458, "loss_iou": 0.58984375, "loss_num": 0.032958984375, "loss_xval": 1.34375, "num_input_tokens_seen": 215522272, "step": 3254 }, { "epoch": 0.30467543408059156, "grad_norm": 26.872020721435547, "learning_rate": 5e-05, "loss": 1.3032, "num_input_tokens_seen": 215589168, "step": 3255 }, { "epoch": 0.30467543408059156, "loss": 1.2833125591278076, "loss_ce": 0.00767773762345314, "loss_iou": 0.5, "loss_num": 0.054443359375, "loss_xval": 1.2734375, "num_input_tokens_seen": 215589168, "step": 3255 }, { "epoch": 0.30476903636448727, "grad_norm": 24.30497932434082, "learning_rate": 5e-05, "loss": 1.3866, "num_input_tokens_seen": 215656732, "step": 3256 }, { "epoch": 0.30476903636448727, "loss": 1.5192744731903076, "loss_ce": 0.00316119147464633, "loss_iou": 0.62890625, "loss_num": 0.051025390625, "loss_xval": 1.515625, "num_input_tokens_seen": 215656732, "step": 3256 }, { "epoch": 0.30486263864838303, "grad_norm": 29.15682029724121, "learning_rate": 5e-05, "loss": 1.4388, "num_input_tokens_seen": 215721492, "step": 3257 }, { "epoch": 0.30486263864838303, "loss": 1.6248369216918945, "loss_ce": 0.006184583995491266, "loss_iou": 0.6796875, "loss_num": 0.052978515625, "loss_xval": 1.6171875, "num_input_tokens_seen": 215721492, "step": 3257 }, { "epoch": 0.30495624093227874, "grad_norm": 34.21500778198242, "learning_rate": 5e-05, "loss": 1.2608, "num_input_tokens_seen": 215787724, "step": 3258 }, { "epoch": 0.30495624093227874, "loss": 1.2550270557403564, "loss_ce": 0.003562240395694971, "loss_iou": 0.55078125, "loss_num": 0.0299072265625, "loss_xval": 1.25, "num_input_tokens_seen": 215787724, "step": 3258 }, { "epoch": 0.30504984321617445, "grad_norm": 46.901206970214844, "learning_rate": 5e-05, "loss": 1.4256, "num_input_tokens_seen": 215853812, "step": 3259 }, { "epoch": 0.30504984321617445, "loss": 1.5135836601257324, "loss_ce": 0.011630570515990257, "loss_iou": 0.56640625, "loss_num": 0.0732421875, "loss_xval": 1.5, "num_input_tokens_seen": 215853812, "step": 3259 }, { "epoch": 0.3051434455000702, "grad_norm": 23.632041931152344, "learning_rate": 5e-05, "loss": 1.6057, "num_input_tokens_seen": 215920840, "step": 3260 }, { "epoch": 0.3051434455000702, "loss": 1.6945726871490479, "loss_ce": 0.004143000580370426, "loss_iou": 0.6875, "loss_num": 0.062255859375, "loss_xval": 1.6875, "num_input_tokens_seen": 215920840, "step": 3260 }, { "epoch": 0.30523704778396593, "grad_norm": 24.994848251342773, "learning_rate": 5e-05, "loss": 1.1699, "num_input_tokens_seen": 215987696, "step": 3261 }, { "epoch": 0.30523704778396593, "loss": 1.244982123374939, "loss_ce": 0.0027946701738983393, "loss_iou": 0.5078125, "loss_num": 0.04541015625, "loss_xval": 1.2421875, "num_input_tokens_seen": 215987696, "step": 3261 }, { "epoch": 0.30533065006786164, "grad_norm": 21.05302619934082, "learning_rate": 5e-05, "loss": 1.2648, "num_input_tokens_seen": 216053572, "step": 3262 }, { "epoch": 0.30533065006786164, "loss": 0.926849365234375, "loss_ce": 0.005950903054326773, "loss_iou": 0.384765625, "loss_num": 0.0301513671875, "loss_xval": 0.921875, "num_input_tokens_seen": 216053572, "step": 3262 }, { "epoch": 0.3054242523517574, "grad_norm": 28.137279510498047, "learning_rate": 5e-05, "loss": 1.4048, "num_input_tokens_seen": 216119900, "step": 3263 }, { "epoch": 0.3054242523517574, "loss": 1.5935150384902954, "loss_ce": 0.003671366721391678, "loss_iou": 0.61328125, "loss_num": 0.0732421875, "loss_xval": 1.59375, "num_input_tokens_seen": 216119900, "step": 3263 }, { "epoch": 0.3055178546356531, "grad_norm": 103.77515411376953, "learning_rate": 5e-05, "loss": 1.5608, "num_input_tokens_seen": 216185992, "step": 3264 }, { "epoch": 0.3055178546356531, "loss": 1.6350138187408447, "loss_ce": 0.005130899604409933, "loss_iou": 0.7109375, "loss_num": 0.04248046875, "loss_xval": 1.6328125, "num_input_tokens_seen": 216185992, "step": 3264 }, { "epoch": 0.30561145691954883, "grad_norm": 41.907386779785156, "learning_rate": 5e-05, "loss": 1.1478, "num_input_tokens_seen": 216251256, "step": 3265 }, { "epoch": 0.30561145691954883, "loss": 1.234153389930725, "loss_ce": 0.0034404967445880175, "loss_iou": 0.4765625, "loss_num": 0.0556640625, "loss_xval": 1.234375, "num_input_tokens_seen": 216251256, "step": 3265 }, { "epoch": 0.30570505920344454, "grad_norm": 21.314462661743164, "learning_rate": 5e-05, "loss": 1.3408, "num_input_tokens_seen": 216318780, "step": 3266 }, { "epoch": 0.30570505920344454, "loss": 1.218451976776123, "loss_ce": 0.006049755960702896, "loss_iou": 0.5234375, "loss_num": 0.0322265625, "loss_xval": 1.2109375, "num_input_tokens_seen": 216318780, "step": 3266 }, { "epoch": 0.3057986614873403, "grad_norm": 43.160789489746094, "learning_rate": 5e-05, "loss": 1.3229, "num_input_tokens_seen": 216385228, "step": 3267 }, { "epoch": 0.3057986614873403, "loss": 1.3583934307098389, "loss_ce": 0.005854426883161068, "loss_iou": 0.5859375, "loss_num": 0.036376953125, "loss_xval": 1.3515625, "num_input_tokens_seen": 216385228, "step": 3267 }, { "epoch": 0.305892263771236, "grad_norm": 26.396848678588867, "learning_rate": 5e-05, "loss": 1.2981, "num_input_tokens_seen": 216450812, "step": 3268 }, { "epoch": 0.305892263771236, "loss": 1.1707066297531128, "loss_ce": 0.004446827806532383, "loss_iou": 0.44140625, "loss_num": 0.056884765625, "loss_xval": 1.1640625, "num_input_tokens_seen": 216450812, "step": 3268 }, { "epoch": 0.30598586605513173, "grad_norm": 15.987907409667969, "learning_rate": 5e-05, "loss": 1.2681, "num_input_tokens_seen": 216516500, "step": 3269 }, { "epoch": 0.30598586605513173, "loss": 1.4139719009399414, "loss_ce": 0.005768820643424988, "loss_iou": 0.5625, "loss_num": 0.056396484375, "loss_xval": 1.40625, "num_input_tokens_seen": 216516500, "step": 3269 }, { "epoch": 0.3060794683390275, "grad_norm": 14.837997436523438, "learning_rate": 5e-05, "loss": 1.2282, "num_input_tokens_seen": 216583640, "step": 3270 }, { "epoch": 0.3060794683390275, "loss": 1.2314945459365845, "loss_ce": 0.00493203941732645, "loss_iou": 0.48828125, "loss_num": 0.050048828125, "loss_xval": 1.2265625, "num_input_tokens_seen": 216583640, "step": 3270 }, { "epoch": 0.3061730706229232, "grad_norm": 44.174407958984375, "learning_rate": 5e-05, "loss": 1.2446, "num_input_tokens_seen": 216649352, "step": 3271 }, { "epoch": 0.3061730706229232, "loss": 1.2967551946640015, "loss_ce": 0.004763028584420681, "loss_iou": 0.51953125, "loss_num": 0.050537109375, "loss_xval": 1.2890625, "num_input_tokens_seen": 216649352, "step": 3271 }, { "epoch": 0.3062666729068189, "grad_norm": 20.741392135620117, "learning_rate": 5e-05, "loss": 1.4702, "num_input_tokens_seen": 216717100, "step": 3272 }, { "epoch": 0.3062666729068189, "loss": 1.4455691576004028, "loss_ce": 0.004651239141821861, "loss_iou": 0.5703125, "loss_num": 0.06103515625, "loss_xval": 1.4375, "num_input_tokens_seen": 216717100, "step": 3272 }, { "epoch": 0.3063602751907146, "grad_norm": 24.962942123413086, "learning_rate": 5e-05, "loss": 1.292, "num_input_tokens_seen": 216783644, "step": 3273 }, { "epoch": 0.3063602751907146, "loss": 1.1587369441986084, "loss_ce": 0.00419585220515728, "loss_iou": 0.453125, "loss_num": 0.050048828125, "loss_xval": 1.15625, "num_input_tokens_seen": 216783644, "step": 3273 }, { "epoch": 0.3064538774746104, "grad_norm": 31.80834197998047, "learning_rate": 5e-05, "loss": 1.4634, "num_input_tokens_seen": 216850756, "step": 3274 }, { "epoch": 0.3064538774746104, "loss": 1.445806622505188, "loss_ce": 0.0034238863736391068, "loss_iou": 0.609375, "loss_num": 0.04443359375, "loss_xval": 1.4453125, "num_input_tokens_seen": 216850756, "step": 3274 }, { "epoch": 0.3065474797585061, "grad_norm": 41.55213928222656, "learning_rate": 5e-05, "loss": 1.4646, "num_input_tokens_seen": 216917756, "step": 3275 }, { "epoch": 0.3065474797585061, "loss": 1.6461317539215088, "loss_ce": 0.008436314761638641, "loss_iou": 0.69921875, "loss_num": 0.048095703125, "loss_xval": 1.640625, "num_input_tokens_seen": 216917756, "step": 3275 }, { "epoch": 0.3066410820424018, "grad_norm": 46.32811737060547, "learning_rate": 5e-05, "loss": 1.3779, "num_input_tokens_seen": 216984336, "step": 3276 }, { "epoch": 0.3066410820424018, "loss": 1.4777175188064575, "loss_ce": 0.004572918172925711, "loss_iou": 0.6328125, "loss_num": 0.041259765625, "loss_xval": 1.4765625, "num_input_tokens_seen": 216984336, "step": 3276 }, { "epoch": 0.3067346843262976, "grad_norm": 26.338844299316406, "learning_rate": 5e-05, "loss": 1.2143, "num_input_tokens_seen": 217050360, "step": 3277 }, { "epoch": 0.3067346843262976, "loss": 1.189054250717163, "loss_ce": 0.0030038978438824415, "loss_iou": 0.462890625, "loss_num": 0.052001953125, "loss_xval": 1.1875, "num_input_tokens_seen": 217050360, "step": 3277 }, { "epoch": 0.3068282866101933, "grad_norm": 16.650480270385742, "learning_rate": 5e-05, "loss": 1.0909, "num_input_tokens_seen": 217115916, "step": 3278 }, { "epoch": 0.3068282866101933, "loss": 1.0408778190612793, "loss_ce": 0.0029140515252947807, "loss_iou": 0.396484375, "loss_num": 0.04931640625, "loss_xval": 1.0390625, "num_input_tokens_seen": 217115916, "step": 3278 }, { "epoch": 0.306921888894089, "grad_norm": 12.268118858337402, "learning_rate": 5e-05, "loss": 1.0564, "num_input_tokens_seen": 217182468, "step": 3279 }, { "epoch": 0.306921888894089, "loss": 1.0205044746398926, "loss_ce": 0.006344387773424387, "loss_iou": 0.45703125, "loss_num": 0.0205078125, "loss_xval": 1.015625, "num_input_tokens_seen": 217182468, "step": 3279 }, { "epoch": 0.30701549117798477, "grad_norm": 23.875041961669922, "learning_rate": 5e-05, "loss": 1.3049, "num_input_tokens_seen": 217249680, "step": 3280 }, { "epoch": 0.30701549117798477, "loss": 1.3264915943145752, "loss_ce": 0.005690774414688349, "loss_iou": 0.55859375, "loss_num": 0.0400390625, "loss_xval": 1.3203125, "num_input_tokens_seen": 217249680, "step": 3280 }, { "epoch": 0.3071090934618805, "grad_norm": 49.74822998046875, "learning_rate": 5e-05, "loss": 1.6603, "num_input_tokens_seen": 217315084, "step": 3281 }, { "epoch": 0.3071090934618805, "loss": 1.8198919296264648, "loss_ce": 0.005438767373561859, "loss_iou": 0.70703125, "loss_num": 0.07958984375, "loss_xval": 1.8125, "num_input_tokens_seen": 217315084, "step": 3281 }, { "epoch": 0.3072026957457762, "grad_norm": 26.989891052246094, "learning_rate": 5e-05, "loss": 1.3183, "num_input_tokens_seen": 217381132, "step": 3282 }, { "epoch": 0.3072026957457762, "loss": 1.295100450515747, "loss_ce": 0.0031082441564649343, "loss_iou": 0.56640625, "loss_num": 0.031005859375, "loss_xval": 1.2890625, "num_input_tokens_seen": 217381132, "step": 3282 }, { "epoch": 0.3072962980296719, "grad_norm": 106.77875518798828, "learning_rate": 5e-05, "loss": 1.3803, "num_input_tokens_seen": 217447956, "step": 3283 }, { "epoch": 0.3072962980296719, "loss": 1.539642333984375, "loss_ce": 0.0015564555069431663, "loss_iou": 0.609375, "loss_num": 0.0634765625, "loss_xval": 1.5390625, "num_input_tokens_seen": 217447956, "step": 3283 }, { "epoch": 0.30738990031356767, "grad_norm": 42.692012786865234, "learning_rate": 5e-05, "loss": 1.3951, "num_input_tokens_seen": 217515024, "step": 3284 }, { "epoch": 0.30738990031356767, "loss": 1.3151333332061768, "loss_ce": 0.006539489608258009, "loss_iou": 0.51953125, "loss_num": 0.0537109375, "loss_xval": 1.3125, "num_input_tokens_seen": 217515024, "step": 3284 }, { "epoch": 0.3074835025974634, "grad_norm": 23.102754592895508, "learning_rate": 5e-05, "loss": 1.2996, "num_input_tokens_seen": 217580552, "step": 3285 }, { "epoch": 0.3074835025974634, "loss": 1.2344200611114502, "loss_ce": 0.00907817017287016, "loss_iou": 0.4375, "loss_num": 0.0703125, "loss_xval": 1.2265625, "num_input_tokens_seen": 217580552, "step": 3285 }, { "epoch": 0.3075771048813591, "grad_norm": 21.07726287841797, "learning_rate": 5e-05, "loss": 1.2692, "num_input_tokens_seen": 217647536, "step": 3286 }, { "epoch": 0.3075771048813591, "loss": 1.365906000137329, "loss_ce": 0.00457786163315177, "loss_iou": 0.546875, "loss_num": 0.05419921875, "loss_xval": 1.359375, "num_input_tokens_seen": 217647536, "step": 3286 }, { "epoch": 0.30767070716525485, "grad_norm": 24.006851196289062, "learning_rate": 5e-05, "loss": 1.4339, "num_input_tokens_seen": 217712684, "step": 3287 }, { "epoch": 0.30767070716525485, "loss": 1.4776030778884888, "loss_ce": 0.008731073699891567, "loss_iou": 0.57421875, "loss_num": 0.06396484375, "loss_xval": 1.46875, "num_input_tokens_seen": 217712684, "step": 3287 }, { "epoch": 0.30776430944915056, "grad_norm": 37.91904067993164, "learning_rate": 5e-05, "loss": 1.3971, "num_input_tokens_seen": 217779820, "step": 3288 }, { "epoch": 0.30776430944915056, "loss": 1.4429993629455566, "loss_ce": 0.007940869778394699, "loss_iou": 0.5859375, "loss_num": 0.052734375, "loss_xval": 1.4375, "num_input_tokens_seen": 217779820, "step": 3288 }, { "epoch": 0.3078579117330463, "grad_norm": 20.647216796875, "learning_rate": 5e-05, "loss": 1.603, "num_input_tokens_seen": 217846684, "step": 3289 }, { "epoch": 0.3078579117330463, "loss": 1.3160533905029297, "loss_ce": 0.004529984667897224, "loss_iou": 0.56640625, "loss_num": 0.034912109375, "loss_xval": 1.3125, "num_input_tokens_seen": 217846684, "step": 3289 }, { "epoch": 0.30795151401694204, "grad_norm": 14.967142105102539, "learning_rate": 5e-05, "loss": 1.2879, "num_input_tokens_seen": 217912200, "step": 3290 }, { "epoch": 0.30795151401694204, "loss": 1.2119174003601074, "loss_ce": 0.007815740071237087, "loss_iou": 0.5078125, "loss_num": 0.037353515625, "loss_xval": 1.203125, "num_input_tokens_seen": 217912200, "step": 3290 }, { "epoch": 0.30804511630083775, "grad_norm": 26.79849624633789, "learning_rate": 5e-05, "loss": 1.4611, "num_input_tokens_seen": 217979080, "step": 3291 }, { "epoch": 0.30804511630083775, "loss": 1.554707646369934, "loss_ce": 0.0063678547739982605, "loss_iou": 0.62109375, "loss_num": 0.06103515625, "loss_xval": 1.546875, "num_input_tokens_seen": 217979080, "step": 3291 }, { "epoch": 0.30813871858473346, "grad_norm": 28.4384822845459, "learning_rate": 5e-05, "loss": 1.629, "num_input_tokens_seen": 218046008, "step": 3292 }, { "epoch": 0.30813871858473346, "loss": 1.515143632888794, "loss_ce": 0.006354586221277714, "loss_iou": 0.59375, "loss_num": 0.064453125, "loss_xval": 1.5078125, "num_input_tokens_seen": 218046008, "step": 3292 }, { "epoch": 0.3082323208686292, "grad_norm": 43.775630950927734, "learning_rate": 5e-05, "loss": 1.4897, "num_input_tokens_seen": 218111388, "step": 3293 }, { "epoch": 0.3082323208686292, "loss": 1.4442451000213623, "loss_ce": 0.006012683734297752, "loss_iou": 0.5859375, "loss_num": 0.052978515625, "loss_xval": 1.4375, "num_input_tokens_seen": 218111388, "step": 3293 }, { "epoch": 0.30832592315252494, "grad_norm": 23.99239158630371, "learning_rate": 5e-05, "loss": 1.5258, "num_input_tokens_seen": 218176880, "step": 3294 }, { "epoch": 0.30832592315252494, "loss": 1.530544638633728, "loss_ce": 0.007107154466211796, "loss_iou": 0.6484375, "loss_num": 0.0458984375, "loss_xval": 1.5234375, "num_input_tokens_seen": 218176880, "step": 3294 }, { "epoch": 0.30841952543642065, "grad_norm": 33.39753723144531, "learning_rate": 5e-05, "loss": 1.4145, "num_input_tokens_seen": 218242624, "step": 3295 }, { "epoch": 0.30841952543642065, "loss": 1.3502821922302246, "loss_ce": 0.0021375836804509163, "loss_iou": 0.55859375, "loss_num": 0.0458984375, "loss_xval": 1.3515625, "num_input_tokens_seen": 218242624, "step": 3295 }, { "epoch": 0.30851312772031636, "grad_norm": 24.41062355041504, "learning_rate": 5e-05, "loss": 1.372, "num_input_tokens_seen": 218308764, "step": 3296 }, { "epoch": 0.30851312772031636, "loss": 1.1369571685791016, "loss_ce": 0.008050918579101562, "loss_iou": 0.408203125, "loss_num": 0.062255859375, "loss_xval": 1.125, "num_input_tokens_seen": 218308764, "step": 3296 }, { "epoch": 0.3086067300042121, "grad_norm": 24.676944732666016, "learning_rate": 5e-05, "loss": 1.5436, "num_input_tokens_seen": 218374720, "step": 3297 }, { "epoch": 0.3086067300042121, "loss": 1.6637136936187744, "loss_ce": 0.004045591689646244, "loss_iou": 0.63671875, "loss_num": 0.078125, "loss_xval": 1.65625, "num_input_tokens_seen": 218374720, "step": 3297 }, { "epoch": 0.30870033228810784, "grad_norm": 30.753559112548828, "learning_rate": 5e-05, "loss": 1.2207, "num_input_tokens_seen": 218440992, "step": 3298 }, { "epoch": 0.30870033228810784, "loss": 1.268710970878601, "loss_ce": 0.0050390600226819515, "loss_iou": 0.5078125, "loss_num": 0.049560546875, "loss_xval": 1.265625, "num_input_tokens_seen": 218440992, "step": 3298 }, { "epoch": 0.30879393457200355, "grad_norm": 21.44145965576172, "learning_rate": 5e-05, "loss": 1.5016, "num_input_tokens_seen": 218507732, "step": 3299 }, { "epoch": 0.30879393457200355, "loss": 1.3323389291763306, "loss_ce": 0.006167007610201836, "loss_iou": 0.5703125, "loss_num": 0.037353515625, "loss_xval": 1.328125, "num_input_tokens_seen": 218507732, "step": 3299 }, { "epoch": 0.30888753685589926, "grad_norm": 36.36735534667969, "learning_rate": 5e-05, "loss": 1.4294, "num_input_tokens_seen": 218573172, "step": 3300 }, { "epoch": 0.30888753685589926, "loss": 1.4702776670455933, "loss_ce": 0.004945619031786919, "loss_iou": 0.58984375, "loss_num": 0.0576171875, "loss_xval": 1.46875, "num_input_tokens_seen": 218573172, "step": 3300 }, { "epoch": 0.308981139139795, "grad_norm": 24.736528396606445, "learning_rate": 5e-05, "loss": 1.1731, "num_input_tokens_seen": 218638484, "step": 3301 }, { "epoch": 0.308981139139795, "loss": 1.0958927869796753, "loss_ce": 0.0038518088404089212, "loss_iou": 0.423828125, "loss_num": 0.049072265625, "loss_xval": 1.09375, "num_input_tokens_seen": 218638484, "step": 3301 }, { "epoch": 0.30907474142369074, "grad_norm": 42.26384353637695, "learning_rate": 5e-05, "loss": 1.405, "num_input_tokens_seen": 218705076, "step": 3302 }, { "epoch": 0.30907474142369074, "loss": 1.3072919845581055, "loss_ce": 0.005045905243605375, "loss_iou": 0.55078125, "loss_num": 0.03955078125, "loss_xval": 1.3046875, "num_input_tokens_seen": 218705076, "step": 3302 }, { "epoch": 0.30916834370758645, "grad_norm": 35.38228988647461, "learning_rate": 5e-05, "loss": 1.3409, "num_input_tokens_seen": 218769772, "step": 3303 }, { "epoch": 0.30916834370758645, "loss": 1.3389854431152344, "loss_ce": 0.003292145673185587, "loss_iou": 0.52734375, "loss_num": 0.056396484375, "loss_xval": 1.3359375, "num_input_tokens_seen": 218769772, "step": 3303 }, { "epoch": 0.3092619459914822, "grad_norm": 18.485929489135742, "learning_rate": 5e-05, "loss": 1.2342, "num_input_tokens_seen": 218836916, "step": 3304 }, { "epoch": 0.3092619459914822, "loss": 1.1005358695983887, "loss_ce": 0.006297651678323746, "loss_iou": 0.466796875, "loss_num": 0.032470703125, "loss_xval": 1.09375, "num_input_tokens_seen": 218836916, "step": 3304 }, { "epoch": 0.3093555482753779, "grad_norm": 34.8907585144043, "learning_rate": 5e-05, "loss": 1.1725, "num_input_tokens_seen": 218902848, "step": 3305 }, { "epoch": 0.3093555482753779, "loss": 1.3086987733840942, "loss_ce": 0.00852786935865879, "loss_iou": 0.5078125, "loss_num": 0.057373046875, "loss_xval": 1.296875, "num_input_tokens_seen": 218902848, "step": 3305 }, { "epoch": 0.30944915055927363, "grad_norm": 23.96047592163086, "learning_rate": 5e-05, "loss": 1.3576, "num_input_tokens_seen": 218968712, "step": 3306 }, { "epoch": 0.30944915055927363, "loss": 1.3991553783416748, "loss_ce": 0.008042062632739544, "loss_iou": 0.625, "loss_num": 0.0286865234375, "loss_xval": 1.390625, "num_input_tokens_seen": 218968712, "step": 3306 }, { "epoch": 0.3095427528431694, "grad_norm": 19.13654136657715, "learning_rate": 5e-05, "loss": 1.3225, "num_input_tokens_seen": 219035900, "step": 3307 }, { "epoch": 0.3095427528431694, "loss": 1.3272422552108765, "loss_ce": 0.003023592522367835, "loss_iou": 0.5625, "loss_num": 0.0390625, "loss_xval": 1.328125, "num_input_tokens_seen": 219035900, "step": 3307 }, { "epoch": 0.3096363551270651, "grad_norm": 29.159271240234375, "learning_rate": 5e-05, "loss": 1.4599, "num_input_tokens_seen": 219101832, "step": 3308 }, { "epoch": 0.3096363551270651, "loss": 1.5176429748535156, "loss_ce": 0.004459417425096035, "loss_iou": 0.63671875, "loss_num": 0.04736328125, "loss_xval": 1.515625, "num_input_tokens_seen": 219101832, "step": 3308 }, { "epoch": 0.3097299574109608, "grad_norm": 35.58757019042969, "learning_rate": 5e-05, "loss": 1.4017, "num_input_tokens_seen": 219167928, "step": 3309 }, { "epoch": 0.3097299574109608, "loss": 1.4925603866577148, "loss_ce": 0.006476448383182287, "loss_iou": 0.59765625, "loss_num": 0.0576171875, "loss_xval": 1.484375, "num_input_tokens_seen": 219167928, "step": 3309 }, { "epoch": 0.30982355969485653, "grad_norm": 32.41355895996094, "learning_rate": 5e-05, "loss": 1.8041, "num_input_tokens_seen": 219234704, "step": 3310 }, { "epoch": 0.30982355969485653, "loss": 1.7300525903701782, "loss_ce": 0.005443126894533634, "loss_iou": 0.7109375, "loss_num": 0.06005859375, "loss_xval": 1.7265625, "num_input_tokens_seen": 219234704, "step": 3310 }, { "epoch": 0.3099171619787523, "grad_norm": 16.624296188354492, "learning_rate": 5e-05, "loss": 1.4817, "num_input_tokens_seen": 219300252, "step": 3311 }, { "epoch": 0.3099171619787523, "loss": 1.4687089920043945, "loss_ce": 0.0038653004448860884, "loss_iou": 0.5703125, "loss_num": 0.064453125, "loss_xval": 1.46875, "num_input_tokens_seen": 219300252, "step": 3311 }, { "epoch": 0.310010764262648, "grad_norm": 17.57419776916504, "learning_rate": 5e-05, "loss": 1.2537, "num_input_tokens_seen": 219366304, "step": 3312 }, { "epoch": 0.310010764262648, "loss": 1.2607388496398926, "loss_ce": 0.006344245281070471, "loss_iou": 0.54296875, "loss_num": 0.0341796875, "loss_xval": 1.2578125, "num_input_tokens_seen": 219366304, "step": 3312 }, { "epoch": 0.3101043665465437, "grad_norm": 34.53327560424805, "learning_rate": 5e-05, "loss": 1.1657, "num_input_tokens_seen": 219431020, "step": 3313 }, { "epoch": 0.3101043665465437, "loss": 1.180055022239685, "loss_ce": 0.006715222727507353, "loss_iou": 0.4375, "loss_num": 0.0595703125, "loss_xval": 1.171875, "num_input_tokens_seen": 219431020, "step": 3313 }, { "epoch": 0.3101979688304395, "grad_norm": 43.260459899902344, "learning_rate": 5e-05, "loss": 1.4908, "num_input_tokens_seen": 219496608, "step": 3314 }, { "epoch": 0.3101979688304395, "loss": 1.4541683197021484, "loss_ce": 0.003973029553890228, "loss_iou": 0.55859375, "loss_num": 0.06591796875, "loss_xval": 1.453125, "num_input_tokens_seen": 219496608, "step": 3314 }, { "epoch": 0.3102915711143352, "grad_norm": 54.946163177490234, "learning_rate": 5e-05, "loss": 1.3666, "num_input_tokens_seen": 219562448, "step": 3315 }, { "epoch": 0.3102915711143352, "loss": 1.3770301342010498, "loss_ce": 0.003983244299888611, "loss_iou": 0.5546875, "loss_num": 0.052734375, "loss_xval": 1.375, "num_input_tokens_seen": 219562448, "step": 3315 }, { "epoch": 0.3103851733982309, "grad_norm": 23.593570709228516, "learning_rate": 5e-05, "loss": 1.569, "num_input_tokens_seen": 219628164, "step": 3316 }, { "epoch": 0.3103851733982309, "loss": 1.5762691497802734, "loss_ce": 0.005956693552434444, "loss_iou": 0.6875, "loss_num": 0.038818359375, "loss_xval": 1.5703125, "num_input_tokens_seen": 219628164, "step": 3316 }, { "epoch": 0.3104787756821266, "grad_norm": 19.556455612182617, "learning_rate": 5e-05, "loss": 1.1861, "num_input_tokens_seen": 219694576, "step": 3317 }, { "epoch": 0.3104787756821266, "loss": 1.2614383697509766, "loss_ce": 0.01143829058855772, "loss_iou": 0.46875, "loss_num": 0.0625, "loss_xval": 1.25, "num_input_tokens_seen": 219694576, "step": 3317 }, { "epoch": 0.3105723779660224, "grad_norm": 25.187734603881836, "learning_rate": 5e-05, "loss": 1.4119, "num_input_tokens_seen": 219761096, "step": 3318 }, { "epoch": 0.3105723779660224, "loss": 1.5257301330566406, "loss_ce": 0.0022926232777535915, "loss_iou": 0.625, "loss_num": 0.05419921875, "loss_xval": 1.5234375, "num_input_tokens_seen": 219761096, "step": 3318 }, { "epoch": 0.3106659802499181, "grad_norm": 31.622879028320312, "learning_rate": 5e-05, "loss": 1.571, "num_input_tokens_seen": 219826468, "step": 3319 }, { "epoch": 0.3106659802499181, "loss": 1.4179356098175049, "loss_ce": 0.005826190579682589, "loss_iou": 0.53515625, "loss_num": 0.06787109375, "loss_xval": 1.4140625, "num_input_tokens_seen": 219826468, "step": 3319 }, { "epoch": 0.3107595825338138, "grad_norm": 23.731725692749023, "learning_rate": 5e-05, "loss": 1.3923, "num_input_tokens_seen": 219892660, "step": 3320 }, { "epoch": 0.3107595825338138, "loss": 1.3493430614471436, "loss_ce": 0.00315176323056221, "loss_iou": 0.6015625, "loss_num": 0.0286865234375, "loss_xval": 1.34375, "num_input_tokens_seen": 219892660, "step": 3320 }, { "epoch": 0.31085318481770957, "grad_norm": 32.589866638183594, "learning_rate": 5e-05, "loss": 1.5546, "num_input_tokens_seen": 219958688, "step": 3321 }, { "epoch": 0.31085318481770957, "loss": 1.2926206588745117, "loss_ce": 0.004046455956995487, "loss_iou": 0.515625, "loss_num": 0.051513671875, "loss_xval": 1.2890625, "num_input_tokens_seen": 219958688, "step": 3321 }, { "epoch": 0.3109467871016053, "grad_norm": 18.977188110351562, "learning_rate": 5e-05, "loss": 1.4326, "num_input_tokens_seen": 220025348, "step": 3322 }, { "epoch": 0.3109467871016053, "loss": 1.5848474502563477, "loss_ce": 0.00867548305541277, "loss_iou": 0.66796875, "loss_num": 0.04736328125, "loss_xval": 1.578125, "num_input_tokens_seen": 220025348, "step": 3322 }, { "epoch": 0.311040389385501, "grad_norm": 20.86507797241211, "learning_rate": 5e-05, "loss": 1.2166, "num_input_tokens_seen": 220091376, "step": 3323 }, { "epoch": 0.311040389385501, "loss": 1.0473569631576538, "loss_ce": 0.004266200587153435, "loss_iou": 0.396484375, "loss_num": 0.0498046875, "loss_xval": 1.046875, "num_input_tokens_seen": 220091376, "step": 3323 }, { "epoch": 0.31113399166939676, "grad_norm": 20.868701934814453, "learning_rate": 5e-05, "loss": 1.4512, "num_input_tokens_seen": 220156540, "step": 3324 }, { "epoch": 0.31113399166939676, "loss": 1.4235413074493408, "loss_ce": 0.0065492019057273865, "loss_iou": 0.56640625, "loss_num": 0.056396484375, "loss_xval": 1.4140625, "num_input_tokens_seen": 220156540, "step": 3324 }, { "epoch": 0.31122759395329247, "grad_norm": 62.60850524902344, "learning_rate": 5e-05, "loss": 1.519, "num_input_tokens_seen": 220223020, "step": 3325 }, { "epoch": 0.31122759395329247, "loss": 1.3537803888320923, "loss_ce": 0.009053874760866165, "loss_iou": 0.5625, "loss_num": 0.04443359375, "loss_xval": 1.34375, "num_input_tokens_seen": 220223020, "step": 3325 }, { "epoch": 0.3113211962371882, "grad_norm": 27.41938018798828, "learning_rate": 5e-05, "loss": 1.4091, "num_input_tokens_seen": 220290112, "step": 3326 }, { "epoch": 0.3113211962371882, "loss": 1.1774846315383911, "loss_ce": 0.005121318623423576, "loss_iou": 0.515625, "loss_num": 0.028076171875, "loss_xval": 1.171875, "num_input_tokens_seen": 220290112, "step": 3326 }, { "epoch": 0.3114147985210839, "grad_norm": 22.340200424194336, "learning_rate": 5e-05, "loss": 1.2139, "num_input_tokens_seen": 220356420, "step": 3327 }, { "epoch": 0.3114147985210839, "loss": 1.0492682456970215, "loss_ce": 0.004102273844182491, "loss_iou": 0.41796875, "loss_num": 0.041748046875, "loss_xval": 1.046875, "num_input_tokens_seen": 220356420, "step": 3327 }, { "epoch": 0.31150840080497966, "grad_norm": 16.401081085205078, "learning_rate": 5e-05, "loss": 1.3767, "num_input_tokens_seen": 220423168, "step": 3328 }, { "epoch": 0.31150840080497966, "loss": 1.5529004335403442, "loss_ce": 0.007734448648989201, "loss_iou": 0.52734375, "loss_num": 0.09814453125, "loss_xval": 1.546875, "num_input_tokens_seen": 220423168, "step": 3328 }, { "epoch": 0.31160200308887537, "grad_norm": 34.76987075805664, "learning_rate": 5e-05, "loss": 1.1955, "num_input_tokens_seen": 220488936, "step": 3329 }, { "epoch": 0.31160200308887537, "loss": 1.1450514793395996, "loss_ce": 0.0068679628893733025, "loss_iou": 0.484375, "loss_num": 0.033935546875, "loss_xval": 1.140625, "num_input_tokens_seen": 220488936, "step": 3329 }, { "epoch": 0.3116956053727711, "grad_norm": 18.02237319946289, "learning_rate": 5e-05, "loss": 1.3261, "num_input_tokens_seen": 220555808, "step": 3330 }, { "epoch": 0.3116956053727711, "loss": 1.1664884090423584, "loss_ce": 0.004897744860500097, "loss_iou": 0.484375, "loss_num": 0.0380859375, "loss_xval": 1.1640625, "num_input_tokens_seen": 220555808, "step": 3330 }, { "epoch": 0.31178920765666684, "grad_norm": 39.48426818847656, "learning_rate": 5e-05, "loss": 1.3595, "num_input_tokens_seen": 220621040, "step": 3331 }, { "epoch": 0.31178920765666684, "loss": 1.3606083393096924, "loss_ce": 0.009045885875821114, "loss_iou": 0.55078125, "loss_num": 0.0498046875, "loss_xval": 1.3515625, "num_input_tokens_seen": 220621040, "step": 3331 }, { "epoch": 0.31188280994056256, "grad_norm": 29.691120147705078, "learning_rate": 5e-05, "loss": 1.3407, "num_input_tokens_seen": 220687548, "step": 3332 }, { "epoch": 0.31188280994056256, "loss": 1.0321433544158936, "loss_ce": 0.0018699432257562876, "loss_iou": 0.45703125, "loss_num": 0.023193359375, "loss_xval": 1.03125, "num_input_tokens_seen": 220687548, "step": 3332 }, { "epoch": 0.31197641222445827, "grad_norm": 27.59664535522461, "learning_rate": 5e-05, "loss": 1.1249, "num_input_tokens_seen": 220754168, "step": 3333 }, { "epoch": 0.31197641222445827, "loss": 1.1480791568756104, "loss_ce": 0.006965956185013056, "loss_iou": 0.431640625, "loss_num": 0.055908203125, "loss_xval": 1.140625, "num_input_tokens_seen": 220754168, "step": 3333 }, { "epoch": 0.312070014508354, "grad_norm": 26.381746292114258, "learning_rate": 5e-05, "loss": 1.5325, "num_input_tokens_seen": 220819884, "step": 3334 }, { "epoch": 0.312070014508354, "loss": 1.3252267837524414, "loss_ce": 0.003937665373086929, "loss_iou": 0.5625, "loss_num": 0.03857421875, "loss_xval": 1.3203125, "num_input_tokens_seen": 220819884, "step": 3334 }, { "epoch": 0.31216361679224974, "grad_norm": 23.994365692138672, "learning_rate": 5e-05, "loss": 1.1033, "num_input_tokens_seen": 220885232, "step": 3335 }, { "epoch": 0.31216361679224974, "loss": 1.0954842567443848, "loss_ce": 0.008081864565610886, "loss_iou": 0.392578125, "loss_num": 0.060302734375, "loss_xval": 1.0859375, "num_input_tokens_seen": 220885232, "step": 3335 }, { "epoch": 0.31225721907614545, "grad_norm": 28.305471420288086, "learning_rate": 5e-05, "loss": 1.4438, "num_input_tokens_seen": 220951272, "step": 3336 }, { "epoch": 0.31225721907614545, "loss": 1.2502145767211914, "loss_ce": 0.009003687649965286, "loss_iou": 0.515625, "loss_num": 0.042236328125, "loss_xval": 1.2421875, "num_input_tokens_seen": 220951272, "step": 3336 }, { "epoch": 0.31235082136004116, "grad_norm": 35.367069244384766, "learning_rate": 5e-05, "loss": 1.3877, "num_input_tokens_seen": 221017748, "step": 3337 }, { "epoch": 0.31235082136004116, "loss": 1.3979169130325317, "loss_ce": 0.008268485777080059, "loss_iou": 0.53515625, "loss_num": 0.06494140625, "loss_xval": 1.390625, "num_input_tokens_seen": 221017748, "step": 3337 }, { "epoch": 0.31244442364393693, "grad_norm": 19.60552978515625, "learning_rate": 5e-05, "loss": 1.4168, "num_input_tokens_seen": 221083068, "step": 3338 }, { "epoch": 0.31244442364393693, "loss": 1.6001744270324707, "loss_ce": 0.008133322931826115, "loss_iou": 0.64453125, "loss_num": 0.060546875, "loss_xval": 1.59375, "num_input_tokens_seen": 221083068, "step": 3338 }, { "epoch": 0.31253802592783264, "grad_norm": 52.10533905029297, "learning_rate": 5e-05, "loss": 1.4384, "num_input_tokens_seen": 221149408, "step": 3339 }, { "epoch": 0.31253802592783264, "loss": 1.5534617900848389, "loss_ce": 0.007807568646967411, "loss_iou": 0.58984375, "loss_num": 0.0732421875, "loss_xval": 1.546875, "num_input_tokens_seen": 221149408, "step": 3339 }, { "epoch": 0.31263162821172835, "grad_norm": 29.24900245666504, "learning_rate": 5e-05, "loss": 1.48, "num_input_tokens_seen": 221215192, "step": 3340 }, { "epoch": 0.31263162821172835, "loss": 1.3427841663360596, "loss_ce": 0.0053817350417375565, "loss_iou": 0.55078125, "loss_num": 0.04736328125, "loss_xval": 1.3359375, "num_input_tokens_seen": 221215192, "step": 3340 }, { "epoch": 0.3127252304956241, "grad_norm": 23.47211456298828, "learning_rate": 5e-05, "loss": 1.5811, "num_input_tokens_seen": 221279480, "step": 3341 }, { "epoch": 0.3127252304956241, "loss": 1.5039896965026855, "loss_ce": 0.004355897195637226, "loss_iou": 0.6015625, "loss_num": 0.0595703125, "loss_xval": 1.5, "num_input_tokens_seen": 221279480, "step": 3341 }, { "epoch": 0.31281883277951983, "grad_norm": 17.825998306274414, "learning_rate": 5e-05, "loss": 1.3731, "num_input_tokens_seen": 221345552, "step": 3342 }, { "epoch": 0.31281883277951983, "loss": 1.1372429132461548, "loss_ce": 0.0061393496580421925, "loss_iou": 0.48046875, "loss_num": 0.033935546875, "loss_xval": 1.1328125, "num_input_tokens_seen": 221345552, "step": 3342 }, { "epoch": 0.31291243506341554, "grad_norm": 30.159902572631836, "learning_rate": 5e-05, "loss": 1.453, "num_input_tokens_seen": 221411564, "step": 3343 }, { "epoch": 0.31291243506341554, "loss": 1.4636118412017822, "loss_ce": 0.005115772597491741, "loss_iou": 0.58984375, "loss_num": 0.05517578125, "loss_xval": 1.4609375, "num_input_tokens_seen": 221411564, "step": 3343 }, { "epoch": 0.31300603734731125, "grad_norm": 26.943056106567383, "learning_rate": 5e-05, "loss": 1.3874, "num_input_tokens_seen": 221478344, "step": 3344 }, { "epoch": 0.31300603734731125, "loss": 1.2727259397506714, "loss_ce": 0.007589232176542282, "loss_iou": 0.53125, "loss_num": 0.041015625, "loss_xval": 1.265625, "num_input_tokens_seen": 221478344, "step": 3344 }, { "epoch": 0.313099639631207, "grad_norm": 38.17118453979492, "learning_rate": 5e-05, "loss": 1.5091, "num_input_tokens_seen": 221543836, "step": 3345 }, { "epoch": 0.313099639631207, "loss": 1.5436885356903076, "loss_ce": 0.0056026773527264595, "loss_iou": 0.67578125, "loss_num": 0.03662109375, "loss_xval": 1.5390625, "num_input_tokens_seen": 221543836, "step": 3345 }, { "epoch": 0.3131932419151027, "grad_norm": 23.127803802490234, "learning_rate": 5e-05, "loss": 1.1563, "num_input_tokens_seen": 221609172, "step": 3346 }, { "epoch": 0.3131932419151027, "loss": 1.0623043775558472, "loss_ce": 0.003710642922669649, "loss_iou": 0.4375, "loss_num": 0.037353515625, "loss_xval": 1.0625, "num_input_tokens_seen": 221609172, "step": 3346 }, { "epoch": 0.31328684419899844, "grad_norm": 105.68035888671875, "learning_rate": 5e-05, "loss": 1.3641, "num_input_tokens_seen": 221675368, "step": 3347 }, { "epoch": 0.31328684419899844, "loss": 1.3974735736846924, "loss_ce": 0.0029422855004668236, "loss_iou": 0.55859375, "loss_num": 0.054931640625, "loss_xval": 1.390625, "num_input_tokens_seen": 221675368, "step": 3347 }, { "epoch": 0.3133804464828942, "grad_norm": 19.81547737121582, "learning_rate": 5e-05, "loss": 1.36, "num_input_tokens_seen": 221741076, "step": 3348 }, { "epoch": 0.3133804464828942, "loss": 1.4273831844329834, "loss_ce": 0.00648471899330616, "loss_iou": 0.57421875, "loss_num": 0.05517578125, "loss_xval": 1.421875, "num_input_tokens_seen": 221741076, "step": 3348 }, { "epoch": 0.3134740487667899, "grad_norm": 77.74214935302734, "learning_rate": 5e-05, "loss": 1.3075, "num_input_tokens_seen": 221807116, "step": 3349 }, { "epoch": 0.3134740487667899, "loss": 1.2954914569854736, "loss_ce": 0.004841960500925779, "loss_iou": 0.546875, "loss_num": 0.0390625, "loss_xval": 1.2890625, "num_input_tokens_seen": 221807116, "step": 3349 }, { "epoch": 0.3135676510506856, "grad_norm": 34.61111831665039, "learning_rate": 5e-05, "loss": 1.272, "num_input_tokens_seen": 221873352, "step": 3350 }, { "epoch": 0.3135676510506856, "loss": 1.359041690826416, "loss_ce": 0.004549413453787565, "loss_iou": 0.5546875, "loss_num": 0.04833984375, "loss_xval": 1.3515625, "num_input_tokens_seen": 221873352, "step": 3350 }, { "epoch": 0.3136612533345814, "grad_norm": 19.94378662109375, "learning_rate": 5e-05, "loss": 1.5089, "num_input_tokens_seen": 221938976, "step": 3351 }, { "epoch": 0.3136612533345814, "loss": 1.492689847946167, "loss_ce": 0.0034320466220378876, "loss_iou": 0.63671875, "loss_num": 0.042236328125, "loss_xval": 1.4921875, "num_input_tokens_seen": 221938976, "step": 3351 }, { "epoch": 0.3137548556184771, "grad_norm": 37.82259750366211, "learning_rate": 5e-05, "loss": 1.2216, "num_input_tokens_seen": 222005316, "step": 3352 }, { "epoch": 0.3137548556184771, "loss": 1.036719799041748, "loss_ce": 0.003608190920203924, "loss_iou": 0.400390625, "loss_num": 0.04638671875, "loss_xval": 1.03125, "num_input_tokens_seen": 222005316, "step": 3352 }, { "epoch": 0.3138484579023728, "grad_norm": 22.122650146484375, "learning_rate": 5e-05, "loss": 0.9921, "num_input_tokens_seen": 222070772, "step": 3353 }, { "epoch": 0.3138484579023728, "loss": 0.9437713623046875, "loss_ce": 0.0027923593297600746, "loss_iou": 0.373046875, "loss_num": 0.038818359375, "loss_xval": 0.94140625, "num_input_tokens_seen": 222070772, "step": 3353 }, { "epoch": 0.3139420601862685, "grad_norm": 58.61040115356445, "learning_rate": 5e-05, "loss": 1.3761, "num_input_tokens_seen": 222135596, "step": 3354 }, { "epoch": 0.3139420601862685, "loss": 1.4548401832580566, "loss_ce": 0.004065017215907574, "loss_iou": 0.62890625, "loss_num": 0.0390625, "loss_xval": 1.453125, "num_input_tokens_seen": 222135596, "step": 3354 }, { "epoch": 0.3140356624701643, "grad_norm": 22.35420036315918, "learning_rate": 5e-05, "loss": 1.2741, "num_input_tokens_seen": 222200868, "step": 3355 }, { "epoch": 0.3140356624701643, "loss": 1.1652780771255493, "loss_ce": 0.0026804166845977306, "loss_iou": 0.41796875, "loss_num": 0.0654296875, "loss_xval": 1.1640625, "num_input_tokens_seen": 222200868, "step": 3355 }, { "epoch": 0.31412926475406, "grad_norm": 61.23613739013672, "learning_rate": 5e-05, "loss": 1.5002, "num_input_tokens_seen": 222267444, "step": 3356 }, { "epoch": 0.31412926475406, "loss": 1.6423583030700684, "loss_ce": 0.00710450392216444, "loss_iou": 0.65234375, "loss_num": 0.06689453125, "loss_xval": 1.6328125, "num_input_tokens_seen": 222267444, "step": 3356 }, { "epoch": 0.3142228670379557, "grad_norm": 32.940921783447266, "learning_rate": 5e-05, "loss": 1.4766, "num_input_tokens_seen": 222333788, "step": 3357 }, { "epoch": 0.3142228670379557, "loss": 1.3237287998199463, "loss_ce": 0.006346018519252539, "loss_iou": 0.5390625, "loss_num": 0.04833984375, "loss_xval": 1.3203125, "num_input_tokens_seen": 222333788, "step": 3357 }, { "epoch": 0.3143164693218515, "grad_norm": 20.063535690307617, "learning_rate": 5e-05, "loss": 1.5623, "num_input_tokens_seen": 222400256, "step": 3358 }, { "epoch": 0.3143164693218515, "loss": 1.6014404296875, "loss_ce": 0.005004804581403732, "loss_iou": 0.640625, "loss_num": 0.06298828125, "loss_xval": 1.59375, "num_input_tokens_seen": 222400256, "step": 3358 }, { "epoch": 0.3144100716057472, "grad_norm": 17.425281524658203, "learning_rate": 5e-05, "loss": 1.1413, "num_input_tokens_seen": 222465900, "step": 3359 }, { "epoch": 0.3144100716057472, "loss": 1.0840343236923218, "loss_ce": 0.00542107131332159, "loss_iou": 0.45703125, "loss_num": 0.032958984375, "loss_xval": 1.078125, "num_input_tokens_seen": 222465900, "step": 3359 }, { "epoch": 0.3145036738896429, "grad_norm": 20.119585037231445, "learning_rate": 5e-05, "loss": 1.2671, "num_input_tokens_seen": 222532788, "step": 3360 }, { "epoch": 0.3145036738896429, "loss": 1.2474873065948486, "loss_ce": 0.0048115793615579605, "loss_iou": 0.51953125, "loss_num": 0.040283203125, "loss_xval": 1.2421875, "num_input_tokens_seen": 222532788, "step": 3360 }, { "epoch": 0.3145972761735386, "grad_norm": 15.529327392578125, "learning_rate": 5e-05, "loss": 1.4399, "num_input_tokens_seen": 222599480, "step": 3361 }, { "epoch": 0.3145972761735386, "loss": 1.5852491855621338, "loss_ce": 0.0022413067054003477, "loss_iou": 0.6171875, "loss_num": 0.0703125, "loss_xval": 1.5859375, "num_input_tokens_seen": 222599480, "step": 3361 }, { "epoch": 0.3146908784574344, "grad_norm": 17.559532165527344, "learning_rate": 5e-05, "loss": 1.1172, "num_input_tokens_seen": 222665860, "step": 3362 }, { "epoch": 0.3146908784574344, "loss": 1.0267374515533447, "loss_ce": 0.002079309429973364, "loss_iou": 0.416015625, "loss_num": 0.0380859375, "loss_xval": 1.0234375, "num_input_tokens_seen": 222665860, "step": 3362 }, { "epoch": 0.3147844807413301, "grad_norm": 24.45988655090332, "learning_rate": 5e-05, "loss": 1.4585, "num_input_tokens_seen": 222731708, "step": 3363 }, { "epoch": 0.3147844807413301, "loss": 1.4786128997802734, "loss_ce": 0.0044917454943060875, "loss_iou": 0.59375, "loss_num": 0.056396484375, "loss_xval": 1.4765625, "num_input_tokens_seen": 222731708, "step": 3363 }, { "epoch": 0.3148780830252258, "grad_norm": 37.90068054199219, "learning_rate": 5e-05, "loss": 1.3609, "num_input_tokens_seen": 222798276, "step": 3364 }, { "epoch": 0.3148780830252258, "loss": 1.4048511981964111, "loss_ce": 0.007390298880636692, "loss_iou": 0.59765625, "loss_num": 0.04150390625, "loss_xval": 1.3984375, "num_input_tokens_seen": 222798276, "step": 3364 }, { "epoch": 0.31497168530912156, "grad_norm": 80.03910827636719, "learning_rate": 5e-05, "loss": 1.4927, "num_input_tokens_seen": 222864148, "step": 3365 }, { "epoch": 0.31497168530912156, "loss": 1.597489595413208, "loss_ce": 0.0017864289693534374, "loss_iou": 0.6953125, "loss_num": 0.040283203125, "loss_xval": 1.59375, "num_input_tokens_seen": 222864148, "step": 3365 }, { "epoch": 0.3150652875930173, "grad_norm": 18.732847213745117, "learning_rate": 5e-05, "loss": 1.4418, "num_input_tokens_seen": 222929604, "step": 3366 }, { "epoch": 0.3150652875930173, "loss": 1.1654069423675537, "loss_ce": 0.002809304278343916, "loss_iou": 0.400390625, "loss_num": 0.072265625, "loss_xval": 1.1640625, "num_input_tokens_seen": 222929604, "step": 3366 }, { "epoch": 0.315158889876913, "grad_norm": 24.935232162475586, "learning_rate": 5e-05, "loss": 1.3883, "num_input_tokens_seen": 222996012, "step": 3367 }, { "epoch": 0.315158889876913, "loss": 1.4018690586090088, "loss_ce": 0.004896492697298527, "loss_iou": 0.546875, "loss_num": 0.06103515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 222996012, "step": 3367 }, { "epoch": 0.31525249216080875, "grad_norm": 18.91539764404297, "learning_rate": 5e-05, "loss": 1.5116, "num_input_tokens_seen": 223061952, "step": 3368 }, { "epoch": 0.31525249216080875, "loss": 1.7186686992645264, "loss_ce": 0.00480151642113924, "loss_iou": 0.70703125, "loss_num": 0.059326171875, "loss_xval": 1.7109375, "num_input_tokens_seen": 223061952, "step": 3368 }, { "epoch": 0.31534609444470446, "grad_norm": 32.9536247253418, "learning_rate": 5e-05, "loss": 1.2714, "num_input_tokens_seen": 223128572, "step": 3369 }, { "epoch": 0.31534609444470446, "loss": 1.2080504894256592, "loss_ce": 0.0063903434202075005, "loss_iou": 0.5234375, "loss_num": 0.030517578125, "loss_xval": 1.203125, "num_input_tokens_seen": 223128572, "step": 3369 }, { "epoch": 0.31543969672860017, "grad_norm": 23.890426635742188, "learning_rate": 5e-05, "loss": 1.3285, "num_input_tokens_seen": 223195320, "step": 3370 }, { "epoch": 0.31543969672860017, "loss": 1.3790334463119507, "loss_ce": 0.0064747813157737255, "loss_iou": 0.60546875, "loss_num": 0.03271484375, "loss_xval": 1.375, "num_input_tokens_seen": 223195320, "step": 3370 }, { "epoch": 0.3155332990124959, "grad_norm": 21.61827850341797, "learning_rate": 5e-05, "loss": 1.1121, "num_input_tokens_seen": 223263108, "step": 3371 }, { "epoch": 0.3155332990124959, "loss": 1.2329776287078857, "loss_ce": 0.0034855089616030455, "loss_iou": 0.546875, "loss_num": 0.0264892578125, "loss_xval": 1.2265625, "num_input_tokens_seen": 223263108, "step": 3371 }, { "epoch": 0.31562690129639165, "grad_norm": 16.282045364379883, "learning_rate": 5e-05, "loss": 1.0576, "num_input_tokens_seen": 223329144, "step": 3372 }, { "epoch": 0.31562690129639165, "loss": 0.9404612183570862, "loss_ce": 0.0044260770082473755, "loss_iou": 0.38671875, "loss_num": 0.032958984375, "loss_xval": 0.9375, "num_input_tokens_seen": 223329144, "step": 3372 }, { "epoch": 0.31572050358028736, "grad_norm": 24.378602981567383, "learning_rate": 5e-05, "loss": 1.4535, "num_input_tokens_seen": 223395040, "step": 3373 }, { "epoch": 0.31572050358028736, "loss": 1.4120330810546875, "loss_ce": 0.008712712675333023, "loss_iou": 0.55078125, "loss_num": 0.060546875, "loss_xval": 1.40625, "num_input_tokens_seen": 223395040, "step": 3373 }, { "epoch": 0.31581410586418307, "grad_norm": 37.420528411865234, "learning_rate": 5e-05, "loss": 1.4691, "num_input_tokens_seen": 223460284, "step": 3374 }, { "epoch": 0.31581410586418307, "loss": 1.5664467811584473, "loss_ce": 0.01078267302364111, "loss_iou": 0.6484375, "loss_num": 0.0517578125, "loss_xval": 1.5546875, "num_input_tokens_seen": 223460284, "step": 3374 }, { "epoch": 0.31590770814807884, "grad_norm": 20.938661575317383, "learning_rate": 5e-05, "loss": 1.5368, "num_input_tokens_seen": 223528588, "step": 3375 }, { "epoch": 0.31590770814807884, "loss": 1.5774849653244019, "loss_ce": 0.008148998953402042, "loss_iou": 0.6484375, "loss_num": 0.0546875, "loss_xval": 1.5703125, "num_input_tokens_seen": 223528588, "step": 3375 }, { "epoch": 0.31600131043197455, "grad_norm": 29.081159591674805, "learning_rate": 5e-05, "loss": 1.325, "num_input_tokens_seen": 223594368, "step": 3376 }, { "epoch": 0.31600131043197455, "loss": 1.1612496376037598, "loss_ce": 0.0054878611117601395, "loss_iou": 0.486328125, "loss_num": 0.03662109375, "loss_xval": 1.15625, "num_input_tokens_seen": 223594368, "step": 3376 }, { "epoch": 0.31609491271587026, "grad_norm": 15.730676651000977, "learning_rate": 5e-05, "loss": 1.2645, "num_input_tokens_seen": 223660236, "step": 3377 }, { "epoch": 0.31609491271587026, "loss": 1.3302812576293945, "loss_ce": 0.005086025223135948, "loss_iou": 0.546875, "loss_num": 0.046142578125, "loss_xval": 1.328125, "num_input_tokens_seen": 223660236, "step": 3377 }, { "epoch": 0.31618851499976597, "grad_norm": 28.429227828979492, "learning_rate": 5e-05, "loss": 1.2494, "num_input_tokens_seen": 223726984, "step": 3378 }, { "epoch": 0.31618851499976597, "loss": 1.4166882038116455, "loss_ce": 0.00653198454529047, "loss_iou": 0.59765625, "loss_num": 0.043701171875, "loss_xval": 1.40625, "num_input_tokens_seen": 223726984, "step": 3378 }, { "epoch": 0.31628211728366173, "grad_norm": 27.458473205566406, "learning_rate": 5e-05, "loss": 1.521, "num_input_tokens_seen": 223795444, "step": 3379 }, { "epoch": 0.31628211728366173, "loss": 1.5506752729415894, "loss_ce": 0.008683143183588982, "loss_iou": 0.625, "loss_num": 0.057861328125, "loss_xval": 1.5390625, "num_input_tokens_seen": 223795444, "step": 3379 }, { "epoch": 0.31637571956755745, "grad_norm": 40.49128723144531, "learning_rate": 5e-05, "loss": 1.357, "num_input_tokens_seen": 223860836, "step": 3380 }, { "epoch": 0.31637571956755745, "loss": 1.21141517162323, "loss_ce": 0.0024307710118591785, "loss_iou": 0.484375, "loss_num": 0.047607421875, "loss_xval": 1.2109375, "num_input_tokens_seen": 223860836, "step": 3380 }, { "epoch": 0.31646932185145316, "grad_norm": 21.579544067382812, "learning_rate": 5e-05, "loss": 1.4706, "num_input_tokens_seen": 223927608, "step": 3381 }, { "epoch": 0.31646932185145316, "loss": 1.4991039037704468, "loss_ce": 0.008869605138897896, "loss_iou": 0.6640625, "loss_num": 0.0322265625, "loss_xval": 1.4921875, "num_input_tokens_seen": 223927608, "step": 3381 }, { "epoch": 0.3165629241353489, "grad_norm": 18.886295318603516, "learning_rate": 5e-05, "loss": 1.3051, "num_input_tokens_seen": 223994040, "step": 3382 }, { "epoch": 0.3165629241353489, "loss": 1.3103877305984497, "loss_ce": 0.003258854616433382, "loss_iou": 0.5, "loss_num": 0.061279296875, "loss_xval": 1.3046875, "num_input_tokens_seen": 223994040, "step": 3382 }, { "epoch": 0.31665652641924463, "grad_norm": 16.55956268310547, "learning_rate": 5e-05, "loss": 1.0806, "num_input_tokens_seen": 224059484, "step": 3383 }, { "epoch": 0.31665652641924463, "loss": 1.2830233573913574, "loss_ce": 0.00372649310156703, "loss_iou": 0.515625, "loss_num": 0.050048828125, "loss_xval": 1.28125, "num_input_tokens_seen": 224059484, "step": 3383 }, { "epoch": 0.31675012870314034, "grad_norm": 29.22968101501465, "learning_rate": 5e-05, "loss": 1.4675, "num_input_tokens_seen": 224125676, "step": 3384 }, { "epoch": 0.31675012870314034, "loss": 1.3845795392990112, "loss_ce": 0.004208347760140896, "loss_iou": 0.5546875, "loss_num": 0.054443359375, "loss_xval": 1.3828125, "num_input_tokens_seen": 224125676, "step": 3384 }, { "epoch": 0.3168437309870361, "grad_norm": 45.01770782470703, "learning_rate": 5e-05, "loss": 1.7641, "num_input_tokens_seen": 224192464, "step": 3385 }, { "epoch": 0.3168437309870361, "loss": 1.734407663345337, "loss_ce": 0.0044272299855947495, "loss_iou": 0.703125, "loss_num": 0.06396484375, "loss_xval": 1.7265625, "num_input_tokens_seen": 224192464, "step": 3385 }, { "epoch": 0.3169373332709318, "grad_norm": 131.20025634765625, "learning_rate": 5e-05, "loss": 1.6422, "num_input_tokens_seen": 224259108, "step": 3386 }, { "epoch": 0.3169373332709318, "loss": 1.508176326751709, "loss_ce": 0.005246670916676521, "loss_iou": 0.6171875, "loss_num": 0.05322265625, "loss_xval": 1.5, "num_input_tokens_seen": 224259108, "step": 3386 }, { "epoch": 0.31703093555482753, "grad_norm": 27.698530197143555, "learning_rate": 5e-05, "loss": 1.5159, "num_input_tokens_seen": 224325392, "step": 3387 }, { "epoch": 0.31703093555482753, "loss": 1.4979016780853271, "loss_ce": 0.003272692672908306, "loss_iou": 0.6015625, "loss_num": 0.05810546875, "loss_xval": 1.4921875, "num_input_tokens_seen": 224325392, "step": 3387 }, { "epoch": 0.31712453783872324, "grad_norm": 44.2696418762207, "learning_rate": 5e-05, "loss": 1.4865, "num_input_tokens_seen": 224391304, "step": 3388 }, { "epoch": 0.31712453783872324, "loss": 1.6612162590026855, "loss_ce": 0.004966217093169689, "loss_iou": 0.64453125, "loss_num": 0.07373046875, "loss_xval": 1.65625, "num_input_tokens_seen": 224391304, "step": 3388 }, { "epoch": 0.317218140122619, "grad_norm": 42.36366271972656, "learning_rate": 5e-05, "loss": 1.5931, "num_input_tokens_seen": 224457064, "step": 3389 }, { "epoch": 0.317218140122619, "loss": 1.5112340450286865, "loss_ce": 0.004886340349912643, "loss_iou": 0.5546875, "loss_num": 0.07958984375, "loss_xval": 1.5078125, "num_input_tokens_seen": 224457064, "step": 3389 }, { "epoch": 0.3173117424065147, "grad_norm": 17.78668975830078, "learning_rate": 5e-05, "loss": 1.3423, "num_input_tokens_seen": 224523372, "step": 3390 }, { "epoch": 0.3173117424065147, "loss": 1.2294988632202148, "loss_ce": 0.0044012125581502914, "loss_iou": 0.53515625, "loss_num": 0.031494140625, "loss_xval": 1.2265625, "num_input_tokens_seen": 224523372, "step": 3390 }, { "epoch": 0.31740534469041043, "grad_norm": 35.49195098876953, "learning_rate": 5e-05, "loss": 1.2822, "num_input_tokens_seen": 224590064, "step": 3391 }, { "epoch": 0.31740534469041043, "loss": 1.2628147602081299, "loss_ce": 0.00805398728698492, "loss_iou": 0.486328125, "loss_num": 0.056884765625, "loss_xval": 1.2578125, "num_input_tokens_seen": 224590064, "step": 3391 }, { "epoch": 0.3174989469743062, "grad_norm": 29.660625457763672, "learning_rate": 5e-05, "loss": 1.7155, "num_input_tokens_seen": 224656460, "step": 3392 }, { "epoch": 0.3174989469743062, "loss": 1.8898135423660278, "loss_ce": 0.006024497095495462, "loss_iou": 0.7890625, "loss_num": 0.061767578125, "loss_xval": 1.8828125, "num_input_tokens_seen": 224656460, "step": 3392 }, { "epoch": 0.3175925492582019, "grad_norm": 33.64856719970703, "learning_rate": 5e-05, "loss": 1.2783, "num_input_tokens_seen": 224722244, "step": 3393 }, { "epoch": 0.3175925492582019, "loss": 1.1382837295532227, "loss_ce": 0.010598192922770977, "loss_iou": 0.419921875, "loss_num": 0.0576171875, "loss_xval": 1.125, "num_input_tokens_seen": 224722244, "step": 3393 }, { "epoch": 0.3176861515420976, "grad_norm": 62.0196418762207, "learning_rate": 5e-05, "loss": 1.2391, "num_input_tokens_seen": 224788924, "step": 3394 }, { "epoch": 0.3176861515420976, "loss": 1.3180869817733765, "loss_ce": 0.004122164100408554, "loss_iou": 0.5625, "loss_num": 0.037841796875, "loss_xval": 1.3125, "num_input_tokens_seen": 224788924, "step": 3394 }, { "epoch": 0.3177797538259933, "grad_norm": 62.28541564941406, "learning_rate": 5e-05, "loss": 1.5308, "num_input_tokens_seen": 224855448, "step": 3395 }, { "epoch": 0.3177797538259933, "loss": 1.6843470335006714, "loss_ce": 0.003682971466332674, "loss_iou": 0.65234375, "loss_num": 0.07470703125, "loss_xval": 1.6796875, "num_input_tokens_seen": 224855448, "step": 3395 }, { "epoch": 0.3178733561098891, "grad_norm": 50.235809326171875, "learning_rate": 5e-05, "loss": 1.5338, "num_input_tokens_seen": 224920948, "step": 3396 }, { "epoch": 0.3178733561098891, "loss": 1.6580981016159058, "loss_ce": 0.007707488723099232, "loss_iou": 0.66015625, "loss_num": 0.06494140625, "loss_xval": 1.6484375, "num_input_tokens_seen": 224920948, "step": 3396 }, { "epoch": 0.3179669583937848, "grad_norm": 23.435644149780273, "learning_rate": 5e-05, "loss": 1.6499, "num_input_tokens_seen": 224986096, "step": 3397 }, { "epoch": 0.3179669583937848, "loss": 1.7257496118545532, "loss_ce": 0.007487837225198746, "loss_iou": 0.72265625, "loss_num": 0.055419921875, "loss_xval": 1.71875, "num_input_tokens_seen": 224986096, "step": 3397 }, { "epoch": 0.3180605606776805, "grad_norm": 16.558685302734375, "learning_rate": 5e-05, "loss": 1.2941, "num_input_tokens_seen": 225051804, "step": 3398 }, { "epoch": 0.3180605606776805, "loss": 1.201046347618103, "loss_ce": 0.004757278133183718, "loss_iou": 0.49609375, "loss_num": 0.040771484375, "loss_xval": 1.1953125, "num_input_tokens_seen": 225051804, "step": 3398 }, { "epoch": 0.3181541629615763, "grad_norm": 35.21195602416992, "learning_rate": 5e-05, "loss": 1.3751, "num_input_tokens_seen": 225117612, "step": 3399 }, { "epoch": 0.3181541629615763, "loss": 1.3540635108947754, "loss_ce": 0.0073838382959365845, "loss_iou": 0.5078125, "loss_num": 0.06689453125, "loss_xval": 1.34375, "num_input_tokens_seen": 225117612, "step": 3399 }, { "epoch": 0.318247765245472, "grad_norm": 26.134944915771484, "learning_rate": 5e-05, "loss": 1.3678, "num_input_tokens_seen": 225184460, "step": 3400 }, { "epoch": 0.318247765245472, "loss": 1.3506264686584473, "loss_ce": 0.0034585148096084595, "loss_iou": 0.59375, "loss_num": 0.031494140625, "loss_xval": 1.34375, "num_input_tokens_seen": 225184460, "step": 3400 }, { "epoch": 0.3183413675293677, "grad_norm": 36.55769729614258, "learning_rate": 5e-05, "loss": 1.3126, "num_input_tokens_seen": 225251700, "step": 3401 }, { "epoch": 0.3183413675293677, "loss": 1.3989133834838867, "loss_ce": 0.011218111030757427, "loss_iou": 0.53515625, "loss_num": 0.06396484375, "loss_xval": 1.390625, "num_input_tokens_seen": 225251700, "step": 3401 }, { "epoch": 0.31843496981326347, "grad_norm": 22.841083526611328, "learning_rate": 5e-05, "loss": 1.2714, "num_input_tokens_seen": 225317468, "step": 3402 }, { "epoch": 0.31843496981326347, "loss": 1.4070237874984741, "loss_ce": 0.006633143872022629, "loss_iou": 0.57421875, "loss_num": 0.05126953125, "loss_xval": 1.3984375, "num_input_tokens_seen": 225317468, "step": 3402 }, { "epoch": 0.3185285720971592, "grad_norm": 23.36016845703125, "learning_rate": 5e-05, "loss": 1.4898, "num_input_tokens_seen": 225383468, "step": 3403 }, { "epoch": 0.3185285720971592, "loss": 1.3079955577850342, "loss_ce": 0.007549961097538471, "loss_iou": 0.51953125, "loss_num": 0.052734375, "loss_xval": 1.296875, "num_input_tokens_seen": 225383468, "step": 3403 }, { "epoch": 0.3186221743810549, "grad_norm": 30.088396072387695, "learning_rate": 5e-05, "loss": 1.5034, "num_input_tokens_seen": 225449732, "step": 3404 }, { "epoch": 0.3186221743810549, "loss": 1.3973443508148193, "loss_ce": 0.004278029780834913, "loss_iou": 0.5625, "loss_num": 0.05419921875, "loss_xval": 1.390625, "num_input_tokens_seen": 225449732, "step": 3404 }, { "epoch": 0.3187157766649506, "grad_norm": 20.8262882232666, "learning_rate": 5e-05, "loss": 1.4677, "num_input_tokens_seen": 225515332, "step": 3405 }, { "epoch": 0.3187157766649506, "loss": 1.4387317895889282, "loss_ce": 0.0031849045772105455, "loss_iou": 0.64453125, "loss_num": 0.0289306640625, "loss_xval": 1.4375, "num_input_tokens_seen": 225515332, "step": 3405 }, { "epoch": 0.31880937894884637, "grad_norm": 15.956134796142578, "learning_rate": 5e-05, "loss": 1.2472, "num_input_tokens_seen": 225582580, "step": 3406 }, { "epoch": 0.31880937894884637, "loss": 1.289783000946045, "loss_ce": 0.003161872271448374, "loss_iou": 0.52734375, "loss_num": 0.046142578125, "loss_xval": 1.2890625, "num_input_tokens_seen": 225582580, "step": 3406 }, { "epoch": 0.3189029812327421, "grad_norm": 27.982378005981445, "learning_rate": 5e-05, "loss": 1.2075, "num_input_tokens_seen": 225649324, "step": 3407 }, { "epoch": 0.3189029812327421, "loss": 1.2502256631851196, "loss_ce": 0.0055967336520552635, "loss_iou": 0.48828125, "loss_num": 0.05419921875, "loss_xval": 1.2421875, "num_input_tokens_seen": 225649324, "step": 3407 }, { "epoch": 0.3189965835166378, "grad_norm": 25.254352569580078, "learning_rate": 5e-05, "loss": 1.5822, "num_input_tokens_seen": 225715472, "step": 3408 }, { "epoch": 0.3189965835166378, "loss": 1.3646240234375, "loss_ce": 0.008178685791790485, "loss_iou": 0.52734375, "loss_num": 0.060546875, "loss_xval": 1.359375, "num_input_tokens_seen": 225715472, "step": 3408 }, { "epoch": 0.31909018580053355, "grad_norm": 29.55914878845215, "learning_rate": 5e-05, "loss": 1.3099, "num_input_tokens_seen": 225781784, "step": 3409 }, { "epoch": 0.31909018580053355, "loss": 1.1618421077728271, "loss_ce": 0.004127358552068472, "loss_iou": 0.4765625, "loss_num": 0.041015625, "loss_xval": 1.15625, "num_input_tokens_seen": 225781784, "step": 3409 }, { "epoch": 0.31918378808442927, "grad_norm": 105.87574768066406, "learning_rate": 5e-05, "loss": 1.4326, "num_input_tokens_seen": 225848616, "step": 3410 }, { "epoch": 0.31918378808442927, "loss": 1.558828353881836, "loss_ce": 0.010000256821513176, "loss_iou": 0.6328125, "loss_num": 0.056884765625, "loss_xval": 1.546875, "num_input_tokens_seen": 225848616, "step": 3410 }, { "epoch": 0.319277390368325, "grad_norm": 41.374290466308594, "learning_rate": 5e-05, "loss": 1.4072, "num_input_tokens_seen": 225915488, "step": 3411 }, { "epoch": 0.319277390368325, "loss": 1.2789338827133179, "loss_ce": 0.0057404618710279465, "loss_iou": 0.5, "loss_num": 0.053955078125, "loss_xval": 1.2734375, "num_input_tokens_seen": 225915488, "step": 3411 }, { "epoch": 0.3193709926522207, "grad_norm": 83.49828338623047, "learning_rate": 5e-05, "loss": 1.2053, "num_input_tokens_seen": 225981768, "step": 3412 }, { "epoch": 0.3193709926522207, "loss": 1.3126630783081055, "loss_ce": 0.0050458889454603195, "loss_iou": 0.515625, "loss_num": 0.0556640625, "loss_xval": 1.3046875, "num_input_tokens_seen": 225981768, "step": 3412 }, { "epoch": 0.31946459493611645, "grad_norm": 13.549079895019531, "learning_rate": 5e-05, "loss": 1.1658, "num_input_tokens_seen": 226048652, "step": 3413 }, { "epoch": 0.31946459493611645, "loss": 1.2378922700881958, "loss_ce": 0.005958714056760073, "loss_iou": 0.490234375, "loss_num": 0.05029296875, "loss_xval": 1.234375, "num_input_tokens_seen": 226048652, "step": 3413 }, { "epoch": 0.31955819722001216, "grad_norm": 36.342227935791016, "learning_rate": 5e-05, "loss": 1.499, "num_input_tokens_seen": 226114952, "step": 3414 }, { "epoch": 0.31955819722001216, "loss": 1.4749104976654053, "loss_ce": 0.004207334015518427, "loss_iou": 0.578125, "loss_num": 0.062255859375, "loss_xval": 1.46875, "num_input_tokens_seen": 226114952, "step": 3414 }, { "epoch": 0.3196517995039079, "grad_norm": 13.046402931213379, "learning_rate": 5e-05, "loss": 1.1797, "num_input_tokens_seen": 226181568, "step": 3415 }, { "epoch": 0.3196517995039079, "loss": 1.1467552185058594, "loss_ce": 0.005397803150117397, "loss_iou": 0.5, "loss_num": 0.0283203125, "loss_xval": 1.140625, "num_input_tokens_seen": 226181568, "step": 3415 }, { "epoch": 0.31974540178780364, "grad_norm": 42.7341423034668, "learning_rate": 5e-05, "loss": 1.2816, "num_input_tokens_seen": 226247452, "step": 3416 }, { "epoch": 0.31974540178780364, "loss": 1.2671403884887695, "loss_ce": 0.01176926214247942, "loss_iou": 0.4921875, "loss_num": 0.05419921875, "loss_xval": 1.2578125, "num_input_tokens_seen": 226247452, "step": 3416 }, { "epoch": 0.31983900407169935, "grad_norm": 19.674161911010742, "learning_rate": 5e-05, "loss": 1.3266, "num_input_tokens_seen": 226314116, "step": 3417 }, { "epoch": 0.31983900407169935, "loss": 1.302283763885498, "loss_ce": 0.003943867515772581, "loss_iou": 0.5078125, "loss_num": 0.057373046875, "loss_xval": 1.296875, "num_input_tokens_seen": 226314116, "step": 3417 }, { "epoch": 0.31993260635559506, "grad_norm": 32.49250411987305, "learning_rate": 5e-05, "loss": 1.2044, "num_input_tokens_seen": 226380248, "step": 3418 }, { "epoch": 0.31993260635559506, "loss": 1.338599443435669, "loss_ce": 0.010474497452378273, "loss_iou": 0.55859375, "loss_num": 0.042236328125, "loss_xval": 1.328125, "num_input_tokens_seen": 226380248, "step": 3418 }, { "epoch": 0.32002620863949083, "grad_norm": 20.575708389282227, "learning_rate": 5e-05, "loss": 1.2589, "num_input_tokens_seen": 226447452, "step": 3419 }, { "epoch": 0.32002620863949083, "loss": 1.138270616531372, "loss_ce": 0.0033829244785010815, "loss_iou": 0.47265625, "loss_num": 0.0380859375, "loss_xval": 1.1328125, "num_input_tokens_seen": 226447452, "step": 3419 }, { "epoch": 0.32011981092338654, "grad_norm": 16.22533416748047, "learning_rate": 5e-05, "loss": 1.2253, "num_input_tokens_seen": 226513744, "step": 3420 }, { "epoch": 0.32011981092338654, "loss": 1.3674187660217285, "loss_ce": 0.003160955850034952, "loss_iou": 0.5625, "loss_num": 0.04833984375, "loss_xval": 1.3671875, "num_input_tokens_seen": 226513744, "step": 3420 }, { "epoch": 0.32021341320728225, "grad_norm": 24.764902114868164, "learning_rate": 5e-05, "loss": 1.2549, "num_input_tokens_seen": 226581080, "step": 3421 }, { "epoch": 0.32021341320728225, "loss": 1.2663476467132568, "loss_ce": 0.0046288808807730675, "loss_iou": 0.484375, "loss_num": 0.058837890625, "loss_xval": 1.265625, "num_input_tokens_seen": 226581080, "step": 3421 }, { "epoch": 0.32030701549117796, "grad_norm": 41.2261962890625, "learning_rate": 5e-05, "loss": 1.5733, "num_input_tokens_seen": 226647828, "step": 3422 }, { "epoch": 0.32030701549117796, "loss": 1.6071685552597046, "loss_ce": 0.008535739034414291, "loss_iou": 0.66796875, "loss_num": 0.052490234375, "loss_xval": 1.6015625, "num_input_tokens_seen": 226647828, "step": 3422 }, { "epoch": 0.3204006177750737, "grad_norm": 23.013776779174805, "learning_rate": 5e-05, "loss": 1.5592, "num_input_tokens_seen": 226714868, "step": 3423 }, { "epoch": 0.3204006177750737, "loss": 1.6581511497497559, "loss_ce": 0.005319178104400635, "loss_iou": 0.6875, "loss_num": 0.056396484375, "loss_xval": 1.65625, "num_input_tokens_seen": 226714868, "step": 3423 }, { "epoch": 0.32049422005896944, "grad_norm": 17.5068359375, "learning_rate": 5e-05, "loss": 0.9892, "num_input_tokens_seen": 226780336, "step": 3424 }, { "epoch": 0.32049422005896944, "loss": 0.9788135290145874, "loss_ce": 0.00542483339086175, "loss_iou": 0.390625, "loss_num": 0.03857421875, "loss_xval": 0.97265625, "num_input_tokens_seen": 226780336, "step": 3424 }, { "epoch": 0.32058782234286515, "grad_norm": 22.214794158935547, "learning_rate": 5e-05, "loss": 1.282, "num_input_tokens_seen": 226846332, "step": 3425 }, { "epoch": 0.32058782234286515, "loss": 1.4596538543701172, "loss_ce": 0.007505323737859726, "loss_iou": 0.57421875, "loss_num": 0.060546875, "loss_xval": 1.453125, "num_input_tokens_seen": 226846332, "step": 3425 }, { "epoch": 0.3206814246267609, "grad_norm": 23.288240432739258, "learning_rate": 5e-05, "loss": 1.4074, "num_input_tokens_seen": 226912540, "step": 3426 }, { "epoch": 0.3206814246267609, "loss": 1.1024901866912842, "loss_ce": 0.0038573648780584335, "loss_iou": 0.482421875, "loss_num": 0.027099609375, "loss_xval": 1.1015625, "num_input_tokens_seen": 226912540, "step": 3426 }, { "epoch": 0.3207750269106566, "grad_norm": 28.18408966064453, "learning_rate": 5e-05, "loss": 1.3961, "num_input_tokens_seen": 226979152, "step": 3427 }, { "epoch": 0.3207750269106566, "loss": 1.4781715869903564, "loss_ce": 0.006369719281792641, "loss_iou": 0.57421875, "loss_num": 0.06494140625, "loss_xval": 1.46875, "num_input_tokens_seen": 226979152, "step": 3427 }, { "epoch": 0.32086862919455234, "grad_norm": 24.60514259338379, "learning_rate": 5e-05, "loss": 1.099, "num_input_tokens_seen": 227044600, "step": 3428 }, { "epoch": 0.32086862919455234, "loss": 1.233229637145996, "loss_ce": 0.005690678488463163, "loss_iou": 0.5390625, "loss_num": 0.0291748046875, "loss_xval": 1.2265625, "num_input_tokens_seen": 227044600, "step": 3428 }, { "epoch": 0.3209622314784481, "grad_norm": 21.425580978393555, "learning_rate": 5e-05, "loss": 1.3, "num_input_tokens_seen": 227110888, "step": 3429 }, { "epoch": 0.3209622314784481, "loss": 1.3242912292480469, "loss_ce": 0.0044670719653368, "loss_iou": 0.51953125, "loss_num": 0.056640625, "loss_xval": 1.3203125, "num_input_tokens_seen": 227110888, "step": 3429 }, { "epoch": 0.3210558337623438, "grad_norm": 27.703676223754883, "learning_rate": 5e-05, "loss": 1.2957, "num_input_tokens_seen": 227176600, "step": 3430 }, { "epoch": 0.3210558337623438, "loss": 1.097275972366333, "loss_ce": 0.0035258883144706488, "loss_iou": 0.486328125, "loss_num": 0.0240478515625, "loss_xval": 1.09375, "num_input_tokens_seen": 227176600, "step": 3430 }, { "epoch": 0.3211494360462395, "grad_norm": 37.83911895751953, "learning_rate": 5e-05, "loss": 1.3346, "num_input_tokens_seen": 227242196, "step": 3431 }, { "epoch": 0.3211494360462395, "loss": 1.5346314907073975, "loss_ce": 0.008264346979558468, "loss_iou": 0.578125, "loss_num": 0.07421875, "loss_xval": 1.5234375, "num_input_tokens_seen": 227242196, "step": 3431 }, { "epoch": 0.32124303833013523, "grad_norm": 37.454063415527344, "learning_rate": 5e-05, "loss": 1.413, "num_input_tokens_seen": 227309504, "step": 3432 }, { "epoch": 0.32124303833013523, "loss": 1.4191879034042358, "loss_ce": 0.005125434137880802, "loss_iou": 0.6015625, "loss_num": 0.0419921875, "loss_xval": 1.4140625, "num_input_tokens_seen": 227309504, "step": 3432 }, { "epoch": 0.321336640614031, "grad_norm": 25.266143798828125, "learning_rate": 5e-05, "loss": 1.3904, "num_input_tokens_seen": 227375672, "step": 3433 }, { "epoch": 0.321336640614031, "loss": 1.439277172088623, "loss_ce": 0.004218521527945995, "loss_iou": 0.57421875, "loss_num": 0.0576171875, "loss_xval": 1.4375, "num_input_tokens_seen": 227375672, "step": 3433 }, { "epoch": 0.3214302428979267, "grad_norm": 33.49772644042969, "learning_rate": 5e-05, "loss": 1.4032, "num_input_tokens_seen": 227441280, "step": 3434 }, { "epoch": 0.3214302428979267, "loss": 1.3680124282836914, "loss_ce": 0.012055323459208012, "loss_iou": 0.5703125, "loss_num": 0.042724609375, "loss_xval": 1.359375, "num_input_tokens_seen": 227441280, "step": 3434 }, { "epoch": 0.3215238451818224, "grad_norm": 39.83012771606445, "learning_rate": 5e-05, "loss": 1.585, "num_input_tokens_seen": 227507384, "step": 3435 }, { "epoch": 0.3215238451818224, "loss": 1.7486791610717773, "loss_ce": 0.004538507200777531, "loss_iou": 0.671875, "loss_num": 0.08056640625, "loss_xval": 1.7421875, "num_input_tokens_seen": 227507384, "step": 3435 }, { "epoch": 0.3216174474657182, "grad_norm": 108.656005859375, "learning_rate": 5e-05, "loss": 1.8334, "num_input_tokens_seen": 227574156, "step": 3436 }, { "epoch": 0.3216174474657182, "loss": 1.5953823328018188, "loss_ce": 0.004073723219335079, "loss_iou": 0.7109375, "loss_num": 0.033935546875, "loss_xval": 1.59375, "num_input_tokens_seen": 227574156, "step": 3436 }, { "epoch": 0.3217110497496139, "grad_norm": 32.74374008178711, "learning_rate": 5e-05, "loss": 1.408, "num_input_tokens_seen": 227640060, "step": 3437 }, { "epoch": 0.3217110497496139, "loss": 1.5385544300079346, "loss_ce": 0.006327945739030838, "loss_iou": 0.6015625, "loss_num": 0.06494140625, "loss_xval": 1.53125, "num_input_tokens_seen": 227640060, "step": 3437 }, { "epoch": 0.3218046520335096, "grad_norm": 22.27479362487793, "learning_rate": 5e-05, "loss": 1.311, "num_input_tokens_seen": 227704916, "step": 3438 }, { "epoch": 0.3218046520335096, "loss": 1.1508591175079346, "loss_ce": 0.006083630956709385, "loss_iou": 0.4765625, "loss_num": 0.038818359375, "loss_xval": 1.1484375, "num_input_tokens_seen": 227704916, "step": 3438 }, { "epoch": 0.3218982543174053, "grad_norm": 163.22607421875, "learning_rate": 5e-05, "loss": 1.4206, "num_input_tokens_seen": 227771108, "step": 3439 }, { "epoch": 0.3218982543174053, "loss": 1.1678868532180786, "loss_ce": 0.006754089146852493, "loss_iou": 0.4296875, "loss_num": 0.060546875, "loss_xval": 1.1640625, "num_input_tokens_seen": 227771108, "step": 3439 }, { "epoch": 0.3219918566013011, "grad_norm": 24.382034301757812, "learning_rate": 5e-05, "loss": 1.4159, "num_input_tokens_seen": 227837164, "step": 3440 }, { "epoch": 0.3219918566013011, "loss": 1.493950366973877, "loss_ce": 0.002373296767473221, "loss_iou": 0.5625, "loss_num": 0.0732421875, "loss_xval": 1.4921875, "num_input_tokens_seen": 227837164, "step": 3440 }, { "epoch": 0.3220854588851968, "grad_norm": 20.726337432861328, "learning_rate": 5e-05, "loss": 1.4626, "num_input_tokens_seen": 227903688, "step": 3441 }, { "epoch": 0.3220854588851968, "loss": 1.488643765449524, "loss_ce": 0.004757056478410959, "loss_iou": 0.59375, "loss_num": 0.059814453125, "loss_xval": 1.484375, "num_input_tokens_seen": 227903688, "step": 3441 }, { "epoch": 0.3221790611690925, "grad_norm": 22.031131744384766, "learning_rate": 5e-05, "loss": 1.703, "num_input_tokens_seen": 227970208, "step": 3442 }, { "epoch": 0.3221790611690925, "loss": 1.785135269165039, "loss_ce": 0.005838434211909771, "loss_iou": 0.6484375, "loss_num": 0.0966796875, "loss_xval": 1.78125, "num_input_tokens_seen": 227970208, "step": 3442 }, { "epoch": 0.3222726634529883, "grad_norm": 20.484830856323242, "learning_rate": 5e-05, "loss": 1.5101, "num_input_tokens_seen": 228036752, "step": 3443 }, { "epoch": 0.3222726634529883, "loss": 1.3310368061065674, "loss_ce": 0.0019352753879502416, "loss_iou": 0.515625, "loss_num": 0.05908203125, "loss_xval": 1.328125, "num_input_tokens_seen": 228036752, "step": 3443 }, { "epoch": 0.322366265736884, "grad_norm": 23.667524337768555, "learning_rate": 5e-05, "loss": 1.2262, "num_input_tokens_seen": 228103348, "step": 3444 }, { "epoch": 0.322366265736884, "loss": 1.22589111328125, "loss_ce": 0.005676334723830223, "loss_iou": 0.50390625, "loss_num": 0.0419921875, "loss_xval": 1.21875, "num_input_tokens_seen": 228103348, "step": 3444 }, { "epoch": 0.3224598680207797, "grad_norm": 23.98842430114746, "learning_rate": 5e-05, "loss": 1.4692, "num_input_tokens_seen": 228168600, "step": 3445 }, { "epoch": 0.3224598680207797, "loss": 1.5688738822937012, "loss_ce": 0.008815182372927666, "loss_iou": 0.6015625, "loss_num": 0.0712890625, "loss_xval": 1.5625, "num_input_tokens_seen": 228168600, "step": 3445 }, { "epoch": 0.32255347030467546, "grad_norm": 31.966474533081055, "learning_rate": 5e-05, "loss": 1.4171, "num_input_tokens_seen": 228234136, "step": 3446 }, { "epoch": 0.32255347030467546, "loss": 1.3432668447494507, "loss_ce": 0.008305962197482586, "loss_iou": 0.57421875, "loss_num": 0.037841796875, "loss_xval": 1.3359375, "num_input_tokens_seen": 228234136, "step": 3446 }, { "epoch": 0.32264707258857117, "grad_norm": 25.92433738708496, "learning_rate": 5e-05, "loss": 1.6404, "num_input_tokens_seen": 228300360, "step": 3447 }, { "epoch": 0.32264707258857117, "loss": 1.4639012813568115, "loss_ce": 0.008823145180940628, "loss_iou": 0.6171875, "loss_num": 0.044189453125, "loss_xval": 1.453125, "num_input_tokens_seen": 228300360, "step": 3447 }, { "epoch": 0.3227406748724669, "grad_norm": 24.386028289794922, "learning_rate": 5e-05, "loss": 1.1537, "num_input_tokens_seen": 228367160, "step": 3448 }, { "epoch": 0.3227406748724669, "loss": 1.1790634393692017, "loss_ce": 0.00523528503254056, "loss_iou": 0.482421875, "loss_num": 0.041748046875, "loss_xval": 1.171875, "num_input_tokens_seen": 228367160, "step": 3448 }, { "epoch": 0.3228342771563626, "grad_norm": 36.26959228515625, "learning_rate": 5e-05, "loss": 1.2385, "num_input_tokens_seen": 228434172, "step": 3449 }, { "epoch": 0.3228342771563626, "loss": 1.2111108303070068, "loss_ce": 0.007497443817555904, "loss_iou": 0.48046875, "loss_num": 0.048583984375, "loss_xval": 1.203125, "num_input_tokens_seen": 228434172, "step": 3449 }, { "epoch": 0.32292787944025836, "grad_norm": 24.693557739257812, "learning_rate": 5e-05, "loss": 1.3832, "num_input_tokens_seen": 228501244, "step": 3450 }, { "epoch": 0.32292787944025836, "loss": 1.470740556716919, "loss_ce": 0.001990544144064188, "loss_iou": 0.5546875, "loss_num": 0.07275390625, "loss_xval": 1.46875, "num_input_tokens_seen": 228501244, "step": 3450 }, { "epoch": 0.32302148172415407, "grad_norm": 20.144039154052734, "learning_rate": 5e-05, "loss": 1.4975, "num_input_tokens_seen": 228566904, "step": 3451 }, { "epoch": 0.32302148172415407, "loss": 1.4781911373138428, "loss_ce": 0.010417640209197998, "loss_iou": 0.56640625, "loss_num": 0.06689453125, "loss_xval": 1.46875, "num_input_tokens_seen": 228566904, "step": 3451 }, { "epoch": 0.3231150840080498, "grad_norm": 22.918781280517578, "learning_rate": 5e-05, "loss": 1.4302, "num_input_tokens_seen": 228631652, "step": 3452 }, { "epoch": 0.3231150840080498, "loss": 1.281588077545166, "loss_ce": 0.010103719308972359, "loss_iou": 0.44921875, "loss_num": 0.07470703125, "loss_xval": 1.2734375, "num_input_tokens_seen": 228631652, "step": 3452 }, { "epoch": 0.32320868629194555, "grad_norm": 32.87624740600586, "learning_rate": 5e-05, "loss": 1.4831, "num_input_tokens_seen": 228698160, "step": 3453 }, { "epoch": 0.32320868629194555, "loss": 1.4957568645477295, "loss_ce": 0.005522478371858597, "loss_iou": 0.65625, "loss_num": 0.034912109375, "loss_xval": 1.4921875, "num_input_tokens_seen": 228698160, "step": 3453 }, { "epoch": 0.32330228857584126, "grad_norm": 27.107770919799805, "learning_rate": 5e-05, "loss": 1.5809, "num_input_tokens_seen": 228765000, "step": 3454 }, { "epoch": 0.32330228857584126, "loss": 1.6188907623291016, "loss_ce": 0.003656448097899556, "loss_iou": 0.640625, "loss_num": 0.06689453125, "loss_xval": 1.6171875, "num_input_tokens_seen": 228765000, "step": 3454 }, { "epoch": 0.32339589085973697, "grad_norm": 44.22768783569336, "learning_rate": 5e-05, "loss": 1.3453, "num_input_tokens_seen": 228831448, "step": 3455 }, { "epoch": 0.32339589085973697, "loss": 1.2090684175491333, "loss_ce": 0.0020371791906654835, "loss_iou": 0.4765625, "loss_num": 0.050537109375, "loss_xval": 1.203125, "num_input_tokens_seen": 228831448, "step": 3455 }, { "epoch": 0.3234894931436327, "grad_norm": 27.273752212524414, "learning_rate": 5e-05, "loss": 1.4877, "num_input_tokens_seen": 228898152, "step": 3456 }, { "epoch": 0.3234894931436327, "loss": 1.4453917741775513, "loss_ce": 0.007891766726970673, "loss_iou": 0.60546875, "loss_num": 0.046142578125, "loss_xval": 1.4375, "num_input_tokens_seen": 228898152, "step": 3456 }, { "epoch": 0.32358309542752844, "grad_norm": 21.556808471679688, "learning_rate": 5e-05, "loss": 1.3753, "num_input_tokens_seen": 228964848, "step": 3457 }, { "epoch": 0.32358309542752844, "loss": 1.4642695188522339, "loss_ce": 0.008214849047362804, "loss_iou": 0.58984375, "loss_num": 0.0546875, "loss_xval": 1.453125, "num_input_tokens_seen": 228964848, "step": 3457 }, { "epoch": 0.32367669771142416, "grad_norm": 19.960359573364258, "learning_rate": 5e-05, "loss": 1.3073, "num_input_tokens_seen": 229031824, "step": 3458 }, { "epoch": 0.32367669771142416, "loss": 1.2399299144744873, "loss_ce": 0.004090023692697287, "loss_iou": 0.53515625, "loss_num": 0.03271484375, "loss_xval": 1.234375, "num_input_tokens_seen": 229031824, "step": 3458 }, { "epoch": 0.32377029999531987, "grad_norm": 18.274446487426758, "learning_rate": 5e-05, "loss": 1.3985, "num_input_tokens_seen": 229098888, "step": 3459 }, { "epoch": 0.32377029999531987, "loss": 1.463757038116455, "loss_ce": 0.0033078021369874477, "loss_iou": 0.60546875, "loss_num": 0.049560546875, "loss_xval": 1.4609375, "num_input_tokens_seen": 229098888, "step": 3459 }, { "epoch": 0.32386390227921563, "grad_norm": 26.443449020385742, "learning_rate": 5e-05, "loss": 1.2965, "num_input_tokens_seen": 229164808, "step": 3460 }, { "epoch": 0.32386390227921563, "loss": 1.220699667930603, "loss_ce": 0.0039027612656354904, "loss_iou": 0.5234375, "loss_num": 0.033203125, "loss_xval": 1.21875, "num_input_tokens_seen": 229164808, "step": 3460 }, { "epoch": 0.32395750456311134, "grad_norm": 26.293176651000977, "learning_rate": 5e-05, "loss": 1.5823, "num_input_tokens_seen": 229230496, "step": 3461 }, { "epoch": 0.32395750456311134, "loss": 1.5072904825210571, "loss_ce": 0.009243616834282875, "loss_iou": 0.59375, "loss_num": 0.0615234375, "loss_xval": 1.5, "num_input_tokens_seen": 229230496, "step": 3461 }, { "epoch": 0.32405110684700705, "grad_norm": 39.32388687133789, "learning_rate": 5e-05, "loss": 1.4236, "num_input_tokens_seen": 229296584, "step": 3462 }, { "epoch": 0.32405110684700705, "loss": 1.4249300956726074, "loss_ce": 0.00500829890370369, "loss_iou": 0.55859375, "loss_num": 0.060302734375, "loss_xval": 1.421875, "num_input_tokens_seen": 229296584, "step": 3462 }, { "epoch": 0.3241447091309028, "grad_norm": 24.357038497924805, "learning_rate": 5e-05, "loss": 1.5255, "num_input_tokens_seen": 229363504, "step": 3463 }, { "epoch": 0.3241447091309028, "loss": 1.4392220973968506, "loss_ce": 0.008069687522947788, "loss_iou": 0.5625, "loss_num": 0.060791015625, "loss_xval": 1.4296875, "num_input_tokens_seen": 229363504, "step": 3463 }, { "epoch": 0.32423831141479853, "grad_norm": 35.12007522583008, "learning_rate": 5e-05, "loss": 1.4583, "num_input_tokens_seen": 229429140, "step": 3464 }, { "epoch": 0.32423831141479853, "loss": 1.6169214248657227, "loss_ce": 0.0048608784563839436, "loss_iou": 0.6015625, "loss_num": 0.08203125, "loss_xval": 1.609375, "num_input_tokens_seen": 229429140, "step": 3464 }, { "epoch": 0.32433191369869424, "grad_norm": 21.183223724365234, "learning_rate": 5e-05, "loss": 1.1281, "num_input_tokens_seen": 229495512, "step": 3465 }, { "epoch": 0.32433191369869424, "loss": 1.2203271389007568, "loss_ce": 0.004262746311724186, "loss_iou": 0.462890625, "loss_num": 0.05810546875, "loss_xval": 1.21875, "num_input_tokens_seen": 229495512, "step": 3465 }, { "epoch": 0.32442551598258995, "grad_norm": 20.533761978149414, "learning_rate": 5e-05, "loss": 1.3212, "num_input_tokens_seen": 229562096, "step": 3466 }, { "epoch": 0.32442551598258995, "loss": 1.3959980010986328, "loss_ce": 0.006349561735987663, "loss_iou": 0.58984375, "loss_num": 0.0419921875, "loss_xval": 1.390625, "num_input_tokens_seen": 229562096, "step": 3466 }, { "epoch": 0.3245191182664857, "grad_norm": 20.71112632751465, "learning_rate": 5e-05, "loss": 1.1819, "num_input_tokens_seen": 229628540, "step": 3467 }, { "epoch": 0.3245191182664857, "loss": 0.9793422818183899, "loss_ce": 0.0071743205189704895, "loss_iou": 0.42578125, "loss_num": 0.0242919921875, "loss_xval": 0.97265625, "num_input_tokens_seen": 229628540, "step": 3467 }, { "epoch": 0.32461272055038143, "grad_norm": 25.044464111328125, "learning_rate": 5e-05, "loss": 1.1751, "num_input_tokens_seen": 229693956, "step": 3468 }, { "epoch": 0.32461272055038143, "loss": 1.39524245262146, "loss_ce": 0.003640956711024046, "loss_iou": 0.61328125, "loss_num": 0.03271484375, "loss_xval": 1.390625, "num_input_tokens_seen": 229693956, "step": 3468 }, { "epoch": 0.32470632283427714, "grad_norm": 19.4493408203125, "learning_rate": 5e-05, "loss": 1.2971, "num_input_tokens_seen": 229759644, "step": 3469 }, { "epoch": 0.32470632283427714, "loss": 1.3415346145629883, "loss_ce": 0.006085404194891453, "loss_iou": 0.5234375, "loss_num": 0.058349609375, "loss_xval": 1.3359375, "num_input_tokens_seen": 229759644, "step": 3469 }, { "epoch": 0.3247999251181729, "grad_norm": 23.106149673461914, "learning_rate": 5e-05, "loss": 1.2264, "num_input_tokens_seen": 229826152, "step": 3470 }, { "epoch": 0.3247999251181729, "loss": 1.2581514120101929, "loss_ce": 0.009127913974225521, "loss_iou": 0.494140625, "loss_num": 0.052001953125, "loss_xval": 1.25, "num_input_tokens_seen": 229826152, "step": 3470 }, { "epoch": 0.3248935274020686, "grad_norm": 25.765979766845703, "learning_rate": 5e-05, "loss": 1.503, "num_input_tokens_seen": 229891876, "step": 3471 }, { "epoch": 0.3248935274020686, "loss": 1.4254307746887207, "loss_ce": 0.0030675381422042847, "loss_iou": 0.625, "loss_num": 0.03515625, "loss_xval": 1.421875, "num_input_tokens_seen": 229891876, "step": 3471 }, { "epoch": 0.3249871296859643, "grad_norm": 35.62071990966797, "learning_rate": 5e-05, "loss": 1.4101, "num_input_tokens_seen": 229958552, "step": 3472 }, { "epoch": 0.3249871296859643, "loss": 1.3482264280319214, "loss_ce": 0.008382691070437431, "loss_iou": 0.5390625, "loss_num": 0.05224609375, "loss_xval": 1.34375, "num_input_tokens_seen": 229958552, "step": 3472 }, { "epoch": 0.32508073196986004, "grad_norm": 23.053340911865234, "learning_rate": 5e-05, "loss": 1.4256, "num_input_tokens_seen": 230024216, "step": 3473 }, { "epoch": 0.32508073196986004, "loss": 1.2874468564987183, "loss_ce": 0.005708581767976284, "loss_iou": 0.5078125, "loss_num": 0.05322265625, "loss_xval": 1.28125, "num_input_tokens_seen": 230024216, "step": 3473 }, { "epoch": 0.3251743342537558, "grad_norm": 29.020132064819336, "learning_rate": 5e-05, "loss": 1.2004, "num_input_tokens_seen": 230090000, "step": 3474 }, { "epoch": 0.3251743342537558, "loss": 1.4137232303619385, "loss_ce": 0.00356693915091455, "loss_iou": 0.578125, "loss_num": 0.050048828125, "loss_xval": 1.40625, "num_input_tokens_seen": 230090000, "step": 3474 }, { "epoch": 0.3252679365376515, "grad_norm": 27.151784896850586, "learning_rate": 5e-05, "loss": 1.2197, "num_input_tokens_seen": 230157512, "step": 3475 }, { "epoch": 0.3252679365376515, "loss": 1.3149802684783936, "loss_ce": 0.006386419292539358, "loss_iou": 0.53125, "loss_num": 0.04931640625, "loss_xval": 1.3125, "num_input_tokens_seen": 230157512, "step": 3475 }, { "epoch": 0.3253615388215472, "grad_norm": 28.80777359008789, "learning_rate": 5e-05, "loss": 1.4261, "num_input_tokens_seen": 230222736, "step": 3476 }, { "epoch": 0.3253615388215472, "loss": 1.5021616220474243, "loss_ce": 0.00435891468077898, "loss_iou": 0.61328125, "loss_num": 0.0546875, "loss_xval": 1.5, "num_input_tokens_seen": 230222736, "step": 3476 }, { "epoch": 0.325455141105443, "grad_norm": 30.501630783081055, "learning_rate": 5e-05, "loss": 1.4853, "num_input_tokens_seen": 230289736, "step": 3477 }, { "epoch": 0.325455141105443, "loss": 1.626631259918213, "loss_ce": 0.006514023058116436, "loss_iou": 0.625, "loss_num": 0.0732421875, "loss_xval": 1.6171875, "num_input_tokens_seen": 230289736, "step": 3477 }, { "epoch": 0.3255487433893387, "grad_norm": 27.62831687927246, "learning_rate": 5e-05, "loss": 1.4628, "num_input_tokens_seen": 230357544, "step": 3478 }, { "epoch": 0.3255487433893387, "loss": 1.2684128284454346, "loss_ce": 0.004252570681273937, "loss_iou": 0.5, "loss_num": 0.052490234375, "loss_xval": 1.265625, "num_input_tokens_seen": 230357544, "step": 3478 }, { "epoch": 0.3256423456732344, "grad_norm": 17.582786560058594, "learning_rate": 5e-05, "loss": 1.2282, "num_input_tokens_seen": 230424296, "step": 3479 }, { "epoch": 0.3256423456732344, "loss": 1.0699564218521118, "loss_ce": 0.006479851435869932, "loss_iou": 0.421875, "loss_num": 0.043701171875, "loss_xval": 1.0625, "num_input_tokens_seen": 230424296, "step": 3479 }, { "epoch": 0.3257359479571302, "grad_norm": 50.17025375366211, "learning_rate": 5e-05, "loss": 1.3563, "num_input_tokens_seen": 230491124, "step": 3480 }, { "epoch": 0.3257359479571302, "loss": 1.348001480102539, "loss_ce": 0.008157771080732346, "loss_iou": 0.5546875, "loss_num": 0.046142578125, "loss_xval": 1.34375, "num_input_tokens_seen": 230491124, "step": 3480 }, { "epoch": 0.3258295502410259, "grad_norm": 28.259422302246094, "learning_rate": 5e-05, "loss": 1.1982, "num_input_tokens_seen": 230557228, "step": 3481 }, { "epoch": 0.3258295502410259, "loss": 1.3323099613189697, "loss_ce": 0.005649839527904987, "loss_iou": 0.515625, "loss_num": 0.058837890625, "loss_xval": 1.328125, "num_input_tokens_seen": 230557228, "step": 3481 }, { "epoch": 0.3259231525249216, "grad_norm": 42.412147521972656, "learning_rate": 5e-05, "loss": 1.5952, "num_input_tokens_seen": 230623592, "step": 3482 }, { "epoch": 0.3259231525249216, "loss": 1.6786226034164429, "loss_ce": 0.007724075112491846, "loss_iou": 0.6640625, "loss_num": 0.0693359375, "loss_xval": 1.671875, "num_input_tokens_seen": 230623592, "step": 3482 }, { "epoch": 0.3260167548088173, "grad_norm": 28.540069580078125, "learning_rate": 5e-05, "loss": 1.2564, "num_input_tokens_seen": 230690916, "step": 3483 }, { "epoch": 0.3260167548088173, "loss": 0.9094557166099548, "loss_ce": 0.0036939966958016157, "loss_iou": 0.40625, "loss_num": 0.01904296875, "loss_xval": 0.90625, "num_input_tokens_seen": 230690916, "step": 3483 }, { "epoch": 0.3261103570927131, "grad_norm": 23.447139739990234, "learning_rate": 5e-05, "loss": 1.4811, "num_input_tokens_seen": 230756944, "step": 3484 }, { "epoch": 0.3261103570927131, "loss": 1.3724188804626465, "loss_ce": 0.006207851227372885, "loss_iou": 0.55078125, "loss_num": 0.052978515625, "loss_xval": 1.3671875, "num_input_tokens_seen": 230756944, "step": 3484 }, { "epoch": 0.3262039593766088, "grad_norm": 22.987640380859375, "learning_rate": 5e-05, "loss": 1.5972, "num_input_tokens_seen": 230822520, "step": 3485 }, { "epoch": 0.3262039593766088, "loss": 1.3434700965881348, "loss_ce": 0.007044360972940922, "loss_iou": 0.5625, "loss_num": 0.042236328125, "loss_xval": 1.3359375, "num_input_tokens_seen": 230822520, "step": 3485 }, { "epoch": 0.3262975616605045, "grad_norm": 29.072772979736328, "learning_rate": 5e-05, "loss": 1.4876, "num_input_tokens_seen": 230889188, "step": 3486 }, { "epoch": 0.3262975616605045, "loss": 1.4115326404571533, "loss_ce": 0.009188849478960037, "loss_iou": 0.58984375, "loss_num": 0.043701171875, "loss_xval": 1.40625, "num_input_tokens_seen": 230889188, "step": 3486 }, { "epoch": 0.32639116394440026, "grad_norm": 54.610877990722656, "learning_rate": 5e-05, "loss": 1.7372, "num_input_tokens_seen": 230954712, "step": 3487 }, { "epoch": 0.32639116394440026, "loss": 1.5750269889831543, "loss_ce": 0.007644145283848047, "loss_iou": 0.640625, "loss_num": 0.05712890625, "loss_xval": 1.5703125, "num_input_tokens_seen": 230954712, "step": 3487 }, { "epoch": 0.326484766228296, "grad_norm": 25.548595428466797, "learning_rate": 5e-05, "loss": 1.6614, "num_input_tokens_seen": 231021184, "step": 3488 }, { "epoch": 0.326484766228296, "loss": 1.8577330112457275, "loss_ce": 0.009100107476115227, "loss_iou": 0.7578125, "loss_num": 0.06640625, "loss_xval": 1.8515625, "num_input_tokens_seen": 231021184, "step": 3488 }, { "epoch": 0.3265783685121917, "grad_norm": 22.18245506286621, "learning_rate": 5e-05, "loss": 1.257, "num_input_tokens_seen": 231087584, "step": 3489 }, { "epoch": 0.3265783685121917, "loss": 1.3807964324951172, "loss_ce": 0.007749560289084911, "loss_iou": 0.578125, "loss_num": 0.044189453125, "loss_xval": 1.375, "num_input_tokens_seen": 231087584, "step": 3489 }, { "epoch": 0.32667197079608745, "grad_norm": 17.516324996948242, "learning_rate": 5e-05, "loss": 1.1775, "num_input_tokens_seen": 231154032, "step": 3490 }, { "epoch": 0.32667197079608745, "loss": 1.2251875400543213, "loss_ce": 0.006651208270341158, "loss_iou": 0.486328125, "loss_num": 0.049072265625, "loss_xval": 1.21875, "num_input_tokens_seen": 231154032, "step": 3490 }, { "epoch": 0.32676557307998316, "grad_norm": 23.85236167907715, "learning_rate": 5e-05, "loss": 1.3247, "num_input_tokens_seen": 231221056, "step": 3491 }, { "epoch": 0.32676557307998316, "loss": 1.2391154766082764, "loss_ce": 0.005228670779615641, "loss_iou": 0.55078125, "loss_num": 0.0262451171875, "loss_xval": 1.234375, "num_input_tokens_seen": 231221056, "step": 3491 }, { "epoch": 0.3268591753638789, "grad_norm": 22.00735855102539, "learning_rate": 5e-05, "loss": 1.3966, "num_input_tokens_seen": 231288024, "step": 3492 }, { "epoch": 0.3268591753638789, "loss": 1.255366325378418, "loss_ce": 0.006342868786305189, "loss_iou": 0.54296875, "loss_num": 0.031982421875, "loss_xval": 1.25, "num_input_tokens_seen": 231288024, "step": 3492 }, { "epoch": 0.3269527776477746, "grad_norm": 52.37733840942383, "learning_rate": 5e-05, "loss": 1.508, "num_input_tokens_seen": 231354548, "step": 3493 }, { "epoch": 0.3269527776477746, "loss": 1.467725396156311, "loss_ce": 0.008252738043665886, "loss_iou": 0.5859375, "loss_num": 0.056396484375, "loss_xval": 1.4609375, "num_input_tokens_seen": 231354548, "step": 3493 }, { "epoch": 0.32704637993167035, "grad_norm": 28.013761520385742, "learning_rate": 5e-05, "loss": 1.4415, "num_input_tokens_seen": 231421152, "step": 3494 }, { "epoch": 0.32704637993167035, "loss": 1.332025408744812, "loss_ce": 0.005365224555134773, "loss_iou": 0.5859375, "loss_num": 0.031005859375, "loss_xval": 1.328125, "num_input_tokens_seen": 231421152, "step": 3494 }, { "epoch": 0.32713998221556606, "grad_norm": 26.436290740966797, "learning_rate": 5e-05, "loss": 1.2413, "num_input_tokens_seen": 231487444, "step": 3495 }, { "epoch": 0.32713998221556606, "loss": 1.2978804111480713, "loss_ce": 0.006376506295055151, "loss_iou": 0.54296875, "loss_num": 0.041015625, "loss_xval": 1.2890625, "num_input_tokens_seen": 231487444, "step": 3495 }, { "epoch": 0.32723358449946177, "grad_norm": 19.611053466796875, "learning_rate": 5e-05, "loss": 1.2601, "num_input_tokens_seen": 231553776, "step": 3496 }, { "epoch": 0.32723358449946177, "loss": 1.4286245107650757, "loss_ce": 0.005772912874817848, "loss_iou": 0.56640625, "loss_num": 0.058349609375, "loss_xval": 1.421875, "num_input_tokens_seen": 231553776, "step": 3496 }, { "epoch": 0.32732718678335754, "grad_norm": 26.257793426513672, "learning_rate": 5e-05, "loss": 1.5374, "num_input_tokens_seen": 231620348, "step": 3497 }, { "epoch": 0.32732718678335754, "loss": 1.6531095504760742, "loss_ce": 0.006625092122703791, "loss_iou": 0.6328125, "loss_num": 0.07666015625, "loss_xval": 1.6484375, "num_input_tokens_seen": 231620348, "step": 3497 }, { "epoch": 0.32742078906725325, "grad_norm": 40.22646713256836, "learning_rate": 5e-05, "loss": 1.4268, "num_input_tokens_seen": 231686712, "step": 3498 }, { "epoch": 0.32742078906725325, "loss": 1.428750991821289, "loss_ce": 0.004922924097627401, "loss_iou": 0.609375, "loss_num": 0.04052734375, "loss_xval": 1.421875, "num_input_tokens_seen": 231686712, "step": 3498 }, { "epoch": 0.32751439135114896, "grad_norm": 26.511734008789062, "learning_rate": 5e-05, "loss": 1.2521, "num_input_tokens_seen": 231753180, "step": 3499 }, { "epoch": 0.32751439135114896, "loss": 1.258674144744873, "loss_ce": 0.007697533816099167, "loss_iou": 0.50390625, "loss_num": 0.04833984375, "loss_xval": 1.25, "num_input_tokens_seen": 231753180, "step": 3499 }, { "epoch": 0.32760799363504467, "grad_norm": 34.28484344482422, "learning_rate": 5e-05, "loss": 1.5833, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32760799363504467, "eval_seeclick_CIoU": 0.15782961249351501, "eval_seeclick_GIoU": 0.16827442497015, "eval_seeclick_IoU": 0.2891116961836815, "eval_seeclick_MAE_all": 0.1535521298646927, "eval_seeclick_MAE_h": 0.06641276739537716, "eval_seeclick_MAE_w": 0.13540880382061005, "eval_seeclick_MAE_x_boxes": 0.2435053288936615, "eval_seeclick_MAE_y_boxes": 0.1485762670636177, "eval_seeclick_NUM_probability": 0.9999370872974396, "eval_seeclick_inside_bbox": 0.4364583343267441, "eval_seeclick_loss": 2.501735210418701, "eval_seeclick_loss_ce": 0.0145033891312778, "eval_seeclick_loss_iou": 0.864501953125, "eval_seeclick_loss_num": 0.15167999267578125, "eval_seeclick_loss_xval": 2.48779296875, "eval_seeclick_runtime": 65.6185, "eval_seeclick_samples_per_second": 0.716, "eval_seeclick_steps_per_second": 0.03, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32760799363504467, "eval_icons_CIoU": -0.11122158542275429, "eval_icons_GIoU": 0.01340615563094616, "eval_icons_IoU": 0.09425394982099533, "eval_icons_MAE_all": 0.20621279627084732, "eval_icons_MAE_h": 0.22439535707235336, "eval_icons_MAE_w": 0.19666418433189392, "eval_icons_MAE_x_boxes": 0.13402969017624855, "eval_icons_MAE_y_boxes": 0.08551926910877228, "eval_icons_NUM_probability": 0.9997999966144562, "eval_icons_inside_bbox": 0.1614583358168602, "eval_icons_loss": 3.042537212371826, "eval_icons_loss_ce": 3.3613167943258304e-05, "eval_icons_loss_iou": 1.005615234375, "eval_icons_loss_num": 0.22802734375, "eval_icons_loss_xval": 3.15087890625, "eval_icons_runtime": 73.5505, "eval_icons_samples_per_second": 0.68, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32760799363504467, "eval_screenspot_CIoU": -0.04845000927646955, "eval_screenspot_GIoU": -0.01256128524740537, "eval_screenspot_IoU": 0.14887459576129913, "eval_screenspot_MAE_all": 0.22817763686180115, "eval_screenspot_MAE_h": 0.179355318347613, "eval_screenspot_MAE_w": 0.18963390588760376, "eval_screenspot_MAE_x_boxes": 0.3209350109100342, "eval_screenspot_MAE_y_boxes": 0.13483184576034546, "eval_screenspot_NUM_probability": 0.9999322891235352, "eval_screenspot_inside_bbox": 0.2970833381017049, "eval_screenspot_loss": 3.1953461170196533, "eval_screenspot_loss_ce": 0.0071808453649282455, "eval_screenspot_loss_iou": 1.0323893229166667, "eval_screenspot_loss_num": 0.23490397135416666, "eval_screenspot_loss_xval": 3.2389322916666665, "eval_screenspot_runtime": 122.7704, "eval_screenspot_samples_per_second": 0.725, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32760799363504467, "eval_compot_CIoU": -0.07480915263295174, "eval_compot_GIoU": -0.026930993422865868, "eval_compot_IoU": 0.11974719911813736, "eval_compot_MAE_all": 0.2226223200559616, "eval_compot_MAE_h": 0.19979272037744522, "eval_compot_MAE_w": 0.22494399547576904, "eval_compot_MAE_x_boxes": 0.19063614308834076, "eval_compot_MAE_y_boxes": 0.1315009444952011, "eval_compot_NUM_probability": 0.9999416470527649, "eval_compot_inside_bbox": 0.2204861119389534, "eval_compot_loss": 3.1513233184814453, "eval_compot_loss_ce": 0.0035701930755749345, "eval_compot_loss_iou": 1.043212890625, "eval_compot_loss_num": 0.227264404296875, "eval_compot_loss_xval": 3.22216796875, "eval_compot_runtime": 67.5686, "eval_compot_samples_per_second": 0.74, "eval_compot_steps_per_second": 0.03, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32760799363504467, "eval_custom_ui_MAE_all": 0.16447503119707108, "eval_custom_ui_MAE_x": 0.16267096251249313, "eval_custom_ui_MAE_y": 0.16627910733222961, "eval_custom_ui_NUM_probability": 0.9999749958515167, "eval_custom_ui_loss": 0.9318787455558777, "eval_custom_ui_loss_ce": 0.18506206572055817, "eval_custom_ui_loss_num": 0.15753173828125, "eval_custom_ui_loss_xval": 0.787353515625, "eval_custom_ui_runtime": 53.7817, "eval_custom_ui_samples_per_second": 0.93, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32760799363504467, "loss": 1.0177785158157349, "loss_ce": 0.20918476581573486, "loss_iou": 0.0, "loss_num": 0.162109375, "loss_xval": 0.80859375, "num_input_tokens_seen": 231819164, "step": 3500 }, { "epoch": 0.32770159591894044, "grad_norm": 20.540634155273438, "learning_rate": 5e-05, "loss": 1.1884, "num_input_tokens_seen": 231885948, "step": 3501 }, { "epoch": 0.32770159591894044, "loss": 1.1003904342651367, "loss_ce": 0.007617032155394554, "loss_iou": 0.470703125, "loss_num": 0.03076171875, "loss_xval": 1.09375, "num_input_tokens_seen": 231885948, "step": 3501 }, { "epoch": 0.32779519820283615, "grad_norm": 15.167670249938965, "learning_rate": 5e-05, "loss": 1.261, "num_input_tokens_seen": 231952416, "step": 3502 }, { "epoch": 0.32779519820283615, "loss": 0.9802899956703186, "loss_ce": 0.004704044200479984, "loss_iou": 0.416015625, "loss_num": 0.02880859375, "loss_xval": 0.9765625, "num_input_tokens_seen": 231952416, "step": 3502 }, { "epoch": 0.32788880048673186, "grad_norm": 19.73589324951172, "learning_rate": 5e-05, "loss": 1.2712, "num_input_tokens_seen": 232017704, "step": 3503 }, { "epoch": 0.32788880048673186, "loss": 1.255335807800293, "loss_ce": 0.008265480399131775, "loss_iou": 0.515625, "loss_num": 0.043701171875, "loss_xval": 1.25, "num_input_tokens_seen": 232017704, "step": 3503 }, { "epoch": 0.3279824027706276, "grad_norm": 23.826526641845703, "learning_rate": 5e-05, "loss": 1.5358, "num_input_tokens_seen": 232083420, "step": 3504 }, { "epoch": 0.3279824027706276, "loss": 1.756319284439087, "loss_ce": 0.011202055960893631, "loss_iou": 0.703125, "loss_num": 0.0673828125, "loss_xval": 1.7421875, "num_input_tokens_seen": 232083420, "step": 3504 }, { "epoch": 0.32807600505452333, "grad_norm": 24.73110580444336, "learning_rate": 5e-05, "loss": 1.3299, "num_input_tokens_seen": 232149428, "step": 3505 }, { "epoch": 0.32807600505452333, "loss": 1.3241204023361206, "loss_ce": 0.006737629882991314, "loss_iou": 0.5546875, "loss_num": 0.042724609375, "loss_xval": 1.3203125, "num_input_tokens_seen": 232149428, "step": 3505 }, { "epoch": 0.32816960733841904, "grad_norm": 39.34742736816406, "learning_rate": 5e-05, "loss": 1.4139, "num_input_tokens_seen": 232215984, "step": 3506 }, { "epoch": 0.32816960733841904, "loss": 1.528343915939331, "loss_ce": 0.006859532557427883, "loss_iou": 0.609375, "loss_num": 0.060546875, "loss_xval": 1.5234375, "num_input_tokens_seen": 232215984, "step": 3506 }, { "epoch": 0.3282632096223148, "grad_norm": 79.795166015625, "learning_rate": 5e-05, "loss": 1.562, "num_input_tokens_seen": 232282256, "step": 3507 }, { "epoch": 0.3282632096223148, "loss": 1.648977518081665, "loss_ce": 0.003469696966931224, "loss_iou": 0.66015625, "loss_num": 0.0654296875, "loss_xval": 1.6484375, "num_input_tokens_seen": 232282256, "step": 3507 }, { "epoch": 0.3283568119062105, "grad_norm": 16.693782806396484, "learning_rate": 5e-05, "loss": 1.0874, "num_input_tokens_seen": 232347496, "step": 3508 }, { "epoch": 0.3283568119062105, "loss": 1.1710684299468994, "loss_ce": 0.008226733654737473, "loss_iou": 0.462890625, "loss_num": 0.047607421875, "loss_xval": 1.1640625, "num_input_tokens_seen": 232347496, "step": 3508 }, { "epoch": 0.32845041419010623, "grad_norm": 17.354244232177734, "learning_rate": 5e-05, "loss": 1.3225, "num_input_tokens_seen": 232413608, "step": 3509 }, { "epoch": 0.32845041419010623, "loss": 1.2690069675445557, "loss_ce": 0.011194521561264992, "loss_iou": 0.484375, "loss_num": 0.057861328125, "loss_xval": 1.2578125, "num_input_tokens_seen": 232413608, "step": 3509 }, { "epoch": 0.32854401647400194, "grad_norm": 29.071693420410156, "learning_rate": 5e-05, "loss": 1.3773, "num_input_tokens_seen": 232479400, "step": 3510 }, { "epoch": 0.32854401647400194, "loss": 1.4294288158416748, "loss_ce": 0.008042161352932453, "loss_iou": 0.59375, "loss_num": 0.046875, "loss_xval": 1.421875, "num_input_tokens_seen": 232479400, "step": 3510 }, { "epoch": 0.3286376187578977, "grad_norm": 20.38787841796875, "learning_rate": 5e-05, "loss": 1.7461, "num_input_tokens_seen": 232545720, "step": 3511 }, { "epoch": 0.3286376187578977, "loss": 1.6558558940887451, "loss_ce": 0.006441822741180658, "loss_iou": 0.7109375, "loss_num": 0.044677734375, "loss_xval": 1.6484375, "num_input_tokens_seen": 232545720, "step": 3511 }, { "epoch": 0.3287312210417934, "grad_norm": 20.92414665222168, "learning_rate": 5e-05, "loss": 1.2691, "num_input_tokens_seen": 232614272, "step": 3512 }, { "epoch": 0.3287312210417934, "loss": 1.2079880237579346, "loss_ce": 0.010234096087515354, "loss_iou": 0.498046875, "loss_num": 0.040283203125, "loss_xval": 1.1953125, "num_input_tokens_seen": 232614272, "step": 3512 }, { "epoch": 0.32882482332568913, "grad_norm": 20.686065673828125, "learning_rate": 5e-05, "loss": 1.4184, "num_input_tokens_seen": 232680532, "step": 3513 }, { "epoch": 0.32882482332568913, "loss": 1.3474245071411133, "loss_ce": 0.0036745343822985888, "loss_iou": 0.59375, "loss_num": 0.03173828125, "loss_xval": 1.34375, "num_input_tokens_seen": 232680532, "step": 3513 }, { "epoch": 0.3289184256095849, "grad_norm": 15.067450523376465, "learning_rate": 5e-05, "loss": 1.2326, "num_input_tokens_seen": 232747440, "step": 3514 }, { "epoch": 0.3289184256095849, "loss": 1.2044594287872314, "loss_ce": 0.007682022638618946, "loss_iou": 0.46875, "loss_num": 0.052001953125, "loss_xval": 1.1953125, "num_input_tokens_seen": 232747440, "step": 3514 }, { "epoch": 0.3290120278934806, "grad_norm": 23.287527084350586, "learning_rate": 5e-05, "loss": 1.3241, "num_input_tokens_seen": 232813848, "step": 3515 }, { "epoch": 0.3290120278934806, "loss": 1.3500601053237915, "loss_ce": 0.0033803777769207954, "loss_iou": 0.53515625, "loss_num": 0.0556640625, "loss_xval": 1.34375, "num_input_tokens_seen": 232813848, "step": 3515 }, { "epoch": 0.3291056301773763, "grad_norm": 36.191043853759766, "learning_rate": 5e-05, "loss": 1.2991, "num_input_tokens_seen": 232879480, "step": 3516 }, { "epoch": 0.3291056301773763, "loss": 1.3669815063476562, "loss_ce": 0.004676827695220709, "loss_iou": 0.578125, "loss_num": 0.041259765625, "loss_xval": 1.359375, "num_input_tokens_seen": 232879480, "step": 3516 }, { "epoch": 0.32919923246127203, "grad_norm": 137.72544860839844, "learning_rate": 5e-05, "loss": 1.6931, "num_input_tokens_seen": 232945264, "step": 3517 }, { "epoch": 0.32919923246127203, "loss": 1.6234352588653564, "loss_ce": 0.0024635731242597103, "loss_iou": 0.609375, "loss_num": 0.080078125, "loss_xval": 1.6171875, "num_input_tokens_seen": 232945264, "step": 3517 }, { "epoch": 0.3292928347451678, "grad_norm": 58.17002487182617, "learning_rate": 5e-05, "loss": 1.3909, "num_input_tokens_seen": 233011808, "step": 3518 }, { "epoch": 0.3292928347451678, "loss": 1.3913087844848633, "loss_ce": 0.00556666124612093, "loss_iou": 0.59375, "loss_num": 0.0400390625, "loss_xval": 1.3828125, "num_input_tokens_seen": 233011808, "step": 3518 }, { "epoch": 0.3293864370290635, "grad_norm": 46.489566802978516, "learning_rate": 5e-05, "loss": 1.4017, "num_input_tokens_seen": 233078812, "step": 3519 }, { "epoch": 0.3293864370290635, "loss": 1.4041200876235962, "loss_ce": 0.004706018604338169, "loss_iou": 0.578125, "loss_num": 0.04833984375, "loss_xval": 1.3984375, "num_input_tokens_seen": 233078812, "step": 3519 }, { "epoch": 0.3294800393129592, "grad_norm": 23.331544876098633, "learning_rate": 5e-05, "loss": 1.5535, "num_input_tokens_seen": 233145912, "step": 3520 }, { "epoch": 0.3294800393129592, "loss": 1.7464112043380737, "loss_ce": 0.007153346668928862, "loss_iou": 0.734375, "loss_num": 0.054443359375, "loss_xval": 1.7421875, "num_input_tokens_seen": 233145912, "step": 3520 }, { "epoch": 0.329573641596855, "grad_norm": 25.761667251586914, "learning_rate": 5e-05, "loss": 1.1247, "num_input_tokens_seen": 233212740, "step": 3521 }, { "epoch": 0.329573641596855, "loss": 1.111678123474121, "loss_ce": 0.003279757220298052, "loss_iou": 0.44140625, "loss_num": 0.044921875, "loss_xval": 1.109375, "num_input_tokens_seen": 233212740, "step": 3521 }, { "epoch": 0.3296672438807507, "grad_norm": 32.18890380859375, "learning_rate": 5e-05, "loss": 1.3776, "num_input_tokens_seen": 233279872, "step": 3522 }, { "epoch": 0.3296672438807507, "loss": 1.3662781715393066, "loss_ce": 0.006414836272597313, "loss_iou": 0.5703125, "loss_num": 0.044677734375, "loss_xval": 1.359375, "num_input_tokens_seen": 233279872, "step": 3522 }, { "epoch": 0.3297608461646464, "grad_norm": 22.280868530273438, "learning_rate": 5e-05, "loss": 1.5736, "num_input_tokens_seen": 233346380, "step": 3523 }, { "epoch": 0.3297608461646464, "loss": 1.5078675746917725, "loss_ce": 0.002984786406159401, "loss_iou": 0.6484375, "loss_num": 0.04150390625, "loss_xval": 1.5078125, "num_input_tokens_seen": 233346380, "step": 3523 }, { "epoch": 0.32985444844854217, "grad_norm": 15.169958114624023, "learning_rate": 5e-05, "loss": 1.1784, "num_input_tokens_seen": 233411988, "step": 3524 }, { "epoch": 0.32985444844854217, "loss": 1.1074304580688477, "loss_ce": 0.004647225607186556, "loss_iou": 0.451171875, "loss_num": 0.040283203125, "loss_xval": 1.1015625, "num_input_tokens_seen": 233411988, "step": 3524 }, { "epoch": 0.3299480507324379, "grad_norm": 39.23529052734375, "learning_rate": 5e-05, "loss": 1.3854, "num_input_tokens_seen": 233478972, "step": 3525 }, { "epoch": 0.3299480507324379, "loss": 1.3963702917099, "loss_ce": 0.008186709135770798, "loss_iou": 0.51953125, "loss_num": 0.06982421875, "loss_xval": 1.390625, "num_input_tokens_seen": 233478972, "step": 3525 }, { "epoch": 0.3300416530163336, "grad_norm": 26.459463119506836, "learning_rate": 5e-05, "loss": 1.2726, "num_input_tokens_seen": 233545680, "step": 3526 }, { "epoch": 0.3300416530163336, "loss": 1.198699951171875, "loss_ce": 0.0053405482321977615, "loss_iou": 0.5234375, "loss_num": 0.0302734375, "loss_xval": 1.1953125, "num_input_tokens_seen": 233545680, "step": 3526 }, { "epoch": 0.3301352553002293, "grad_norm": 19.502931594848633, "learning_rate": 5e-05, "loss": 1.494, "num_input_tokens_seen": 233611424, "step": 3527 }, { "epoch": 0.3301352553002293, "loss": 1.7368769645690918, "loss_ce": 0.007384879048913717, "loss_iou": 0.6875, "loss_num": 0.07080078125, "loss_xval": 1.7265625, "num_input_tokens_seen": 233611424, "step": 3527 }, { "epoch": 0.33022885758412507, "grad_norm": 20.541210174560547, "learning_rate": 5e-05, "loss": 1.2868, "num_input_tokens_seen": 233677900, "step": 3528 }, { "epoch": 0.33022885758412507, "loss": 1.276645302772522, "loss_ce": 0.0036960765719413757, "loss_iou": 0.5625, "loss_num": 0.02880859375, "loss_xval": 1.2734375, "num_input_tokens_seen": 233677900, "step": 3528 }, { "epoch": 0.3303224598680208, "grad_norm": 38.70900344848633, "learning_rate": 5e-05, "loss": 1.6072, "num_input_tokens_seen": 233744480, "step": 3529 }, { "epoch": 0.3303224598680208, "loss": 1.5182149410247803, "loss_ce": 0.009914163500070572, "loss_iou": 0.62109375, "loss_num": 0.053466796875, "loss_xval": 1.5078125, "num_input_tokens_seen": 233744480, "step": 3529 }, { "epoch": 0.3304160621519165, "grad_norm": 29.879817962646484, "learning_rate": 5e-05, "loss": 1.4821, "num_input_tokens_seen": 233810300, "step": 3530 }, { "epoch": 0.3304160621519165, "loss": 1.501889944076538, "loss_ce": 0.003354812040925026, "loss_iou": 0.640625, "loss_num": 0.0439453125, "loss_xval": 1.5, "num_input_tokens_seen": 233810300, "step": 3530 }, { "epoch": 0.33050966443581226, "grad_norm": 17.80263328552246, "learning_rate": 5e-05, "loss": 1.3036, "num_input_tokens_seen": 233876360, "step": 3531 }, { "epoch": 0.33050966443581226, "loss": 1.0629225969314575, "loss_ce": 0.004817101173102856, "loss_iou": 0.408203125, "loss_num": 0.04833984375, "loss_xval": 1.0546875, "num_input_tokens_seen": 233876360, "step": 3531 }, { "epoch": 0.33060326671970797, "grad_norm": 39.266719818115234, "learning_rate": 5e-05, "loss": 1.3206, "num_input_tokens_seen": 233943072, "step": 3532 }, { "epoch": 0.33060326671970797, "loss": 1.2983803749084473, "loss_ce": 0.007364715449512005, "loss_iou": 0.5234375, "loss_num": 0.049072265625, "loss_xval": 1.2890625, "num_input_tokens_seen": 233943072, "step": 3532 }, { "epoch": 0.3306968690036037, "grad_norm": 27.344268798828125, "learning_rate": 5e-05, "loss": 1.5397, "num_input_tokens_seen": 234008400, "step": 3533 }, { "epoch": 0.3306968690036037, "loss": 1.5955493450164795, "loss_ce": 0.003264123573899269, "loss_iou": 0.640625, "loss_num": 0.06201171875, "loss_xval": 1.59375, "num_input_tokens_seen": 234008400, "step": 3533 }, { "epoch": 0.3307904712874994, "grad_norm": 24.001371383666992, "learning_rate": 5e-05, "loss": 1.4408, "num_input_tokens_seen": 234074216, "step": 3534 }, { "epoch": 0.3307904712874994, "loss": 1.3274438381195068, "loss_ce": 0.004445748869329691, "loss_iou": 0.5859375, "loss_num": 0.0302734375, "loss_xval": 1.3203125, "num_input_tokens_seen": 234074216, "step": 3534 }, { "epoch": 0.33088407357139515, "grad_norm": 18.622882843017578, "learning_rate": 5e-05, "loss": 1.3406, "num_input_tokens_seen": 234140852, "step": 3535 }, { "epoch": 0.33088407357139515, "loss": 1.2333338260650635, "loss_ce": 0.0048182448372244835, "loss_iou": 0.515625, "loss_num": 0.0400390625, "loss_xval": 1.2265625, "num_input_tokens_seen": 234140852, "step": 3535 }, { "epoch": 0.33097767585529086, "grad_norm": 25.497224807739258, "learning_rate": 5e-05, "loss": 1.1527, "num_input_tokens_seen": 234206304, "step": 3536 }, { "epoch": 0.33097767585529086, "loss": 1.274425983428955, "loss_ce": 0.0024532973766326904, "loss_iou": 0.56640625, "loss_num": 0.02734375, "loss_xval": 1.2734375, "num_input_tokens_seen": 234206304, "step": 3536 }, { "epoch": 0.3310712781391866, "grad_norm": 24.679630279541016, "learning_rate": 5e-05, "loss": 1.2641, "num_input_tokens_seen": 234272740, "step": 3537 }, { "epoch": 0.3310712781391866, "loss": 1.2440797090530396, "loss_ce": 0.007995770312845707, "loss_iou": 0.48046875, "loss_num": 0.054931640625, "loss_xval": 1.234375, "num_input_tokens_seen": 234272740, "step": 3537 }, { "epoch": 0.33116488042308234, "grad_norm": 26.851369857788086, "learning_rate": 5e-05, "loss": 1.6072, "num_input_tokens_seen": 234338928, "step": 3538 }, { "epoch": 0.33116488042308234, "loss": 1.5216840505599976, "loss_ce": 0.00996539369225502, "loss_iou": 0.51953125, "loss_num": 0.0947265625, "loss_xval": 1.515625, "num_input_tokens_seen": 234338928, "step": 3538 }, { "epoch": 0.33125848270697805, "grad_norm": 26.35730743408203, "learning_rate": 5e-05, "loss": 1.2772, "num_input_tokens_seen": 234405568, "step": 3539 }, { "epoch": 0.33125848270697805, "loss": 1.2963889837265015, "loss_ce": 0.003908491227775812, "loss_iou": 0.54296875, "loss_num": 0.041259765625, "loss_xval": 1.2890625, "num_input_tokens_seen": 234405568, "step": 3539 }, { "epoch": 0.33135208499087376, "grad_norm": 28.880949020385742, "learning_rate": 5e-05, "loss": 1.3157, "num_input_tokens_seen": 234472604, "step": 3540 }, { "epoch": 0.33135208499087376, "loss": 1.4409079551696777, "loss_ce": 0.004384455271065235, "loss_iou": 0.59765625, "loss_num": 0.048828125, "loss_xval": 1.4375, "num_input_tokens_seen": 234472604, "step": 3540 }, { "epoch": 0.33144568727476953, "grad_norm": 38.752498626708984, "learning_rate": 5e-05, "loss": 1.1753, "num_input_tokens_seen": 234537884, "step": 3541 }, { "epoch": 0.33144568727476953, "loss": 1.1052851676940918, "loss_ce": 0.010558545589447021, "loss_iou": 0.40625, "loss_num": 0.05615234375, "loss_xval": 1.09375, "num_input_tokens_seen": 234537884, "step": 3541 }, { "epoch": 0.33153928955866524, "grad_norm": 14.769792556762695, "learning_rate": 5e-05, "loss": 1.1402, "num_input_tokens_seen": 234603852, "step": 3542 }, { "epoch": 0.33153928955866524, "loss": 1.3644130229949951, "loss_ce": 0.0030848467722535133, "loss_iou": 0.57421875, "loss_num": 0.041748046875, "loss_xval": 1.359375, "num_input_tokens_seen": 234603852, "step": 3542 }, { "epoch": 0.33163289184256095, "grad_norm": 20.41305923461914, "learning_rate": 5e-05, "loss": 1.2194, "num_input_tokens_seen": 234669296, "step": 3543 }, { "epoch": 0.33163289184256095, "loss": 1.207371473312378, "loss_ce": 0.006199490278959274, "loss_iou": 0.494140625, "loss_num": 0.04296875, "loss_xval": 1.203125, "num_input_tokens_seen": 234669296, "step": 3543 }, { "epoch": 0.33172649412645666, "grad_norm": 17.607336044311523, "learning_rate": 5e-05, "loss": 1.2414, "num_input_tokens_seen": 234736068, "step": 3544 }, { "epoch": 0.33172649412645666, "loss": 1.2859746217727661, "loss_ce": 0.004724621307104826, "loss_iou": 0.55078125, "loss_num": 0.03564453125, "loss_xval": 1.28125, "num_input_tokens_seen": 234736068, "step": 3544 }, { "epoch": 0.3318200964103524, "grad_norm": 40.402427673339844, "learning_rate": 5e-05, "loss": 1.115, "num_input_tokens_seen": 234801856, "step": 3545 }, { "epoch": 0.3318200964103524, "loss": 1.352484941482544, "loss_ce": 0.005317067727446556, "loss_iou": 0.546875, "loss_num": 0.050537109375, "loss_xval": 1.34375, "num_input_tokens_seen": 234801856, "step": 3545 }, { "epoch": 0.33191369869424814, "grad_norm": 38.34578323364258, "learning_rate": 5e-05, "loss": 1.4438, "num_input_tokens_seen": 234868392, "step": 3546 }, { "epoch": 0.33191369869424814, "loss": 1.2742551565170288, "loss_ce": 0.009606714360415936, "loss_iou": 0.5, "loss_num": 0.052978515625, "loss_xval": 1.265625, "num_input_tokens_seen": 234868392, "step": 3546 }, { "epoch": 0.33200730097814385, "grad_norm": 26.828020095825195, "learning_rate": 5e-05, "loss": 1.7167, "num_input_tokens_seen": 234934924, "step": 3547 }, { "epoch": 0.33200730097814385, "loss": 1.8022754192352295, "loss_ce": 0.004423964768648148, "loss_iou": 0.7578125, "loss_num": 0.056396484375, "loss_xval": 1.796875, "num_input_tokens_seen": 234934924, "step": 3547 }, { "epoch": 0.3321009032620396, "grad_norm": 73.9911880493164, "learning_rate": 5e-05, "loss": 1.4153, "num_input_tokens_seen": 235001172, "step": 3548 }, { "epoch": 0.3321009032620396, "loss": 1.5545170307159424, "loss_ce": 0.002759157679975033, "loss_iou": 0.578125, "loss_num": 0.07958984375, "loss_xval": 1.5546875, "num_input_tokens_seen": 235001172, "step": 3548 }, { "epoch": 0.3321945055459353, "grad_norm": 12.100743293762207, "learning_rate": 5e-05, "loss": 1.3101, "num_input_tokens_seen": 235067556, "step": 3549 }, { "epoch": 0.3321945055459353, "loss": 1.3225867748260498, "loss_ce": 0.009110218845307827, "loss_iou": 0.515625, "loss_num": 0.056396484375, "loss_xval": 1.3125, "num_input_tokens_seen": 235067556, "step": 3549 }, { "epoch": 0.33228810782983104, "grad_norm": 38.98204803466797, "learning_rate": 5e-05, "loss": 1.1906, "num_input_tokens_seen": 235133284, "step": 3550 }, { "epoch": 0.33228810782983104, "loss": 1.3154993057250977, "loss_ce": 0.014718085527420044, "loss_iou": 0.4921875, "loss_num": 0.06298828125, "loss_xval": 1.296875, "num_input_tokens_seen": 235133284, "step": 3550 }, { "epoch": 0.3323817101137268, "grad_norm": 19.8131160736084, "learning_rate": 5e-05, "loss": 1.5927, "num_input_tokens_seen": 235199728, "step": 3551 }, { "epoch": 0.3323817101137268, "loss": 1.679270625114441, "loss_ce": 0.005442567635327578, "loss_iou": 0.62890625, "loss_num": 0.08349609375, "loss_xval": 1.671875, "num_input_tokens_seen": 235199728, "step": 3551 }, { "epoch": 0.3324753123976225, "grad_norm": 34.68616485595703, "learning_rate": 5e-05, "loss": 1.3493, "num_input_tokens_seen": 235266408, "step": 3552 }, { "epoch": 0.3324753123976225, "loss": 1.4741802215576172, "loss_ce": 0.007383421529084444, "loss_iou": 0.578125, "loss_num": 0.06201171875, "loss_xval": 1.46875, "num_input_tokens_seen": 235266408, "step": 3552 }, { "epoch": 0.3325689146815182, "grad_norm": 25.491535186767578, "learning_rate": 5e-05, "loss": 1.3788, "num_input_tokens_seen": 235330740, "step": 3553 }, { "epoch": 0.3325689146815182, "loss": 1.0573961734771729, "loss_ce": 0.004173534922301769, "loss_iou": 0.453125, "loss_num": 0.0294189453125, "loss_xval": 1.0546875, "num_input_tokens_seen": 235330740, "step": 3553 }, { "epoch": 0.33266251696541393, "grad_norm": 18.788177490234375, "learning_rate": 5e-05, "loss": 1.1589, "num_input_tokens_seen": 235396160, "step": 3554 }, { "epoch": 0.33266251696541393, "loss": 1.2340214252471924, "loss_ce": 0.006970584392547607, "loss_iou": 0.48828125, "loss_num": 0.0498046875, "loss_xval": 1.2265625, "num_input_tokens_seen": 235396160, "step": 3554 }, { "epoch": 0.3327561192493097, "grad_norm": 22.256479263305664, "learning_rate": 5e-05, "loss": 1.3504, "num_input_tokens_seen": 235461560, "step": 3555 }, { "epoch": 0.3327561192493097, "loss": 1.5194861888885498, "loss_ce": 0.004349506925791502, "loss_iou": 0.5703125, "loss_num": 0.07421875, "loss_xval": 1.515625, "num_input_tokens_seen": 235461560, "step": 3555 }, { "epoch": 0.3328497215332054, "grad_norm": 29.759389877319336, "learning_rate": 5e-05, "loss": 1.2314, "num_input_tokens_seen": 235527944, "step": 3556 }, { "epoch": 0.3328497215332054, "loss": 1.3256677389144897, "loss_ce": 0.007796605117619038, "loss_iou": 0.53515625, "loss_num": 0.050048828125, "loss_xval": 1.3203125, "num_input_tokens_seen": 235527944, "step": 3556 }, { "epoch": 0.3329433238171011, "grad_norm": 25.504209518432617, "learning_rate": 5e-05, "loss": 1.3557, "num_input_tokens_seen": 235593500, "step": 3557 }, { "epoch": 0.3329433238171011, "loss": 1.3016202449798584, "loss_ce": 0.005721805617213249, "loss_iou": 0.57421875, "loss_num": 0.029296875, "loss_xval": 1.296875, "num_input_tokens_seen": 235593500, "step": 3557 }, { "epoch": 0.3330369261009969, "grad_norm": 14.457237243652344, "learning_rate": 5e-05, "loss": 1.4084, "num_input_tokens_seen": 235659492, "step": 3558 }, { "epoch": 0.3330369261009969, "loss": 1.2264822721481323, "loss_ce": 0.007244064472615719, "loss_iou": 0.51953125, "loss_num": 0.035888671875, "loss_xval": 1.21875, "num_input_tokens_seen": 235659492, "step": 3558 }, { "epoch": 0.3331305283848926, "grad_norm": 29.743091583251953, "learning_rate": 5e-05, "loss": 1.0858, "num_input_tokens_seen": 235725756, "step": 3559 }, { "epoch": 0.3331305283848926, "loss": 0.768072247505188, "loss_ce": 0.007543675601482391, "loss_iou": 0.32421875, "loss_num": 0.0223388671875, "loss_xval": 0.76171875, "num_input_tokens_seen": 235725756, "step": 3559 }, { "epoch": 0.3332241306687883, "grad_norm": 19.524080276489258, "learning_rate": 5e-05, "loss": 1.1747, "num_input_tokens_seen": 235791336, "step": 3560 }, { "epoch": 0.3332241306687883, "loss": 1.2219960689544678, "loss_ce": 0.004222680814564228, "loss_iou": 0.51171875, "loss_num": 0.03955078125, "loss_xval": 1.21875, "num_input_tokens_seen": 235791336, "step": 3560 }, { "epoch": 0.333317732952684, "grad_norm": 18.797630310058594, "learning_rate": 5e-05, "loss": 1.3116, "num_input_tokens_seen": 235858364, "step": 3561 }, { "epoch": 0.333317732952684, "loss": 1.25065016746521, "loss_ce": 0.005533017683774233, "loss_iou": 0.53515625, "loss_num": 0.034912109375, "loss_xval": 1.2421875, "num_input_tokens_seen": 235858364, "step": 3561 }, { "epoch": 0.3334113352365798, "grad_norm": 15.471199035644531, "learning_rate": 5e-05, "loss": 1.1297, "num_input_tokens_seen": 235925348, "step": 3562 }, { "epoch": 0.3334113352365798, "loss": 0.9816075563430786, "loss_ce": 0.004068493843078613, "loss_iou": 0.435546875, "loss_num": 0.0211181640625, "loss_xval": 0.9765625, "num_input_tokens_seen": 235925348, "step": 3562 }, { "epoch": 0.3335049375204755, "grad_norm": 59.01436233520508, "learning_rate": 5e-05, "loss": 1.1061, "num_input_tokens_seen": 235989724, "step": 3563 }, { "epoch": 0.3335049375204755, "loss": 1.0877387523651123, "loss_ce": 0.01107865758240223, "loss_iou": 0.4296875, "loss_num": 0.04345703125, "loss_xval": 1.078125, "num_input_tokens_seen": 235989724, "step": 3563 }, { "epoch": 0.3335985398043712, "grad_norm": 19.76849365234375, "learning_rate": 5e-05, "loss": 1.3762, "num_input_tokens_seen": 236055836, "step": 3564 }, { "epoch": 0.3335985398043712, "loss": 1.3018929958343506, "loss_ce": 0.00428555253893137, "loss_iou": 0.4921875, "loss_num": 0.06298828125, "loss_xval": 1.296875, "num_input_tokens_seen": 236055836, "step": 3564 }, { "epoch": 0.333692142088267, "grad_norm": 42.39924240112305, "learning_rate": 5e-05, "loss": 1.3063, "num_input_tokens_seen": 236121944, "step": 3565 }, { "epoch": 0.333692142088267, "loss": 1.2431130409240723, "loss_ce": 0.009226251393556595, "loss_iou": 0.55859375, "loss_num": 0.023193359375, "loss_xval": 1.234375, "num_input_tokens_seen": 236121944, "step": 3565 }, { "epoch": 0.3337857443721627, "grad_norm": 53.350154876708984, "learning_rate": 5e-05, "loss": 1.5743, "num_input_tokens_seen": 236187768, "step": 3566 }, { "epoch": 0.3337857443721627, "loss": 1.5111441612243652, "loss_ce": 0.0028433436527848244, "loss_iou": 0.671875, "loss_num": 0.033203125, "loss_xval": 1.5078125, "num_input_tokens_seen": 236187768, "step": 3566 }, { "epoch": 0.3338793466560584, "grad_norm": 25.451549530029297, "learning_rate": 5e-05, "loss": 1.3672, "num_input_tokens_seen": 236254516, "step": 3567 }, { "epoch": 0.3338793466560584, "loss": 1.3634276390075684, "loss_ce": 0.0050291623920202255, "loss_iou": 0.5703125, "loss_num": 0.044677734375, "loss_xval": 1.359375, "num_input_tokens_seen": 236254516, "step": 3567 }, { "epoch": 0.33397294893995416, "grad_norm": 36.89323043823242, "learning_rate": 5e-05, "loss": 1.2016, "num_input_tokens_seen": 236320584, "step": 3568 }, { "epoch": 0.33397294893995416, "loss": 1.2182739973068237, "loss_ce": 0.009777914732694626, "loss_iou": 0.470703125, "loss_num": 0.0537109375, "loss_xval": 1.2109375, "num_input_tokens_seen": 236320584, "step": 3568 }, { "epoch": 0.3340665512238499, "grad_norm": 24.950538635253906, "learning_rate": 5e-05, "loss": 1.1386, "num_input_tokens_seen": 236387368, "step": 3569 }, { "epoch": 0.3340665512238499, "loss": 1.2318916320800781, "loss_ce": 0.00777044054120779, "loss_iou": 0.51171875, "loss_num": 0.041015625, "loss_xval": 1.2265625, "num_input_tokens_seen": 236387368, "step": 3569 }, { "epoch": 0.3341601535077456, "grad_norm": 28.292020797729492, "learning_rate": 5e-05, "loss": 1.4136, "num_input_tokens_seen": 236454328, "step": 3570 }, { "epoch": 0.3341601535077456, "loss": 1.3414260149002075, "loss_ce": 0.0069532981142401695, "loss_iou": 0.5234375, "loss_num": 0.0576171875, "loss_xval": 1.3359375, "num_input_tokens_seen": 236454328, "step": 3570 }, { "epoch": 0.3342537557916413, "grad_norm": 35.358177185058594, "learning_rate": 5e-05, "loss": 1.4104, "num_input_tokens_seen": 236519344, "step": 3571 }, { "epoch": 0.3342537557916413, "loss": 1.4984736442565918, "loss_ce": 0.004821363370865583, "loss_iou": 0.6328125, "loss_num": 0.044921875, "loss_xval": 1.4921875, "num_input_tokens_seen": 236519344, "step": 3571 }, { "epoch": 0.33434735807553706, "grad_norm": 34.11064910888672, "learning_rate": 5e-05, "loss": 1.4699, "num_input_tokens_seen": 236585556, "step": 3572 }, { "epoch": 0.33434735807553706, "loss": 1.519061803817749, "loss_ce": 0.00294864852912724, "loss_iou": 0.59375, "loss_num": 0.0654296875, "loss_xval": 1.515625, "num_input_tokens_seen": 236585556, "step": 3572 }, { "epoch": 0.33444096035943277, "grad_norm": 25.50047492980957, "learning_rate": 5e-05, "loss": 1.2505, "num_input_tokens_seen": 236651276, "step": 3573 }, { "epoch": 0.33444096035943277, "loss": 1.2307844161987305, "loss_ce": 0.006175089627504349, "loss_iou": 0.5234375, "loss_num": 0.034912109375, "loss_xval": 1.2265625, "num_input_tokens_seen": 236651276, "step": 3573 }, { "epoch": 0.3345345626433285, "grad_norm": 21.302656173706055, "learning_rate": 5e-05, "loss": 1.3939, "num_input_tokens_seen": 236717856, "step": 3574 }, { "epoch": 0.3345345626433285, "loss": 1.4176924228668213, "loss_ce": 0.005582941696047783, "loss_iou": 0.546875, "loss_num": 0.06396484375, "loss_xval": 1.4140625, "num_input_tokens_seen": 236717856, "step": 3574 }, { "epoch": 0.33462816492722425, "grad_norm": 69.69845581054688, "learning_rate": 5e-05, "loss": 1.1162, "num_input_tokens_seen": 236784504, "step": 3575 }, { "epoch": 0.33462816492722425, "loss": 1.1865599155426025, "loss_ce": 0.006384147331118584, "loss_iou": 0.50390625, "loss_num": 0.03515625, "loss_xval": 1.1796875, "num_input_tokens_seen": 236784504, "step": 3575 }, { "epoch": 0.33472176721111996, "grad_norm": 37.370361328125, "learning_rate": 5e-05, "loss": 1.2367, "num_input_tokens_seen": 236849824, "step": 3576 }, { "epoch": 0.33472176721111996, "loss": 1.2631953954696655, "loss_ce": 0.001476649777032435, "loss_iou": 0.52734375, "loss_num": 0.042236328125, "loss_xval": 1.265625, "num_input_tokens_seen": 236849824, "step": 3576 }, { "epoch": 0.33481536949501567, "grad_norm": 34.37902069091797, "learning_rate": 5e-05, "loss": 1.416, "num_input_tokens_seen": 236914832, "step": 3577 }, { "epoch": 0.33481536949501567, "loss": 1.3424489498138428, "loss_ce": 0.006023182068020105, "loss_iou": 0.54296875, "loss_num": 0.050048828125, "loss_xval": 1.3359375, "num_input_tokens_seen": 236914832, "step": 3577 }, { "epoch": 0.3349089717789114, "grad_norm": 25.483211517333984, "learning_rate": 5e-05, "loss": 1.3453, "num_input_tokens_seen": 236980828, "step": 3578 }, { "epoch": 0.3349089717789114, "loss": 1.3032865524291992, "loss_ce": 0.010806070640683174, "loss_iou": 0.52734375, "loss_num": 0.0478515625, "loss_xval": 1.2890625, "num_input_tokens_seen": 236980828, "step": 3578 }, { "epoch": 0.33500257406280715, "grad_norm": 16.853036880493164, "learning_rate": 5e-05, "loss": 1.4174, "num_input_tokens_seen": 237046216, "step": 3579 }, { "epoch": 0.33500257406280715, "loss": 1.5309834480285645, "loss_ce": 0.007545987144112587, "loss_iou": 0.625, "loss_num": 0.055419921875, "loss_xval": 1.5234375, "num_input_tokens_seen": 237046216, "step": 3579 }, { "epoch": 0.33509617634670286, "grad_norm": 20.91730499267578, "learning_rate": 5e-05, "loss": 1.2171, "num_input_tokens_seen": 237112608, "step": 3580 }, { "epoch": 0.33509617634670286, "loss": 1.0730029344558716, "loss_ce": 0.005498023703694344, "loss_iou": 0.453125, "loss_num": 0.031982421875, "loss_xval": 1.0703125, "num_input_tokens_seen": 237112608, "step": 3580 }, { "epoch": 0.33518977863059857, "grad_norm": 18.459970474243164, "learning_rate": 5e-05, "loss": 1.4292, "num_input_tokens_seen": 237178012, "step": 3581 }, { "epoch": 0.33518977863059857, "loss": 1.3331146240234375, "loss_ce": 0.0040131378918886185, "loss_iou": 0.55859375, "loss_num": 0.04296875, "loss_xval": 1.328125, "num_input_tokens_seen": 237178012, "step": 3581 }, { "epoch": 0.33528338091449433, "grad_norm": 27.11981773376465, "learning_rate": 5e-05, "loss": 1.4251, "num_input_tokens_seen": 237244296, "step": 3582 }, { "epoch": 0.33528338091449433, "loss": 1.3270272016525269, "loss_ce": 0.013062350451946259, "loss_iou": 0.5078125, "loss_num": 0.060791015625, "loss_xval": 1.3125, "num_input_tokens_seen": 237244296, "step": 3582 }, { "epoch": 0.33537698319839004, "grad_norm": 31.337844848632812, "learning_rate": 5e-05, "loss": 1.3725, "num_input_tokens_seen": 237308300, "step": 3583 }, { "epoch": 0.33537698319839004, "loss": 1.4365966320037842, "loss_ce": 0.009350612759590149, "loss_iou": 0.60546875, "loss_num": 0.043701171875, "loss_xval": 1.4296875, "num_input_tokens_seen": 237308300, "step": 3583 }, { "epoch": 0.33547058548228575, "grad_norm": 25.37666893005371, "learning_rate": 5e-05, "loss": 1.3299, "num_input_tokens_seen": 237373976, "step": 3584 }, { "epoch": 0.33547058548228575, "loss": 1.2955013513565063, "loss_ce": 0.00790372584015131, "loss_iou": 0.50390625, "loss_num": 0.056640625, "loss_xval": 1.2890625, "num_input_tokens_seen": 237373976, "step": 3584 }, { "epoch": 0.3355641877661815, "grad_norm": 19.894668579101562, "learning_rate": 5e-05, "loss": 1.1274, "num_input_tokens_seen": 237440044, "step": 3585 }, { "epoch": 0.3355641877661815, "loss": 1.1329090595245361, "loss_ce": 0.005574485287070274, "loss_iou": 0.470703125, "loss_num": 0.036865234375, "loss_xval": 1.125, "num_input_tokens_seen": 237440044, "step": 3585 }, { "epoch": 0.33565779005007723, "grad_norm": 31.73177146911621, "learning_rate": 5e-05, "loss": 1.5391, "num_input_tokens_seen": 237506328, "step": 3586 }, { "epoch": 0.33565779005007723, "loss": 1.4331083297729492, "loss_ce": 0.0053739119321107864, "loss_iou": 0.56640625, "loss_num": 0.058349609375, "loss_xval": 1.4296875, "num_input_tokens_seen": 237506328, "step": 3586 }, { "epoch": 0.33575139233397294, "grad_norm": 125.68413543701172, "learning_rate": 5e-05, "loss": 1.3737, "num_input_tokens_seen": 237573624, "step": 3587 }, { "epoch": 0.33575139233397294, "loss": 1.424976110458374, "loss_ce": 0.006519145332276821, "loss_iou": 0.60546875, "loss_num": 0.041259765625, "loss_xval": 1.421875, "num_input_tokens_seen": 237573624, "step": 3587 }, { "epoch": 0.33584499461786865, "grad_norm": 140.8628387451172, "learning_rate": 5e-05, "loss": 1.3593, "num_input_tokens_seen": 237640176, "step": 3588 }, { "epoch": 0.33584499461786865, "loss": 1.4855228662490845, "loss_ce": 0.007983766496181488, "loss_iou": 0.6328125, "loss_num": 0.041748046875, "loss_xval": 1.4765625, "num_input_tokens_seen": 237640176, "step": 3588 }, { "epoch": 0.3359385969017644, "grad_norm": 28.658716201782227, "learning_rate": 5e-05, "loss": 1.5553, "num_input_tokens_seen": 237706232, "step": 3589 }, { "epoch": 0.3359385969017644, "loss": 1.4980051517486572, "loss_ce": 0.00484110414981842, "loss_iou": 0.58203125, "loss_num": 0.06591796875, "loss_xval": 1.4921875, "num_input_tokens_seen": 237706232, "step": 3589 }, { "epoch": 0.33603219918566013, "grad_norm": 39.05143737792969, "learning_rate": 5e-05, "loss": 1.428, "num_input_tokens_seen": 237772796, "step": 3590 }, { "epoch": 0.33603219918566013, "loss": 1.4196934700012207, "loss_ce": 0.0056308722123503685, "loss_iou": 0.57421875, "loss_num": 0.052734375, "loss_xval": 1.4140625, "num_input_tokens_seen": 237772796, "step": 3590 }, { "epoch": 0.33612580146955584, "grad_norm": 31.278472900390625, "learning_rate": 5e-05, "loss": 1.3124, "num_input_tokens_seen": 237839628, "step": 3591 }, { "epoch": 0.33612580146955584, "loss": 1.3004555702209473, "loss_ce": 0.004068867303431034, "loss_iou": 0.5625, "loss_num": 0.033935546875, "loss_xval": 1.296875, "num_input_tokens_seen": 237839628, "step": 3591 }, { "epoch": 0.3362194037534516, "grad_norm": 22.228548049926758, "learning_rate": 5e-05, "loss": 1.5376, "num_input_tokens_seen": 237906620, "step": 3592 }, { "epoch": 0.3362194037534516, "loss": 1.4838358163833618, "loss_ce": 0.005320260301232338, "loss_iou": 0.6484375, "loss_num": 0.0361328125, "loss_xval": 1.4765625, "num_input_tokens_seen": 237906620, "step": 3592 }, { "epoch": 0.3363130060373473, "grad_norm": 30.330821990966797, "learning_rate": 5e-05, "loss": 1.3088, "num_input_tokens_seen": 237973000, "step": 3593 }, { "epoch": 0.3363130060373473, "loss": 1.115976095199585, "loss_ce": 0.008951054885983467, "loss_iou": 0.45703125, "loss_num": 0.03857421875, "loss_xval": 1.109375, "num_input_tokens_seen": 237973000, "step": 3593 }, { "epoch": 0.33640660832124303, "grad_norm": 21.70915985107422, "learning_rate": 5e-05, "loss": 1.0476, "num_input_tokens_seen": 238038964, "step": 3594 }, { "epoch": 0.33640660832124303, "loss": 1.1406142711639404, "loss_ce": 0.005604444537311792, "loss_iou": 0.4765625, "loss_num": 0.035888671875, "loss_xval": 1.1328125, "num_input_tokens_seen": 238038964, "step": 3594 }, { "epoch": 0.33650021060513874, "grad_norm": 57.008792877197266, "learning_rate": 5e-05, "loss": 1.3146, "num_input_tokens_seen": 238105212, "step": 3595 }, { "epoch": 0.33650021060513874, "loss": 1.3627915382385254, "loss_ce": 0.008299377746880054, "loss_iou": 0.5625, "loss_num": 0.04541015625, "loss_xval": 1.3515625, "num_input_tokens_seen": 238105212, "step": 3595 }, { "epoch": 0.3365938128890345, "grad_norm": 20.6258487701416, "learning_rate": 5e-05, "loss": 1.2524, "num_input_tokens_seen": 238171928, "step": 3596 }, { "epoch": 0.3365938128890345, "loss": 1.2348411083221436, "loss_ce": 0.007302008103579283, "loss_iou": 0.515625, "loss_num": 0.039794921875, "loss_xval": 1.2265625, "num_input_tokens_seen": 238171928, "step": 3596 }, { "epoch": 0.3366874151729302, "grad_norm": 22.634859085083008, "learning_rate": 5e-05, "loss": 1.3681, "num_input_tokens_seen": 238239024, "step": 3597 }, { "epoch": 0.3366874151729302, "loss": 1.4914647340774536, "loss_ce": 0.00611317390576005, "loss_iou": 0.609375, "loss_num": 0.0537109375, "loss_xval": 1.484375, "num_input_tokens_seen": 238239024, "step": 3597 }, { "epoch": 0.3367810174568259, "grad_norm": 23.674104690551758, "learning_rate": 5e-05, "loss": 1.2735, "num_input_tokens_seen": 238305176, "step": 3598 }, { "epoch": 0.3367810174568259, "loss": 1.3926182985305786, "loss_ce": 0.004923042841255665, "loss_iou": 0.56640625, "loss_num": 0.051513671875, "loss_xval": 1.390625, "num_input_tokens_seen": 238305176, "step": 3598 }, { "epoch": 0.3368746197407217, "grad_norm": 22.08316421508789, "learning_rate": 5e-05, "loss": 1.336, "num_input_tokens_seen": 238371956, "step": 3599 }, { "epoch": 0.3368746197407217, "loss": 1.3277714252471924, "loss_ce": 0.004529254510998726, "loss_iou": 0.53125, "loss_num": 0.0517578125, "loss_xval": 1.3203125, "num_input_tokens_seen": 238371956, "step": 3599 }, { "epoch": 0.3369682220246174, "grad_norm": 23.223655700683594, "learning_rate": 5e-05, "loss": 1.3301, "num_input_tokens_seen": 238438224, "step": 3600 }, { "epoch": 0.3369682220246174, "loss": 1.1885607242584229, "loss_ce": 0.01033812016248703, "loss_iou": 0.474609375, "loss_num": 0.04541015625, "loss_xval": 1.1796875, "num_input_tokens_seen": 238438224, "step": 3600 }, { "epoch": 0.3370618243085131, "grad_norm": 23.214136123657227, "learning_rate": 5e-05, "loss": 1.3091, "num_input_tokens_seen": 238504992, "step": 3601 }, { "epoch": 0.3370618243085131, "loss": 1.3695616722106934, "loss_ce": 0.0062803965993225574, "loss_iou": 0.54296875, "loss_num": 0.05615234375, "loss_xval": 1.359375, "num_input_tokens_seen": 238504992, "step": 3601 }, { "epoch": 0.3371554265924089, "grad_norm": 32.63926696777344, "learning_rate": 5e-05, "loss": 1.5392, "num_input_tokens_seen": 238570780, "step": 3602 }, { "epoch": 0.3371554265924089, "loss": 1.5200865268707275, "loss_ce": 0.004949766211211681, "loss_iou": 0.62890625, "loss_num": 0.051513671875, "loss_xval": 1.515625, "num_input_tokens_seen": 238570780, "step": 3602 }, { "epoch": 0.3372490288763046, "grad_norm": 40.30387496948242, "learning_rate": 5e-05, "loss": 1.6432, "num_input_tokens_seen": 238637600, "step": 3603 }, { "epoch": 0.3372490288763046, "loss": 1.5404798984527588, "loss_ce": 0.010206448845565319, "loss_iou": 0.6484375, "loss_num": 0.04638671875, "loss_xval": 1.53125, "num_input_tokens_seen": 238637600, "step": 3603 }, { "epoch": 0.3373426311602003, "grad_norm": 22.046640396118164, "learning_rate": 5e-05, "loss": 1.6809, "num_input_tokens_seen": 238703548, "step": 3604 }, { "epoch": 0.3373426311602003, "loss": 1.6503952741622925, "loss_ce": 0.003910881467163563, "loss_iou": 0.70703125, "loss_num": 0.046875, "loss_xval": 1.6484375, "num_input_tokens_seen": 238703548, "step": 3604 }, { "epoch": 0.337436233444096, "grad_norm": 10.93876838684082, "learning_rate": 5e-05, "loss": 1.301, "num_input_tokens_seen": 238770456, "step": 3605 }, { "epoch": 0.337436233444096, "loss": 1.4645330905914307, "loss_ce": 0.007501896936446428, "loss_iou": 0.58984375, "loss_num": 0.05517578125, "loss_xval": 1.453125, "num_input_tokens_seen": 238770456, "step": 3605 }, { "epoch": 0.3375298357279918, "grad_norm": 91.18254852294922, "learning_rate": 5e-05, "loss": 1.2765, "num_input_tokens_seen": 238837304, "step": 3606 }, { "epoch": 0.3375298357279918, "loss": 1.205566167831421, "loss_ce": 0.006347442977130413, "loss_iou": 0.478515625, "loss_num": 0.048583984375, "loss_xval": 1.203125, "num_input_tokens_seen": 238837304, "step": 3606 }, { "epoch": 0.3376234380118875, "grad_norm": 32.3939323425293, "learning_rate": 5e-05, "loss": 1.4283, "num_input_tokens_seen": 238904392, "step": 3607 }, { "epoch": 0.3376234380118875, "loss": 1.3657934665679932, "loss_ce": 0.005441934801638126, "loss_iou": 0.578125, "loss_num": 0.04150390625, "loss_xval": 1.359375, "num_input_tokens_seen": 238904392, "step": 3607 }, { "epoch": 0.3377170402957832, "grad_norm": 48.57048416137695, "learning_rate": 5e-05, "loss": 1.4798, "num_input_tokens_seen": 238969948, "step": 3608 }, { "epoch": 0.3377170402957832, "loss": 1.5589087009429932, "loss_ce": 0.005197668448090553, "loss_iou": 0.6328125, "loss_num": 0.05859375, "loss_xval": 1.5546875, "num_input_tokens_seen": 238969948, "step": 3608 }, { "epoch": 0.33781064257967897, "grad_norm": 25.70297622680664, "learning_rate": 5e-05, "loss": 1.3151, "num_input_tokens_seen": 239035680, "step": 3609 }, { "epoch": 0.33781064257967897, "loss": 1.46964430809021, "loss_ce": 0.006265445612370968, "loss_iou": 0.6015625, "loss_num": 0.0517578125, "loss_xval": 1.4609375, "num_input_tokens_seen": 239035680, "step": 3609 }, { "epoch": 0.3379042448635747, "grad_norm": 23.70758628845215, "learning_rate": 5e-05, "loss": 1.4996, "num_input_tokens_seen": 239102592, "step": 3610 }, { "epoch": 0.3379042448635747, "loss": 1.3945214748382568, "loss_ce": 0.0058496082201600075, "loss_iou": 0.55859375, "loss_num": 0.0546875, "loss_xval": 1.390625, "num_input_tokens_seen": 239102592, "step": 3610 }, { "epoch": 0.3379978471474704, "grad_norm": 31.09476089477539, "learning_rate": 5e-05, "loss": 1.3197, "num_input_tokens_seen": 239168452, "step": 3611 }, { "epoch": 0.3379978471474704, "loss": 1.6598644256591797, "loss_ce": 0.00898553803563118, "loss_iou": 0.671875, "loss_num": 0.061767578125, "loss_xval": 1.6484375, "num_input_tokens_seen": 239168452, "step": 3611 }, { "epoch": 0.3380914494313661, "grad_norm": 28.302125930786133, "learning_rate": 5e-05, "loss": 1.4817, "num_input_tokens_seen": 239235148, "step": 3612 }, { "epoch": 0.3380914494313661, "loss": 1.4059725999832153, "loss_ce": 0.004605429247021675, "loss_iou": 0.5859375, "loss_num": 0.0458984375, "loss_xval": 1.3984375, "num_input_tokens_seen": 239235148, "step": 3612 }, { "epoch": 0.33818505171526186, "grad_norm": 35.3673095703125, "learning_rate": 5e-05, "loss": 1.2683, "num_input_tokens_seen": 239300912, "step": 3613 }, { "epoch": 0.33818505171526186, "loss": 1.2753794193267822, "loss_ce": 0.0043832845985889435, "loss_iou": 0.515625, "loss_num": 0.04736328125, "loss_xval": 1.2734375, "num_input_tokens_seen": 239300912, "step": 3613 }, { "epoch": 0.3382786539991576, "grad_norm": 28.36713981628418, "learning_rate": 5e-05, "loss": 1.5699, "num_input_tokens_seen": 239367352, "step": 3614 }, { "epoch": 0.3382786539991576, "loss": 1.4123287200927734, "loss_ce": 0.007055333815515041, "loss_iou": 0.5625, "loss_num": 0.05615234375, "loss_xval": 1.40625, "num_input_tokens_seen": 239367352, "step": 3614 }, { "epoch": 0.3383722562830533, "grad_norm": 14.07665729522705, "learning_rate": 5e-05, "loss": 1.3271, "num_input_tokens_seen": 239433836, "step": 3615 }, { "epoch": 0.3383722562830533, "loss": 1.2867792844772339, "loss_ce": 0.0065058632753789425, "loss_iou": 0.486328125, "loss_num": 0.06103515625, "loss_xval": 1.28125, "num_input_tokens_seen": 239433836, "step": 3615 }, { "epoch": 0.33846585856694905, "grad_norm": 25.80449104309082, "learning_rate": 5e-05, "loss": 1.2404, "num_input_tokens_seen": 239500736, "step": 3616 }, { "epoch": 0.33846585856694905, "loss": 1.094995141029358, "loss_ce": 0.005639633163809776, "loss_iou": 0.494140625, "loss_num": 0.02001953125, "loss_xval": 1.0859375, "num_input_tokens_seen": 239500736, "step": 3616 }, { "epoch": 0.33855946085084476, "grad_norm": 38.415252685546875, "learning_rate": 5e-05, "loss": 1.2547, "num_input_tokens_seen": 239566704, "step": 3617 }, { "epoch": 0.33855946085084476, "loss": 1.4759812355041504, "loss_ce": 0.005766476970165968, "loss_iou": 0.6171875, "loss_num": 0.04638671875, "loss_xval": 1.46875, "num_input_tokens_seen": 239566704, "step": 3617 }, { "epoch": 0.3386530631347405, "grad_norm": 24.202896118164062, "learning_rate": 5e-05, "loss": 1.4835, "num_input_tokens_seen": 239633692, "step": 3618 }, { "epoch": 0.3386530631347405, "loss": 1.4221255779266357, "loss_ce": 0.0022036314476281404, "loss_iou": 0.625, "loss_num": 0.03466796875, "loss_xval": 1.421875, "num_input_tokens_seen": 239633692, "step": 3618 }, { "epoch": 0.33874666541863624, "grad_norm": 23.281497955322266, "learning_rate": 5e-05, "loss": 1.2984, "num_input_tokens_seen": 239699932, "step": 3619 }, { "epoch": 0.33874666541863624, "loss": 1.2486505508422852, "loss_ce": 0.004021669737994671, "loss_iou": 0.486328125, "loss_num": 0.054443359375, "loss_xval": 1.2421875, "num_input_tokens_seen": 239699932, "step": 3619 }, { "epoch": 0.33884026770253195, "grad_norm": 26.224349975585938, "learning_rate": 5e-05, "loss": 1.1741, "num_input_tokens_seen": 239766256, "step": 3620 }, { "epoch": 0.33884026770253195, "loss": 1.2733521461486816, "loss_ce": 0.006262177601456642, "loss_iou": 0.53125, "loss_num": 0.041748046875, "loss_xval": 1.265625, "num_input_tokens_seen": 239766256, "step": 3620 }, { "epoch": 0.33893386998642766, "grad_norm": 36.211002349853516, "learning_rate": 5e-05, "loss": 1.5066, "num_input_tokens_seen": 239833364, "step": 3621 }, { "epoch": 0.33893386998642766, "loss": 1.398667812347412, "loss_ce": 0.004136573988944292, "loss_iou": 0.55859375, "loss_num": 0.0546875, "loss_xval": 1.390625, "num_input_tokens_seen": 239833364, "step": 3621 }, { "epoch": 0.33902747227032337, "grad_norm": 19.57887840270996, "learning_rate": 5e-05, "loss": 1.5203, "num_input_tokens_seen": 239900024, "step": 3622 }, { "epoch": 0.33902747227032337, "loss": 1.3430957794189453, "loss_ce": 0.005205217748880386, "loss_iou": 0.5625, "loss_num": 0.04248046875, "loss_xval": 1.3359375, "num_input_tokens_seen": 239900024, "step": 3622 }, { "epoch": 0.33912107455421914, "grad_norm": 21.123933792114258, "learning_rate": 5e-05, "loss": 1.3294, "num_input_tokens_seen": 239966632, "step": 3623 }, { "epoch": 0.33912107455421914, "loss": 1.5254234075546265, "loss_ce": 0.0024741683155298233, "loss_iou": 0.6171875, "loss_num": 0.05859375, "loss_xval": 1.5234375, "num_input_tokens_seen": 239966632, "step": 3623 }, { "epoch": 0.33921467683811485, "grad_norm": 23.80128288269043, "learning_rate": 5e-05, "loss": 1.3695, "num_input_tokens_seen": 240033332, "step": 3624 }, { "epoch": 0.33921467683811485, "loss": 1.3639514446258545, "loss_ce": 0.006041194777935743, "loss_iou": 0.56640625, "loss_num": 0.044921875, "loss_xval": 1.359375, "num_input_tokens_seen": 240033332, "step": 3624 }, { "epoch": 0.33930827912201056, "grad_norm": 20.2900390625, "learning_rate": 5e-05, "loss": 1.3114, "num_input_tokens_seen": 240099780, "step": 3625 }, { "epoch": 0.33930827912201056, "loss": 1.3428840637207031, "loss_ce": 0.008411416783928871, "loss_iou": 0.51953125, "loss_num": 0.058349609375, "loss_xval": 1.3359375, "num_input_tokens_seen": 240099780, "step": 3625 }, { "epoch": 0.3394018814059063, "grad_norm": 47.4752082824707, "learning_rate": 5e-05, "loss": 1.1964, "num_input_tokens_seen": 240165780, "step": 3626 }, { "epoch": 0.3394018814059063, "loss": 1.3032163381576538, "loss_ce": 0.005853088106960058, "loss_iou": 0.546875, "loss_num": 0.041015625, "loss_xval": 1.296875, "num_input_tokens_seen": 240165780, "step": 3626 }, { "epoch": 0.33949548368980204, "grad_norm": 40.83226013183594, "learning_rate": 5e-05, "loss": 1.4323, "num_input_tokens_seen": 240232156, "step": 3627 }, { "epoch": 0.33949548368980204, "loss": 1.4385390281677246, "loss_ce": 0.004945231601595879, "loss_iou": 0.61328125, "loss_num": 0.041748046875, "loss_xval": 1.4375, "num_input_tokens_seen": 240232156, "step": 3627 }, { "epoch": 0.33958908597369775, "grad_norm": 28.174959182739258, "learning_rate": 5e-05, "loss": 1.2689, "num_input_tokens_seen": 240298552, "step": 3628 }, { "epoch": 0.33958908597369775, "loss": 1.3345706462860107, "loss_ce": 0.008398830890655518, "loss_iou": 0.5859375, "loss_num": 0.03125, "loss_xval": 1.328125, "num_input_tokens_seen": 240298552, "step": 3628 }, { "epoch": 0.3396826882575935, "grad_norm": 27.069580078125, "learning_rate": 5e-05, "loss": 1.4573, "num_input_tokens_seen": 240364116, "step": 3629 }, { "epoch": 0.3396826882575935, "loss": 1.4937152862548828, "loss_ce": 0.002504334319382906, "loss_iou": 0.62109375, "loss_num": 0.050537109375, "loss_xval": 1.4921875, "num_input_tokens_seen": 240364116, "step": 3629 }, { "epoch": 0.3397762905414892, "grad_norm": 28.511459350585938, "learning_rate": 5e-05, "loss": 1.3544, "num_input_tokens_seen": 240431792, "step": 3630 }, { "epoch": 0.3397762905414892, "loss": 1.3059096336364746, "loss_ce": 0.004151838831603527, "loss_iou": 0.5234375, "loss_num": 0.051513671875, "loss_xval": 1.3046875, "num_input_tokens_seen": 240431792, "step": 3630 }, { "epoch": 0.33986989282538493, "grad_norm": 26.77628517150879, "learning_rate": 5e-05, "loss": 1.4932, "num_input_tokens_seen": 240498624, "step": 3631 }, { "epoch": 0.33986989282538493, "loss": 1.4641647338867188, "loss_ce": 0.007621804252266884, "loss_iou": 0.59375, "loss_num": 0.053466796875, "loss_xval": 1.453125, "num_input_tokens_seen": 240498624, "step": 3631 }, { "epoch": 0.33996349510928064, "grad_norm": 24.17523765563965, "learning_rate": 5e-05, "loss": 1.345, "num_input_tokens_seen": 240565364, "step": 3632 }, { "epoch": 0.33996349510928064, "loss": 1.3431411981582642, "loss_ce": 0.009156826883554459, "loss_iou": 0.578125, "loss_num": 0.0361328125, "loss_xval": 1.3359375, "num_input_tokens_seen": 240565364, "step": 3632 }, { "epoch": 0.3400570973931764, "grad_norm": 22.559850692749023, "learning_rate": 5e-05, "loss": 1.3459, "num_input_tokens_seen": 240631580, "step": 3633 }, { "epoch": 0.3400570973931764, "loss": 1.2266149520874023, "loss_ce": 0.007864905521273613, "loss_iou": 0.48046875, "loss_num": 0.05126953125, "loss_xval": 1.21875, "num_input_tokens_seen": 240631580, "step": 3633 }, { "epoch": 0.3401506996770721, "grad_norm": 37.38408660888672, "learning_rate": 5e-05, "loss": 1.2722, "num_input_tokens_seen": 240697760, "step": 3634 }, { "epoch": 0.3401506996770721, "loss": 1.2712650299072266, "loss_ce": 0.005640106741338968, "loss_iou": 0.53125, "loss_num": 0.041259765625, "loss_xval": 1.265625, "num_input_tokens_seen": 240697760, "step": 3634 }, { "epoch": 0.34024430196096783, "grad_norm": 27.79058265686035, "learning_rate": 5e-05, "loss": 1.3727, "num_input_tokens_seen": 240763544, "step": 3635 }, { "epoch": 0.34024430196096783, "loss": 1.2752653360366821, "loss_ce": 0.006405411288142204, "loss_iou": 0.5078125, "loss_num": 0.05029296875, "loss_xval": 1.265625, "num_input_tokens_seen": 240763544, "step": 3635 }, { "epoch": 0.3403379042448636, "grad_norm": 19.13265037536621, "learning_rate": 5e-05, "loss": 1.1719, "num_input_tokens_seen": 240829732, "step": 3636 }, { "epoch": 0.3403379042448636, "loss": 1.0187313556671143, "loss_ce": 0.007500950712710619, "loss_iou": 0.396484375, "loss_num": 0.04345703125, "loss_xval": 1.0078125, "num_input_tokens_seen": 240829732, "step": 3636 }, { "epoch": 0.3404315065287593, "grad_norm": 18.1207218170166, "learning_rate": 5e-05, "loss": 1.3044, "num_input_tokens_seen": 240895856, "step": 3637 }, { "epoch": 0.3404315065287593, "loss": 1.4831037521362305, "loss_ce": 0.008006195537745953, "loss_iou": 0.6015625, "loss_num": 0.0546875, "loss_xval": 1.4765625, "num_input_tokens_seen": 240895856, "step": 3637 }, { "epoch": 0.340525108812655, "grad_norm": 17.262310028076172, "learning_rate": 5e-05, "loss": 1.0922, "num_input_tokens_seen": 240961812, "step": 3638 }, { "epoch": 0.340525108812655, "loss": 0.9637893438339233, "loss_ce": 0.006147765088826418, "loss_iou": 0.408203125, "loss_num": 0.0283203125, "loss_xval": 0.95703125, "num_input_tokens_seen": 240961812, "step": 3638 }, { "epoch": 0.34061871109655073, "grad_norm": 22.514741897583008, "learning_rate": 5e-05, "loss": 1.2908, "num_input_tokens_seen": 241026360, "step": 3639 }, { "epoch": 0.34061871109655073, "loss": 1.1033295392990112, "loss_ce": 0.004422076512128115, "loss_iou": 0.38671875, "loss_num": 0.0654296875, "loss_xval": 1.1015625, "num_input_tokens_seen": 241026360, "step": 3639 }, { "epoch": 0.3407123133804465, "grad_norm": 30.53278350830078, "learning_rate": 5e-05, "loss": 1.2993, "num_input_tokens_seen": 241093012, "step": 3640 }, { "epoch": 0.3407123133804465, "loss": 1.4144235849380493, "loss_ce": 0.0042672958225011826, "loss_iou": 0.55078125, "loss_num": 0.06201171875, "loss_xval": 1.40625, "num_input_tokens_seen": 241093012, "step": 3640 }, { "epoch": 0.3408059156643422, "grad_norm": 92.73212432861328, "learning_rate": 5e-05, "loss": 1.2742, "num_input_tokens_seen": 241159044, "step": 3641 }, { "epoch": 0.3408059156643422, "loss": 1.4088337421417236, "loss_ce": 0.006978189572691917, "loss_iou": 0.5859375, "loss_num": 0.04541015625, "loss_xval": 1.3984375, "num_input_tokens_seen": 241159044, "step": 3641 }, { "epoch": 0.3408995179482379, "grad_norm": 46.380210876464844, "learning_rate": 5e-05, "loss": 1.4636, "num_input_tokens_seen": 241225988, "step": 3642 }, { "epoch": 0.3408995179482379, "loss": 1.8216707706451416, "loss_ce": 0.006241173017770052, "loss_iou": 0.734375, "loss_num": 0.068359375, "loss_xval": 1.8125, "num_input_tokens_seen": 241225988, "step": 3642 }, { "epoch": 0.3409931202321337, "grad_norm": 32.543819427490234, "learning_rate": 5e-05, "loss": 1.1491, "num_input_tokens_seen": 241292644, "step": 3643 }, { "epoch": 0.3409931202321337, "loss": 1.2192710638046265, "loss_ce": 0.006868735421448946, "loss_iou": 0.490234375, "loss_num": 0.0458984375, "loss_xval": 1.2109375, "num_input_tokens_seen": 241292644, "step": 3643 }, { "epoch": 0.3410867225160294, "grad_norm": 89.16029357910156, "learning_rate": 5e-05, "loss": 1.8464, "num_input_tokens_seen": 241359000, "step": 3644 }, { "epoch": 0.3410867225160294, "loss": 1.9885772466659546, "loss_ce": 0.004202256910502911, "loss_iou": 0.76171875, "loss_num": 0.091796875, "loss_xval": 1.984375, "num_input_tokens_seen": 241359000, "step": 3644 }, { "epoch": 0.3411803247999251, "grad_norm": 14.490792274475098, "learning_rate": 5e-05, "loss": 1.3464, "num_input_tokens_seen": 241425052, "step": 3645 }, { "epoch": 0.3411803247999251, "loss": 1.4264261722564697, "loss_ce": 0.009434008970856667, "loss_iou": 0.5546875, "loss_num": 0.062255859375, "loss_xval": 1.4140625, "num_input_tokens_seen": 241425052, "step": 3645 }, { "epoch": 0.34127392708382087, "grad_norm": 20.671415328979492, "learning_rate": 5e-05, "loss": 1.0687, "num_input_tokens_seen": 241490788, "step": 3646 }, { "epoch": 0.34127392708382087, "loss": 0.9522599577903748, "loss_ce": 0.0050554098561406136, "loss_iou": 0.376953125, "loss_num": 0.038330078125, "loss_xval": 0.9453125, "num_input_tokens_seen": 241490788, "step": 3646 }, { "epoch": 0.3413675293677166, "grad_norm": 31.99077606201172, "learning_rate": 5e-05, "loss": 1.3249, "num_input_tokens_seen": 241557120, "step": 3647 }, { "epoch": 0.3413675293677166, "loss": 1.1573506593704224, "loss_ce": 0.007936619222164154, "loss_iou": 0.4609375, "loss_num": 0.04541015625, "loss_xval": 1.1484375, "num_input_tokens_seen": 241557120, "step": 3647 }, { "epoch": 0.3414611316516123, "grad_norm": 41.82197570800781, "learning_rate": 5e-05, "loss": 1.5867, "num_input_tokens_seen": 241624184, "step": 3648 }, { "epoch": 0.3414611316516123, "loss": 1.692509651184082, "loss_ce": 0.005986269097775221, "loss_iou": 0.7421875, "loss_num": 0.041015625, "loss_xval": 1.6875, "num_input_tokens_seen": 241624184, "step": 3648 }, { "epoch": 0.341554733935508, "grad_norm": 35.49443435668945, "learning_rate": 5e-05, "loss": 1.6186, "num_input_tokens_seen": 241690640, "step": 3649 }, { "epoch": 0.341554733935508, "loss": 1.6188666820526123, "loss_ce": 0.01281813345849514, "loss_iou": 0.6953125, "loss_num": 0.0419921875, "loss_xval": 1.609375, "num_input_tokens_seen": 241690640, "step": 3649 }, { "epoch": 0.34164833621940377, "grad_norm": 13.038924217224121, "learning_rate": 5e-05, "loss": 1.4472, "num_input_tokens_seen": 241756420, "step": 3650 }, { "epoch": 0.34164833621940377, "loss": 1.6883447170257568, "loss_ce": 0.005727503448724747, "loss_iou": 0.671875, "loss_num": 0.06689453125, "loss_xval": 1.6796875, "num_input_tokens_seen": 241756420, "step": 3650 }, { "epoch": 0.3417419385032995, "grad_norm": 25.877485275268555, "learning_rate": 5e-05, "loss": 1.2876, "num_input_tokens_seen": 241822736, "step": 3651 }, { "epoch": 0.3417419385032995, "loss": 1.2723809480667114, "loss_ce": 0.0033379076048731804, "loss_iou": 0.52734375, "loss_num": 0.042236328125, "loss_xval": 1.265625, "num_input_tokens_seen": 241822736, "step": 3651 }, { "epoch": 0.3418355407871952, "grad_norm": 23.364559173583984, "learning_rate": 5e-05, "loss": 1.4992, "num_input_tokens_seen": 241889040, "step": 3652 }, { "epoch": 0.3418355407871952, "loss": 1.4105358123779297, "loss_ce": 0.007703776005655527, "loss_iou": 0.59375, "loss_num": 0.04345703125, "loss_xval": 1.40625, "num_input_tokens_seen": 241889040, "step": 3652 }, { "epoch": 0.34192914307109096, "grad_norm": 36.80274963378906, "learning_rate": 5e-05, "loss": 1.2887, "num_input_tokens_seen": 241954656, "step": 3653 }, { "epoch": 0.34192914307109096, "loss": 1.308194637298584, "loss_ce": 0.007413332350552082, "loss_iou": 0.51953125, "loss_num": 0.052490234375, "loss_xval": 1.296875, "num_input_tokens_seen": 241954656, "step": 3653 }, { "epoch": 0.34202274535498667, "grad_norm": 87.49969482421875, "learning_rate": 5e-05, "loss": 1.5953, "num_input_tokens_seen": 242020348, "step": 3654 }, { "epoch": 0.34202274535498667, "loss": 1.5683457851409912, "loss_ce": 0.004869316704571247, "loss_iou": 0.66015625, "loss_num": 0.049072265625, "loss_xval": 1.5625, "num_input_tokens_seen": 242020348, "step": 3654 }, { "epoch": 0.3421163476388824, "grad_norm": 25.59351921081543, "learning_rate": 5e-05, "loss": 1.2982, "num_input_tokens_seen": 242087588, "step": 3655 }, { "epoch": 0.3421163476388824, "loss": 1.184438943862915, "loss_ce": 0.004263220354914665, "loss_iou": 0.49609375, "loss_num": 0.037841796875, "loss_xval": 1.1796875, "num_input_tokens_seen": 242087588, "step": 3655 }, { "epoch": 0.3422099499227781, "grad_norm": 21.046737670898438, "learning_rate": 5e-05, "loss": 1.4828, "num_input_tokens_seen": 242153320, "step": 3656 }, { "epoch": 0.3422099499227781, "loss": 1.4882756471633911, "loss_ce": 0.0029240858275443316, "loss_iou": 0.5859375, "loss_num": 0.0625, "loss_xval": 1.484375, "num_input_tokens_seen": 242153320, "step": 3656 }, { "epoch": 0.34230355220667386, "grad_norm": 46.43707275390625, "learning_rate": 5e-05, "loss": 1.2706, "num_input_tokens_seen": 242219364, "step": 3657 }, { "epoch": 0.34230355220667386, "loss": 0.9916031360626221, "loss_ce": 0.004908815957605839, "loss_iou": 0.396484375, "loss_num": 0.0390625, "loss_xval": 0.98828125, "num_input_tokens_seen": 242219364, "step": 3657 }, { "epoch": 0.34239715449056957, "grad_norm": 27.1610107421875, "learning_rate": 5e-05, "loss": 1.3813, "num_input_tokens_seen": 242285964, "step": 3658 }, { "epoch": 0.34239715449056957, "loss": 1.4464139938354492, "loss_ce": 0.009402213618159294, "loss_iou": 0.61328125, "loss_num": 0.04248046875, "loss_xval": 1.4375, "num_input_tokens_seen": 242285964, "step": 3658 }, { "epoch": 0.3424907567744653, "grad_norm": 13.915948867797852, "learning_rate": 5e-05, "loss": 1.0621, "num_input_tokens_seen": 242352340, "step": 3659 }, { "epoch": 0.3424907567744653, "loss": 1.1372686624526978, "loss_ce": 0.005920969881117344, "loss_iou": 0.46875, "loss_num": 0.038818359375, "loss_xval": 1.1328125, "num_input_tokens_seen": 242352340, "step": 3659 }, { "epoch": 0.34258435905836104, "grad_norm": 18.808433532714844, "learning_rate": 5e-05, "loss": 1.4462, "num_input_tokens_seen": 242419068, "step": 3660 }, { "epoch": 0.34258435905836104, "loss": 1.2946178913116455, "loss_ce": 0.007508429698646069, "loss_iou": 0.5234375, "loss_num": 0.04833984375, "loss_xval": 1.2890625, "num_input_tokens_seen": 242419068, "step": 3660 }, { "epoch": 0.34267796134225675, "grad_norm": 29.82279396057129, "learning_rate": 5e-05, "loss": 1.2986, "num_input_tokens_seen": 242485832, "step": 3661 }, { "epoch": 0.34267796134225675, "loss": 1.252081036567688, "loss_ce": 0.005010711029171944, "loss_iou": 0.53515625, "loss_num": 0.035400390625, "loss_xval": 1.25, "num_input_tokens_seen": 242485832, "step": 3661 }, { "epoch": 0.34277156362615246, "grad_norm": 28.067873001098633, "learning_rate": 5e-05, "loss": 1.5593, "num_input_tokens_seen": 242552972, "step": 3662 }, { "epoch": 0.34277156362615246, "loss": 1.4852641820907593, "loss_ce": 0.0038188102189451456, "loss_iou": 0.6015625, "loss_num": 0.054931640625, "loss_xval": 1.484375, "num_input_tokens_seen": 242552972, "step": 3662 }, { "epoch": 0.34286516591004823, "grad_norm": 36.84526824951172, "learning_rate": 5e-05, "loss": 1.3071, "num_input_tokens_seen": 242618832, "step": 3663 }, { "epoch": 0.34286516591004823, "loss": 1.2323442697525024, "loss_ce": 0.00431685009971261, "loss_iou": 0.46875, "loss_num": 0.05859375, "loss_xval": 1.2265625, "num_input_tokens_seen": 242618832, "step": 3663 }, { "epoch": 0.34295876819394394, "grad_norm": 17.441896438598633, "learning_rate": 5e-05, "loss": 1.4414, "num_input_tokens_seen": 242684976, "step": 3664 }, { "epoch": 0.34295876819394394, "loss": 1.5163402557373047, "loss_ce": 0.0038890610449016094, "loss_iou": 0.609375, "loss_num": 0.05810546875, "loss_xval": 1.515625, "num_input_tokens_seen": 242684976, "step": 3664 }, { "epoch": 0.34305237047783965, "grad_norm": 28.150863647460938, "learning_rate": 5e-05, "loss": 1.3833, "num_input_tokens_seen": 242751128, "step": 3665 }, { "epoch": 0.34305237047783965, "loss": 1.174673318862915, "loss_ce": 0.008169415406882763, "loss_iou": 0.48828125, "loss_num": 0.038330078125, "loss_xval": 1.1640625, "num_input_tokens_seen": 242751128, "step": 3665 }, { "epoch": 0.34314597276173536, "grad_norm": 29.392072677612305, "learning_rate": 5e-05, "loss": 1.4848, "num_input_tokens_seen": 242817620, "step": 3666 }, { "epoch": 0.34314597276173536, "loss": 1.4709686040878296, "loss_ce": 0.003195198019966483, "loss_iou": 0.55859375, "loss_num": 0.0703125, "loss_xval": 1.46875, "num_input_tokens_seen": 242817620, "step": 3666 }, { "epoch": 0.34323957504563113, "grad_norm": 23.619131088256836, "learning_rate": 5e-05, "loss": 1.4531, "num_input_tokens_seen": 242883984, "step": 3667 }, { "epoch": 0.34323957504563113, "loss": 1.5173687934875488, "loss_ce": 0.007603058125823736, "loss_iou": 0.62109375, "loss_num": 0.053955078125, "loss_xval": 1.5078125, "num_input_tokens_seen": 242883984, "step": 3667 }, { "epoch": 0.34333317732952684, "grad_norm": 118.72444152832031, "learning_rate": 5e-05, "loss": 1.589, "num_input_tokens_seen": 242951052, "step": 3668 }, { "epoch": 0.34333317732952684, "loss": 1.6637331247329712, "loss_ce": 0.005529981106519699, "loss_iou": 0.65625, "loss_num": 0.06884765625, "loss_xval": 1.65625, "num_input_tokens_seen": 242951052, "step": 3668 }, { "epoch": 0.34342677961342255, "grad_norm": 63.13013458251953, "learning_rate": 5e-05, "loss": 1.5791, "num_input_tokens_seen": 243017768, "step": 3669 }, { "epoch": 0.34342677961342255, "loss": 1.6526031494140625, "loss_ce": 0.009048559702932835, "loss_iou": 0.69140625, "loss_num": 0.052001953125, "loss_xval": 1.640625, "num_input_tokens_seen": 243017768, "step": 3669 }, { "epoch": 0.3435203818973183, "grad_norm": 25.43216323852539, "learning_rate": 5e-05, "loss": 1.1632, "num_input_tokens_seen": 243083536, "step": 3670 }, { "epoch": 0.3435203818973183, "loss": 1.3266937732696533, "loss_ce": 0.004916415549814701, "loss_iou": 0.5234375, "loss_num": 0.0546875, "loss_xval": 1.3203125, "num_input_tokens_seen": 243083536, "step": 3670 }, { "epoch": 0.343613984181214, "grad_norm": 63.88376235961914, "learning_rate": 5e-05, "loss": 1.5546, "num_input_tokens_seen": 243149552, "step": 3671 }, { "epoch": 0.343613984181214, "loss": 1.7038636207580566, "loss_ce": 0.006598077714443207, "loss_iou": 0.703125, "loss_num": 0.05859375, "loss_xval": 1.6953125, "num_input_tokens_seen": 243149552, "step": 3671 }, { "epoch": 0.34370758646510974, "grad_norm": 30.116592407226562, "learning_rate": 5e-05, "loss": 1.3339, "num_input_tokens_seen": 243215392, "step": 3672 }, { "epoch": 0.34370758646510974, "loss": 1.22581148147583, "loss_ce": 0.007061444688588381, "loss_iou": 0.50390625, "loss_num": 0.041748046875, "loss_xval": 1.21875, "num_input_tokens_seen": 243215392, "step": 3672 }, { "epoch": 0.34380118874900545, "grad_norm": 25.52834129333496, "learning_rate": 5e-05, "loss": 1.2373, "num_input_tokens_seen": 243282692, "step": 3673 }, { "epoch": 0.34380118874900545, "loss": 1.2232282161712646, "loss_ce": 0.004966398235410452, "loss_iou": 0.494140625, "loss_num": 0.04638671875, "loss_xval": 1.21875, "num_input_tokens_seen": 243282692, "step": 3673 }, { "epoch": 0.3438947910329012, "grad_norm": 35.763919830322266, "learning_rate": 5e-05, "loss": 1.5312, "num_input_tokens_seen": 243349264, "step": 3674 }, { "epoch": 0.3438947910329012, "loss": 1.647485613822937, "loss_ce": 0.006860640831291676, "loss_iou": 0.65234375, "loss_num": 0.06689453125, "loss_xval": 1.640625, "num_input_tokens_seen": 243349264, "step": 3674 }, { "epoch": 0.3439883933167969, "grad_norm": 30.420204162597656, "learning_rate": 5e-05, "loss": 1.5644, "num_input_tokens_seen": 243415048, "step": 3675 }, { "epoch": 0.3439883933167969, "loss": 1.7645032405853271, "loss_ce": 0.010596971958875656, "loss_iou": 0.71484375, "loss_num": 0.06494140625, "loss_xval": 1.75, "num_input_tokens_seen": 243415048, "step": 3675 }, { "epoch": 0.34408199560069264, "grad_norm": 18.679962158203125, "learning_rate": 5e-05, "loss": 1.4205, "num_input_tokens_seen": 243480824, "step": 3676 }, { "epoch": 0.34408199560069264, "loss": 1.3647938966751099, "loss_ce": 0.005418871063739061, "loss_iou": 0.52734375, "loss_num": 0.060546875, "loss_xval": 1.359375, "num_input_tokens_seen": 243480824, "step": 3676 }, { "epoch": 0.3441755978845884, "grad_norm": 39.677101135253906, "learning_rate": 5e-05, "loss": 1.3917, "num_input_tokens_seen": 243547072, "step": 3677 }, { "epoch": 0.3441755978845884, "loss": 1.3339930772781372, "loss_ce": 0.005135707091540098, "loss_iou": 0.50390625, "loss_num": 0.0654296875, "loss_xval": 1.328125, "num_input_tokens_seen": 243547072, "step": 3677 }, { "epoch": 0.3442692001684841, "grad_norm": 32.127342224121094, "learning_rate": 5e-05, "loss": 1.7895, "num_input_tokens_seen": 243613868, "step": 3678 }, { "epoch": 0.3442692001684841, "loss": 1.6922013759613037, "loss_ce": 0.006654429715126753, "loss_iou": 0.703125, "loss_num": 0.055419921875, "loss_xval": 1.6875, "num_input_tokens_seen": 243613868, "step": 3678 }, { "epoch": 0.3443628024523798, "grad_norm": 18.287097930908203, "learning_rate": 5e-05, "loss": 1.4034, "num_input_tokens_seen": 243680388, "step": 3679 }, { "epoch": 0.3443628024523798, "loss": 1.375736951828003, "loss_ce": 0.0046431622467935085, "loss_iou": 0.498046875, "loss_num": 0.07470703125, "loss_xval": 1.375, "num_input_tokens_seen": 243680388, "step": 3679 }, { "epoch": 0.3444564047362756, "grad_norm": 11.223101615905762, "learning_rate": 5e-05, "loss": 1.3468, "num_input_tokens_seen": 243746560, "step": 3680 }, { "epoch": 0.3444564047362756, "loss": 1.4266433715820312, "loss_ce": 0.0037918402813374996, "loss_iou": 0.498046875, "loss_num": 0.0849609375, "loss_xval": 1.421875, "num_input_tokens_seen": 243746560, "step": 3680 }, { "epoch": 0.3445500070201713, "grad_norm": 146.6287384033203, "learning_rate": 5e-05, "loss": 1.3631, "num_input_tokens_seen": 243812084, "step": 3681 }, { "epoch": 0.3445500070201713, "loss": 1.281965732574463, "loss_ce": 0.0055985464714467525, "loss_iou": 0.515625, "loss_num": 0.049072265625, "loss_xval": 1.2734375, "num_input_tokens_seen": 243812084, "step": 3681 }, { "epoch": 0.344643609304067, "grad_norm": 25.403162002563477, "learning_rate": 5e-05, "loss": 1.3416, "num_input_tokens_seen": 243878352, "step": 3682 }, { "epoch": 0.344643609304067, "loss": 1.209255576133728, "loss_ce": 0.01052510179579258, "loss_iou": 0.490234375, "loss_num": 0.04345703125, "loss_xval": 1.1953125, "num_input_tokens_seen": 243878352, "step": 3682 }, { "epoch": 0.3447372115879627, "grad_norm": 26.909154891967773, "learning_rate": 5e-05, "loss": 1.1672, "num_input_tokens_seen": 243944912, "step": 3683 }, { "epoch": 0.3447372115879627, "loss": 1.17277193069458, "loss_ce": 0.004314985126256943, "loss_iou": 0.4765625, "loss_num": 0.04296875, "loss_xval": 1.171875, "num_input_tokens_seen": 243944912, "step": 3683 }, { "epoch": 0.3448308138718585, "grad_norm": 41.67969512939453, "learning_rate": 5e-05, "loss": 1.7167, "num_input_tokens_seen": 244010936, "step": 3684 }, { "epoch": 0.3448308138718585, "loss": 1.7872049808502197, "loss_ce": 0.004978567361831665, "loss_iou": 0.69921875, "loss_num": 0.0771484375, "loss_xval": 1.78125, "num_input_tokens_seen": 244010936, "step": 3684 }, { "epoch": 0.3449244161557542, "grad_norm": 60.78505325317383, "learning_rate": 5e-05, "loss": 1.4952, "num_input_tokens_seen": 244078052, "step": 3685 }, { "epoch": 0.3449244161557542, "loss": 1.5322611331939697, "loss_ce": 0.00491733755916357, "loss_iou": 0.640625, "loss_num": 0.04931640625, "loss_xval": 1.53125, "num_input_tokens_seen": 244078052, "step": 3685 }, { "epoch": 0.3450180184396499, "grad_norm": 15.89416217803955, "learning_rate": 5e-05, "loss": 1.5434, "num_input_tokens_seen": 244144356, "step": 3686 }, { "epoch": 0.3450180184396499, "loss": 1.550642490386963, "loss_ce": 0.004743984434753656, "loss_iou": 0.55859375, "loss_num": 0.0849609375, "loss_xval": 1.546875, "num_input_tokens_seen": 244144356, "step": 3686 }, { "epoch": 0.3451116207235457, "grad_norm": 40.959346771240234, "learning_rate": 5e-05, "loss": 1.3323, "num_input_tokens_seen": 244211072, "step": 3687 }, { "epoch": 0.3451116207235457, "loss": 1.285388469696045, "loss_ce": 0.0075564635917544365, "loss_iou": 0.5234375, "loss_num": 0.04638671875, "loss_xval": 1.28125, "num_input_tokens_seen": 244211072, "step": 3687 }, { "epoch": 0.3452052230074414, "grad_norm": 25.80829620361328, "learning_rate": 5e-05, "loss": 1.3496, "num_input_tokens_seen": 244277672, "step": 3688 }, { "epoch": 0.3452052230074414, "loss": 1.2949481010437012, "loss_ce": 0.0039324769750237465, "loss_iou": 0.515625, "loss_num": 0.051513671875, "loss_xval": 1.2890625, "num_input_tokens_seen": 244277672, "step": 3688 }, { "epoch": 0.3452988252913371, "grad_norm": 26.702003479003906, "learning_rate": 5e-05, "loss": 1.4056, "num_input_tokens_seen": 244343808, "step": 3689 }, { "epoch": 0.3452988252913371, "loss": 1.5517548322677612, "loss_ce": 0.005368147045373917, "loss_iou": 0.6171875, "loss_num": 0.06298828125, "loss_xval": 1.546875, "num_input_tokens_seen": 244343808, "step": 3689 }, { "epoch": 0.34539242757523286, "grad_norm": 36.517913818359375, "learning_rate": 5e-05, "loss": 1.7433, "num_input_tokens_seen": 244410436, "step": 3690 }, { "epoch": 0.34539242757523286, "loss": 1.7622140645980835, "loss_ce": 0.00635466817766428, "loss_iou": 0.7109375, "loss_num": 0.06689453125, "loss_xval": 1.7578125, "num_input_tokens_seen": 244410436, "step": 3690 }, { "epoch": 0.3454860298591286, "grad_norm": 22.5693302154541, "learning_rate": 5e-05, "loss": 1.7756, "num_input_tokens_seen": 244477220, "step": 3691 }, { "epoch": 0.3454860298591286, "loss": 1.552940845489502, "loss_ce": 0.005089297890663147, "loss_iou": 0.66015625, "loss_num": 0.044677734375, "loss_xval": 1.546875, "num_input_tokens_seen": 244477220, "step": 3691 }, { "epoch": 0.3455796321430243, "grad_norm": 48.82529067993164, "learning_rate": 5e-05, "loss": 1.3523, "num_input_tokens_seen": 244543112, "step": 3692 }, { "epoch": 0.3455796321430243, "loss": 1.4222763776779175, "loss_ce": 0.003819405334070325, "loss_iou": 0.55078125, "loss_num": 0.0625, "loss_xval": 1.421875, "num_input_tokens_seen": 244543112, "step": 3692 }, { "epoch": 0.34567323442692, "grad_norm": 20.175817489624023, "learning_rate": 5e-05, "loss": 1.0903, "num_input_tokens_seen": 244609400, "step": 3693 }, { "epoch": 0.34567323442692, "loss": 1.069154977798462, "loss_ce": 0.007143290247768164, "loss_iou": 0.482421875, "loss_num": 0.019287109375, "loss_xval": 1.0625, "num_input_tokens_seen": 244609400, "step": 3693 }, { "epoch": 0.34576683671081576, "grad_norm": 20.41504669189453, "learning_rate": 5e-05, "loss": 1.3561, "num_input_tokens_seen": 244675696, "step": 3694 }, { "epoch": 0.34576683671081576, "loss": 1.2821524143218994, "loss_ce": 0.003832231042906642, "loss_iou": 0.5078125, "loss_num": 0.0517578125, "loss_xval": 1.28125, "num_input_tokens_seen": 244675696, "step": 3694 }, { "epoch": 0.34586043899471147, "grad_norm": 46.832210540771484, "learning_rate": 5e-05, "loss": 1.5007, "num_input_tokens_seen": 244741120, "step": 3695 }, { "epoch": 0.34586043899471147, "loss": 1.5052576065063477, "loss_ce": 0.00867561437189579, "loss_iou": 0.486328125, "loss_num": 0.10546875, "loss_xval": 1.5, "num_input_tokens_seen": 244741120, "step": 3695 }, { "epoch": 0.3459540412786072, "grad_norm": 24.776247024536133, "learning_rate": 5e-05, "loss": 1.4978, "num_input_tokens_seen": 244807848, "step": 3696 }, { "epoch": 0.3459540412786072, "loss": 1.6521716117858887, "loss_ce": 0.005199096165597439, "loss_iou": 0.68359375, "loss_num": 0.055908203125, "loss_xval": 1.6484375, "num_input_tokens_seen": 244807848, "step": 3696 }, { "epoch": 0.34604764356250295, "grad_norm": 19.971397399902344, "learning_rate": 5e-05, "loss": 1.3413, "num_input_tokens_seen": 244875628, "step": 3697 }, { "epoch": 0.34604764356250295, "loss": 1.4090442657470703, "loss_ce": 0.007677052635699511, "loss_iou": 0.51953125, "loss_num": 0.07275390625, "loss_xval": 1.3984375, "num_input_tokens_seen": 244875628, "step": 3697 }, { "epoch": 0.34614124584639866, "grad_norm": 35.29273986816406, "learning_rate": 5e-05, "loss": 1.4335, "num_input_tokens_seen": 244941636, "step": 3698 }, { "epoch": 0.34614124584639866, "loss": 1.6018643379211426, "loss_ce": 0.005184710957109928, "loss_iou": 0.640625, "loss_num": 0.0625, "loss_xval": 1.59375, "num_input_tokens_seen": 244941636, "step": 3698 }, { "epoch": 0.34623484813029437, "grad_norm": 35.30159378051758, "learning_rate": 5e-05, "loss": 1.3965, "num_input_tokens_seen": 245007276, "step": 3699 }, { "epoch": 0.34623484813029437, "loss": 1.3094220161437988, "loss_ce": 0.005711059086024761, "loss_iou": 0.5390625, "loss_num": 0.044189453125, "loss_xval": 1.3046875, "num_input_tokens_seen": 245007276, "step": 3699 }, { "epoch": 0.3463284504141901, "grad_norm": 20.247718811035156, "learning_rate": 5e-05, "loss": 1.554, "num_input_tokens_seen": 245073048, "step": 3700 }, { "epoch": 0.3463284504141901, "loss": 1.7371816635131836, "loss_ce": 0.012083975598216057, "loss_iou": 0.6484375, "loss_num": 0.08544921875, "loss_xval": 1.7265625, "num_input_tokens_seen": 245073048, "step": 3700 }, { "epoch": 0.34642205269808585, "grad_norm": 22.586915969848633, "learning_rate": 5e-05, "loss": 1.4198, "num_input_tokens_seen": 245139508, "step": 3701 }, { "epoch": 0.34642205269808585, "loss": 1.3193607330322266, "loss_ce": 0.008325554430484772, "loss_iou": 0.546875, "loss_num": 0.043212890625, "loss_xval": 1.3125, "num_input_tokens_seen": 245139508, "step": 3701 }, { "epoch": 0.34651565498198156, "grad_norm": 44.17936325073242, "learning_rate": 5e-05, "loss": 1.4207, "num_input_tokens_seen": 245205184, "step": 3702 }, { "epoch": 0.34651565498198156, "loss": 1.4175338745117188, "loss_ce": 0.006401140242815018, "loss_iou": 0.5703125, "loss_num": 0.0546875, "loss_xval": 1.4140625, "num_input_tokens_seen": 245205184, "step": 3702 }, { "epoch": 0.34660925726587727, "grad_norm": 20.859495162963867, "learning_rate": 5e-05, "loss": 1.479, "num_input_tokens_seen": 245271516, "step": 3703 }, { "epoch": 0.34660925726587727, "loss": 1.3903142213821411, "loss_ce": 0.005548547953367233, "loss_iou": 0.59765625, "loss_num": 0.03759765625, "loss_xval": 1.3828125, "num_input_tokens_seen": 245271516, "step": 3703 }, { "epoch": 0.34670285954977303, "grad_norm": 10.4099702835083, "learning_rate": 5e-05, "loss": 1.1748, "num_input_tokens_seen": 245337784, "step": 3704 }, { "epoch": 0.34670285954977303, "loss": 1.2686253786087036, "loss_ce": 0.0030003655701875687, "loss_iou": 0.53125, "loss_num": 0.04052734375, "loss_xval": 1.265625, "num_input_tokens_seen": 245337784, "step": 3704 }, { "epoch": 0.34679646183366875, "grad_norm": 23.97836685180664, "learning_rate": 5e-05, "loss": 1.3845, "num_input_tokens_seen": 245404520, "step": 3705 }, { "epoch": 0.34679646183366875, "loss": 1.360370397567749, "loss_ce": 0.007342985365539789, "loss_iou": 0.5625, "loss_num": 0.04541015625, "loss_xval": 1.3515625, "num_input_tokens_seen": 245404520, "step": 3705 }, { "epoch": 0.34689006411756446, "grad_norm": 20.272201538085938, "learning_rate": 5e-05, "loss": 1.2688, "num_input_tokens_seen": 245471380, "step": 3706 }, { "epoch": 0.34689006411756446, "loss": 1.2851166725158691, "loss_ce": 0.0053314100950956345, "loss_iou": 0.5546875, "loss_num": 0.033447265625, "loss_xval": 1.28125, "num_input_tokens_seen": 245471380, "step": 3706 }, { "epoch": 0.3469836664014602, "grad_norm": 25.2825870513916, "learning_rate": 5e-05, "loss": 1.4306, "num_input_tokens_seen": 245536760, "step": 3707 }, { "epoch": 0.3469836664014602, "loss": 1.44158935546875, "loss_ce": 0.010925253853201866, "loss_iou": 0.58203125, "loss_num": 0.052978515625, "loss_xval": 1.4296875, "num_input_tokens_seen": 245536760, "step": 3707 }, { "epoch": 0.34707726868535593, "grad_norm": 27.773530960083008, "learning_rate": 5e-05, "loss": 1.3239, "num_input_tokens_seen": 245603424, "step": 3708 }, { "epoch": 0.34707726868535593, "loss": 1.2831923961639404, "loss_ce": 0.008778421208262444, "loss_iou": 0.53515625, "loss_num": 0.04150390625, "loss_xval": 1.2734375, "num_input_tokens_seen": 245603424, "step": 3708 }, { "epoch": 0.34717087096925164, "grad_norm": 26.971498489379883, "learning_rate": 5e-05, "loss": 1.584, "num_input_tokens_seen": 245669460, "step": 3709 }, { "epoch": 0.34717087096925164, "loss": 1.676065444946289, "loss_ce": 0.003702209796756506, "loss_iou": 0.65625, "loss_num": 0.0712890625, "loss_xval": 1.671875, "num_input_tokens_seen": 245669460, "step": 3709 }, { "epoch": 0.34726447325314735, "grad_norm": 21.093570709228516, "learning_rate": 5e-05, "loss": 1.1342, "num_input_tokens_seen": 245735736, "step": 3710 }, { "epoch": 0.34726447325314735, "loss": 1.240703821182251, "loss_ce": 0.0073054637759923935, "loss_iou": 0.490234375, "loss_num": 0.050537109375, "loss_xval": 1.234375, "num_input_tokens_seen": 245735736, "step": 3710 }, { "epoch": 0.3473580755370431, "grad_norm": 29.543262481689453, "learning_rate": 5e-05, "loss": 1.1328, "num_input_tokens_seen": 245802308, "step": 3711 }, { "epoch": 0.3473580755370431, "loss": 1.1575729846954346, "loss_ce": 0.005229260306805372, "loss_iou": 0.49609375, "loss_num": 0.03271484375, "loss_xval": 1.15625, "num_input_tokens_seen": 245802308, "step": 3711 }, { "epoch": 0.34745167782093883, "grad_norm": 27.989383697509766, "learning_rate": 5e-05, "loss": 1.5556, "num_input_tokens_seen": 245867520, "step": 3712 }, { "epoch": 0.34745167782093883, "loss": 1.6030399799346924, "loss_ce": 0.0053838156163692474, "loss_iou": 0.62109375, "loss_num": 0.0712890625, "loss_xval": 1.59375, "num_input_tokens_seen": 245867520, "step": 3712 }, { "epoch": 0.34754528010483454, "grad_norm": 38.358299255371094, "learning_rate": 5e-05, "loss": 1.1329, "num_input_tokens_seen": 245934080, "step": 3713 }, { "epoch": 0.34754528010483454, "loss": 1.0540615320205688, "loss_ce": 0.0018154431600123644, "loss_iou": 0.46875, "loss_num": 0.02294921875, "loss_xval": 1.0546875, "num_input_tokens_seen": 245934080, "step": 3713 }, { "epoch": 0.3476388823887303, "grad_norm": 30.39254379272461, "learning_rate": 5e-05, "loss": 1.3977, "num_input_tokens_seen": 246000392, "step": 3714 }, { "epoch": 0.3476388823887303, "loss": 1.2956597805023193, "loss_ce": 0.004644259810447693, "loss_iou": 0.53125, "loss_num": 0.04541015625, "loss_xval": 1.2890625, "num_input_tokens_seen": 246000392, "step": 3714 }, { "epoch": 0.347732484672626, "grad_norm": 21.19542121887207, "learning_rate": 5e-05, "loss": 1.3307, "num_input_tokens_seen": 246066356, "step": 3715 }, { "epoch": 0.347732484672626, "loss": 1.5889265537261963, "loss_ce": 0.0029890104196965694, "loss_iou": 0.60546875, "loss_num": 0.07470703125, "loss_xval": 1.5859375, "num_input_tokens_seen": 246066356, "step": 3715 }, { "epoch": 0.34782608695652173, "grad_norm": 18.027751922607422, "learning_rate": 5e-05, "loss": 1.3525, "num_input_tokens_seen": 246132344, "step": 3716 }, { "epoch": 0.34782608695652173, "loss": 1.3334925174713135, "loss_ce": 0.007808832451701164, "loss_iou": 0.54296875, "loss_num": 0.048828125, "loss_xval": 1.328125, "num_input_tokens_seen": 246132344, "step": 3716 }, { "epoch": 0.34791968924041744, "grad_norm": 54.89906311035156, "learning_rate": 5e-05, "loss": 1.3624, "num_input_tokens_seen": 246199552, "step": 3717 }, { "epoch": 0.34791968924041744, "loss": 1.5785040855407715, "loss_ce": 0.005750169046223164, "loss_iou": 0.62109375, "loss_num": 0.0654296875, "loss_xval": 1.5703125, "num_input_tokens_seen": 246199552, "step": 3717 }, { "epoch": 0.3480132915243132, "grad_norm": 30.902700424194336, "learning_rate": 5e-05, "loss": 1.8924, "num_input_tokens_seen": 246265040, "step": 3718 }, { "epoch": 0.3480132915243132, "loss": 1.7432301044464111, "loss_ce": 0.005925359204411507, "loss_iou": 0.703125, "loss_num": 0.0673828125, "loss_xval": 1.734375, "num_input_tokens_seen": 246265040, "step": 3718 }, { "epoch": 0.3481068938082089, "grad_norm": 40.07839584350586, "learning_rate": 5e-05, "loss": 1.2299, "num_input_tokens_seen": 246330648, "step": 3719 }, { "epoch": 0.3481068938082089, "loss": 1.0856637954711914, "loss_ce": 0.0046091387048363686, "loss_iou": 0.435546875, "loss_num": 0.041748046875, "loss_xval": 1.078125, "num_input_tokens_seen": 246330648, "step": 3719 }, { "epoch": 0.3482004960921046, "grad_norm": 24.28256607055664, "learning_rate": 5e-05, "loss": 1.5795, "num_input_tokens_seen": 246397028, "step": 3720 }, { "epoch": 0.3482004960921046, "loss": 1.5197551250457764, "loss_ce": 0.006571510806679726, "loss_iou": 0.62890625, "loss_num": 0.051513671875, "loss_xval": 1.515625, "num_input_tokens_seen": 246397028, "step": 3720 }, { "epoch": 0.3482940983760004, "grad_norm": 24.5905818939209, "learning_rate": 5e-05, "loss": 1.4159, "num_input_tokens_seen": 246463484, "step": 3721 }, { "epoch": 0.3482940983760004, "loss": 1.5609971284866333, "loss_ce": 0.0053330278024077415, "loss_iou": 0.64453125, "loss_num": 0.053466796875, "loss_xval": 1.5546875, "num_input_tokens_seen": 246463484, "step": 3721 }, { "epoch": 0.3483877006598961, "grad_norm": 22.278881072998047, "learning_rate": 5e-05, "loss": 1.3557, "num_input_tokens_seen": 246529304, "step": 3722 }, { "epoch": 0.3483877006598961, "loss": 1.4038203954696655, "loss_ce": 0.005382911302149296, "loss_iou": 0.56640625, "loss_num": 0.052978515625, "loss_xval": 1.3984375, "num_input_tokens_seen": 246529304, "step": 3722 }, { "epoch": 0.3484813029437918, "grad_norm": 62.42136001586914, "learning_rate": 5e-05, "loss": 1.4577, "num_input_tokens_seen": 246595196, "step": 3723 }, { "epoch": 0.3484813029437918, "loss": 1.490161657333374, "loss_ce": 0.006274977698922157, "loss_iou": 0.58203125, "loss_num": 0.0634765625, "loss_xval": 1.484375, "num_input_tokens_seen": 246595196, "step": 3723 }, { "epoch": 0.3485749052276876, "grad_norm": 21.269954681396484, "learning_rate": 5e-05, "loss": 1.2955, "num_input_tokens_seen": 246660448, "step": 3724 }, { "epoch": 0.3485749052276876, "loss": 1.2220447063446045, "loss_ce": 0.005858277902007103, "loss_iou": 0.47265625, "loss_num": 0.054443359375, "loss_xval": 1.21875, "num_input_tokens_seen": 246660448, "step": 3724 }, { "epoch": 0.3486685075115833, "grad_norm": 20.63177490234375, "learning_rate": 5e-05, "loss": 1.1556, "num_input_tokens_seen": 246726176, "step": 3725 }, { "epoch": 0.3486685075115833, "loss": 0.934677004814148, "loss_ce": 0.0059660375118255615, "loss_iou": 0.3984375, "loss_num": 0.026123046875, "loss_xval": 0.9296875, "num_input_tokens_seen": 246726176, "step": 3725 }, { "epoch": 0.348762109795479, "grad_norm": 19.049415588378906, "learning_rate": 5e-05, "loss": 1.298, "num_input_tokens_seen": 246792180, "step": 3726 }, { "epoch": 0.348762109795479, "loss": 1.1643699407577515, "loss_ce": 0.008119983598589897, "loss_iou": 0.44140625, "loss_num": 0.054931640625, "loss_xval": 1.15625, "num_input_tokens_seen": 246792180, "step": 3726 }, { "epoch": 0.3488557120793747, "grad_norm": 27.139068603515625, "learning_rate": 5e-05, "loss": 1.4329, "num_input_tokens_seen": 246857796, "step": 3727 }, { "epoch": 0.3488557120793747, "loss": 1.4684312343597412, "loss_ce": 0.004808175843209028, "loss_iou": 0.58984375, "loss_num": 0.057373046875, "loss_xval": 1.4609375, "num_input_tokens_seen": 246857796, "step": 3727 }, { "epoch": 0.3489493143632705, "grad_norm": 25.338150024414062, "learning_rate": 5e-05, "loss": 1.3457, "num_input_tokens_seen": 246924268, "step": 3728 }, { "epoch": 0.3489493143632705, "loss": 1.1883865594863892, "loss_ce": 0.006257642991840839, "loss_iou": 0.515625, "loss_num": 0.0303955078125, "loss_xval": 1.1796875, "num_input_tokens_seen": 246924268, "step": 3728 }, { "epoch": 0.3490429166471662, "grad_norm": 27.340499877929688, "learning_rate": 5e-05, "loss": 1.3044, "num_input_tokens_seen": 246991528, "step": 3729 }, { "epoch": 0.3490429166471662, "loss": 1.4734461307525635, "loss_ce": 0.008602448739111423, "loss_iou": 0.5859375, "loss_num": 0.057861328125, "loss_xval": 1.46875, "num_input_tokens_seen": 246991528, "step": 3729 }, { "epoch": 0.3491365189310619, "grad_norm": 27.09548568725586, "learning_rate": 5e-05, "loss": 1.3197, "num_input_tokens_seen": 247058764, "step": 3730 }, { "epoch": 0.3491365189310619, "loss": 1.2067992687225342, "loss_ce": 0.007092206738889217, "loss_iou": 0.51171875, "loss_num": 0.03515625, "loss_xval": 1.203125, "num_input_tokens_seen": 247058764, "step": 3730 }, { "epoch": 0.34923012121495767, "grad_norm": 24.08138084411621, "learning_rate": 5e-05, "loss": 1.399, "num_input_tokens_seen": 247125216, "step": 3731 }, { "epoch": 0.34923012121495767, "loss": 1.2107813358306885, "loss_ce": 0.00716799683868885, "loss_iou": 0.490234375, "loss_num": 0.04443359375, "loss_xval": 1.203125, "num_input_tokens_seen": 247125216, "step": 3731 }, { "epoch": 0.3493237234988534, "grad_norm": 27.801660537719727, "learning_rate": 5e-05, "loss": 1.3599, "num_input_tokens_seen": 247190348, "step": 3732 }, { "epoch": 0.3493237234988534, "loss": 1.091841220855713, "loss_ce": 0.006147854961454868, "loss_iou": 0.42578125, "loss_num": 0.046875, "loss_xval": 1.0859375, "num_input_tokens_seen": 247190348, "step": 3732 }, { "epoch": 0.3494173257827491, "grad_norm": 16.304279327392578, "learning_rate": 5e-05, "loss": 1.448, "num_input_tokens_seen": 247255968, "step": 3733 }, { "epoch": 0.3494173257827491, "loss": 1.4395084381103516, "loss_ce": 0.005426528863608837, "loss_iou": 0.56640625, "loss_num": 0.0595703125, "loss_xval": 1.4375, "num_input_tokens_seen": 247255968, "step": 3733 }, { "epoch": 0.3495109280666448, "grad_norm": 18.43020248413086, "learning_rate": 5e-05, "loss": 1.2068, "num_input_tokens_seen": 247322588, "step": 3734 }, { "epoch": 0.3495109280666448, "loss": 1.3476488590240479, "loss_ce": 0.006828451529145241, "loss_iou": 0.56640625, "loss_num": 0.0419921875, "loss_xval": 1.34375, "num_input_tokens_seen": 247322588, "step": 3734 }, { "epoch": 0.34960453035054057, "grad_norm": 20.443666458129883, "learning_rate": 5e-05, "loss": 1.2719, "num_input_tokens_seen": 247388704, "step": 3735 }, { "epoch": 0.34960453035054057, "loss": 0.9696588516235352, "loss_ce": 0.008721387013792992, "loss_iou": 0.390625, "loss_num": 0.035888671875, "loss_xval": 0.9609375, "num_input_tokens_seen": 247388704, "step": 3735 }, { "epoch": 0.3496981326344363, "grad_norm": 32.19856643676758, "learning_rate": 5e-05, "loss": 1.581, "num_input_tokens_seen": 247454628, "step": 3736 }, { "epoch": 0.3496981326344363, "loss": 1.8044781684875488, "loss_ce": 0.010532870888710022, "loss_iou": 0.6796875, "loss_num": 0.0859375, "loss_xval": 1.796875, "num_input_tokens_seen": 247454628, "step": 3736 }, { "epoch": 0.349791734918332, "grad_norm": 43.47623062133789, "learning_rate": 5e-05, "loss": 1.4253, "num_input_tokens_seen": 247520040, "step": 3737 }, { "epoch": 0.349791734918332, "loss": 1.3010234832763672, "loss_ce": 0.005125178024172783, "loss_iou": 0.53125, "loss_num": 0.046630859375, "loss_xval": 1.296875, "num_input_tokens_seen": 247520040, "step": 3737 }, { "epoch": 0.34988533720222775, "grad_norm": 26.866897583007812, "learning_rate": 5e-05, "loss": 1.4757, "num_input_tokens_seen": 247585524, "step": 3738 }, { "epoch": 0.34988533720222775, "loss": 1.554808497428894, "loss_ce": 0.003050694242119789, "loss_iou": 0.64453125, "loss_num": 0.05322265625, "loss_xval": 1.5546875, "num_input_tokens_seen": 247585524, "step": 3738 }, { "epoch": 0.34997893948612346, "grad_norm": 34.61675262451172, "learning_rate": 5e-05, "loss": 1.1528, "num_input_tokens_seen": 247652392, "step": 3739 }, { "epoch": 0.34997893948612346, "loss": 0.9877606630325317, "loss_ce": 0.0077802203595638275, "loss_iou": 0.400390625, "loss_num": 0.03564453125, "loss_xval": 0.98046875, "num_input_tokens_seen": 247652392, "step": 3739 }, { "epoch": 0.3500725417700192, "grad_norm": 25.95360565185547, "learning_rate": 5e-05, "loss": 1.3645, "num_input_tokens_seen": 247718332, "step": 3740 }, { "epoch": 0.3500725417700192, "loss": 1.4898865222930908, "loss_ce": 0.005511471536010504, "loss_iou": 0.59765625, "loss_num": 0.057373046875, "loss_xval": 1.484375, "num_input_tokens_seen": 247718332, "step": 3740 }, { "epoch": 0.35016614405391494, "grad_norm": 19.525766372680664, "learning_rate": 5e-05, "loss": 1.3563, "num_input_tokens_seen": 247785160, "step": 3741 }, { "epoch": 0.35016614405391494, "loss": 1.343273639678955, "loss_ce": 0.005382982082664967, "loss_iou": 0.515625, "loss_num": 0.06103515625, "loss_xval": 1.3359375, "num_input_tokens_seen": 247785160, "step": 3741 }, { "epoch": 0.35025974633781065, "grad_norm": 25.111024856567383, "learning_rate": 5e-05, "loss": 1.5062, "num_input_tokens_seen": 247851032, "step": 3742 }, { "epoch": 0.35025974633781065, "loss": 1.586619257926941, "loss_ce": 0.008494309149682522, "loss_iou": 0.63671875, "loss_num": 0.061279296875, "loss_xval": 1.578125, "num_input_tokens_seen": 247851032, "step": 3742 }, { "epoch": 0.35035334862170636, "grad_norm": 34.43784713745117, "learning_rate": 5e-05, "loss": 1.8012, "num_input_tokens_seen": 247917356, "step": 3743 }, { "epoch": 0.35035334862170636, "loss": 2.063164710998535, "loss_ce": 0.00945371575653553, "loss_iou": 0.79296875, "loss_num": 0.09375, "loss_xval": 2.046875, "num_input_tokens_seen": 247917356, "step": 3743 }, { "epoch": 0.3504469509056021, "grad_norm": 22.911392211914062, "learning_rate": 5e-05, "loss": 1.5925, "num_input_tokens_seen": 247984328, "step": 3744 }, { "epoch": 0.3504469509056021, "loss": 1.6110068559646606, "loss_ce": 0.006514659151434898, "loss_iou": 0.671875, "loss_num": 0.052490234375, "loss_xval": 1.6015625, "num_input_tokens_seen": 247984328, "step": 3744 }, { "epoch": 0.35054055318949784, "grad_norm": 15.441649436950684, "learning_rate": 5e-05, "loss": 1.3077, "num_input_tokens_seen": 248050976, "step": 3745 }, { "epoch": 0.35054055318949784, "loss": 1.418440818786621, "loss_ce": 0.004866563715040684, "loss_iou": 0.59375, "loss_num": 0.045654296875, "loss_xval": 1.4140625, "num_input_tokens_seen": 248050976, "step": 3745 }, { "epoch": 0.35063415547339355, "grad_norm": 29.87442398071289, "learning_rate": 5e-05, "loss": 1.3034, "num_input_tokens_seen": 248117104, "step": 3746 }, { "epoch": 0.35063415547339355, "loss": 1.0477105379104614, "loss_ce": 0.0037651462480425835, "loss_iou": 0.42578125, "loss_num": 0.03857421875, "loss_xval": 1.046875, "num_input_tokens_seen": 248117104, "step": 3746 }, { "epoch": 0.35072775775728926, "grad_norm": 69.46481323242188, "learning_rate": 5e-05, "loss": 1.3402, "num_input_tokens_seen": 248182724, "step": 3747 }, { "epoch": 0.35072775775728926, "loss": 1.389664888381958, "loss_ce": 0.002946228487417102, "loss_iou": 0.5546875, "loss_num": 0.055419921875, "loss_xval": 1.390625, "num_input_tokens_seen": 248182724, "step": 3747 }, { "epoch": 0.350821360041185, "grad_norm": 37.87159729003906, "learning_rate": 5e-05, "loss": 1.5612, "num_input_tokens_seen": 248249392, "step": 3748 }, { "epoch": 0.350821360041185, "loss": 1.309908390045166, "loss_ce": 0.007174026221036911, "loss_iou": 0.58984375, "loss_num": 0.025146484375, "loss_xval": 1.3046875, "num_input_tokens_seen": 248249392, "step": 3748 }, { "epoch": 0.35091496232508074, "grad_norm": 14.190786361694336, "learning_rate": 5e-05, "loss": 1.4599, "num_input_tokens_seen": 248316060, "step": 3749 }, { "epoch": 0.35091496232508074, "loss": 1.6260002851486206, "loss_ce": 0.005883119069039822, "loss_iou": 0.59375, "loss_num": 0.0859375, "loss_xval": 1.6171875, "num_input_tokens_seen": 248316060, "step": 3749 }, { "epoch": 0.35100856460897645, "grad_norm": 17.839017868041992, "learning_rate": 5e-05, "loss": 1.1132, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35100856460897645, "eval_seeclick_CIoU": 0.1675967127084732, "eval_seeclick_GIoU": 0.18281087279319763, "eval_seeclick_IoU": 0.2830906957387924, "eval_seeclick_MAE_all": 0.17341461777687073, "eval_seeclick_MAE_h": 0.09848669171333313, "eval_seeclick_MAE_w": 0.12923284247517586, "eval_seeclick_MAE_x_boxes": 0.2607213482260704, "eval_seeclick_MAE_y_boxes": 0.12332096695899963, "eval_seeclick_NUM_probability": 0.9999575316905975, "eval_seeclick_inside_bbox": 0.38750000298023224, "eval_seeclick_loss": 2.4699790477752686, "eval_seeclick_loss_ce": 0.013367105275392532, "eval_seeclick_loss_iou": 0.8355712890625, "eval_seeclick_loss_num": 0.1644439697265625, "eval_seeclick_loss_xval": 2.49267578125, "eval_seeclick_runtime": 68.9088, "eval_seeclick_samples_per_second": 0.682, "eval_seeclick_steps_per_second": 0.029, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35100856460897645, "eval_icons_CIoU": -0.1398204267024994, "eval_icons_GIoU": 0.004120431374758482, "eval_icons_IoU": 0.06718008033931255, "eval_icons_MAE_all": 0.21449340134859085, "eval_icons_MAE_h": 0.2233874425292015, "eval_icons_MAE_w": 0.19416968524456024, "eval_icons_MAE_x_boxes": 0.14719068259000778, "eval_icons_MAE_y_boxes": 0.09624434635043144, "eval_icons_NUM_probability": 0.9998857080936432, "eval_icons_inside_bbox": 0.1336805559694767, "eval_icons_loss": 3.072598934173584, "eval_icons_loss_ce": 0.00033260000054724514, "eval_icons_loss_iou": 0.988037109375, "eval_icons_loss_num": 0.214599609375, "eval_icons_loss_xval": 3.0498046875, "eval_icons_runtime": 70.9789, "eval_icons_samples_per_second": 0.704, "eval_icons_steps_per_second": 0.028, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35100856460897645, "eval_screenspot_CIoU": -0.01793273165822029, "eval_screenspot_GIoU": 0.0311531195572267, "eval_screenspot_IoU": 0.16112064321835837, "eval_screenspot_MAE_all": 0.21388815840085348, "eval_screenspot_MAE_h": 0.18546390533447266, "eval_screenspot_MAE_w": 0.19503758351008096, "eval_screenspot_MAE_x_boxes": 0.27813207109769184, "eval_screenspot_MAE_y_boxes": 0.10236301769812901, "eval_screenspot_NUM_probability": 0.9999507665634155, "eval_screenspot_inside_bbox": 0.2912500003973643, "eval_screenspot_loss": 3.0125794410705566, "eval_screenspot_loss_ce": 0.011606858111917973, "eval_screenspot_loss_iou": 0.98046875, "eval_screenspot_loss_num": 0.22271728515625, "eval_screenspot_loss_xval": 3.0732421875, "eval_screenspot_runtime": 119.8004, "eval_screenspot_samples_per_second": 0.743, "eval_screenspot_steps_per_second": 0.025, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35100856460897645, "eval_compot_CIoU": -0.10679537430405617, "eval_compot_GIoU": -0.010890580713748932, "eval_compot_IoU": 0.09891067445278168, "eval_compot_MAE_all": 0.2528786063194275, "eval_compot_MAE_h": 0.236269049346447, "eval_compot_MAE_w": 0.34164193272590637, "eval_compot_MAE_x_boxes": 0.14958462119102478, "eval_compot_MAE_y_boxes": 0.08694823086261749, "eval_compot_NUM_probability": 0.9998629093170166, "eval_compot_inside_bbox": 0.1892361119389534, "eval_compot_loss": 3.3435428142547607, "eval_compot_loss_ce": 0.008334077894687653, "eval_compot_loss_iou": 1.054931640625, "eval_compot_loss_num": 0.265899658203125, "eval_compot_loss_xval": 3.4365234375, "eval_compot_runtime": 72.0474, "eval_compot_samples_per_second": 0.694, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35100856460897645, "eval_custom_ui_MAE_all": 0.17373321950435638, "eval_custom_ui_MAE_x": 0.20427002012729645, "eval_custom_ui_MAE_y": 0.14319640398025513, "eval_custom_ui_NUM_probability": 0.9999723434448242, "eval_custom_ui_loss": 1.0378717184066772, "eval_custom_ui_loss_ce": 0.21781423687934875, "eval_custom_ui_loss_num": 0.170257568359375, "eval_custom_ui_loss_xval": 0.850830078125, "eval_custom_ui_runtime": 53.808, "eval_custom_ui_samples_per_second": 0.929, "eval_custom_ui_steps_per_second": 0.037, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35100856460897645, "loss": 1.0204535722732544, "loss_ce": 0.2328559309244156, "loss_iou": 0.0, "loss_num": 0.1572265625, "loss_xval": 0.7890625, "num_input_tokens_seen": 248381900, "step": 3750 }, { "epoch": 0.35110216689287216, "grad_norm": 28.215551376342773, "learning_rate": 5e-05, "loss": 1.3363, "num_input_tokens_seen": 248448948, "step": 3751 }, { "epoch": 0.35110216689287216, "loss": 1.2733601331710815, "loss_ce": 0.008223352022469044, "loss_iou": 0.48828125, "loss_num": 0.05810546875, "loss_xval": 1.265625, "num_input_tokens_seen": 248448948, "step": 3751 }, { "epoch": 0.3511957691767679, "grad_norm": 59.027870178222656, "learning_rate": 5e-05, "loss": 1.3765, "num_input_tokens_seen": 248516020, "step": 3752 }, { "epoch": 0.3511957691767679, "loss": 1.4210923910140991, "loss_ce": 0.008982954546809196, "loss_iou": 0.59765625, "loss_num": 0.04296875, "loss_xval": 1.4140625, "num_input_tokens_seen": 248516020, "step": 3752 }, { "epoch": 0.35128937146066364, "grad_norm": 60.99232482910156, "learning_rate": 5e-05, "loss": 1.542, "num_input_tokens_seen": 248581520, "step": 3753 }, { "epoch": 0.35128937146066364, "loss": 1.66731858253479, "loss_ce": 0.0037444327026605606, "loss_iou": 0.65625, "loss_num": 0.06982421875, "loss_xval": 1.6640625, "num_input_tokens_seen": 248581520, "step": 3753 }, { "epoch": 0.35138297374455935, "grad_norm": 30.35032844543457, "learning_rate": 5e-05, "loss": 1.4345, "num_input_tokens_seen": 248648016, "step": 3754 }, { "epoch": 0.35138297374455935, "loss": 1.4922391176223755, "loss_ce": 0.002981330268085003, "loss_iou": 0.56640625, "loss_num": 0.07177734375, "loss_xval": 1.4921875, "num_input_tokens_seen": 248648016, "step": 3754 }, { "epoch": 0.3514765760284551, "grad_norm": 38.929969787597656, "learning_rate": 5e-05, "loss": 1.414, "num_input_tokens_seen": 248714624, "step": 3755 }, { "epoch": 0.3514765760284551, "loss": 1.5615707635879517, "loss_ce": 0.010789508000016212, "loss_iou": 0.6171875, "loss_num": 0.06396484375, "loss_xval": 1.546875, "num_input_tokens_seen": 248714624, "step": 3755 }, { "epoch": 0.3515701783123508, "grad_norm": 21.145841598510742, "learning_rate": 5e-05, "loss": 1.6155, "num_input_tokens_seen": 248780872, "step": 3756 }, { "epoch": 0.3515701783123508, "loss": 1.6254315376281738, "loss_ce": 0.008244091644883156, "loss_iou": 0.68359375, "loss_num": 0.05029296875, "loss_xval": 1.6171875, "num_input_tokens_seen": 248780872, "step": 3756 }, { "epoch": 0.35166378059624653, "grad_norm": 18.661279678344727, "learning_rate": 5e-05, "loss": 1.3232, "num_input_tokens_seen": 248847868, "step": 3757 }, { "epoch": 0.35166378059624653, "loss": 1.2235629558563232, "loss_ce": 0.0033480715937912464, "loss_iou": 0.51171875, "loss_num": 0.03857421875, "loss_xval": 1.21875, "num_input_tokens_seen": 248847868, "step": 3757 }, { "epoch": 0.3517573828801423, "grad_norm": 25.271100997924805, "learning_rate": 5e-05, "loss": 1.2365, "num_input_tokens_seen": 248914544, "step": 3758 }, { "epoch": 0.3517573828801423, "loss": 1.090301513671875, "loss_ce": 0.004364030435681343, "loss_iou": 0.486328125, "loss_num": 0.0224609375, "loss_xval": 1.0859375, "num_input_tokens_seen": 248914544, "step": 3758 }, { "epoch": 0.351850985164038, "grad_norm": 24.748661041259766, "learning_rate": 5e-05, "loss": 1.3842, "num_input_tokens_seen": 248981016, "step": 3759 }, { "epoch": 0.351850985164038, "loss": 1.4900479316711426, "loss_ce": 0.006649418734014034, "loss_iou": 0.60546875, "loss_num": 0.055419921875, "loss_xval": 1.484375, "num_input_tokens_seen": 248981016, "step": 3759 }, { "epoch": 0.3519445874479337, "grad_norm": 22.86206817626953, "learning_rate": 5e-05, "loss": 1.3847, "num_input_tokens_seen": 249048088, "step": 3760 }, { "epoch": 0.3519445874479337, "loss": 1.5376489162445068, "loss_ce": 0.007375461980700493, "loss_iou": 0.6328125, "loss_num": 0.052978515625, "loss_xval": 1.53125, "num_input_tokens_seen": 249048088, "step": 3760 }, { "epoch": 0.35203818973182943, "grad_norm": 34.3881721496582, "learning_rate": 5e-05, "loss": 1.2414, "num_input_tokens_seen": 249113680, "step": 3761 }, { "epoch": 0.35203818973182943, "loss": 1.1507374048233032, "loss_ce": 0.008891724050045013, "loss_iou": 0.458984375, "loss_num": 0.044677734375, "loss_xval": 1.140625, "num_input_tokens_seen": 249113680, "step": 3761 }, { "epoch": 0.3521317920157252, "grad_norm": 30.287397384643555, "learning_rate": 5e-05, "loss": 1.6355, "num_input_tokens_seen": 249179744, "step": 3762 }, { "epoch": 0.3521317920157252, "loss": 1.8284342288970947, "loss_ce": 0.003238811856135726, "loss_iou": 0.73046875, "loss_num": 0.0732421875, "loss_xval": 1.828125, "num_input_tokens_seen": 249179744, "step": 3762 }, { "epoch": 0.3522253942996209, "grad_norm": 21.533756256103516, "learning_rate": 5e-05, "loss": 1.3172, "num_input_tokens_seen": 249246988, "step": 3763 }, { "epoch": 0.3522253942996209, "loss": 1.338017463684082, "loss_ce": 0.004033091012388468, "loss_iou": 0.54296875, "loss_num": 0.04931640625, "loss_xval": 1.3359375, "num_input_tokens_seen": 249246988, "step": 3763 }, { "epoch": 0.3523189965835166, "grad_norm": 28.634693145751953, "learning_rate": 5e-05, "loss": 1.2778, "num_input_tokens_seen": 249312740, "step": 3764 }, { "epoch": 0.3523189965835166, "loss": 1.2920770645141602, "loss_ce": 0.006432513706386089, "loss_iou": 0.45703125, "loss_num": 0.07373046875, "loss_xval": 1.2890625, "num_input_tokens_seen": 249312740, "step": 3764 }, { "epoch": 0.3524125988674124, "grad_norm": 34.45309066772461, "learning_rate": 5e-05, "loss": 1.4943, "num_input_tokens_seen": 249379832, "step": 3765 }, { "epoch": 0.3524125988674124, "loss": 1.5002496242523193, "loss_ce": 0.005132400430738926, "loss_iou": 0.640625, "loss_num": 0.04345703125, "loss_xval": 1.4921875, "num_input_tokens_seen": 249379832, "step": 3765 }, { "epoch": 0.3525062011513081, "grad_norm": 52.35409164428711, "learning_rate": 5e-05, "loss": 1.0973, "num_input_tokens_seen": 249445796, "step": 3766 }, { "epoch": 0.3525062011513081, "loss": 0.9985809326171875, "loss_ce": 0.004928619600832462, "loss_iou": 0.40625, "loss_num": 0.035888671875, "loss_xval": 0.9921875, "num_input_tokens_seen": 249445796, "step": 3766 }, { "epoch": 0.3525998034352038, "grad_norm": 65.1169204711914, "learning_rate": 5e-05, "loss": 1.261, "num_input_tokens_seen": 249511664, "step": 3767 }, { "epoch": 0.3525998034352038, "loss": 1.1969112157821655, "loss_ce": 0.00550497230142355, "loss_iou": 0.466796875, "loss_num": 0.052001953125, "loss_xval": 1.1875, "num_input_tokens_seen": 249511664, "step": 3767 }, { "epoch": 0.3526934057190996, "grad_norm": 31.02557945251465, "learning_rate": 5e-05, "loss": 1.4004, "num_input_tokens_seen": 249577608, "step": 3768 }, { "epoch": 0.3526934057190996, "loss": 1.3097755908966064, "loss_ce": 0.0036232187412679195, "loss_iou": 0.52734375, "loss_num": 0.0498046875, "loss_xval": 1.3046875, "num_input_tokens_seen": 249577608, "step": 3768 }, { "epoch": 0.3527870080029953, "grad_norm": 26.30963897705078, "learning_rate": 5e-05, "loss": 1.4664, "num_input_tokens_seen": 249642692, "step": 3769 }, { "epoch": 0.3527870080029953, "loss": 1.4610590934753418, "loss_ce": 0.008544469252228737, "loss_iou": 0.5625, "loss_num": 0.06494140625, "loss_xval": 1.453125, "num_input_tokens_seen": 249642692, "step": 3769 }, { "epoch": 0.352880610286891, "grad_norm": 12.026778221130371, "learning_rate": 5e-05, "loss": 1.1388, "num_input_tokens_seen": 249708692, "step": 3770 }, { "epoch": 0.352880610286891, "loss": 1.216174602508545, "loss_ce": 0.0038942997343838215, "loss_iou": 0.482421875, "loss_num": 0.048828125, "loss_xval": 1.2109375, "num_input_tokens_seen": 249708692, "step": 3770 }, { "epoch": 0.3529742125707867, "grad_norm": 20.33180046081543, "learning_rate": 5e-05, "loss": 1.1762, "num_input_tokens_seen": 249775016, "step": 3771 }, { "epoch": 0.3529742125707867, "loss": 1.1421606540679932, "loss_ce": 0.005075741559267044, "loss_iou": 0.478515625, "loss_num": 0.035888671875, "loss_xval": 1.140625, "num_input_tokens_seen": 249775016, "step": 3771 }, { "epoch": 0.35306781485468247, "grad_norm": 22.935237884521484, "learning_rate": 5e-05, "loss": 1.3296, "num_input_tokens_seen": 249841488, "step": 3772 }, { "epoch": 0.35306781485468247, "loss": 1.2075347900390625, "loss_ce": 0.004898104816675186, "loss_iou": 0.486328125, "loss_num": 0.045654296875, "loss_xval": 1.203125, "num_input_tokens_seen": 249841488, "step": 3772 }, { "epoch": 0.3531614171385782, "grad_norm": 29.02605438232422, "learning_rate": 5e-05, "loss": 1.355, "num_input_tokens_seen": 249908660, "step": 3773 }, { "epoch": 0.3531614171385782, "loss": 1.3004937171936035, "loss_ce": 0.0036187791265547276, "loss_iou": 0.515625, "loss_num": 0.052978515625, "loss_xval": 1.296875, "num_input_tokens_seen": 249908660, "step": 3773 }, { "epoch": 0.3532550194224739, "grad_norm": 38.25444030761719, "learning_rate": 5e-05, "loss": 1.466, "num_input_tokens_seen": 249973572, "step": 3774 }, { "epoch": 0.3532550194224739, "loss": 1.4684109687805176, "loss_ce": 0.008938336744904518, "loss_iou": 0.59375, "loss_num": 0.05517578125, "loss_xval": 1.4609375, "num_input_tokens_seen": 249973572, "step": 3774 }, { "epoch": 0.35334862170636966, "grad_norm": 21.02090835571289, "learning_rate": 5e-05, "loss": 1.456, "num_input_tokens_seen": 250039832, "step": 3775 }, { "epoch": 0.35334862170636966, "loss": 1.5842968225479126, "loss_ce": 0.011298801749944687, "loss_iou": 0.60546875, "loss_num": 0.072265625, "loss_xval": 1.5703125, "num_input_tokens_seen": 250039832, "step": 3775 }, { "epoch": 0.35344222399026537, "grad_norm": 15.860291481018066, "learning_rate": 5e-05, "loss": 1.3043, "num_input_tokens_seen": 250105992, "step": 3776 }, { "epoch": 0.35344222399026537, "loss": 1.246699571609497, "loss_ce": 0.005000336095690727, "loss_iou": 0.51953125, "loss_num": 0.04150390625, "loss_xval": 1.2421875, "num_input_tokens_seen": 250105992, "step": 3776 }, { "epoch": 0.3535358262741611, "grad_norm": 28.231550216674805, "learning_rate": 5e-05, "loss": 1.3316, "num_input_tokens_seen": 250172276, "step": 3777 }, { "epoch": 0.3535358262741611, "loss": 1.4352359771728516, "loss_ce": 0.004571851342916489, "loss_iou": 0.57421875, "loss_num": 0.056640625, "loss_xval": 1.4296875, "num_input_tokens_seen": 250172276, "step": 3777 }, { "epoch": 0.3536294285580568, "grad_norm": 20.316654205322266, "learning_rate": 5e-05, "loss": 1.1808, "num_input_tokens_seen": 250239324, "step": 3778 }, { "epoch": 0.3536294285580568, "loss": 1.1808514595031738, "loss_ce": 0.005314263980835676, "loss_iou": 0.482421875, "loss_num": 0.0419921875, "loss_xval": 1.171875, "num_input_tokens_seen": 250239324, "step": 3778 }, { "epoch": 0.35372303084195256, "grad_norm": 26.929428100585938, "learning_rate": 5e-05, "loss": 1.3109, "num_input_tokens_seen": 250305160, "step": 3779 }, { "epoch": 0.35372303084195256, "loss": 1.23021399974823, "loss_ce": 0.006092979572713375, "loss_iou": 0.5, "loss_num": 0.04443359375, "loss_xval": 1.2265625, "num_input_tokens_seen": 250305160, "step": 3779 }, { "epoch": 0.35381663312584827, "grad_norm": 39.06914138793945, "learning_rate": 5e-05, "loss": 1.4644, "num_input_tokens_seen": 250371812, "step": 3780 }, { "epoch": 0.35381663312584827, "loss": 1.6360867023468018, "loss_ce": 0.007668718695640564, "loss_iou": 0.65234375, "loss_num": 0.064453125, "loss_xval": 1.625, "num_input_tokens_seen": 250371812, "step": 3780 }, { "epoch": 0.353910235409744, "grad_norm": 42.50438690185547, "learning_rate": 5e-05, "loss": 1.5819, "num_input_tokens_seen": 250439176, "step": 3781 }, { "epoch": 0.353910235409744, "loss": 1.6286696195602417, "loss_ce": 0.007575837429612875, "loss_iou": 0.671875, "loss_num": 0.0556640625, "loss_xval": 1.625, "num_input_tokens_seen": 250439176, "step": 3781 }, { "epoch": 0.35400383769363974, "grad_norm": 140.99368286132812, "learning_rate": 5e-05, "loss": 1.1349, "num_input_tokens_seen": 250504520, "step": 3782 }, { "epoch": 0.35400383769363974, "loss": 1.1769592761993408, "loss_ce": 0.0036194026470184326, "loss_iou": 0.48046875, "loss_num": 0.042724609375, "loss_xval": 1.171875, "num_input_tokens_seen": 250504520, "step": 3782 }, { "epoch": 0.35409743997753546, "grad_norm": 23.02762794494629, "learning_rate": 5e-05, "loss": 1.5267, "num_input_tokens_seen": 250570640, "step": 3783 }, { "epoch": 0.35409743997753546, "loss": 1.5634300708770752, "loss_ce": 0.011184044182300568, "loss_iou": 0.58984375, "loss_num": 0.07421875, "loss_xval": 1.5546875, "num_input_tokens_seen": 250570640, "step": 3783 }, { "epoch": 0.35419104226143117, "grad_norm": 18.30438232421875, "learning_rate": 5e-05, "loss": 1.1609, "num_input_tokens_seen": 250635644, "step": 3784 }, { "epoch": 0.35419104226143117, "loss": 1.2793399095535278, "loss_ce": 0.004925830289721489, "loss_iou": 0.55078125, "loss_num": 0.03515625, "loss_xval": 1.2734375, "num_input_tokens_seen": 250635644, "step": 3784 }, { "epoch": 0.35428464454532693, "grad_norm": 30.881805419921875, "learning_rate": 5e-05, "loss": 1.3808, "num_input_tokens_seen": 250701120, "step": 3785 }, { "epoch": 0.35428464454532693, "loss": 1.1811391115188599, "loss_ce": 0.005846105050295591, "loss_iou": 0.48828125, "loss_num": 0.03955078125, "loss_xval": 1.171875, "num_input_tokens_seen": 250701120, "step": 3785 }, { "epoch": 0.35437824682922264, "grad_norm": 32.559993743896484, "learning_rate": 5e-05, "loss": 1.2272, "num_input_tokens_seen": 250767312, "step": 3786 }, { "epoch": 0.35437824682922264, "loss": 1.3315820693969727, "loss_ce": 0.0073633925057947636, "loss_iou": 0.5703125, "loss_num": 0.03662109375, "loss_xval": 1.328125, "num_input_tokens_seen": 250767312, "step": 3786 }, { "epoch": 0.35447184911311835, "grad_norm": 26.60196876525879, "learning_rate": 5e-05, "loss": 1.717, "num_input_tokens_seen": 250832468, "step": 3787 }, { "epoch": 0.35447184911311835, "loss": 1.5778484344482422, "loss_ce": 0.008512407541275024, "loss_iou": 0.58203125, "loss_num": 0.08154296875, "loss_xval": 1.5703125, "num_input_tokens_seen": 250832468, "step": 3787 }, { "epoch": 0.35456545139701406, "grad_norm": 22.825754165649414, "learning_rate": 5e-05, "loss": 1.2415, "num_input_tokens_seen": 250899164, "step": 3788 }, { "epoch": 0.35456545139701406, "loss": 1.3567886352539062, "loss_ce": 0.006202704273164272, "loss_iou": 0.546875, "loss_num": 0.052001953125, "loss_xval": 1.3515625, "num_input_tokens_seen": 250899164, "step": 3788 }, { "epoch": 0.35465905368090983, "grad_norm": 28.716413497924805, "learning_rate": 5e-05, "loss": 1.3179, "num_input_tokens_seen": 250964908, "step": 3789 }, { "epoch": 0.35465905368090983, "loss": 1.4335330724716187, "loss_ce": 0.004822130315005779, "loss_iou": 0.5234375, "loss_num": 0.076171875, "loss_xval": 1.4296875, "num_input_tokens_seen": 250964908, "step": 3789 }, { "epoch": 0.35475265596480554, "grad_norm": 35.66719055175781, "learning_rate": 5e-05, "loss": 1.7455, "num_input_tokens_seen": 251032036, "step": 3790 }, { "epoch": 0.35475265596480554, "loss": 1.7090213298797607, "loss_ce": 0.004431548062711954, "loss_iou": 0.63671875, "loss_num": 0.0859375, "loss_xval": 1.703125, "num_input_tokens_seen": 251032036, "step": 3790 }, { "epoch": 0.35484625824870125, "grad_norm": 60.46017074584961, "learning_rate": 5e-05, "loss": 1.7746, "num_input_tokens_seen": 251098572, "step": 3791 }, { "epoch": 0.35484625824870125, "loss": 2.071401357650757, "loss_ce": 0.006948351860046387, "loss_iou": 0.79296875, "loss_num": 0.0966796875, "loss_xval": 2.0625, "num_input_tokens_seen": 251098572, "step": 3791 }, { "epoch": 0.354939860532597, "grad_norm": 17.12310028076172, "learning_rate": 5e-05, "loss": 1.2025, "num_input_tokens_seen": 251163432, "step": 3792 }, { "epoch": 0.354939860532597, "loss": 1.31663978099823, "loss_ce": 0.008046085014939308, "loss_iou": 0.46484375, "loss_num": 0.0751953125, "loss_xval": 1.3125, "num_input_tokens_seen": 251163432, "step": 3792 }, { "epoch": 0.35503346281649273, "grad_norm": 87.58589172363281, "learning_rate": 5e-05, "loss": 1.3667, "num_input_tokens_seen": 251229820, "step": 3793 }, { "epoch": 0.35503346281649273, "loss": 1.3160040378570557, "loss_ce": 0.0039922627620399, "loss_iou": 0.55078125, "loss_num": 0.04150390625, "loss_xval": 1.3125, "num_input_tokens_seen": 251229820, "step": 3793 }, { "epoch": 0.35512706510038844, "grad_norm": 20.974403381347656, "learning_rate": 5e-05, "loss": 1.3826, "num_input_tokens_seen": 251295944, "step": 3794 }, { "epoch": 0.35512706510038844, "loss": 1.4396576881408691, "loss_ce": 0.006552317179739475, "loss_iou": 0.5625, "loss_num": 0.0625, "loss_xval": 1.4296875, "num_input_tokens_seen": 251295944, "step": 3794 }, { "epoch": 0.35522066738428415, "grad_norm": 25.305522918701172, "learning_rate": 5e-05, "loss": 1.295, "num_input_tokens_seen": 251361376, "step": 3795 }, { "epoch": 0.35522066738428415, "loss": 1.5007115602493286, "loss_ce": 0.005594349466264248, "loss_iou": 0.625, "loss_num": 0.04931640625, "loss_xval": 1.4921875, "num_input_tokens_seen": 251361376, "step": 3795 }, { "epoch": 0.3553142696681799, "grad_norm": 33.90884780883789, "learning_rate": 5e-05, "loss": 1.2415, "num_input_tokens_seen": 251426768, "step": 3796 }, { "epoch": 0.3553142696681799, "loss": 1.0307388305664062, "loss_ce": 0.0036392416805028915, "loss_iou": 0.408203125, "loss_num": 0.0419921875, "loss_xval": 1.0234375, "num_input_tokens_seen": 251426768, "step": 3796 }, { "epoch": 0.3554078719520756, "grad_norm": 21.46078109741211, "learning_rate": 5e-05, "loss": 1.6657, "num_input_tokens_seen": 251493444, "step": 3797 }, { "epoch": 0.3554078719520756, "loss": 1.8991409540176392, "loss_ce": 0.007539353333413601, "loss_iou": 0.7734375, "loss_num": 0.06982421875, "loss_xval": 1.890625, "num_input_tokens_seen": 251493444, "step": 3797 }, { "epoch": 0.35550147423597134, "grad_norm": 41.06492614746094, "learning_rate": 5e-05, "loss": 1.3286, "num_input_tokens_seen": 251559548, "step": 3798 }, { "epoch": 0.35550147423597134, "loss": 1.3790137767791748, "loss_ce": 0.009018702432513237, "loss_iou": 0.53125, "loss_num": 0.061279296875, "loss_xval": 1.3671875, "num_input_tokens_seen": 251559548, "step": 3798 }, { "epoch": 0.3555950765198671, "grad_norm": 18.932714462280273, "learning_rate": 5e-05, "loss": 1.2403, "num_input_tokens_seen": 251626720, "step": 3799 }, { "epoch": 0.3555950765198671, "loss": 1.4121257066726685, "loss_ce": 0.004899176768958569, "loss_iou": 0.5625, "loss_num": 0.057373046875, "loss_xval": 1.40625, "num_input_tokens_seen": 251626720, "step": 3799 }, { "epoch": 0.3556886788037628, "grad_norm": 28.769277572631836, "learning_rate": 5e-05, "loss": 1.4383, "num_input_tokens_seen": 251693384, "step": 3800 }, { "epoch": 0.3556886788037628, "loss": 1.4223759174346924, "loss_ce": 0.006360397674143314, "loss_iou": 0.6328125, "loss_num": 0.0299072265625, "loss_xval": 1.4140625, "num_input_tokens_seen": 251693384, "step": 3800 }, { "epoch": 0.3557822810876585, "grad_norm": 38.383880615234375, "learning_rate": 5e-05, "loss": 1.4703, "num_input_tokens_seen": 251759756, "step": 3801 }, { "epoch": 0.3557822810876585, "loss": 1.507293462753296, "loss_ce": 0.008514214307069778, "loss_iou": 0.59375, "loss_num": 0.0625, "loss_xval": 1.5, "num_input_tokens_seen": 251759756, "step": 3801 }, { "epoch": 0.3558758833715543, "grad_norm": 64.96527099609375, "learning_rate": 5e-05, "loss": 1.5875, "num_input_tokens_seen": 251826760, "step": 3802 }, { "epoch": 0.3558758833715543, "loss": 1.6287994384765625, "loss_ce": 0.005264241714030504, "loss_iou": 0.69140625, "loss_num": 0.048828125, "loss_xval": 1.625, "num_input_tokens_seen": 251826760, "step": 3802 }, { "epoch": 0.35596948565545, "grad_norm": 16.524049758911133, "learning_rate": 5e-05, "loss": 1.2465, "num_input_tokens_seen": 251893856, "step": 3803 }, { "epoch": 0.35596948565545, "loss": 1.3341472148895264, "loss_ce": 0.009928441606462002, "loss_iou": 0.5625, "loss_num": 0.04052734375, "loss_xval": 1.328125, "num_input_tokens_seen": 251893856, "step": 3803 }, { "epoch": 0.3560630879393457, "grad_norm": 28.4807186126709, "learning_rate": 5e-05, "loss": 1.4655, "num_input_tokens_seen": 251960148, "step": 3804 }, { "epoch": 0.3560630879393457, "loss": 1.4297295808792114, "loss_ce": 0.012249134480953217, "loss_iou": 0.53515625, "loss_num": 0.0693359375, "loss_xval": 1.4140625, "num_input_tokens_seen": 251960148, "step": 3804 }, { "epoch": 0.3561566902232414, "grad_norm": 42.3966064453125, "learning_rate": 5e-05, "loss": 1.5877, "num_input_tokens_seen": 252024984, "step": 3805 }, { "epoch": 0.3561566902232414, "loss": 1.5378053188323975, "loss_ce": 0.004602145403623581, "loss_iou": 0.61328125, "loss_num": 0.060546875, "loss_xval": 1.53125, "num_input_tokens_seen": 252024984, "step": 3805 }, { "epoch": 0.3562502925071372, "grad_norm": 22.70017433166504, "learning_rate": 5e-05, "loss": 1.7721, "num_input_tokens_seen": 252089500, "step": 3806 }, { "epoch": 0.3562502925071372, "loss": 1.7175476551055908, "loss_ce": 0.004656947683542967, "loss_iou": 0.7265625, "loss_num": 0.0517578125, "loss_xval": 1.7109375, "num_input_tokens_seen": 252089500, "step": 3806 }, { "epoch": 0.3563438947910329, "grad_norm": 19.666152954101562, "learning_rate": 5e-05, "loss": 1.1383, "num_input_tokens_seen": 252156364, "step": 3807 }, { "epoch": 0.3563438947910329, "loss": 1.2264556884765625, "loss_ce": 0.0042877038940787315, "loss_iou": 0.515625, "loss_num": 0.037841796875, "loss_xval": 1.21875, "num_input_tokens_seen": 252156364, "step": 3807 }, { "epoch": 0.3564374970749286, "grad_norm": 45.91120910644531, "learning_rate": 5e-05, "loss": 1.1452, "num_input_tokens_seen": 252222760, "step": 3808 }, { "epoch": 0.3564374970749286, "loss": 0.9723122715950012, "loss_ce": 0.006980276666581631, "loss_iou": 0.435546875, "loss_num": 0.0186767578125, "loss_xval": 0.96484375, "num_input_tokens_seen": 252222760, "step": 3808 }, { "epoch": 0.3565310993588244, "grad_norm": 24.510021209716797, "learning_rate": 5e-05, "loss": 1.5332, "num_input_tokens_seen": 252289256, "step": 3809 }, { "epoch": 0.3565310993588244, "loss": 1.5503010749816895, "loss_ce": 0.0034260577522218227, "loss_iou": 0.58203125, "loss_num": 0.07666015625, "loss_xval": 1.546875, "num_input_tokens_seen": 252289256, "step": 3809 }, { "epoch": 0.3566247016427201, "grad_norm": 21.58144187927246, "learning_rate": 5e-05, "loss": 1.2941, "num_input_tokens_seen": 252356080, "step": 3810 }, { "epoch": 0.3566247016427201, "loss": 1.3713481426239014, "loss_ce": 0.0036723411176353693, "loss_iou": 0.57421875, "loss_num": 0.04443359375, "loss_xval": 1.3671875, "num_input_tokens_seen": 252356080, "step": 3810 }, { "epoch": 0.3567183039266158, "grad_norm": 33.67427062988281, "learning_rate": 5e-05, "loss": 1.566, "num_input_tokens_seen": 252422348, "step": 3811 }, { "epoch": 0.3567183039266158, "loss": 1.4817874431610107, "loss_ce": 0.009131135419011116, "loss_iou": 0.54296875, "loss_num": 0.0771484375, "loss_xval": 1.46875, "num_input_tokens_seen": 252422348, "step": 3811 }, { "epoch": 0.3568119062105115, "grad_norm": 25.521656036376953, "learning_rate": 5e-05, "loss": 1.3677, "num_input_tokens_seen": 252488916, "step": 3812 }, { "epoch": 0.3568119062105115, "loss": 1.4877207279205322, "loss_ce": 0.0067636920139193535, "loss_iou": 0.61328125, "loss_num": 0.05126953125, "loss_xval": 1.484375, "num_input_tokens_seen": 252488916, "step": 3812 }, { "epoch": 0.3569055084944073, "grad_norm": 31.854557037353516, "learning_rate": 5e-05, "loss": 1.3124, "num_input_tokens_seen": 252556832, "step": 3813 }, { "epoch": 0.3569055084944073, "loss": 1.2551075220108032, "loss_ce": 0.0031543918885290623, "loss_iou": 0.54296875, "loss_num": 0.032470703125, "loss_xval": 1.25, "num_input_tokens_seen": 252556832, "step": 3813 }, { "epoch": 0.356999110778303, "grad_norm": 26.8541316986084, "learning_rate": 5e-05, "loss": 1.3146, "num_input_tokens_seen": 252623132, "step": 3814 }, { "epoch": 0.356999110778303, "loss": 1.3905422687530518, "loss_ce": 0.004800091963261366, "loss_iou": 0.55078125, "loss_num": 0.057373046875, "loss_xval": 1.3828125, "num_input_tokens_seen": 252623132, "step": 3814 }, { "epoch": 0.3570927130621987, "grad_norm": 42.99372100830078, "learning_rate": 5e-05, "loss": 1.4607, "num_input_tokens_seen": 252689232, "step": 3815 }, { "epoch": 0.3570927130621987, "loss": 1.4563833475112915, "loss_ce": 0.007408723700791597, "loss_iou": 0.58984375, "loss_num": 0.052978515625, "loss_xval": 1.4453125, "num_input_tokens_seen": 252689232, "step": 3815 }, { "epoch": 0.35718631534609446, "grad_norm": 35.9421272277832, "learning_rate": 5e-05, "loss": 1.2842, "num_input_tokens_seen": 252755568, "step": 3816 }, { "epoch": 0.35718631534609446, "loss": 1.2683870792388916, "loss_ce": 0.004226885735988617, "loss_iou": 0.5625, "loss_num": 0.0277099609375, "loss_xval": 1.265625, "num_input_tokens_seen": 252755568, "step": 3816 }, { "epoch": 0.3572799176299902, "grad_norm": 29.325708389282227, "learning_rate": 5e-05, "loss": 1.5757, "num_input_tokens_seen": 252821712, "step": 3817 }, { "epoch": 0.3572799176299902, "loss": 1.375547170639038, "loss_ce": 0.0049416664987802505, "loss_iou": 0.5234375, "loss_num": 0.06396484375, "loss_xval": 1.3671875, "num_input_tokens_seen": 252821712, "step": 3817 }, { "epoch": 0.3573735199138859, "grad_norm": 18.662654876708984, "learning_rate": 5e-05, "loss": 1.0818, "num_input_tokens_seen": 252887648, "step": 3818 }, { "epoch": 0.3573735199138859, "loss": 1.1648759841918945, "loss_ce": 0.004231431521475315, "loss_iou": 0.4765625, "loss_num": 0.04150390625, "loss_xval": 1.1640625, "num_input_tokens_seen": 252887648, "step": 3818 }, { "epoch": 0.35746712219778165, "grad_norm": 31.385486602783203, "learning_rate": 5e-05, "loss": 1.3327, "num_input_tokens_seen": 252953428, "step": 3819 }, { "epoch": 0.35746712219778165, "loss": 1.429532766342163, "loss_ce": 0.0027750488370656967, "loss_iou": 0.5546875, "loss_num": 0.0634765625, "loss_xval": 1.4296875, "num_input_tokens_seen": 252953428, "step": 3819 }, { "epoch": 0.35756072448167736, "grad_norm": 21.94999122619629, "learning_rate": 5e-05, "loss": 1.437, "num_input_tokens_seen": 253019704, "step": 3820 }, { "epoch": 0.35756072448167736, "loss": 1.5294301509857178, "loss_ce": 0.006480982061475515, "loss_iou": 0.671875, "loss_num": 0.035400390625, "loss_xval": 1.5234375, "num_input_tokens_seen": 253019704, "step": 3820 }, { "epoch": 0.35765432676557307, "grad_norm": 16.596799850463867, "learning_rate": 5e-05, "loss": 1.0904, "num_input_tokens_seen": 253085800, "step": 3821 }, { "epoch": 0.35765432676557307, "loss": 1.227621078491211, "loss_ce": 0.0035000182688236237, "loss_iou": 0.53125, "loss_num": 0.0322265625, "loss_xval": 1.2265625, "num_input_tokens_seen": 253085800, "step": 3821 }, { "epoch": 0.3577479290494688, "grad_norm": 23.880023956298828, "learning_rate": 5e-05, "loss": 1.5784, "num_input_tokens_seen": 253152092, "step": 3822 }, { "epoch": 0.3577479290494688, "loss": 1.4667890071868896, "loss_ce": 0.002433606656268239, "loss_iou": 0.58203125, "loss_num": 0.06005859375, "loss_xval": 1.4609375, "num_input_tokens_seen": 253152092, "step": 3822 }, { "epoch": 0.35784153133336455, "grad_norm": 26.221240997314453, "learning_rate": 5e-05, "loss": 1.2829, "num_input_tokens_seen": 253217216, "step": 3823 }, { "epoch": 0.35784153133336455, "loss": 1.206403136253357, "loss_ce": 0.005963684059679508, "loss_iou": 0.482421875, "loss_num": 0.047119140625, "loss_xval": 1.203125, "num_input_tokens_seen": 253217216, "step": 3823 }, { "epoch": 0.35793513361726026, "grad_norm": 27.62700843811035, "learning_rate": 5e-05, "loss": 1.5616, "num_input_tokens_seen": 253283996, "step": 3824 }, { "epoch": 0.35793513361726026, "loss": 1.5621179342269897, "loss_ce": 0.0045007579028606415, "loss_iou": 0.64453125, "loss_num": 0.054443359375, "loss_xval": 1.5546875, "num_input_tokens_seen": 253283996, "step": 3824 }, { "epoch": 0.35802873590115597, "grad_norm": 58.04174041748047, "learning_rate": 5e-05, "loss": 1.412, "num_input_tokens_seen": 253350792, "step": 3825 }, { "epoch": 0.35802873590115597, "loss": 1.4483146667480469, "loss_ce": 0.008373213931918144, "loss_iou": 0.58984375, "loss_num": 0.0517578125, "loss_xval": 1.4375, "num_input_tokens_seen": 253350792, "step": 3825 }, { "epoch": 0.35812233818505174, "grad_norm": 26.386741638183594, "learning_rate": 5e-05, "loss": 1.7399, "num_input_tokens_seen": 253417320, "step": 3826 }, { "epoch": 0.35812233818505174, "loss": 1.763947606086731, "loss_ce": 0.007111691869795322, "loss_iou": 0.703125, "loss_num": 0.0693359375, "loss_xval": 1.7578125, "num_input_tokens_seen": 253417320, "step": 3826 }, { "epoch": 0.35821594046894745, "grad_norm": 22.723888397216797, "learning_rate": 5e-05, "loss": 1.1559, "num_input_tokens_seen": 253482760, "step": 3827 }, { "epoch": 0.35821594046894745, "loss": 0.9857834577560425, "loss_ce": 0.005680882837623358, "loss_iou": 0.38671875, "loss_num": 0.041259765625, "loss_xval": 0.98046875, "num_input_tokens_seen": 253482760, "step": 3827 }, { "epoch": 0.35830954275284316, "grad_norm": 105.30680847167969, "learning_rate": 5e-05, "loss": 1.3322, "num_input_tokens_seen": 253549452, "step": 3828 }, { "epoch": 0.35830954275284316, "loss": 1.2688713073730469, "loss_ce": 0.006847410928457975, "loss_iou": 0.490234375, "loss_num": 0.056884765625, "loss_xval": 1.265625, "num_input_tokens_seen": 253549452, "step": 3828 }, { "epoch": 0.3584031450367389, "grad_norm": 29.074954986572266, "learning_rate": 5e-05, "loss": 1.3919, "num_input_tokens_seen": 253613672, "step": 3829 }, { "epoch": 0.3584031450367389, "loss": 1.4572618007659912, "loss_ce": 0.004136784002184868, "loss_iou": 0.58984375, "loss_num": 0.054443359375, "loss_xval": 1.453125, "num_input_tokens_seen": 253613672, "step": 3829 }, { "epoch": 0.35849674732063463, "grad_norm": 30.948087692260742, "learning_rate": 5e-05, "loss": 1.3421, "num_input_tokens_seen": 253679756, "step": 3830 }, { "epoch": 0.35849674732063463, "loss": 1.2343542575836182, "loss_ce": 0.004129580222070217, "loss_iou": 0.46484375, "loss_num": 0.06005859375, "loss_xval": 1.2265625, "num_input_tokens_seen": 253679756, "step": 3830 }, { "epoch": 0.35859034960453035, "grad_norm": 25.996593475341797, "learning_rate": 5e-05, "loss": 1.4863, "num_input_tokens_seen": 253746732, "step": 3831 }, { "epoch": 0.35859034960453035, "loss": 1.2896075248718262, "loss_ce": 0.0029864534735679626, "loss_iou": 0.56640625, "loss_num": 0.031494140625, "loss_xval": 1.2890625, "num_input_tokens_seen": 253746732, "step": 3831 }, { "epoch": 0.35868395188842606, "grad_norm": 50.55592346191406, "learning_rate": 5e-05, "loss": 1.2749, "num_input_tokens_seen": 253812632, "step": 3832 }, { "epoch": 0.35868395188842606, "loss": 1.3609986305236816, "loss_ce": 0.009436029940843582, "loss_iou": 0.5625, "loss_num": 0.044677734375, "loss_xval": 1.3515625, "num_input_tokens_seen": 253812632, "step": 3832 }, { "epoch": 0.3587775541723218, "grad_norm": 22.35114097595215, "learning_rate": 5e-05, "loss": 1.3814, "num_input_tokens_seen": 253879212, "step": 3833 }, { "epoch": 0.3587775541723218, "loss": 1.462552547454834, "loss_ce": 0.004300536587834358, "loss_iou": 0.5703125, "loss_num": 0.064453125, "loss_xval": 1.4609375, "num_input_tokens_seen": 253879212, "step": 3833 }, { "epoch": 0.35887115645621753, "grad_norm": 26.133445739746094, "learning_rate": 5e-05, "loss": 1.6415, "num_input_tokens_seen": 253945172, "step": 3834 }, { "epoch": 0.35887115645621753, "loss": 1.7571624517440796, "loss_ce": 0.007162506692111492, "loss_iou": 0.6796875, "loss_num": 0.078125, "loss_xval": 1.75, "num_input_tokens_seen": 253945172, "step": 3834 }, { "epoch": 0.35896475874011324, "grad_norm": 27.23897361755371, "learning_rate": 5e-05, "loss": 1.3649, "num_input_tokens_seen": 254011844, "step": 3835 }, { "epoch": 0.35896475874011324, "loss": 1.5204861164093018, "loss_ce": 0.006814256310462952, "loss_iou": 0.6171875, "loss_num": 0.056396484375, "loss_xval": 1.515625, "num_input_tokens_seen": 254011844, "step": 3835 }, { "epoch": 0.359058361024009, "grad_norm": 34.14655303955078, "learning_rate": 5e-05, "loss": 1.3203, "num_input_tokens_seen": 254077760, "step": 3836 }, { "epoch": 0.359058361024009, "loss": 1.3540546894073486, "loss_ce": 0.005421890877187252, "loss_iou": 0.5625, "loss_num": 0.045654296875, "loss_xval": 1.3515625, "num_input_tokens_seen": 254077760, "step": 3836 }, { "epoch": 0.3591519633079047, "grad_norm": 23.973108291625977, "learning_rate": 5e-05, "loss": 1.6667, "num_input_tokens_seen": 254144368, "step": 3837 }, { "epoch": 0.3591519633079047, "loss": 1.9346356391906738, "loss_ce": 0.0029950684402137995, "loss_iou": 0.7578125, "loss_num": 0.0830078125, "loss_xval": 1.9296875, "num_input_tokens_seen": 254144368, "step": 3837 }, { "epoch": 0.35924556559180043, "grad_norm": 14.81295108795166, "learning_rate": 5e-05, "loss": 1.5248, "num_input_tokens_seen": 254210724, "step": 3838 }, { "epoch": 0.35924556559180043, "loss": 1.3920135498046875, "loss_ce": 0.003829952096566558, "loss_iou": 0.5234375, "loss_num": 0.068359375, "loss_xval": 1.390625, "num_input_tokens_seen": 254210724, "step": 3838 }, { "epoch": 0.35933916787569614, "grad_norm": 228.7640838623047, "learning_rate": 5e-05, "loss": 1.4907, "num_input_tokens_seen": 254277704, "step": 3839 }, { "epoch": 0.35933916787569614, "loss": 1.335458755493164, "loss_ce": 0.01221649069339037, "loss_iou": 0.5078125, "loss_num": 0.061279296875, "loss_xval": 1.3203125, "num_input_tokens_seen": 254277704, "step": 3839 }, { "epoch": 0.3594327701595919, "grad_norm": 30.683616638183594, "learning_rate": 5e-05, "loss": 1.3883, "num_input_tokens_seen": 254343496, "step": 3840 }, { "epoch": 0.3594327701595919, "loss": 1.5232099294662476, "loss_ce": 0.005143512040376663, "loss_iou": 0.58984375, "loss_num": 0.06689453125, "loss_xval": 1.515625, "num_input_tokens_seen": 254343496, "step": 3840 }, { "epoch": 0.3595263724434876, "grad_norm": 62.56199264526367, "learning_rate": 5e-05, "loss": 1.6315, "num_input_tokens_seen": 254410352, "step": 3841 }, { "epoch": 0.3595263724434876, "loss": 1.6729618310928345, "loss_ce": 0.004016554448753595, "loss_iou": 0.6796875, "loss_num": 0.061279296875, "loss_xval": 1.671875, "num_input_tokens_seen": 254410352, "step": 3841 }, { "epoch": 0.35961997472738333, "grad_norm": 34.9542350769043, "learning_rate": 5e-05, "loss": 1.5272, "num_input_tokens_seen": 254476636, "step": 3842 }, { "epoch": 0.35961997472738333, "loss": 1.5351890325546265, "loss_ce": 0.007845314219594002, "loss_iou": 0.625, "loss_num": 0.054931640625, "loss_xval": 1.53125, "num_input_tokens_seen": 254476636, "step": 3842 }, { "epoch": 0.3597135770112791, "grad_norm": 30.81777000427246, "learning_rate": 5e-05, "loss": 1.437, "num_input_tokens_seen": 254541704, "step": 3843 }, { "epoch": 0.3597135770112791, "loss": 1.4094040393829346, "loss_ce": 0.012065219692885876, "loss_iou": 0.54296875, "loss_num": 0.0625, "loss_xval": 1.3984375, "num_input_tokens_seen": 254541704, "step": 3843 }, { "epoch": 0.3598071792951748, "grad_norm": 33.980743408203125, "learning_rate": 5e-05, "loss": 1.5379, "num_input_tokens_seen": 254607920, "step": 3844 }, { "epoch": 0.3598071792951748, "loss": 1.6039977073669434, "loss_ce": 0.0073180063627660275, "loss_iou": 0.671875, "loss_num": 0.050048828125, "loss_xval": 1.59375, "num_input_tokens_seen": 254607920, "step": 3844 }, { "epoch": 0.3599007815790705, "grad_norm": 25.925189971923828, "learning_rate": 5e-05, "loss": 1.3263, "num_input_tokens_seen": 254674336, "step": 3845 }, { "epoch": 0.3599007815790705, "loss": 1.3964755535125732, "loss_ce": 0.007803751155734062, "loss_iou": 0.5625, "loss_num": 0.05322265625, "loss_xval": 1.390625, "num_input_tokens_seen": 254674336, "step": 3845 }, { "epoch": 0.3599943838629663, "grad_norm": 41.20928955078125, "learning_rate": 5e-05, "loss": 1.6059, "num_input_tokens_seen": 254741228, "step": 3846 }, { "epoch": 0.3599943838629663, "loss": 1.5326600074768066, "loss_ce": 0.008245894685387611, "loss_iou": 0.6328125, "loss_num": 0.0517578125, "loss_xval": 1.5234375, "num_input_tokens_seen": 254741228, "step": 3846 }, { "epoch": 0.360087986146862, "grad_norm": 27.70813751220703, "learning_rate": 5e-05, "loss": 1.3411, "num_input_tokens_seen": 254807580, "step": 3847 }, { "epoch": 0.360087986146862, "loss": 1.4202702045440674, "loss_ce": 0.0042546410113573074, "loss_iou": 0.59765625, "loss_num": 0.0439453125, "loss_xval": 1.4140625, "num_input_tokens_seen": 254807580, "step": 3847 }, { "epoch": 0.3601815884307577, "grad_norm": 31.373220443725586, "learning_rate": 5e-05, "loss": 1.1782, "num_input_tokens_seen": 254873264, "step": 3848 }, { "epoch": 0.3601815884307577, "loss": 1.2204126119613647, "loss_ce": 0.008986882865428925, "loss_iou": 0.48828125, "loss_num": 0.04736328125, "loss_xval": 1.2109375, "num_input_tokens_seen": 254873264, "step": 3848 }, { "epoch": 0.3602751907146534, "grad_norm": 29.523340225219727, "learning_rate": 5e-05, "loss": 1.1972, "num_input_tokens_seen": 254939124, "step": 3849 }, { "epoch": 0.3602751907146534, "loss": 1.0339815616607666, "loss_ce": 0.003952270373702049, "loss_iou": 0.43359375, "loss_num": 0.0322265625, "loss_xval": 1.03125, "num_input_tokens_seen": 254939124, "step": 3849 }, { "epoch": 0.3603687929985492, "grad_norm": 35.41106414794922, "learning_rate": 5e-05, "loss": 1.3877, "num_input_tokens_seen": 255004984, "step": 3850 }, { "epoch": 0.3603687929985492, "loss": 1.5489107370376587, "loss_ce": 0.010824768804013729, "loss_iou": 0.6015625, "loss_num": 0.06640625, "loss_xval": 1.5390625, "num_input_tokens_seen": 255004984, "step": 3850 }, { "epoch": 0.3604623952824449, "grad_norm": 32.25589370727539, "learning_rate": 5e-05, "loss": 1.2475, "num_input_tokens_seen": 255072140, "step": 3851 }, { "epoch": 0.3604623952824449, "loss": 1.139512538909912, "loss_ce": 0.007676601409912109, "loss_iou": 0.46484375, "loss_num": 0.040283203125, "loss_xval": 1.1328125, "num_input_tokens_seen": 255072140, "step": 3851 }, { "epoch": 0.3605559975663406, "grad_norm": 40.44350051879883, "learning_rate": 5e-05, "loss": 1.3483, "num_input_tokens_seen": 255137500, "step": 3852 }, { "epoch": 0.3605559975663406, "loss": 1.35080885887146, "loss_ce": 0.003640816081315279, "loss_iou": 0.57421875, "loss_num": 0.03955078125, "loss_xval": 1.34375, "num_input_tokens_seen": 255137500, "step": 3852 }, { "epoch": 0.36064959985023637, "grad_norm": 26.932327270507812, "learning_rate": 5e-05, "loss": 1.4373, "num_input_tokens_seen": 255204320, "step": 3853 }, { "epoch": 0.36064959985023637, "loss": 1.6209089756011963, "loss_ce": 0.004698014352470636, "loss_iou": 0.640625, "loss_num": 0.06689453125, "loss_xval": 1.6171875, "num_input_tokens_seen": 255204320, "step": 3853 }, { "epoch": 0.3607432021341321, "grad_norm": 23.202775955200195, "learning_rate": 5e-05, "loss": 1.3226, "num_input_tokens_seen": 255270512, "step": 3854 }, { "epoch": 0.3607432021341321, "loss": 1.0814577341079712, "loss_ce": 0.004553454462438822, "loss_iou": 0.4140625, "loss_num": 0.04931640625, "loss_xval": 1.078125, "num_input_tokens_seen": 255270512, "step": 3854 }, { "epoch": 0.3608368044180278, "grad_norm": 41.50984573364258, "learning_rate": 5e-05, "loss": 1.2359, "num_input_tokens_seen": 255337016, "step": 3855 }, { "epoch": 0.3608368044180278, "loss": 1.0581815242767334, "loss_ce": 0.0034940862096846104, "loss_iou": 0.44921875, "loss_num": 0.0308837890625, "loss_xval": 1.0546875, "num_input_tokens_seen": 255337016, "step": 3855 }, { "epoch": 0.3609304067019235, "grad_norm": 24.14653778076172, "learning_rate": 5e-05, "loss": 1.6067, "num_input_tokens_seen": 255403040, "step": 3856 }, { "epoch": 0.3609304067019235, "loss": 1.564652681350708, "loss_ce": 0.004594052210450172, "loss_iou": 0.6328125, "loss_num": 0.058349609375, "loss_xval": 1.5625, "num_input_tokens_seen": 255403040, "step": 3856 }, { "epoch": 0.36102400898581927, "grad_norm": 26.012975692749023, "learning_rate": 5e-05, "loss": 1.2269, "num_input_tokens_seen": 255468232, "step": 3857 }, { "epoch": 0.36102400898581927, "loss": 1.012009620666504, "loss_ce": 0.006028190720826387, "loss_iou": 0.435546875, "loss_num": 0.02734375, "loss_xval": 1.0078125, "num_input_tokens_seen": 255468232, "step": 3857 }, { "epoch": 0.361117611269715, "grad_norm": 25.54911994934082, "learning_rate": 5e-05, "loss": 1.4046, "num_input_tokens_seen": 255533860, "step": 3858 }, { "epoch": 0.361117611269715, "loss": 1.2875566482543945, "loss_ce": 0.008748022839426994, "loss_iou": 0.51171875, "loss_num": 0.051513671875, "loss_xval": 1.28125, "num_input_tokens_seen": 255533860, "step": 3858 }, { "epoch": 0.3612112135536107, "grad_norm": 29.165939331054688, "learning_rate": 5e-05, "loss": 1.2991, "num_input_tokens_seen": 255599948, "step": 3859 }, { "epoch": 0.3612112135536107, "loss": 1.2389352321624756, "loss_ce": 0.014814192429184914, "loss_iou": 0.50390625, "loss_num": 0.042724609375, "loss_xval": 1.2265625, "num_input_tokens_seen": 255599948, "step": 3859 }, { "epoch": 0.36130481583750645, "grad_norm": 31.390108108520508, "learning_rate": 5e-05, "loss": 1.2778, "num_input_tokens_seen": 255667236, "step": 3860 }, { "epoch": 0.36130481583750645, "loss": 1.1914269924163818, "loss_ce": 0.0039269146509468555, "loss_iou": 0.53515625, "loss_num": 0.023193359375, "loss_xval": 1.1875, "num_input_tokens_seen": 255667236, "step": 3860 }, { "epoch": 0.36139841812140217, "grad_norm": 23.40657615661621, "learning_rate": 5e-05, "loss": 1.3771, "num_input_tokens_seen": 255734296, "step": 3861 }, { "epoch": 0.36139841812140217, "loss": 1.4665780067443848, "loss_ce": 0.009058399125933647, "loss_iou": 0.578125, "loss_num": 0.06103515625, "loss_xval": 1.4609375, "num_input_tokens_seen": 255734296, "step": 3861 }, { "epoch": 0.3614920204052979, "grad_norm": 12.270401954650879, "learning_rate": 5e-05, "loss": 1.1437, "num_input_tokens_seen": 255800104, "step": 3862 }, { "epoch": 0.3614920204052979, "loss": 1.3889422416687012, "loss_ce": 0.006617933511734009, "loss_iou": 0.54296875, "loss_num": 0.0595703125, "loss_xval": 1.3828125, "num_input_tokens_seen": 255800104, "step": 3862 }, { "epoch": 0.36158562268919364, "grad_norm": 16.934629440307617, "learning_rate": 5e-05, "loss": 1.1744, "num_input_tokens_seen": 255866968, "step": 3863 }, { "epoch": 0.36158562268919364, "loss": 1.3468637466430664, "loss_ce": 0.005066880490630865, "loss_iou": 0.53125, "loss_num": 0.05517578125, "loss_xval": 1.34375, "num_input_tokens_seen": 255866968, "step": 3863 }, { "epoch": 0.36167922497308935, "grad_norm": 25.67149543762207, "learning_rate": 5e-05, "loss": 1.36, "num_input_tokens_seen": 255932840, "step": 3864 }, { "epoch": 0.36167922497308935, "loss": 1.3157559633255005, "loss_ce": 0.009115353226661682, "loss_iou": 0.51171875, "loss_num": 0.055908203125, "loss_xval": 1.3046875, "num_input_tokens_seen": 255932840, "step": 3864 }, { "epoch": 0.36177282725698506, "grad_norm": 20.666156768798828, "learning_rate": 5e-05, "loss": 1.1782, "num_input_tokens_seen": 255999628, "step": 3865 }, { "epoch": 0.36177282725698506, "loss": 1.127396583557129, "loss_ce": 0.008744290098547935, "loss_iou": 0.46875, "loss_num": 0.035888671875, "loss_xval": 1.1171875, "num_input_tokens_seen": 255999628, "step": 3865 }, { "epoch": 0.3618664295408808, "grad_norm": 16.260723114013672, "learning_rate": 5e-05, "loss": 1.3397, "num_input_tokens_seen": 256066584, "step": 3866 }, { "epoch": 0.3618664295408808, "loss": 1.4446632862091064, "loss_ce": 0.007651643827557564, "loss_iou": 0.5703125, "loss_num": 0.059814453125, "loss_xval": 1.4375, "num_input_tokens_seen": 256066584, "step": 3866 }, { "epoch": 0.36196003182477654, "grad_norm": 18.712108612060547, "learning_rate": 5e-05, "loss": 1.3027, "num_input_tokens_seen": 256132528, "step": 3867 }, { "epoch": 0.36196003182477654, "loss": 1.4516030550003052, "loss_ce": 0.003849088679999113, "loss_iou": 0.58984375, "loss_num": 0.05322265625, "loss_xval": 1.4453125, "num_input_tokens_seen": 256132528, "step": 3867 }, { "epoch": 0.36205363410867225, "grad_norm": 25.798871994018555, "learning_rate": 5e-05, "loss": 1.2465, "num_input_tokens_seen": 256199148, "step": 3868 }, { "epoch": 0.36205363410867225, "loss": 1.1591289043426514, "loss_ce": 0.006785160396248102, "loss_iou": 0.5, "loss_num": 0.0296630859375, "loss_xval": 1.15625, "num_input_tokens_seen": 256199148, "step": 3868 }, { "epoch": 0.36214723639256796, "grad_norm": 57.71001434326172, "learning_rate": 5e-05, "loss": 1.5598, "num_input_tokens_seen": 256265288, "step": 3869 }, { "epoch": 0.36214723639256796, "loss": 1.2412936687469482, "loss_ce": 0.006430388428270817, "loss_iou": 0.5390625, "loss_num": 0.03125, "loss_xval": 1.234375, "num_input_tokens_seen": 256265288, "step": 3869 }, { "epoch": 0.36224083867646373, "grad_norm": 21.811450958251953, "learning_rate": 5e-05, "loss": 1.4648, "num_input_tokens_seen": 256332536, "step": 3870 }, { "epoch": 0.36224083867646373, "loss": 1.3369035720825195, "loss_ce": 0.011708246544003487, "loss_iou": 0.53515625, "loss_num": 0.05078125, "loss_xval": 1.328125, "num_input_tokens_seen": 256332536, "step": 3870 }, { "epoch": 0.36233444096035944, "grad_norm": 25.570297241210938, "learning_rate": 5e-05, "loss": 1.3582, "num_input_tokens_seen": 256398532, "step": 3871 }, { "epoch": 0.36233444096035944, "loss": 1.2315714359283447, "loss_ce": 0.01013591792434454, "loss_iou": 0.490234375, "loss_num": 0.04833984375, "loss_xval": 1.21875, "num_input_tokens_seen": 256398532, "step": 3871 }, { "epoch": 0.36242804324425515, "grad_norm": 49.26054382324219, "learning_rate": 5e-05, "loss": 1.2921, "num_input_tokens_seen": 256465032, "step": 3872 }, { "epoch": 0.36242804324425515, "loss": 1.2738597393035889, "loss_ce": 0.0033519864082336426, "loss_iou": 0.53515625, "loss_num": 0.04052734375, "loss_xval": 1.2734375, "num_input_tokens_seen": 256465032, "step": 3872 }, { "epoch": 0.36252164552815086, "grad_norm": 18.672164916992188, "learning_rate": 5e-05, "loss": 1.6078, "num_input_tokens_seen": 256531388, "step": 3873 }, { "epoch": 0.36252164552815086, "loss": 1.5311250686645508, "loss_ce": 0.005734493024647236, "loss_iou": 0.62890625, "loss_num": 0.05322265625, "loss_xval": 1.5234375, "num_input_tokens_seen": 256531388, "step": 3873 }, { "epoch": 0.3626152478120466, "grad_norm": 22.86305809020996, "learning_rate": 5e-05, "loss": 1.1378, "num_input_tokens_seen": 256597248, "step": 3874 }, { "epoch": 0.3626152478120466, "loss": 1.235906720161438, "loss_ce": 0.00641454104334116, "loss_iou": 0.50390625, "loss_num": 0.044921875, "loss_xval": 1.2265625, "num_input_tokens_seen": 256597248, "step": 3874 }, { "epoch": 0.36270885009594234, "grad_norm": 26.60591697692871, "learning_rate": 5e-05, "loss": 1.2531, "num_input_tokens_seen": 256663520, "step": 3875 }, { "epoch": 0.36270885009594234, "loss": 1.0908284187316895, "loss_ce": 0.0034260787069797516, "loss_iou": 0.42578125, "loss_num": 0.04736328125, "loss_xval": 1.0859375, "num_input_tokens_seen": 256663520, "step": 3875 }, { "epoch": 0.36280245237983805, "grad_norm": 24.526897430419922, "learning_rate": 5e-05, "loss": 1.243, "num_input_tokens_seen": 256731112, "step": 3876 }, { "epoch": 0.36280245237983805, "loss": 1.449812650680542, "loss_ce": 0.005476716905832291, "loss_iou": 0.60546875, "loss_num": 0.04638671875, "loss_xval": 1.4453125, "num_input_tokens_seen": 256731112, "step": 3876 }, { "epoch": 0.3628960546637338, "grad_norm": 36.438655853271484, "learning_rate": 5e-05, "loss": 1.4344, "num_input_tokens_seen": 256796960, "step": 3877 }, { "epoch": 0.3628960546637338, "loss": 1.41978919506073, "loss_ce": 0.005238380283117294, "loss_iou": 0.55859375, "loss_num": 0.059326171875, "loss_xval": 1.4140625, "num_input_tokens_seen": 256796960, "step": 3877 }, { "epoch": 0.3629896569476295, "grad_norm": 22.20621109008789, "learning_rate": 5e-05, "loss": 1.7951, "num_input_tokens_seen": 256861344, "step": 3878 }, { "epoch": 0.3629896569476295, "loss": 1.8474003076553345, "loss_ce": 0.009021367877721786, "loss_iou": 0.7265625, "loss_num": 0.07666015625, "loss_xval": 1.8359375, "num_input_tokens_seen": 256861344, "step": 3878 }, { "epoch": 0.36308325923152523, "grad_norm": 18.223386764526367, "learning_rate": 5e-05, "loss": 1.2966, "num_input_tokens_seen": 256928064, "step": 3879 }, { "epoch": 0.36308325923152523, "loss": 1.348849892616272, "loss_ce": 0.004123359452933073, "loss_iou": 0.546875, "loss_num": 0.050048828125, "loss_xval": 1.34375, "num_input_tokens_seen": 256928064, "step": 3879 }, { "epoch": 0.363176861515421, "grad_norm": 24.631162643432617, "learning_rate": 5e-05, "loss": 1.3253, "num_input_tokens_seen": 256993304, "step": 3880 }, { "epoch": 0.363176861515421, "loss": 1.3371837139129639, "loss_ce": 0.007105658762156963, "loss_iou": 0.546875, "loss_num": 0.0478515625, "loss_xval": 1.328125, "num_input_tokens_seen": 256993304, "step": 3880 }, { "epoch": 0.3632704637993167, "grad_norm": 156.11648559570312, "learning_rate": 5e-05, "loss": 1.6287, "num_input_tokens_seen": 257059136, "step": 3881 }, { "epoch": 0.3632704637993167, "loss": 1.4368083477020264, "loss_ce": 0.004191184416413307, "loss_iou": 0.61328125, "loss_num": 0.041015625, "loss_xval": 1.4296875, "num_input_tokens_seen": 257059136, "step": 3881 }, { "epoch": 0.3633640660832124, "grad_norm": 33.807151794433594, "learning_rate": 5e-05, "loss": 1.3767, "num_input_tokens_seen": 257126408, "step": 3882 }, { "epoch": 0.3633640660832124, "loss": 1.403756856918335, "loss_ce": 0.003366295015439391, "loss_iou": 0.62109375, "loss_num": 0.031982421875, "loss_xval": 1.3984375, "num_input_tokens_seen": 257126408, "step": 3882 }, { "epoch": 0.36345766836710813, "grad_norm": 29.032533645629883, "learning_rate": 5e-05, "loss": 1.188, "num_input_tokens_seen": 257193128, "step": 3883 }, { "epoch": 0.36345766836710813, "loss": 1.0745892524719238, "loss_ce": 0.007694760337471962, "loss_iou": 0.421875, "loss_num": 0.044677734375, "loss_xval": 1.0703125, "num_input_tokens_seen": 257193128, "step": 3883 }, { "epoch": 0.3635512706510039, "grad_norm": 30.084447860717773, "learning_rate": 5e-05, "loss": 1.3535, "num_input_tokens_seen": 257260136, "step": 3884 }, { "epoch": 0.3635512706510039, "loss": 1.5438709259033203, "loss_ce": 0.00773820374161005, "loss_iou": 0.609375, "loss_num": 0.0634765625, "loss_xval": 1.5390625, "num_input_tokens_seen": 257260136, "step": 3884 }, { "epoch": 0.3636448729348996, "grad_norm": 28.8676700592041, "learning_rate": 5e-05, "loss": 1.4723, "num_input_tokens_seen": 257326596, "step": 3885 }, { "epoch": 0.3636448729348996, "loss": 1.5852420330047607, "loss_ce": 0.007117072120308876, "loss_iou": 0.63671875, "loss_num": 0.061279296875, "loss_xval": 1.578125, "num_input_tokens_seen": 257326596, "step": 3885 }, { "epoch": 0.3637384752187953, "grad_norm": 214.13739013671875, "learning_rate": 5e-05, "loss": 1.3138, "num_input_tokens_seen": 257392704, "step": 3886 }, { "epoch": 0.3637384752187953, "loss": 1.3630067110061646, "loss_ce": 0.007049663923680782, "loss_iou": 0.5, "loss_num": 0.0712890625, "loss_xval": 1.359375, "num_input_tokens_seen": 257392704, "step": 3886 }, { "epoch": 0.3638320775026911, "grad_norm": 32.66344451904297, "learning_rate": 5e-05, "loss": 1.4655, "num_input_tokens_seen": 257459588, "step": 3887 }, { "epoch": 0.3638320775026911, "loss": 1.50942063331604, "loss_ce": 0.007467404939234257, "loss_iou": 0.60546875, "loss_num": 0.0576171875, "loss_xval": 1.5, "num_input_tokens_seen": 257459588, "step": 3887 }, { "epoch": 0.3639256797865868, "grad_norm": 28.34696388244629, "learning_rate": 5e-05, "loss": 1.5062, "num_input_tokens_seen": 257525372, "step": 3888 }, { "epoch": 0.3639256797865868, "loss": 1.5711674690246582, "loss_ce": 0.005737648345530033, "loss_iou": 0.60546875, "loss_num": 0.0712890625, "loss_xval": 1.5625, "num_input_tokens_seen": 257525372, "step": 3888 }, { "epoch": 0.3640192820704825, "grad_norm": 13.845455169677734, "learning_rate": 5e-05, "loss": 1.1894, "num_input_tokens_seen": 257590204, "step": 3889 }, { "epoch": 0.3640192820704825, "loss": 0.9258078336715698, "loss_ce": 0.0051535069942474365, "loss_iou": 0.294921875, "loss_num": 0.06640625, "loss_xval": 0.921875, "num_input_tokens_seen": 257590204, "step": 3889 }, { "epoch": 0.3641128843543783, "grad_norm": 32.85861587524414, "learning_rate": 5e-05, "loss": 1.1474, "num_input_tokens_seen": 257655680, "step": 3890 }, { "epoch": 0.3641128843543783, "loss": 1.2459192276000977, "loss_ce": 0.008156735450029373, "loss_iou": 0.490234375, "loss_num": 0.05126953125, "loss_xval": 1.234375, "num_input_tokens_seen": 257655680, "step": 3890 }, { "epoch": 0.364206486638274, "grad_norm": 40.10943603515625, "learning_rate": 5e-05, "loss": 1.4291, "num_input_tokens_seen": 257722332, "step": 3891 }, { "epoch": 0.364206486638274, "loss": 1.4412177801132202, "loss_ce": 0.01153024472296238, "loss_iou": 0.56640625, "loss_num": 0.059326171875, "loss_xval": 1.4296875, "num_input_tokens_seen": 257722332, "step": 3891 }, { "epoch": 0.3643000889221697, "grad_norm": 35.25660705566406, "learning_rate": 5e-05, "loss": 1.5353, "num_input_tokens_seen": 257789960, "step": 3892 }, { "epoch": 0.3643000889221697, "loss": 1.5436816215515137, "loss_ce": 0.0026659530121833086, "loss_iou": 0.62890625, "loss_num": 0.05712890625, "loss_xval": 1.5390625, "num_input_tokens_seen": 257789960, "step": 3892 }, { "epoch": 0.3643936912060654, "grad_norm": 24.885868072509766, "learning_rate": 5e-05, "loss": 1.2971, "num_input_tokens_seen": 257856628, "step": 3893 }, { "epoch": 0.3643936912060654, "loss": 1.2822798490524292, "loss_ce": 0.006706084590405226, "loss_iou": 0.53125, "loss_num": 0.04296875, "loss_xval": 1.2734375, "num_input_tokens_seen": 257856628, "step": 3893 }, { "epoch": 0.3644872934899612, "grad_norm": 23.392274856567383, "learning_rate": 5e-05, "loss": 1.2484, "num_input_tokens_seen": 257922620, "step": 3894 }, { "epoch": 0.3644872934899612, "loss": 1.484807014465332, "loss_ce": 0.005803174804896116, "loss_iou": 0.578125, "loss_num": 0.064453125, "loss_xval": 1.4765625, "num_input_tokens_seen": 257922620, "step": 3894 }, { "epoch": 0.3645808957738569, "grad_norm": 35.21171188354492, "learning_rate": 5e-05, "loss": 1.584, "num_input_tokens_seen": 257987672, "step": 3895 }, { "epoch": 0.3645808957738569, "loss": 1.5609768629074097, "loss_ce": 0.0038479208014905453, "loss_iou": 0.609375, "loss_num": 0.068359375, "loss_xval": 1.5546875, "num_input_tokens_seen": 257987672, "step": 3895 }, { "epoch": 0.3646744980577526, "grad_norm": 41.75724411010742, "learning_rate": 5e-05, "loss": 1.3437, "num_input_tokens_seen": 258053940, "step": 3896 }, { "epoch": 0.3646744980577526, "loss": 1.4538421630859375, "loss_ce": 0.015548745170235634, "loss_iou": 0.5234375, "loss_num": 0.078125, "loss_xval": 1.4375, "num_input_tokens_seen": 258053940, "step": 3896 }, { "epoch": 0.36476810034164836, "grad_norm": 25.527973175048828, "learning_rate": 5e-05, "loss": 1.2668, "num_input_tokens_seen": 258120848, "step": 3897 }, { "epoch": 0.36476810034164836, "loss": 1.237147569656372, "loss_ce": 0.003260722616687417, "loss_iou": 0.5, "loss_num": 0.046875, "loss_xval": 1.234375, "num_input_tokens_seen": 258120848, "step": 3897 }, { "epoch": 0.36486170262554407, "grad_norm": 29.957181930541992, "learning_rate": 5e-05, "loss": 1.4001, "num_input_tokens_seen": 258186440, "step": 3898 }, { "epoch": 0.36486170262554407, "loss": 1.521573781967163, "loss_ce": 0.0039955200627446175, "loss_iou": 0.61328125, "loss_num": 0.0576171875, "loss_xval": 1.515625, "num_input_tokens_seen": 258186440, "step": 3898 }, { "epoch": 0.3649553049094398, "grad_norm": 17.0150146484375, "learning_rate": 5e-05, "loss": 1.2839, "num_input_tokens_seen": 258253036, "step": 3899 }, { "epoch": 0.3649553049094398, "loss": 1.1094952821731567, "loss_ce": 0.005003111902624369, "loss_iou": 0.423828125, "loss_num": 0.051025390625, "loss_xval": 1.1015625, "num_input_tokens_seen": 258253036, "step": 3899 }, { "epoch": 0.3650489071933355, "grad_norm": 28.997066497802734, "learning_rate": 5e-05, "loss": 1.4532, "num_input_tokens_seen": 258318316, "step": 3900 }, { "epoch": 0.3650489071933355, "loss": 1.4568918943405151, "loss_ce": 0.008649641647934914, "loss_iou": 0.55078125, "loss_num": 0.0693359375, "loss_xval": 1.4453125, "num_input_tokens_seen": 258318316, "step": 3900 }, { "epoch": 0.36514250947723126, "grad_norm": 33.862464904785156, "learning_rate": 5e-05, "loss": 1.3942, "num_input_tokens_seen": 258384308, "step": 3901 }, { "epoch": 0.36514250947723126, "loss": 1.2920153141021729, "loss_ce": 0.0034410918597131968, "loss_iou": 0.498046875, "loss_num": 0.058349609375, "loss_xval": 1.2890625, "num_input_tokens_seen": 258384308, "step": 3901 }, { "epoch": 0.36523611176112697, "grad_norm": 16.475595474243164, "learning_rate": 5e-05, "loss": 1.326, "num_input_tokens_seen": 258449096, "step": 3902 }, { "epoch": 0.36523611176112697, "loss": 1.1843986511230469, "loss_ce": 0.0037345124874264, "loss_iou": 0.421875, "loss_num": 0.0673828125, "loss_xval": 1.1796875, "num_input_tokens_seen": 258449096, "step": 3902 }, { "epoch": 0.3653297140450227, "grad_norm": 10.042855262756348, "learning_rate": 5e-05, "loss": 1.1307, "num_input_tokens_seen": 258513448, "step": 3903 }, { "epoch": 0.3653297140450227, "loss": 1.134523868560791, "loss_ce": 0.007326656952500343, "loss_iou": 0.458984375, "loss_num": 0.0419921875, "loss_xval": 1.125, "num_input_tokens_seen": 258513448, "step": 3903 }, { "epoch": 0.36542331632891845, "grad_norm": 29.135862350463867, "learning_rate": 5e-05, "loss": 1.2811, "num_input_tokens_seen": 258580004, "step": 3904 }, { "epoch": 0.36542331632891845, "loss": 1.1003613471984863, "loss_ce": 0.005207505542784929, "loss_iou": 0.443359375, "loss_num": 0.0419921875, "loss_xval": 1.09375, "num_input_tokens_seen": 258580004, "step": 3904 }, { "epoch": 0.36551691861281416, "grad_norm": 21.72974395751953, "learning_rate": 5e-05, "loss": 1.6188, "num_input_tokens_seen": 258644024, "step": 3905 }, { "epoch": 0.36551691861281416, "loss": 1.5372267961502075, "loss_ce": 0.00792991928756237, "loss_iou": 0.65234375, "loss_num": 0.044189453125, "loss_xval": 1.53125, "num_input_tokens_seen": 258644024, "step": 3905 }, { "epoch": 0.36561052089670987, "grad_norm": 18.686946868896484, "learning_rate": 5e-05, "loss": 1.446, "num_input_tokens_seen": 258709872, "step": 3906 }, { "epoch": 0.36561052089670987, "loss": 1.4138548374176025, "loss_ce": 0.0032103601843118668, "loss_iou": 0.5703125, "loss_num": 0.05322265625, "loss_xval": 1.4140625, "num_input_tokens_seen": 258709872, "step": 3906 }, { "epoch": 0.36570412318060563, "grad_norm": 25.584678649902344, "learning_rate": 5e-05, "loss": 1.349, "num_input_tokens_seen": 258776216, "step": 3907 }, { "epoch": 0.36570412318060563, "loss": 1.3176180124282837, "loss_ce": 0.005606233142316341, "loss_iou": 0.5234375, "loss_num": 0.052490234375, "loss_xval": 1.3125, "num_input_tokens_seen": 258776216, "step": 3907 }, { "epoch": 0.36579772546450134, "grad_norm": 40.10686492919922, "learning_rate": 5e-05, "loss": 1.3434, "num_input_tokens_seen": 258842412, "step": 3908 }, { "epoch": 0.36579772546450134, "loss": 1.4787346124649048, "loss_ce": 0.0036369068548083305, "loss_iou": 0.5859375, "loss_num": 0.059814453125, "loss_xval": 1.4765625, "num_input_tokens_seen": 258842412, "step": 3908 }, { "epoch": 0.36589132774839705, "grad_norm": 24.690710067749023, "learning_rate": 5e-05, "loss": 1.4947, "num_input_tokens_seen": 258909164, "step": 3909 }, { "epoch": 0.36589132774839705, "loss": 1.6424697637557983, "loss_ce": 0.005750999320298433, "loss_iou": 0.625, "loss_num": 0.07666015625, "loss_xval": 1.640625, "num_input_tokens_seen": 258909164, "step": 3909 }, { "epoch": 0.36598493003229277, "grad_norm": 16.426416397094727, "learning_rate": 5e-05, "loss": 1.2203, "num_input_tokens_seen": 258975068, "step": 3910 }, { "epoch": 0.36598493003229277, "loss": 1.1377646923065186, "loss_ce": 0.005806789733469486, "loss_iou": 0.435546875, "loss_num": 0.052490234375, "loss_xval": 1.1328125, "num_input_tokens_seen": 258975068, "step": 3910 }, { "epoch": 0.36607853231618853, "grad_norm": 19.121076583862305, "learning_rate": 5e-05, "loss": 1.0438, "num_input_tokens_seen": 259041128, "step": 3911 }, { "epoch": 0.36607853231618853, "loss": 1.300186276435852, "loss_ce": 0.0033112491946667433, "loss_iou": 0.55078125, "loss_num": 0.03955078125, "loss_xval": 1.296875, "num_input_tokens_seen": 259041128, "step": 3911 }, { "epoch": 0.36617213460008424, "grad_norm": 19.31698989868164, "learning_rate": 5e-05, "loss": 1.4742, "num_input_tokens_seen": 259107748, "step": 3912 }, { "epoch": 0.36617213460008424, "loss": 1.4676434993743896, "loss_ce": 0.014518586918711662, "loss_iou": 0.546875, "loss_num": 0.07177734375, "loss_xval": 1.453125, "num_input_tokens_seen": 259107748, "step": 3912 }, { "epoch": 0.36626573688397995, "grad_norm": 25.95240592956543, "learning_rate": 5e-05, "loss": 1.3572, "num_input_tokens_seen": 259175184, "step": 3913 }, { "epoch": 0.36626573688397995, "loss": 1.4779455661773682, "loss_ce": 0.0033362722024321556, "loss_iou": 0.6484375, "loss_num": 0.035888671875, "loss_xval": 1.4765625, "num_input_tokens_seen": 259175184, "step": 3913 }, { "epoch": 0.3663593391678757, "grad_norm": 22.275863647460938, "learning_rate": 5e-05, "loss": 1.3261, "num_input_tokens_seen": 259240856, "step": 3914 }, { "epoch": 0.3663593391678757, "loss": 1.2703871726989746, "loss_ce": 0.008668376132845879, "loss_iou": 0.498046875, "loss_num": 0.0537109375, "loss_xval": 1.265625, "num_input_tokens_seen": 259240856, "step": 3914 }, { "epoch": 0.36645294145177143, "grad_norm": 28.913101196289062, "learning_rate": 5e-05, "loss": 1.2511, "num_input_tokens_seen": 259307000, "step": 3915 }, { "epoch": 0.36645294145177143, "loss": 1.1929950714111328, "loss_ce": 0.009401226416230202, "loss_iou": 0.49609375, "loss_num": 0.038330078125, "loss_xval": 1.1875, "num_input_tokens_seen": 259307000, "step": 3915 }, { "epoch": 0.36654654373566714, "grad_norm": 22.642305374145508, "learning_rate": 5e-05, "loss": 1.4163, "num_input_tokens_seen": 259374700, "step": 3916 }, { "epoch": 0.36654654373566714, "loss": 1.435065507888794, "loss_ce": 0.006354633718729019, "loss_iou": 0.57421875, "loss_num": 0.05615234375, "loss_xval": 1.4296875, "num_input_tokens_seen": 259374700, "step": 3916 }, { "epoch": 0.36664014601956285, "grad_norm": 30.0216064453125, "learning_rate": 5e-05, "loss": 1.2547, "num_input_tokens_seen": 259441668, "step": 3917 }, { "epoch": 0.36664014601956285, "loss": 1.1637648344039917, "loss_ce": 0.004096841439604759, "loss_iou": 0.486328125, "loss_num": 0.037109375, "loss_xval": 1.15625, "num_input_tokens_seen": 259441668, "step": 3917 }, { "epoch": 0.3667337483034586, "grad_norm": 25.223445892333984, "learning_rate": 5e-05, "loss": 1.4944, "num_input_tokens_seen": 259507604, "step": 3918 }, { "epoch": 0.3667337483034586, "loss": 1.3250019550323486, "loss_ce": 0.0022480469197034836, "loss_iou": 0.58984375, "loss_num": 0.029052734375, "loss_xval": 1.3203125, "num_input_tokens_seen": 259507604, "step": 3918 }, { "epoch": 0.36682735058735433, "grad_norm": 14.87253475189209, "learning_rate": 5e-05, "loss": 1.1651, "num_input_tokens_seen": 259573948, "step": 3919 }, { "epoch": 0.36682735058735433, "loss": 1.2377943992614746, "loss_ce": 0.0034194160252809525, "loss_iou": 0.498046875, "loss_num": 0.047607421875, "loss_xval": 1.234375, "num_input_tokens_seen": 259573948, "step": 3919 }, { "epoch": 0.36692095287125004, "grad_norm": 15.379536628723145, "learning_rate": 5e-05, "loss": 1.2461, "num_input_tokens_seen": 259639676, "step": 3920 }, { "epoch": 0.36692095287125004, "loss": 1.3444726467132568, "loss_ce": 0.0031641186214983463, "loss_iou": 0.54296875, "loss_num": 0.05029296875, "loss_xval": 1.34375, "num_input_tokens_seen": 259639676, "step": 3920 }, { "epoch": 0.3670145551551458, "grad_norm": 24.076215744018555, "learning_rate": 5e-05, "loss": 1.262, "num_input_tokens_seen": 259705404, "step": 3921 }, { "epoch": 0.3670145551551458, "loss": 1.3675591945648193, "loss_ce": 0.00427787471562624, "loss_iou": 0.515625, "loss_num": 0.06640625, "loss_xval": 1.359375, "num_input_tokens_seen": 259705404, "step": 3921 }, { "epoch": 0.3671081574390415, "grad_norm": 25.591129302978516, "learning_rate": 5e-05, "loss": 1.3342, "num_input_tokens_seen": 259771784, "step": 3922 }, { "epoch": 0.3671081574390415, "loss": 1.3885796070098877, "loss_ce": 0.006743676960468292, "loss_iou": 0.5625, "loss_num": 0.05126953125, "loss_xval": 1.3828125, "num_input_tokens_seen": 259771784, "step": 3922 }, { "epoch": 0.3672017597229372, "grad_norm": 42.1247444152832, "learning_rate": 5e-05, "loss": 1.4726, "num_input_tokens_seen": 259838688, "step": 3923 }, { "epoch": 0.3672017597229372, "loss": 1.4543354511260986, "loss_ce": 0.0038959342055022717, "loss_iou": 0.59765625, "loss_num": 0.051025390625, "loss_xval": 1.453125, "num_input_tokens_seen": 259838688, "step": 3923 }, { "epoch": 0.367295362006833, "grad_norm": 25.162288665771484, "learning_rate": 5e-05, "loss": 1.4083, "num_input_tokens_seen": 259904864, "step": 3924 }, { "epoch": 0.367295362006833, "loss": 1.4487223625183105, "loss_ce": 0.0043863835744559765, "loss_iou": 0.609375, "loss_num": 0.046142578125, "loss_xval": 1.4453125, "num_input_tokens_seen": 259904864, "step": 3924 }, { "epoch": 0.3673889642907287, "grad_norm": 46.03004455566406, "learning_rate": 5e-05, "loss": 1.1118, "num_input_tokens_seen": 259971360, "step": 3925 }, { "epoch": 0.3673889642907287, "loss": 1.2532507181167603, "loss_ce": 0.00813352596014738, "loss_iou": 0.53125, "loss_num": 0.036376953125, "loss_xval": 1.2421875, "num_input_tokens_seen": 259971360, "step": 3925 }, { "epoch": 0.3674825665746244, "grad_norm": 29.814109802246094, "learning_rate": 5e-05, "loss": 1.4138, "num_input_tokens_seen": 260036788, "step": 3926 }, { "epoch": 0.3674825665746244, "loss": 1.447306752204895, "loss_ce": 0.009318475611507893, "loss_iou": 0.515625, "loss_num": 0.0810546875, "loss_xval": 1.4375, "num_input_tokens_seen": 260036788, "step": 3926 }, { "epoch": 0.3675761688585201, "grad_norm": 39.045223236083984, "learning_rate": 5e-05, "loss": 1.1283, "num_input_tokens_seen": 260103620, "step": 3927 }, { "epoch": 0.3675761688585201, "loss": 1.1129131317138672, "loss_ce": 0.0030498034320771694, "loss_iou": 0.48828125, "loss_num": 0.0267333984375, "loss_xval": 1.109375, "num_input_tokens_seen": 260103620, "step": 3927 }, { "epoch": 0.3676697711424159, "grad_norm": 26.382869720458984, "learning_rate": 5e-05, "loss": 1.5512, "num_input_tokens_seen": 260170292, "step": 3928 }, { "epoch": 0.3676697711424159, "loss": 1.6524462699890137, "loss_ce": 0.0049853757955133915, "loss_iou": 0.671875, "loss_num": 0.06103515625, "loss_xval": 1.6484375, "num_input_tokens_seen": 260170292, "step": 3928 }, { "epoch": 0.3677633734263116, "grad_norm": 16.269216537475586, "learning_rate": 5e-05, "loss": 1.1346, "num_input_tokens_seen": 260236180, "step": 3929 }, { "epoch": 0.3677633734263116, "loss": 0.9752902984619141, "loss_ce": 0.004343067295849323, "loss_iou": 0.359375, "loss_num": 0.05078125, "loss_xval": 0.97265625, "num_input_tokens_seen": 260236180, "step": 3929 }, { "epoch": 0.3678569757102073, "grad_norm": 32.700626373291016, "learning_rate": 5e-05, "loss": 1.3044, "num_input_tokens_seen": 260301688, "step": 3930 }, { "epoch": 0.3678569757102073, "loss": 1.1879197359085083, "loss_ce": 0.002861207351088524, "loss_iou": 0.466796875, "loss_num": 0.050048828125, "loss_xval": 1.1875, "num_input_tokens_seen": 260301688, "step": 3930 }, { "epoch": 0.3679505779941031, "grad_norm": 109.71320343017578, "learning_rate": 5e-05, "loss": 1.3784, "num_input_tokens_seen": 260368680, "step": 3931 }, { "epoch": 0.3679505779941031, "loss": 1.4181301593780518, "loss_ce": 0.006509024649858475, "loss_iou": 0.578125, "loss_num": 0.0517578125, "loss_xval": 1.4140625, "num_input_tokens_seen": 260368680, "step": 3931 }, { "epoch": 0.3680441802779988, "grad_norm": 25.38262939453125, "learning_rate": 5e-05, "loss": 1.3802, "num_input_tokens_seen": 260435616, "step": 3932 }, { "epoch": 0.3680441802779988, "loss": 1.368844747543335, "loss_ce": 0.006540108472108841, "loss_iou": 0.55078125, "loss_num": 0.052978515625, "loss_xval": 1.359375, "num_input_tokens_seen": 260435616, "step": 3932 }, { "epoch": 0.3681377825618945, "grad_norm": 21.631601333618164, "learning_rate": 5e-05, "loss": 1.4052, "num_input_tokens_seen": 260501660, "step": 3933 }, { "epoch": 0.3681377825618945, "loss": 1.1894268989562988, "loss_ce": 0.005344804376363754, "loss_iou": 0.48828125, "loss_num": 0.04150390625, "loss_xval": 1.1875, "num_input_tokens_seen": 260501660, "step": 3933 }, { "epoch": 0.3682313848457902, "grad_norm": 28.03546142578125, "learning_rate": 5e-05, "loss": 1.158, "num_input_tokens_seen": 260568136, "step": 3934 }, { "epoch": 0.3682313848457902, "loss": 1.268270492553711, "loss_ce": 0.006307664327323437, "loss_iou": 0.5, "loss_num": 0.05224609375, "loss_xval": 1.265625, "num_input_tokens_seen": 260568136, "step": 3934 }, { "epoch": 0.368324987129686, "grad_norm": 54.29316711425781, "learning_rate": 5e-05, "loss": 1.4128, "num_input_tokens_seen": 260634212, "step": 3935 }, { "epoch": 0.368324987129686, "loss": 1.3329976797103882, "loss_ce": 0.014150056056678295, "loss_iou": 0.54296875, "loss_num": 0.047119140625, "loss_xval": 1.3203125, "num_input_tokens_seen": 260634212, "step": 3935 }, { "epoch": 0.3684185894135817, "grad_norm": 35.369625091552734, "learning_rate": 5e-05, "loss": 1.4614, "num_input_tokens_seen": 260700212, "step": 3936 }, { "epoch": 0.3684185894135817, "loss": 1.37261962890625, "loss_ce": 0.013732925057411194, "loss_iou": 0.57421875, "loss_num": 0.041748046875, "loss_xval": 1.359375, "num_input_tokens_seen": 260700212, "step": 3936 }, { "epoch": 0.3685121916974774, "grad_norm": 32.84611892700195, "learning_rate": 5e-05, "loss": 1.5433, "num_input_tokens_seen": 260765612, "step": 3937 }, { "epoch": 0.3685121916974774, "loss": 1.389090895652771, "loss_ce": 0.005301805678755045, "loss_iou": 0.515625, "loss_num": 0.0712890625, "loss_xval": 1.3828125, "num_input_tokens_seen": 260765612, "step": 3937 }, { "epoch": 0.36860579398137316, "grad_norm": 36.469139099121094, "learning_rate": 5e-05, "loss": 1.1915, "num_input_tokens_seen": 260832048, "step": 3938 }, { "epoch": 0.36860579398137316, "loss": 1.1461762189865112, "loss_ce": 0.005551266483962536, "loss_iou": 0.474609375, "loss_num": 0.037841796875, "loss_xval": 1.140625, "num_input_tokens_seen": 260832048, "step": 3938 }, { "epoch": 0.3686993962652689, "grad_norm": 25.36920738220215, "learning_rate": 5e-05, "loss": 1.4403, "num_input_tokens_seen": 260897948, "step": 3939 }, { "epoch": 0.3686993962652689, "loss": 1.6238946914672852, "loss_ce": 0.0037775335367769003, "loss_iou": 0.64453125, "loss_num": 0.06591796875, "loss_xval": 1.6171875, "num_input_tokens_seen": 260897948, "step": 3939 }, { "epoch": 0.3687929985491646, "grad_norm": 18.976686477661133, "learning_rate": 5e-05, "loss": 1.1858, "num_input_tokens_seen": 260963644, "step": 3940 }, { "epoch": 0.3687929985491646, "loss": 1.3617618083953857, "loss_ce": 0.003363432828336954, "loss_iou": 0.53125, "loss_num": 0.060302734375, "loss_xval": 1.359375, "num_input_tokens_seen": 260963644, "step": 3940 }, { "epoch": 0.36888660083306035, "grad_norm": 208.07705688476562, "learning_rate": 5e-05, "loss": 1.4929, "num_input_tokens_seen": 261028620, "step": 3941 }, { "epoch": 0.36888660083306035, "loss": 1.8821766376495361, "loss_ce": 0.009129858575761318, "loss_iou": 0.71484375, "loss_num": 0.087890625, "loss_xval": 1.875, "num_input_tokens_seen": 261028620, "step": 3941 }, { "epoch": 0.36898020311695606, "grad_norm": 23.991506576538086, "learning_rate": 5e-05, "loss": 1.3835, "num_input_tokens_seen": 261095508, "step": 3942 }, { "epoch": 0.36898020311695606, "loss": 1.3920793533325195, "loss_ce": 0.006825482007116079, "loss_iou": 0.52734375, "loss_num": 0.06591796875, "loss_xval": 1.3828125, "num_input_tokens_seen": 261095508, "step": 3942 }, { "epoch": 0.3690738054008518, "grad_norm": 22.969532012939453, "learning_rate": 5e-05, "loss": 1.539, "num_input_tokens_seen": 261161432, "step": 3943 }, { "epoch": 0.3690738054008518, "loss": 1.3845458030700684, "loss_ce": 0.0041747367940843105, "loss_iou": 0.58984375, "loss_num": 0.039306640625, "loss_xval": 1.3828125, "num_input_tokens_seen": 261161432, "step": 3943 }, { "epoch": 0.3691674076847475, "grad_norm": 21.72538185119629, "learning_rate": 5e-05, "loss": 1.1539, "num_input_tokens_seen": 261228076, "step": 3944 }, { "epoch": 0.3691674076847475, "loss": 1.1648333072662354, "loss_ce": 0.005409496836364269, "loss_iou": 0.5, "loss_num": 0.0322265625, "loss_xval": 1.15625, "num_input_tokens_seen": 261228076, "step": 3944 }, { "epoch": 0.36926100996864325, "grad_norm": 22.333606719970703, "learning_rate": 5e-05, "loss": 1.4191, "num_input_tokens_seen": 261292968, "step": 3945 }, { "epoch": 0.36926100996864325, "loss": 1.445363998413086, "loss_ce": 0.0061549958772957325, "loss_iou": 0.59375, "loss_num": 0.05029296875, "loss_xval": 1.4375, "num_input_tokens_seen": 261292968, "step": 3945 }, { "epoch": 0.36935461225253896, "grad_norm": 38.30065155029297, "learning_rate": 5e-05, "loss": 1.3555, "num_input_tokens_seen": 261359140, "step": 3946 }, { "epoch": 0.36935461225253896, "loss": 1.4409079551696777, "loss_ce": 0.006337637081742287, "loss_iou": 0.58203125, "loss_num": 0.053955078125, "loss_xval": 1.4375, "num_input_tokens_seen": 261359140, "step": 3946 }, { "epoch": 0.36944821453643467, "grad_norm": 24.800479888916016, "learning_rate": 5e-05, "loss": 1.5593, "num_input_tokens_seen": 261425624, "step": 3947 }, { "epoch": 0.36944821453643467, "loss": 1.5257058143615723, "loss_ce": 0.006174529902637005, "loss_iou": 0.609375, "loss_num": 0.059326171875, "loss_xval": 1.515625, "num_input_tokens_seen": 261425624, "step": 3947 }, { "epoch": 0.36954181682033044, "grad_norm": 19.440298080444336, "learning_rate": 5e-05, "loss": 1.4042, "num_input_tokens_seen": 261492212, "step": 3948 }, { "epoch": 0.36954181682033044, "loss": 1.3907049894332886, "loss_ce": 0.007160104811191559, "loss_iou": 0.52734375, "loss_num": 0.06494140625, "loss_xval": 1.3828125, "num_input_tokens_seen": 261492212, "step": 3948 }, { "epoch": 0.36963541910422615, "grad_norm": 37.813629150390625, "learning_rate": 5e-05, "loss": 1.3949, "num_input_tokens_seen": 261559560, "step": 3949 }, { "epoch": 0.36963541910422615, "loss": 1.3426899909973145, "loss_ce": 0.0033344775438308716, "loss_iou": 0.546875, "loss_num": 0.048583984375, "loss_xval": 1.3359375, "num_input_tokens_seen": 261559560, "step": 3949 }, { "epoch": 0.36972902138812186, "grad_norm": 25.93182373046875, "learning_rate": 5e-05, "loss": 1.5264, "num_input_tokens_seen": 261626104, "step": 3950 }, { "epoch": 0.36972902138812186, "loss": 1.5038950443267822, "loss_ce": 0.006824803072959185, "loss_iou": 0.6171875, "loss_num": 0.052978515625, "loss_xval": 1.5, "num_input_tokens_seen": 261626104, "step": 3950 }, { "epoch": 0.36982262367201757, "grad_norm": 11.437479019165039, "learning_rate": 5e-05, "loss": 1.0759, "num_input_tokens_seen": 261691932, "step": 3951 }, { "epoch": 0.36982262367201757, "loss": 0.9897888898849487, "loss_ce": 0.007489139214158058, "loss_iou": 0.40625, "loss_num": 0.033447265625, "loss_xval": 0.98046875, "num_input_tokens_seen": 261691932, "step": 3951 }, { "epoch": 0.36991622595591334, "grad_norm": 25.98269271850586, "learning_rate": 5e-05, "loss": 1.4724, "num_input_tokens_seen": 261758472, "step": 3952 }, { "epoch": 0.36991622595591334, "loss": 1.594347596168518, "loss_ce": 0.005480426829308271, "loss_iou": 0.625, "loss_num": 0.0673828125, "loss_xval": 1.5859375, "num_input_tokens_seen": 261758472, "step": 3952 }, { "epoch": 0.37000982823980905, "grad_norm": 35.442134857177734, "learning_rate": 5e-05, "loss": 1.1802, "num_input_tokens_seen": 261826040, "step": 3953 }, { "epoch": 0.37000982823980905, "loss": 1.3480236530303955, "loss_ce": 0.007691656239330769, "loss_iou": 0.55859375, "loss_num": 0.044189453125, "loss_xval": 1.34375, "num_input_tokens_seen": 261826040, "step": 3953 }, { "epoch": 0.37010343052370476, "grad_norm": 19.181516647338867, "learning_rate": 5e-05, "loss": 1.3268, "num_input_tokens_seen": 261892000, "step": 3954 }, { "epoch": 0.37010343052370476, "loss": 1.2413418292999268, "loss_ce": 0.004525455180555582, "loss_iou": 0.5078125, "loss_num": 0.043701171875, "loss_xval": 1.234375, "num_input_tokens_seen": 261892000, "step": 3954 }, { "epoch": 0.3701970328076005, "grad_norm": 51.0396614074707, "learning_rate": 5e-05, "loss": 1.2316, "num_input_tokens_seen": 261958300, "step": 3955 }, { "epoch": 0.3701970328076005, "loss": 1.1863930225372314, "loss_ce": 0.004019945859909058, "loss_iou": 0.423828125, "loss_num": 0.0673828125, "loss_xval": 1.1796875, "num_input_tokens_seen": 261958300, "step": 3955 }, { "epoch": 0.37029063509149623, "grad_norm": 31.92195701599121, "learning_rate": 5e-05, "loss": 1.1183, "num_input_tokens_seen": 262024588, "step": 3956 }, { "epoch": 0.37029063509149623, "loss": 0.9922685623168945, "loss_ce": 0.0041093844920396805, "loss_iou": 0.41015625, "loss_num": 0.033447265625, "loss_xval": 0.98828125, "num_input_tokens_seen": 262024588, "step": 3956 }, { "epoch": 0.37038423737539194, "grad_norm": 43.49807357788086, "learning_rate": 5e-05, "loss": 1.1621, "num_input_tokens_seen": 262090060, "step": 3957 }, { "epoch": 0.37038423737539194, "loss": 1.233185052871704, "loss_ce": 0.007599052041769028, "loss_iou": 0.486328125, "loss_num": 0.051025390625, "loss_xval": 1.2265625, "num_input_tokens_seen": 262090060, "step": 3957 }, { "epoch": 0.3704778396592877, "grad_norm": 36.92182540893555, "learning_rate": 5e-05, "loss": 1.4606, "num_input_tokens_seen": 262157232, "step": 3958 }, { "epoch": 0.3704778396592877, "loss": 1.4280641078948975, "loss_ce": 0.006189106963574886, "loss_iou": 0.546875, "loss_num": 0.06494140625, "loss_xval": 1.421875, "num_input_tokens_seen": 262157232, "step": 3958 }, { "epoch": 0.3705714419431834, "grad_norm": 20.42865753173828, "learning_rate": 5e-05, "loss": 1.5222, "num_input_tokens_seen": 262224816, "step": 3959 }, { "epoch": 0.3705714419431834, "loss": 1.3759833574295044, "loss_ce": 0.004889609292149544, "loss_iou": 0.578125, "loss_num": 0.043212890625, "loss_xval": 1.375, "num_input_tokens_seen": 262224816, "step": 3959 }, { "epoch": 0.37066504422707913, "grad_norm": 22.276254653930664, "learning_rate": 5e-05, "loss": 1.4538, "num_input_tokens_seen": 262290680, "step": 3960 }, { "epoch": 0.37066504422707913, "loss": 1.602579116821289, "loss_ce": 0.007364316843450069, "loss_iou": 0.62890625, "loss_num": 0.06787109375, "loss_xval": 1.59375, "num_input_tokens_seen": 262290680, "step": 3960 }, { "epoch": 0.37075864651097484, "grad_norm": 45.19758224487305, "learning_rate": 5e-05, "loss": 1.6121, "num_input_tokens_seen": 262357400, "step": 3961 }, { "epoch": 0.37075864651097484, "loss": 1.6432125568389893, "loss_ce": 0.006005595438182354, "loss_iou": 0.65234375, "loss_num": 0.06689453125, "loss_xval": 1.640625, "num_input_tokens_seen": 262357400, "step": 3961 }, { "epoch": 0.3708522487948706, "grad_norm": 33.26939392089844, "learning_rate": 5e-05, "loss": 1.5507, "num_input_tokens_seen": 262423420, "step": 3962 }, { "epoch": 0.3708522487948706, "loss": 1.7359198331832886, "loss_ce": 0.006427627522498369, "loss_iou": 0.7265625, "loss_num": 0.055419921875, "loss_xval": 1.7265625, "num_input_tokens_seen": 262423420, "step": 3962 }, { "epoch": 0.3709458510787663, "grad_norm": 23.176490783691406, "learning_rate": 5e-05, "loss": 1.464, "num_input_tokens_seen": 262489300, "step": 3963 }, { "epoch": 0.3709458510787663, "loss": 1.648054599761963, "loss_ce": 0.008406110107898712, "loss_iou": 0.6796875, "loss_num": 0.056396484375, "loss_xval": 1.640625, "num_input_tokens_seen": 262489300, "step": 3963 }, { "epoch": 0.37103945336266203, "grad_norm": 29.751508712768555, "learning_rate": 5e-05, "loss": 1.4418, "num_input_tokens_seen": 262556360, "step": 3964 }, { "epoch": 0.37103945336266203, "loss": 1.2887781858444214, "loss_ce": 0.0055750226601958275, "loss_iou": 0.51953125, "loss_num": 0.049560546875, "loss_xval": 1.28125, "num_input_tokens_seen": 262556360, "step": 3964 }, { "epoch": 0.3711330556465578, "grad_norm": 22.3258113861084, "learning_rate": 5e-05, "loss": 1.1992, "num_input_tokens_seen": 262622520, "step": 3965 }, { "epoch": 0.3711330556465578, "loss": 0.9840934872627258, "loss_ce": 0.0028922846540808678, "loss_iou": 0.4296875, "loss_num": 0.0244140625, "loss_xval": 0.98046875, "num_input_tokens_seen": 262622520, "step": 3965 }, { "epoch": 0.3712266579304535, "grad_norm": 35.426761627197266, "learning_rate": 5e-05, "loss": 1.2399, "num_input_tokens_seen": 262687972, "step": 3966 }, { "epoch": 0.3712266579304535, "loss": 1.333057165145874, "loss_ce": 0.005908791907131672, "loss_iou": 0.5, "loss_num": 0.06494140625, "loss_xval": 1.328125, "num_input_tokens_seen": 262687972, "step": 3966 }, { "epoch": 0.3713202602143492, "grad_norm": 33.05916213989258, "learning_rate": 5e-05, "loss": 1.5817, "num_input_tokens_seen": 262754496, "step": 3967 }, { "epoch": 0.3713202602143492, "loss": 1.429577112197876, "loss_ce": 0.006237310823053122, "loss_iou": 0.59765625, "loss_num": 0.046142578125, "loss_xval": 1.421875, "num_input_tokens_seen": 262754496, "step": 3967 }, { "epoch": 0.371413862498245, "grad_norm": 30.960010528564453, "learning_rate": 5e-05, "loss": 1.3217, "num_input_tokens_seen": 262821360, "step": 3968 }, { "epoch": 0.371413862498245, "loss": 1.3256900310516357, "loss_ce": 0.003912715706974268, "loss_iou": 0.5625, "loss_num": 0.039794921875, "loss_xval": 1.3203125, "num_input_tokens_seen": 262821360, "step": 3968 }, { "epoch": 0.3715074647821407, "grad_norm": 24.339763641357422, "learning_rate": 5e-05, "loss": 1.3748, "num_input_tokens_seen": 262888432, "step": 3969 }, { "epoch": 0.3715074647821407, "loss": 1.5881636142730713, "loss_ce": 0.006132287904620171, "loss_iou": 0.625, "loss_num": 0.0673828125, "loss_xval": 1.578125, "num_input_tokens_seen": 262888432, "step": 3969 }, { "epoch": 0.3716010670660364, "grad_norm": 15.922314643859863, "learning_rate": 5e-05, "loss": 1.3201, "num_input_tokens_seen": 262953548, "step": 3970 }, { "epoch": 0.3716010670660364, "loss": 1.4539998769760132, "loss_ce": 0.05067954212427139, "loss_iou": 0.5078125, "loss_num": 0.07763671875, "loss_xval": 1.40625, "num_input_tokens_seen": 262953548, "step": 3970 }, { "epoch": 0.3716946693499321, "grad_norm": 33.7575569152832, "learning_rate": 5e-05, "loss": 1.0576, "num_input_tokens_seen": 263019548, "step": 3971 }, { "epoch": 0.3716946693499321, "loss": 1.1322962045669556, "loss_ce": 0.005831355229020119, "loss_iou": 0.4453125, "loss_num": 0.046630859375, "loss_xval": 1.125, "num_input_tokens_seen": 263019548, "step": 3971 }, { "epoch": 0.3717882716338279, "grad_norm": 26.21764373779297, "learning_rate": 5e-05, "loss": 1.2614, "num_input_tokens_seen": 263086704, "step": 3972 }, { "epoch": 0.3717882716338279, "loss": 1.081892728805542, "loss_ce": 0.0047443886287510395, "loss_iou": 0.46875, "loss_num": 0.0277099609375, "loss_xval": 1.078125, "num_input_tokens_seen": 263086704, "step": 3972 }, { "epoch": 0.3718818739177236, "grad_norm": 52.61083984375, "learning_rate": 5e-05, "loss": 1.4271, "num_input_tokens_seen": 263154148, "step": 3973 }, { "epoch": 0.3718818739177236, "loss": 1.5574123859405518, "loss_ce": 0.004678058438003063, "loss_iou": 0.6484375, "loss_num": 0.05126953125, "loss_xval": 1.5546875, "num_input_tokens_seen": 263154148, "step": 3973 }, { "epoch": 0.3719754762016193, "grad_norm": 23.409099578857422, "learning_rate": 5e-05, "loss": 1.3342, "num_input_tokens_seen": 263220920, "step": 3974 }, { "epoch": 0.3719754762016193, "loss": 1.3142833709716797, "loss_ce": 0.004713157191872597, "loss_iou": 0.578125, "loss_num": 0.0303955078125, "loss_xval": 1.3125, "num_input_tokens_seen": 263220920, "step": 3974 }, { "epoch": 0.37206907848551507, "grad_norm": 31.830984115600586, "learning_rate": 5e-05, "loss": 1.4793, "num_input_tokens_seen": 263286796, "step": 3975 }, { "epoch": 0.37206907848551507, "loss": 1.300828218460083, "loss_ce": 0.010789182037115097, "loss_iou": 0.5234375, "loss_num": 0.048583984375, "loss_xval": 1.2890625, "num_input_tokens_seen": 263286796, "step": 3975 }, { "epoch": 0.3721626807694108, "grad_norm": 43.15821838378906, "learning_rate": 5e-05, "loss": 1.1752, "num_input_tokens_seen": 263353516, "step": 3976 }, { "epoch": 0.3721626807694108, "loss": 1.0715612173080444, "loss_ce": 0.003201832063496113, "loss_iou": 0.45703125, "loss_num": 0.0308837890625, "loss_xval": 1.0703125, "num_input_tokens_seen": 263353516, "step": 3976 }, { "epoch": 0.3722562830533065, "grad_norm": 29.856653213500977, "learning_rate": 5e-05, "loss": 1.1555, "num_input_tokens_seen": 263419444, "step": 3977 }, { "epoch": 0.3722562830533065, "loss": 1.1790707111358643, "loss_ce": 0.003777771722525358, "loss_iou": 0.5078125, "loss_num": 0.031982421875, "loss_xval": 1.171875, "num_input_tokens_seen": 263419444, "step": 3977 }, { "epoch": 0.3723498853372022, "grad_norm": 22.400190353393555, "learning_rate": 5e-05, "loss": 1.5132, "num_input_tokens_seen": 263485440, "step": 3978 }, { "epoch": 0.3723498853372022, "loss": 1.6392052173614502, "loss_ce": 0.006392636336386204, "loss_iou": 0.66796875, "loss_num": 0.059814453125, "loss_xval": 1.6328125, "num_input_tokens_seen": 263485440, "step": 3978 }, { "epoch": 0.37244348762109797, "grad_norm": 22.372915267944336, "learning_rate": 5e-05, "loss": 1.3614, "num_input_tokens_seen": 263551080, "step": 3979 }, { "epoch": 0.37244348762109797, "loss": 1.2551847696304321, "loss_ce": 0.007137857377529144, "loss_iou": 0.484375, "loss_num": 0.055908203125, "loss_xval": 1.25, "num_input_tokens_seen": 263551080, "step": 3979 }, { "epoch": 0.3725370899049937, "grad_norm": 27.176448822021484, "learning_rate": 5e-05, "loss": 1.3504, "num_input_tokens_seen": 263617276, "step": 3980 }, { "epoch": 0.3725370899049937, "loss": 1.3879384994506836, "loss_ce": 0.006102666258811951, "loss_iou": 0.57421875, "loss_num": 0.04638671875, "loss_xval": 1.3828125, "num_input_tokens_seen": 263617276, "step": 3980 }, { "epoch": 0.3726306921888894, "grad_norm": 23.19794464111328, "learning_rate": 5e-05, "loss": 1.1345, "num_input_tokens_seen": 263683032, "step": 3981 }, { "epoch": 0.3726306921888894, "loss": 1.2938721179962158, "loss_ce": 0.002856516744941473, "loss_iou": 0.5078125, "loss_num": 0.055419921875, "loss_xval": 1.2890625, "num_input_tokens_seen": 263683032, "step": 3981 }, { "epoch": 0.37272429447278516, "grad_norm": 33.36949157714844, "learning_rate": 5e-05, "loss": 1.1853, "num_input_tokens_seen": 263748408, "step": 3982 }, { "epoch": 0.37272429447278516, "loss": 1.0041017532348633, "loss_ce": 0.0072755636647343636, "loss_iou": 0.3984375, "loss_num": 0.03955078125, "loss_xval": 0.99609375, "num_input_tokens_seen": 263748408, "step": 3982 }, { "epoch": 0.37281789675668087, "grad_norm": 23.538049697875977, "learning_rate": 5e-05, "loss": 1.3909, "num_input_tokens_seen": 263815224, "step": 3983 }, { "epoch": 0.37281789675668087, "loss": 1.483205795288086, "loss_ce": 0.004690231755375862, "loss_iou": 0.61328125, "loss_num": 0.05078125, "loss_xval": 1.4765625, "num_input_tokens_seen": 263815224, "step": 3983 }, { "epoch": 0.3729114990405766, "grad_norm": 21.951091766357422, "learning_rate": 5e-05, "loss": 1.3645, "num_input_tokens_seen": 263881316, "step": 3984 }, { "epoch": 0.3729114990405766, "loss": 1.4610261917114258, "loss_ce": 0.005948091857135296, "loss_iou": 0.55859375, "loss_num": 0.06689453125, "loss_xval": 1.453125, "num_input_tokens_seen": 263881316, "step": 3984 }, { "epoch": 0.37300510132447234, "grad_norm": 38.7070198059082, "learning_rate": 5e-05, "loss": 1.206, "num_input_tokens_seen": 263946392, "step": 3985 }, { "epoch": 0.37300510132447234, "loss": 1.2883522510528564, "loss_ce": 0.0018533116672188044, "loss_iou": 0.52734375, "loss_num": 0.04638671875, "loss_xval": 1.2890625, "num_input_tokens_seen": 263946392, "step": 3985 }, { "epoch": 0.37309870360836805, "grad_norm": 18.664636611938477, "learning_rate": 5e-05, "loss": 1.2316, "num_input_tokens_seen": 264011620, "step": 3986 }, { "epoch": 0.37309870360836805, "loss": 1.0689376592636108, "loss_ce": 0.008879079483449459, "loss_iou": 0.40234375, "loss_num": 0.05126953125, "loss_xval": 1.0625, "num_input_tokens_seen": 264011620, "step": 3986 }, { "epoch": 0.37319230589226376, "grad_norm": 19.976411819458008, "learning_rate": 5e-05, "loss": 1.0078, "num_input_tokens_seen": 264077216, "step": 3987 }, { "epoch": 0.37319230589226376, "loss": 0.9493486881256104, "loss_ce": 0.004524484742432833, "loss_iou": 0.40234375, "loss_num": 0.0283203125, "loss_xval": 0.9453125, "num_input_tokens_seen": 264077216, "step": 3987 }, { "epoch": 0.3732859081761595, "grad_norm": 41.811859130859375, "learning_rate": 5e-05, "loss": 1.2472, "num_input_tokens_seen": 264143472, "step": 3988 }, { "epoch": 0.3732859081761595, "loss": 1.242361307144165, "loss_ce": 0.0045683011412620544, "loss_iou": 0.49609375, "loss_num": 0.048828125, "loss_xval": 1.234375, "num_input_tokens_seen": 264143472, "step": 3988 }, { "epoch": 0.37337951046005524, "grad_norm": 15.546930313110352, "learning_rate": 5e-05, "loss": 1.1988, "num_input_tokens_seen": 264209472, "step": 3989 }, { "epoch": 0.37337951046005524, "loss": 1.1625902652740479, "loss_ce": 0.004204002674669027, "loss_iou": 0.478515625, "loss_num": 0.04052734375, "loss_xval": 1.15625, "num_input_tokens_seen": 264209472, "step": 3989 }, { "epoch": 0.37347311274395095, "grad_norm": 20.363605499267578, "learning_rate": 5e-05, "loss": 1.2522, "num_input_tokens_seen": 264275612, "step": 3990 }, { "epoch": 0.37347311274395095, "loss": 1.4210174083709717, "loss_ce": 0.010372873395681381, "loss_iou": 0.5546875, "loss_num": 0.060302734375, "loss_xval": 1.4140625, "num_input_tokens_seen": 264275612, "step": 3990 }, { "epoch": 0.37356671502784666, "grad_norm": 22.40890884399414, "learning_rate": 5e-05, "loss": 1.3464, "num_input_tokens_seen": 264342076, "step": 3991 }, { "epoch": 0.37356671502784666, "loss": 1.2793970108032227, "loss_ce": 0.009865716099739075, "loss_iou": 0.50390625, "loss_num": 0.052734375, "loss_xval": 1.265625, "num_input_tokens_seen": 264342076, "step": 3991 }, { "epoch": 0.37366031731174243, "grad_norm": 40.94968032836914, "learning_rate": 5e-05, "loss": 1.2437, "num_input_tokens_seen": 264408764, "step": 3992 }, { "epoch": 0.37366031731174243, "loss": 1.3521018028259277, "loss_ce": 0.004445582628250122, "loss_iou": 0.56640625, "loss_num": 0.04345703125, "loss_xval": 1.34375, "num_input_tokens_seen": 264408764, "step": 3992 }, { "epoch": 0.37375391959563814, "grad_norm": 28.47822380065918, "learning_rate": 5e-05, "loss": 1.7147, "num_input_tokens_seen": 264474700, "step": 3993 }, { "epoch": 0.37375391959563814, "loss": 1.8727054595947266, "loss_ce": 0.004541396629065275, "loss_iou": 0.74609375, "loss_num": 0.0751953125, "loss_xval": 1.8671875, "num_input_tokens_seen": 264474700, "step": 3993 }, { "epoch": 0.37384752187953385, "grad_norm": 15.673739433288574, "learning_rate": 5e-05, "loss": 1.272, "num_input_tokens_seen": 264539576, "step": 3994 }, { "epoch": 0.37384752187953385, "loss": 1.1188582181930542, "loss_ce": 0.006797630339860916, "loss_iou": 0.447265625, "loss_num": 0.043701171875, "loss_xval": 1.109375, "num_input_tokens_seen": 264539576, "step": 3994 }, { "epoch": 0.37394112416342956, "grad_norm": 30.9637451171875, "learning_rate": 5e-05, "loss": 1.3586, "num_input_tokens_seen": 264605320, "step": 3995 }, { "epoch": 0.37394112416342956, "loss": 1.4542756080627441, "loss_ce": 0.004080276004970074, "loss_iou": 0.52734375, "loss_num": 0.07958984375, "loss_xval": 1.453125, "num_input_tokens_seen": 264605320, "step": 3995 }, { "epoch": 0.3740347264473253, "grad_norm": 22.062971115112305, "learning_rate": 5e-05, "loss": 1.4988, "num_input_tokens_seen": 264672384, "step": 3996 }, { "epoch": 0.3740347264473253, "loss": 1.524160623550415, "loss_ce": 0.0051176357083022594, "loss_iou": 0.6171875, "loss_num": 0.057373046875, "loss_xval": 1.515625, "num_input_tokens_seen": 264672384, "step": 3996 }, { "epoch": 0.37412832873122104, "grad_norm": 82.78164672851562, "learning_rate": 5e-05, "loss": 1.3324, "num_input_tokens_seen": 264737884, "step": 3997 }, { "epoch": 0.37412832873122104, "loss": 1.3954143524169922, "loss_ce": 0.00527762807905674, "loss_iou": 0.515625, "loss_num": 0.0712890625, "loss_xval": 1.390625, "num_input_tokens_seen": 264737884, "step": 3997 }, { "epoch": 0.37422193101511675, "grad_norm": 25.055173873901367, "learning_rate": 5e-05, "loss": 1.132, "num_input_tokens_seen": 264804800, "step": 3998 }, { "epoch": 0.37422193101511675, "loss": 1.2982065677642822, "loss_ce": 0.004749562591314316, "loss_iou": 0.5546875, "loss_num": 0.0361328125, "loss_xval": 1.296875, "num_input_tokens_seen": 264804800, "step": 3998 }, { "epoch": 0.3743155332990125, "grad_norm": 36.58376693725586, "learning_rate": 5e-05, "loss": 1.3056, "num_input_tokens_seen": 264869108, "step": 3999 }, { "epoch": 0.3743155332990125, "loss": 1.1527501344680786, "loss_ce": 0.005533342249691486, "loss_iou": 0.52734375, "loss_num": 0.018798828125, "loss_xval": 1.1484375, "num_input_tokens_seen": 264869108, "step": 3999 }, { "epoch": 0.3744091355829082, "grad_norm": 54.213768005371094, "learning_rate": 5e-05, "loss": 1.8184, "num_input_tokens_seen": 264936296, "step": 4000 }, { "epoch": 0.3744091355829082, "eval_seeclick_CIoU": 0.15379706770181656, "eval_seeclick_GIoU": 0.1632080227136612, "eval_seeclick_IoU": 0.2842213287949562, "eval_seeclick_MAE_all": 0.17822610586881638, "eval_seeclick_MAE_h": 0.11929627135396004, "eval_seeclick_MAE_w": 0.11855470016598701, "eval_seeclick_MAE_x_boxes": 0.25859377533197403, "eval_seeclick_MAE_y_boxes": 0.1552606150507927, "eval_seeclick_NUM_probability": 0.9998007714748383, "eval_seeclick_inside_bbox": 0.3895833343267441, "eval_seeclick_loss": 2.629326581954956, "eval_seeclick_loss_ce": 0.014735812786966562, "eval_seeclick_loss_iou": 0.890380859375, "eval_seeclick_loss_num": 0.170318603515625, "eval_seeclick_loss_xval": 2.63330078125, "eval_seeclick_runtime": 72.9953, "eval_seeclick_samples_per_second": 0.644, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 264936296, "step": 4000 }, { "epoch": 0.3744091355829082, "eval_icons_CIoU": -0.09496079757809639, "eval_icons_GIoU": 0.035429807379841805, "eval_icons_IoU": 0.1063428670167923, "eval_icons_MAE_all": 0.20099008828401566, "eval_icons_MAE_h": 0.18222493678331375, "eval_icons_MAE_w": 0.1513308808207512, "eval_icons_MAE_x_boxes": 0.16003187745809555, "eval_icons_MAE_y_boxes": 0.127197178080678, "eval_icons_NUM_probability": 0.9998990595340729, "eval_icons_inside_bbox": 0.1649305559694767, "eval_icons_loss": 2.8375203609466553, "eval_icons_loss_ce": 2.2454639292845968e-05, "eval_icons_loss_iou": 0.950927734375, "eval_icons_loss_num": 0.198516845703125, "eval_icons_loss_xval": 2.89208984375, "eval_icons_runtime": 73.6202, "eval_icons_samples_per_second": 0.679, "eval_icons_steps_per_second": 0.027, "num_input_tokens_seen": 264936296, "step": 4000 }, { "epoch": 0.3744091355829082, "eval_screenspot_CIoU": -0.019918086628119152, "eval_screenspot_GIoU": 0.013222339873512587, "eval_screenspot_IoU": 0.15667323768138885, "eval_screenspot_MAE_all": 0.21852064629395804, "eval_screenspot_MAE_h": 0.1598268449306488, "eval_screenspot_MAE_w": 0.18928087254365286, "eval_screenspot_MAE_x_boxes": 0.29260844985644024, "eval_screenspot_MAE_y_boxes": 0.14141333103179932, "eval_screenspot_NUM_probability": 0.9999316533406576, "eval_screenspot_inside_bbox": 0.3254166642824809, "eval_screenspot_loss": 3.0932860374450684, "eval_screenspot_loss_ce": 0.010087936495741209, "eval_screenspot_loss_iou": 0.9951171875, "eval_screenspot_loss_num": 0.22828165690104166, "eval_screenspot_loss_xval": 3.1318359375, "eval_screenspot_runtime": 126.8978, "eval_screenspot_samples_per_second": 0.701, "eval_screenspot_steps_per_second": 0.024, "num_input_tokens_seen": 264936296, "step": 4000 }, { "epoch": 0.3744091355829082, "eval_compot_CIoU": -0.0741785280406475, "eval_compot_GIoU": 0.020535959862172604, "eval_compot_IoU": 0.1116841621696949, "eval_compot_MAE_all": 0.2555025890469551, "eval_compot_MAE_h": 0.18729552626609802, "eval_compot_MAE_w": 0.2644929438829422, "eval_compot_MAE_x_boxes": 0.21654167771339417, "eval_compot_MAE_y_boxes": 0.12299535050988197, "eval_compot_NUM_probability": 0.9999019503593445, "eval_compot_inside_bbox": 0.1892361119389534, "eval_compot_loss": 3.307427406311035, "eval_compot_loss_ce": 0.00394441606476903, "eval_compot_loss_iou": 1.0068359375, "eval_compot_loss_num": 0.2715911865234375, "eval_compot_loss_xval": 3.3720703125, "eval_compot_runtime": 70.3304, "eval_compot_samples_per_second": 0.711, "eval_compot_steps_per_second": 0.028, "num_input_tokens_seen": 264936296, "step": 4000 }, { "epoch": 0.3744091355829082, "eval_custom_ui_MAE_all": 0.147480309009552, "eval_custom_ui_MAE_x": 0.15427882224321365, "eval_custom_ui_MAE_y": 0.14068179205060005, "eval_custom_ui_NUM_probability": 0.9999688565731049, "eval_custom_ui_loss": 0.8656248450279236, "eval_custom_ui_loss_ce": 0.1458643600344658, "eval_custom_ui_loss_num": 0.1512298583984375, "eval_custom_ui_loss_xval": 0.756591796875, "eval_custom_ui_runtime": 55.7567, "eval_custom_ui_samples_per_second": 0.897, "eval_custom_ui_steps_per_second": 0.036, "num_input_tokens_seen": 264936296, "step": 4000 } ], "logging_steps": 1.0, "max_steps": 10683, "num_input_tokens_seen": 264936296, "num_train_epochs": 1, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.2325338094847918e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }