diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,82393 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7234775947912677, + "eval_steps": 250, + "global_step": 4500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00038299502106472615, + "grad_norm": 58.36933438464531, + "learning_rate": 5e-06, + "loss": 0.327, + "num_input_tokens_seen": 173192, + "step": 1 + }, + { + "epoch": 0.00038299502106472615, + "loss": 0.3826669752597809, + "loss_ce": 0.00034278002567589283, + "loss_iou": 1.0884798765182495, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 173192, + "step": 1 + }, + { + "epoch": 0.0007659900421294523, + "grad_norm": 421.0828358936704, + "learning_rate": 5e-06, + "loss": 37.2345, + "num_input_tokens_seen": 346432, + "step": 2 + }, + { + "epoch": 0.0007659900421294523, + "loss": 37.343849182128906, + "loss_ce": 9.83613426797092e-05, + "loss_iou": 1.5366404056549072, + "loss_num": 37.25, + "loss_xval": 37.25, + "num_input_tokens_seen": 346432, + "step": 2 + }, + { + "epoch": 0.0011489850631941786, + "grad_norm": 246.44322210329707, + "learning_rate": 5e-06, + "loss": 14.7033, + "num_input_tokens_seen": 519784, + "step": 3 + }, + { + "epoch": 0.0011489850631941786, + "loss": 14.8048095703125, + "loss_ce": 0.00012145288201281801, + "loss_iou": 12.648301124572754, + "loss_num": 14.8125, + "loss_xval": 14.8125, + "num_input_tokens_seen": 519784, + "step": 3 + }, + { + "epoch": 0.0015319800842589046, + "grad_norm": 294.36104428006075, + "learning_rate": 5e-06, + "loss": 23.9066, + "num_input_tokens_seen": 692808, + "step": 4 + }, + { + "epoch": 0.0015319800842589046, + "loss": 23.937803268432617, + "loss_ce": 0.00030406011501327157, + "loss_iou": 1.5096111297607422, + "loss_num": 24.0, + "loss_xval": 24.0, + "num_input_tokens_seen": 692808, + "step": 4 + }, + { + "epoch": 0.0019149751053236309, + "grad_norm": 202.47763715173787, + "learning_rate": 5e-06, + "loss": 10.7347, + "num_input_tokens_seen": 865640, + "step": 5 + }, + { + "epoch": 0.0019149751053236309, + "eval_websight_new_CIoU": -0.01316814310848713, + "eval_websight_new_GIoU": 0.03720420226454735, + "eval_websight_new_IoU": 0.03720420226454735, + "eval_websight_new_MAE_all": 1.3634408712387085, + "eval_websight_new_MAE_h": 2.1734297275543213, + "eval_websight_new_MAE_w": 2.1817232370376587, + "eval_websight_new_MAE_x": 0.6177258789539337, + "eval_websight_new_MAE_y": 0.4808841943740845, + "eval_websight_new_NUM_probability": 0.9870634377002716, + "eval_websight_new_inside_bbox": 0.0, + "eval_websight_new_loss": 14.947561264038086, + "eval_websight_new_loss_ce": 6.12214753346052e-05, + "eval_websight_new_loss_iou": 1.5935487747192383, + "eval_websight_new_loss_num": 14.93359375, + "eval_websight_new_loss_xval": 14.93359375, + "eval_websight_new_runtime": 62.2711, + "eval_websight_new_samples_per_second": 0.803, + "eval_websight_new_steps_per_second": 0.032, + "num_input_tokens_seen": 865640, + "step": 5 + }, + { + "epoch": 0.0019149751053236309, + "eval_seeclick_CIoU": -0.019112106412649155, + "eval_seeclick_GIoU": 0.04438253119587898, + "eval_seeclick_IoU": 0.044382523745298386, + "eval_seeclick_MAE_all": 1.3827024102210999, + "eval_seeclick_MAE_h": 2.2133243083953857, + "eval_seeclick_MAE_w": 2.1218671798706055, + "eval_seeclick_MAE_x": 0.6495624482631683, + "eval_seeclick_MAE_y": 0.5460554659366608, + "eval_seeclick_NUM_probability": 0.9863913357257843, + "eval_seeclick_inside_bbox": 0.07465277798473835, + "eval_seeclick_loss": 14.71789836883545, + "eval_seeclick_loss_ce": 0.0003928400110453367, + "eval_seeclick_loss_iou": 1.6963666677474976, + "eval_seeclick_loss_num": 14.65625, + "eval_seeclick_loss_xval": 14.65625, + "eval_seeclick_runtime": 84.2849, + "eval_seeclick_samples_per_second": 0.593, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 865640, + "step": 5 + }, + { + "epoch": 0.0019149751053236309, + "eval_icons_CIoU": -0.009676441550254822, + "eval_icons_GIoU": 0.03315560147166252, + "eval_icons_IoU": 0.03315560333430767, + "eval_icons_MAE_all": 1.353821575641632, + "eval_icons_MAE_h": 2.2173551321029663, + "eval_icons_MAE_w": 2.198903441429138, + "eval_icons_MAE_x": 0.49197526276111603, + "eval_icons_MAE_y": 0.5070522427558899, + "eval_icons_NUM_probability": 0.9866805374622345, + "eval_icons_inside_bbox": 0.0711805559694767, + "eval_icons_loss": 15.255043983459473, + "eval_icons_loss_ce": 4.263626760803163e-05, + "eval_icons_loss_iou": 1.6046354174613953, + "eval_icons_loss_num": 15.28515625, + "eval_icons_loss_xval": 15.28515625, + "eval_icons_runtime": 82.1924, + "eval_icons_samples_per_second": 0.608, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 865640, + "step": 5 + }, + { + "epoch": 0.0019149751053236309, + "loss": 15.343791961669922, + "loss_ce": 4.252552753314376e-05, + "loss_iou": 1.606989860534668, + "loss_num": 15.375, + "loss_xval": 15.375, + "num_input_tokens_seen": 865640, + "step": 5 + }, + { + "epoch": 0.002297970126388357, + "grad_norm": 188.94230579429873, + "learning_rate": 5e-06, + "loss": 14.5862, + "num_input_tokens_seen": 1038128, + "step": 6 + }, + { + "epoch": 0.002297970126388357, + "loss": 14.539365768432617, + "loss_ce": 0.00030374148627743125, + "loss_iou": 1.5851291418075562, + "loss_num": 14.5625, + "loss_xval": 14.5625, + "num_input_tokens_seen": 1038128, + "step": 6 + }, + { + "epoch": 0.002680965147453083, + "grad_norm": 168.09641651439844, + "learning_rate": 5e-06, + "loss": 8.2737, + "num_input_tokens_seen": 1210752, + "step": 7 + }, + { + "epoch": 0.002680965147453083, + "loss": 8.371365547180176, + "loss_ce": 0.00027169095119461417, + "loss_iou": 81.91307067871094, + "loss_num": 8.375, + "loss_xval": 8.375, + "num_input_tokens_seen": 1210752, + "step": 7 + }, + { + "epoch": 0.003063960168517809, + "grad_norm": 140.32501556176152, + "learning_rate": 5e-06, + "loss": 11.1605, + "num_input_tokens_seen": 1383168, + "step": 8 + }, + { + "epoch": 0.003063960168517809, + "loss": 11.00023078918457, + "loss_ce": 0.00023003615206107497, + "loss_iou": 1.6515984535217285, + "loss_num": 11.0, + "loss_xval": 11.0, + "num_input_tokens_seen": 1383168, + "step": 8 + }, + { + "epoch": 0.0034469551895825352, + "grad_norm": 129.22621630179205, + "learning_rate": 5e-06, + "loss": 6.6702, + "num_input_tokens_seen": 1556192, + "step": 9 + }, + { + "epoch": 0.0034469551895825352, + "loss": 6.598052978515625, + "loss_ce": 0.000396652176277712, + "loss_iou": 58.22084045410156, + "loss_num": 6.59375, + "loss_xval": 6.59375, + "num_input_tokens_seen": 1556192, + "step": 9 + }, + { + "epoch": 0.0038299502106472617, + "grad_norm": 143.99423695445878, + "learning_rate": 5e-06, + "loss": 12.387, + "num_input_tokens_seen": 1729120, + "step": 10 + }, + { + "epoch": 0.0038299502106472617, + "loss": 12.289302825927734, + "loss_ce": 0.00023921528190840036, + "loss_iou": 1.606602668762207, + "loss_num": 12.3125, + "loss_xval": 12.3125, + "num_input_tokens_seen": 1729120, + "step": 10 + }, + { + "epoch": 0.004212945231711987, + "grad_norm": 108.40687466795202, + "learning_rate": 5e-06, + "loss": 5.6898, + "num_input_tokens_seen": 1901856, + "step": 11 + }, + { + "epoch": 0.004212945231711987, + "loss": 5.812960147857666, + "loss_ce": 0.0004599654348567128, + "loss_iou": 259.53076171875, + "loss_num": 5.8125, + "loss_xval": 5.8125, + "num_input_tokens_seen": 1901856, + "step": 11 + }, + { + "epoch": 0.004595940252776714, + "grad_norm": 131.46512738561623, + "learning_rate": 5e-06, + "loss": 12.4495, + "num_input_tokens_seen": 2074432, + "step": 12 + }, + { + "epoch": 0.004595940252776714, + "loss": 12.453392028808594, + "loss_ce": 0.00026683963369578123, + "loss_iou": 1.5887775421142578, + "loss_num": 12.4375, + "loss_xval": 12.4375, + "num_input_tokens_seen": 2074432, + "step": 12 + }, + { + "epoch": 0.00497893527384144, + "grad_norm": 101.21069895303762, + "learning_rate": 5e-06, + "loss": 5.1644, + "num_input_tokens_seen": 2247256, + "step": 13 + }, + { + "epoch": 0.00497893527384144, + "loss": 5.449373245239258, + "loss_ce": 0.00015416568203363568, + "loss_iou": 17.10838508605957, + "loss_num": 5.4375, + "loss_xval": 5.4375, + "num_input_tokens_seen": 2247256, + "step": 13 + }, + { + "epoch": 0.005361930294906166, + "grad_norm": 109.310493071197, + "learning_rate": 5e-06, + "loss": 11.2854, + "num_input_tokens_seen": 2420488, + "step": 14 + }, + { + "epoch": 0.005361930294906166, + "loss": 11.437763214111328, + "loss_ce": 0.00026362822973169386, + "loss_iou": 1.66493821144104, + "loss_num": 11.4375, + "loss_xval": 11.4375, + "num_input_tokens_seen": 2420488, + "step": 14 + }, + { + "epoch": 0.005744925315970892, + "grad_norm": 94.34650368787317, + "learning_rate": 5e-06, + "loss": 4.4436, + "num_input_tokens_seen": 2593328, + "step": 15 + }, + { + "epoch": 0.005744925315970892, + "loss": 4.543169021606445, + "loss_ce": 0.00020011767628602684, + "loss_iou": 6.623126029968262, + "loss_num": 4.53125, + "loss_xval": 4.53125, + "num_input_tokens_seen": 2593328, + "step": 15 + }, + { + "epoch": 0.006127920337035618, + "grad_norm": 100.17025206843569, + "learning_rate": 5e-06, + "loss": 10.3714, + "num_input_tokens_seen": 2765696, + "step": 16 + }, + { + "epoch": 0.006127920337035618, + "loss": 10.398763656616211, + "loss_ce": 0.0003265046398155391, + "loss_iou": 1.6537723541259766, + "loss_num": 10.375, + "loss_xval": 10.375, + "num_input_tokens_seen": 2765696, + "step": 16 + }, + { + "epoch": 0.0065109153581003444, + "grad_norm": 92.14093040650044, + "learning_rate": 5e-06, + "loss": 4.4866, + "num_input_tokens_seen": 2938840, + "step": 17 + }, + { + "epoch": 0.0065109153581003444, + "loss": 4.25418758392334, + "loss_ce": 0.00028116017347201705, + "loss_iou": 8.455916404724121, + "loss_num": 4.25, + "loss_xval": 4.25, + "num_input_tokens_seen": 2938840, + "step": 17 + }, + { + "epoch": 0.0068939103791650705, + "grad_norm": 83.76771747909497, + "learning_rate": 5e-06, + "loss": 9.6722, + "num_input_tokens_seen": 3111656, + "step": 18 + }, + { + "epoch": 0.0068939103791650705, + "loss": 9.726861953735352, + "loss_ce": 0.00029987277230247855, + "loss_iou": 1.6638808250427246, + "loss_num": 9.75, + "loss_xval": 9.75, + "num_input_tokens_seen": 3111656, + "step": 18 + }, + { + "epoch": 0.007276905400229797, + "grad_norm": 85.59127019381263, + "learning_rate": 5e-06, + "loss": 4.392, + "num_input_tokens_seen": 3284496, + "step": 19 + }, + { + "epoch": 0.007276905400229797, + "loss": 4.572667121887207, + "loss_ce": 0.0004015713930130005, + "loss_iou": 23.15438461303711, + "loss_num": 4.5625, + "loss_xval": 4.5625, + "num_input_tokens_seen": 3284496, + "step": 19 + }, + { + "epoch": 0.0076599004212945234, + "grad_norm": 78.4184602471564, + "learning_rate": 5e-06, + "loss": 7.9458, + "num_input_tokens_seen": 3457816, + "step": 20 + }, + { + "epoch": 0.0076599004212945234, + "loss": 7.8091044425964355, + "loss_ce": 0.0005108587210997939, + "loss_iou": 1.90053391456604, + "loss_num": 7.8125, + "loss_xval": 7.8125, + "num_input_tokens_seen": 3457816, + "step": 20 + }, + { + "epoch": 0.00804289544235925, + "grad_norm": 88.46632333266166, + "learning_rate": 5e-06, + "loss": 3.1119, + "num_input_tokens_seen": 3631000, + "step": 21 + }, + { + "epoch": 0.00804289544235925, + "loss": 2.914778232574463, + "loss_ce": 0.000715868838597089, + "loss_iou": 20.56275177001953, + "loss_num": 2.90625, + "loss_xval": 2.90625, + "num_input_tokens_seen": 3631000, + "step": 21 + }, + { + "epoch": 0.008425890463423975, + "grad_norm": 73.58581565716081, + "learning_rate": 5e-06, + "loss": 6.5807, + "num_input_tokens_seen": 3803952, + "step": 22 + }, + { + "epoch": 0.008425890463423975, + "loss": 6.504635810852051, + "loss_ce": 0.0007294907118193805, + "loss_iou": 1.8562748432159424, + "loss_num": 6.5, + "loss_xval": 6.5, + "num_input_tokens_seen": 3803952, + "step": 22 + }, + { + "epoch": 0.008808885484488702, + "grad_norm": 40.484161003235144, + "learning_rate": 5e-06, + "loss": 2.4782, + "num_input_tokens_seen": 3976616, + "step": 23 + }, + { + "epoch": 0.008808885484488702, + "loss": 2.4908900260925293, + "loss_ce": 0.0006555955624207854, + "loss_iou": 37.24055099487305, + "loss_num": 2.484375, + "loss_xval": 2.484375, + "num_input_tokens_seen": 3976616, + "step": 23 + }, + { + "epoch": 0.009191880505553428, + "grad_norm": 97.22230887683381, + "learning_rate": 5e-06, + "loss": 2.0994, + "num_input_tokens_seen": 4149200, + "step": 24 + }, + { + "epoch": 0.009191880505553428, + "loss": 2.0976505279541016, + "loss_ce": 0.000971070141531527, + "loss_iou": 13.649656295776367, + "loss_num": 2.09375, + "loss_xval": 2.09375, + "num_input_tokens_seen": 4149200, + "step": 24 + }, + { + "epoch": 0.009574875526618154, + "grad_norm": 154.48061606660323, + "learning_rate": 5e-06, + "loss": 7.5555, + "num_input_tokens_seen": 4321712, + "step": 25 + }, + { + "epoch": 0.009574875526618154, + "loss": 7.571127891540527, + "loss_ce": 0.0008151677320711315, + "loss_iou": 13.631052017211914, + "loss_num": 7.5625, + "loss_xval": 7.5625, + "num_input_tokens_seen": 4321712, + "step": 25 + }, + { + "epoch": 0.00995787054768288, + "grad_norm": 29.090531216489648, + "learning_rate": 5e-06, + "loss": 1.6815, + "num_input_tokens_seen": 4494408, + "step": 26 + }, + { + "epoch": 0.00995787054768288, + "loss": 1.7185226678848267, + "loss_ce": 0.0007492044242098927, + "loss_iou": 4.346151828765869, + "loss_num": 1.71875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 4494408, + "step": 26 + }, + { + "epoch": 0.010340865568747606, + "grad_norm": 100.30542578014845, + "learning_rate": 5e-06, + "loss": 1.7095, + "num_input_tokens_seen": 4667032, + "step": 27 + }, + { + "epoch": 0.010340865568747606, + "loss": 1.786179780960083, + "loss_ce": 0.0010234997607767582, + "loss_iou": 1.342555284500122, + "loss_num": 1.78125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 4667032, + "step": 27 + }, + { + "epoch": 0.010723860589812333, + "grad_norm": 123.5520679984825, + "learning_rate": 5e-06, + "loss": 5.1258, + "num_input_tokens_seen": 4839960, + "step": 28 + }, + { + "epoch": 0.010723860589812333, + "loss": 5.078887939453125, + "loss_ce": 0.0007630588952451944, + "loss_iou": 4.0871148109436035, + "loss_num": 5.0625, + "loss_xval": 5.0625, + "num_input_tokens_seen": 4839960, + "step": 28 + }, + { + "epoch": 0.011106855610877058, + "grad_norm": 75.49799926725528, + "learning_rate": 5e-06, + "loss": 2.2301, + "num_input_tokens_seen": 5012824, + "step": 29 + }, + { + "epoch": 0.011106855610877058, + "loss": 2.1779701709747314, + "loss_ce": 0.0012123838532716036, + "loss_iou": 6.067341327667236, + "loss_num": 2.171875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 5012824, + "step": 29 + }, + { + "epoch": 0.011489850631941785, + "grad_norm": 69.37472894493497, + "learning_rate": 5e-06, + "loss": 4.0042, + "num_input_tokens_seen": 5186096, + "step": 30 + }, + { + "epoch": 0.011489850631941785, + "loss": 4.1497392654418945, + "loss_ce": 0.001301644602790475, + "loss_iou": 4.804612636566162, + "loss_num": 4.15625, + "loss_xval": 4.15625, + "num_input_tokens_seen": 5186096, + "step": 30 + }, + { + "epoch": 0.011872845653006512, + "grad_norm": 70.67664919604134, + "learning_rate": 5e-06, + "loss": 1.655, + "num_input_tokens_seen": 5358576, + "step": 31 + }, + { + "epoch": 0.011872845653006512, + "loss": 1.7472056150436401, + "loss_ce": 0.0011118631809949875, + "loss_iou": 2.0371646881103516, + "loss_num": 1.75, + "loss_xval": 1.75, + "num_input_tokens_seen": 5358576, + "step": 31 + }, + { + "epoch": 0.012255840674071237, + "grad_norm": 123.2471229403419, + "learning_rate": 5e-06, + "loss": 4.8823, + "num_input_tokens_seen": 5531816, + "step": 32 + }, + { + "epoch": 0.012255840674071237, + "loss": 4.876434326171875, + "loss_ce": 0.001434716279618442, + "loss_iou": 3.3636035919189453, + "loss_num": 4.875, + "loss_xval": 4.875, + "num_input_tokens_seen": 5531816, + "step": 32 + }, + { + "epoch": 0.012638835695135964, + "grad_norm": 103.57924655765518, + "learning_rate": 5e-06, + "loss": 2.1105, + "num_input_tokens_seen": 5704840, + "step": 33 + }, + { + "epoch": 0.012638835695135964, + "loss": 2.044581651687622, + "loss_ce": 0.0016129963332787156, + "loss_iou": 2.3663644790649414, + "loss_num": 2.046875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 5704840, + "step": 33 + }, + { + "epoch": 0.013021830716200689, + "grad_norm": 103.70727825235932, + "learning_rate": 5e-06, + "loss": 4.4179, + "num_input_tokens_seen": 5877712, + "step": 34 + }, + { + "epoch": 0.013021830716200689, + "loss": 4.494317054748535, + "loss_ce": 0.002129969885572791, + "loss_iou": 3.775545358657837, + "loss_num": 4.5, + "loss_xval": 4.5, + "num_input_tokens_seen": 5877712, + "step": 34 + }, + { + "epoch": 0.013404825737265416, + "grad_norm": 47.36531865531552, + "learning_rate": 5e-06, + "loss": 1.1801, + "num_input_tokens_seen": 6050960, + "step": 35 + }, + { + "epoch": 0.013404825737265416, + "loss": 1.2508718967437744, + "loss_ce": 0.0023367758840322495, + "loss_iou": 2.011030673980713, + "loss_num": 1.25, + "loss_xval": 1.25, + "num_input_tokens_seen": 6050960, + "step": 35 + }, + { + "epoch": 0.013787820758330141, + "grad_norm": 96.79092463858534, + "learning_rate": 5e-06, + "loss": 2.1503, + "num_input_tokens_seen": 6224208, + "step": 36 + }, + { + "epoch": 0.013787820758330141, + "loss": 2.1378912925720215, + "loss_ce": 0.002148999134078622, + "loss_iou": 8.005640029907227, + "loss_num": 2.140625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 6224208, + "step": 36 + }, + { + "epoch": 0.014170815779394868, + "grad_norm": 104.65517919895153, + "learning_rate": 5e-06, + "loss": 2.4288, + "num_input_tokens_seen": 6397048, + "step": 37 + }, + { + "epoch": 0.014170815779394868, + "loss": 2.458104133605957, + "loss_ce": 0.0030261282809078693, + "loss_iou": 2.239044189453125, + "loss_num": 2.453125, + "loss_xval": 2.453125, + "num_input_tokens_seen": 6397048, + "step": 37 + }, + { + "epoch": 0.014553810800459595, + "grad_norm": 45.91550086531722, + "learning_rate": 5e-06, + "loss": 1.9428, + "num_input_tokens_seen": 6570120, + "step": 38 + }, + { + "epoch": 0.014553810800459595, + "loss": 2.0247607231140137, + "loss_ce": 0.003276278730481863, + "loss_iou": 11.768243789672852, + "loss_num": 2.015625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 6570120, + "step": 38 + }, + { + "epoch": 0.01493680582152432, + "grad_norm": 78.02027171698998, + "learning_rate": 5e-06, + "loss": 0.5863, + "num_input_tokens_seen": 6742992, + "step": 39 + }, + { + "epoch": 0.01493680582152432, + "loss": 0.5959486961364746, + "loss_ce": 0.0031753242947161198, + "loss_iou": 1.0671956539154053, + "loss_num": 0.59375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 6742992, + "step": 39 + }, + { + "epoch": 0.015319800842589047, + "grad_norm": 108.02693357884827, + "learning_rate": 5e-06, + "loss": 2.9515, + "num_input_tokens_seen": 6915768, + "step": 40 + }, + { + "epoch": 0.015319800842589047, + "loss": 2.9932217597961426, + "loss_ce": 0.002987401094287634, + "loss_iou": 22.81262969970703, + "loss_num": 2.984375, + "loss_xval": 2.984375, + "num_input_tokens_seen": 6915768, + "step": 40 + }, + { + "epoch": 0.015702795863653772, + "grad_norm": 72.35437301636028, + "learning_rate": 5e-06, + "loss": 0.9581, + "num_input_tokens_seen": 7084968, + "step": 41 + }, + { + "epoch": 0.015702795863653772, + "loss": 1.0195496082305908, + "loss_ce": 0.0044127958826720715, + "loss_iou": 1.2581861019134521, + "loss_num": 1.015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 7084968, + "step": 41 + }, + { + "epoch": 0.0160857908847185, + "grad_norm": 57.05189470945179, + "learning_rate": 5e-06, + "loss": 2.1534, + "num_input_tokens_seen": 7258080, + "step": 42 + }, + { + "epoch": 0.0160857908847185, + "loss": 2.1609060764312744, + "loss_ce": 0.004656002391129732, + "loss_iou": 25.787906646728516, + "loss_num": 2.15625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 7258080, + "step": 42 + }, + { + "epoch": 0.016468785905783226, + "grad_norm": 28.17430307668298, + "learning_rate": 5e-06, + "loss": 0.6822, + "num_input_tokens_seen": 7430792, + "step": 43 + }, + { + "epoch": 0.016468785905783226, + "loss": 0.6581391096115112, + "loss_ce": 0.003842195961624384, + "loss_iou": 1.172561764717102, + "loss_num": 0.65625, + "loss_xval": 0.65625, + "num_input_tokens_seen": 7430792, + "step": 43 + }, + { + "epoch": 0.01685178092684795, + "grad_norm": 61.68393713568411, + "learning_rate": 5e-06, + "loss": 1.0347, + "num_input_tokens_seen": 7603568, + "step": 44 + }, + { + "epoch": 0.01685178092684795, + "loss": 0.9895001649856567, + "loss_ce": 0.00414864718914032, + "loss_iou": 1.0747069120407104, + "loss_num": 0.984375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 7603568, + "step": 44 + }, + { + "epoch": 0.017234775947912676, + "grad_norm": 32.228713424789944, + "learning_rate": 5e-06, + "loss": 0.8874, + "num_input_tokens_seen": 7772568, + "step": 45 + }, + { + "epoch": 0.017234775947912676, + "loss": 0.9120867252349854, + "loss_ce": 0.00437189731746912, + "loss_iou": 1.1424987316131592, + "loss_num": 0.90625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 7772568, + "step": 45 + }, + { + "epoch": 0.017617770968977403, + "grad_norm": 67.19238474935851, + "learning_rate": 5e-06, + "loss": 0.6135, + "num_input_tokens_seen": 7945216, + "step": 46 + }, + { + "epoch": 0.017617770968977403, + "loss": 0.6822792887687683, + "loss_ce": 0.0050332145765423775, + "loss_iou": 1.542587161064148, + "loss_num": 0.67578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 7945216, + "step": 46 + }, + { + "epoch": 0.01800076599004213, + "grad_norm": 98.77020921873604, + "learning_rate": 5e-06, + "loss": 1.7709, + "num_input_tokens_seen": 8118400, + "step": 47 + }, + { + "epoch": 0.01800076599004213, + "loss": 1.759229063987732, + "loss_ce": 0.004346287809312344, + "loss_iou": 2.3266377449035645, + "loss_num": 1.7578125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 8118400, + "step": 47 + }, + { + "epoch": 0.018383761011106857, + "grad_norm": 63.114520354559936, + "learning_rate": 5e-06, + "loss": 0.9873, + "num_input_tokens_seen": 8291176, + "step": 48 + }, + { + "epoch": 0.018383761011106857, + "loss": 1.0272853374481201, + "loss_ce": 0.004336033947765827, + "loss_iou": 1.1968190670013428, + "loss_num": 1.0234375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 8291176, + "step": 48 + }, + { + "epoch": 0.01876675603217158, + "grad_norm": 44.02810373754691, + "learning_rate": 5e-06, + "loss": 1.1976, + "num_input_tokens_seen": 8463632, + "step": 49 + }, + { + "epoch": 0.01876675603217158, + "loss": 1.1249773502349854, + "loss_ce": 0.0048601580783724785, + "loss_iou": 1.3406559228897095, + "loss_num": 1.1171875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 8463632, + "step": 49 + }, + { + "epoch": 0.019149751053236307, + "grad_norm": 84.624531596747, + "learning_rate": 5e-06, + "loss": 0.9335, + "num_input_tokens_seen": 8636576, + "step": 50 + }, + { + "epoch": 0.019149751053236307, + "eval_websight_new_CIoU": 0.43197502195835114, + "eval_websight_new_GIoU": 0.4391936808824539, + "eval_websight_new_IoU": 0.4392305761575699, + "eval_websight_new_MAE_all": 0.13614656776189804, + "eval_websight_new_MAE_h": 0.23954762518405914, + "eval_websight_new_MAE_w": 0.2133445292711258, + "eval_websight_new_MAE_x": 0.04393843561410904, + "eval_websight_new_MAE_y": 0.047755710780620575, + "eval_websight_new_NUM_probability": 0.14433185011148453, + "eval_websight_new_inside_bbox": 0.8020833432674408, + "eval_websight_new_loss": 1.6407192945480347, + "eval_websight_new_loss_ce": 0.009637001436203718, + "eval_websight_new_loss_iou": 1.931229829788208, + "eval_websight_new_loss_num": 1.6201171875, + "eval_websight_new_loss_xval": 1.6201171875, + "eval_websight_new_runtime": 55.8378, + "eval_websight_new_samples_per_second": 0.895, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 8636576, + "step": 50 + }, + { + "epoch": 0.019149751053236307, + "eval_seeclick_CIoU": 0.31005920469760895, + "eval_seeclick_GIoU": 0.31324100494384766, + "eval_seeclick_IoU": 0.3503878265619278, + "eval_seeclick_MAE_all": 0.1857384666800499, + "eval_seeclick_MAE_h": 0.27291879057884216, + "eval_seeclick_MAE_w": 0.22510244697332382, + "eval_seeclick_MAE_x": 0.13958367705345154, + "eval_seeclick_MAE_y": 0.10534897819161415, + "eval_seeclick_NUM_probability": 0.21946162730455399, + "eval_seeclick_inside_bbox": 0.5590277910232544, + "eval_seeclick_loss": 1.8699127435684204, + "eval_seeclick_loss_ce": 0.006399712758138776, + "eval_seeclick_loss_iou": 4.592440366744995, + "eval_seeclick_loss_num": 1.8291015625, + "eval_seeclick_loss_xval": 1.8291015625, + "eval_seeclick_runtime": 82.084, + "eval_seeclick_samples_per_second": 0.609, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 8636576, + "step": 50 + }, + { + "epoch": 0.019149751053236307, + "eval_icons_CIoU": 0.36255019903182983, + "eval_icons_GIoU": 0.36736391484737396, + "eval_icons_IoU": 0.3720673471689224, + "eval_icons_MAE_all": 0.15631137788295746, + "eval_icons_MAE_h": 0.251584492623806, + "eval_icons_MAE_w": 0.2644573599100113, + "eval_icons_MAE_x": 0.05817447975277901, + "eval_icons_MAE_y": 0.05102917365729809, + "eval_icons_NUM_probability": 0.09473497048020363, + "eval_icons_inside_bbox": 0.8506944477558136, + "eval_icons_loss": 1.907942771911621, + "eval_icons_loss_ce": 0.007402913179248571, + "eval_icons_loss_iou": 1.7391288876533508, + "eval_icons_loss_num": 1.89208984375, + "eval_icons_loss_xval": 1.89208984375, + "eval_icons_runtime": 82.3134, + "eval_icons_samples_per_second": 0.607, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 8636576, + "step": 50 + }, + { + "epoch": 0.019149751053236307, + "loss": 1.9059419631958008, + "loss_ce": 0.007504510693252087, + "loss_iou": 1.7757078409194946, + "loss_num": 1.8984375, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 8636576, + "step": 50 + }, + { + "epoch": 0.019532746074301034, + "grad_norm": 72.6783448644803, + "learning_rate": 5e-06, + "loss": 1.7384, + "num_input_tokens_seen": 8809256, + "step": 51 + }, + { + "epoch": 0.019532746074301034, + "loss": 1.7620909214019775, + "loss_ce": 0.0052549513056874275, + "loss_iou": 3.7303404808044434, + "loss_num": 1.7578125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 8809256, + "step": 51 + }, + { + "epoch": 0.01991574109536576, + "grad_norm": 27.706596397207054, + "learning_rate": 5e-06, + "loss": 0.891, + "num_input_tokens_seen": 8982032, + "step": 52 + }, + { + "epoch": 0.01991574109536576, + "loss": 0.8567217588424683, + "loss_ce": 0.006135815754532814, + "loss_iou": 1.1876529455184937, + "loss_num": 0.8515625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 8982032, + "step": 52 + }, + { + "epoch": 0.020298736116430488, + "grad_norm": 43.56380441147401, + "learning_rate": 5e-06, + "loss": 0.4726, + "num_input_tokens_seen": 9155144, + "step": 53 + }, + { + "epoch": 0.020298736116430488, + "loss": 0.4914742410182953, + "loss_ce": 0.005878560245037079, + "loss_iou": 1.143690586090088, + "loss_num": 0.486328125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 9155144, + "step": 53 + }, + { + "epoch": 0.02068173113749521, + "grad_norm": 28.591352396787148, + "learning_rate": 5e-06, + "loss": 0.8557, + "num_input_tokens_seen": 9327944, + "step": 54 + }, + { + "epoch": 0.02068173113749521, + "loss": 0.8597661256790161, + "loss_ce": 0.005273905117064714, + "loss_iou": 1.217780590057373, + "loss_num": 0.85546875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 9327944, + "step": 54 + }, + { + "epoch": 0.02106472615855994, + "grad_norm": 46.95323992873654, + "learning_rate": 5e-06, + "loss": 0.5126, + "num_input_tokens_seen": 9500872, + "step": 55 + }, + { + "epoch": 0.02106472615855994, + "loss": 0.5463019609451294, + "loss_ce": 0.005774635821580887, + "loss_iou": 1.072084665298462, + "loss_num": 0.5390625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 9500872, + "step": 55 + }, + { + "epoch": 0.021447721179624665, + "grad_norm": 38.03301004910845, + "learning_rate": 5e-06, + "loss": 0.8326, + "num_input_tokens_seen": 9673920, + "step": 56 + }, + { + "epoch": 0.021447721179624665, + "loss": 0.7999447584152222, + "loss_ce": 0.005022889003157616, + "loss_iou": 1.042433261871338, + "loss_num": 0.796875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 9673920, + "step": 56 + }, + { + "epoch": 0.021830716200689392, + "grad_norm": 16.554509285804894, + "learning_rate": 5e-06, + "loss": 0.4558, + "num_input_tokens_seen": 9846568, + "step": 57 + }, + { + "epoch": 0.021830716200689392, + "loss": 0.426127552986145, + "loss_ce": 0.006205694749951363, + "loss_iou": 1.0183396339416504, + "loss_num": 0.419921875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 9846568, + "step": 57 + }, + { + "epoch": 0.022213711221754116, + "grad_norm": 31.72178541360483, + "learning_rate": 5e-06, + "loss": 0.3621, + "num_input_tokens_seen": 10019416, + "step": 58 + }, + { + "epoch": 0.022213711221754116, + "loss": 0.35577136278152466, + "loss_ce": 0.0067723277024924755, + "loss_iou": 1.0083670616149902, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 10019416, + "step": 58 + }, + { + "epoch": 0.022596706242818843, + "grad_norm": 37.92132274146497, + "learning_rate": 5e-06, + "loss": 0.663, + "num_input_tokens_seen": 10192152, + "step": 59 + }, + { + "epoch": 0.022596706242818843, + "loss": 0.6462411880493164, + "loss_ce": 0.0053720735013484955, + "loss_iou": 1.8834075927734375, + "loss_num": 0.640625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 10192152, + "step": 59 + }, + { + "epoch": 0.02297970126388357, + "grad_norm": 42.481798631476686, + "learning_rate": 5e-06, + "loss": 0.3958, + "num_input_tokens_seen": 10365416, + "step": 60 + }, + { + "epoch": 0.02297970126388357, + "loss": 0.375872403383255, + "loss_ce": 0.006731784902513027, + "loss_iou": 1.0175365209579468, + "loss_num": 0.369140625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 10365416, + "step": 60 + }, + { + "epoch": 0.023362696284948296, + "grad_norm": 54.60705120241134, + "learning_rate": 5e-06, + "loss": 0.8181, + "num_input_tokens_seen": 10534848, + "step": 61 + }, + { + "epoch": 0.023362696284948296, + "loss": 0.8863776922225952, + "loss_ce": 0.005518313962966204, + "loss_iou": 1.1352825164794922, + "loss_num": 0.8828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 10534848, + "step": 61 + }, + { + "epoch": 0.023745691306013023, + "grad_norm": 35.219975611996276, + "learning_rate": 5e-06, + "loss": 0.4362, + "num_input_tokens_seen": 10707600, + "step": 62 + }, + { + "epoch": 0.023745691306013023, + "loss": 0.43948858976364136, + "loss_ce": 0.006383126601576805, + "loss_iou": 1.1062934398651123, + "loss_num": 0.43359375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 10707600, + "step": 62 + }, + { + "epoch": 0.024128686327077747, + "grad_norm": 56.84713250392648, + "learning_rate": 5e-06, + "loss": 0.5531, + "num_input_tokens_seen": 10881128, + "step": 63 + }, + { + "epoch": 0.024128686327077747, + "loss": 0.5669519901275635, + "loss_ce": 0.006405100226402283, + "loss_iou": 1.3195397853851318, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 10881128, + "step": 63 + }, + { + "epoch": 0.024511681348142474, + "grad_norm": 69.52194454977185, + "learning_rate": 5e-06, + "loss": 0.8904, + "num_input_tokens_seen": 11053920, + "step": 64 + }, + { + "epoch": 0.024511681348142474, + "loss": 0.9384351968765259, + "loss_ce": 0.0067945728078484535, + "loss_iou": 1.0879559516906738, + "loss_num": 0.9296875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 11053920, + "step": 64 + }, + { + "epoch": 0.0248946763692072, + "grad_norm": 40.79322177764759, + "learning_rate": 5e-06, + "loss": 0.6829, + "num_input_tokens_seen": 11226752, + "step": 65 + }, + { + "epoch": 0.0248946763692072, + "loss": 0.7247698307037354, + "loss_ce": 0.006019807420670986, + "loss_iou": 1.2506904602050781, + "loss_num": 0.71875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 11226752, + "step": 65 + }, + { + "epoch": 0.025277671390271927, + "grad_norm": 47.654912684020516, + "learning_rate": 5e-06, + "loss": 0.487, + "num_input_tokens_seen": 11399488, + "step": 66 + }, + { + "epoch": 0.025277671390271927, + "loss": 0.5268010497093201, + "loss_ce": 0.006781519390642643, + "loss_iou": 1.1212661266326904, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 11399488, + "step": 66 + }, + { + "epoch": 0.025660666411336654, + "grad_norm": 54.30692560515609, + "learning_rate": 5e-06, + "loss": 0.7733, + "num_input_tokens_seen": 11572368, + "step": 67 + }, + { + "epoch": 0.025660666411336654, + "loss": 0.7901598811149597, + "loss_ce": 0.006468463689088821, + "loss_iou": 1.510016918182373, + "loss_num": 0.78515625, + "loss_xval": 0.78515625, + "num_input_tokens_seen": 11572368, + "step": 67 + }, + { + "epoch": 0.026043661432401378, + "grad_norm": 44.986881254476216, + "learning_rate": 5e-06, + "loss": 0.4963, + "num_input_tokens_seen": 11745096, + "step": 68 + }, + { + "epoch": 0.026043661432401378, + "loss": 0.48728063702583313, + "loss_ce": 0.00632362999022007, + "loss_iou": 1.1487774848937988, + "loss_num": 0.48046875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 11745096, + "step": 68 + }, + { + "epoch": 0.026426656453466105, + "grad_norm": 38.949899945039, + "learning_rate": 5e-06, + "loss": 0.6862, + "num_input_tokens_seen": 11917968, + "step": 69 + }, + { + "epoch": 0.026426656453466105, + "loss": 0.6249829530715942, + "loss_ce": 0.006330636329948902, + "loss_iou": 1.0569710731506348, + "loss_num": 0.6171875, + "loss_xval": 0.6171875, + "num_input_tokens_seen": 11917968, + "step": 69 + }, + { + "epoch": 0.02680965147453083, + "grad_norm": 43.54601105668646, + "learning_rate": 5e-06, + "loss": 0.3444, + "num_input_tokens_seen": 12090616, + "step": 70 + }, + { + "epoch": 0.02680965147453083, + "loss": 0.3571290969848633, + "loss_ce": 0.006543142721056938, + "loss_iou": 1.0208394527435303, + "loss_num": 0.3515625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 12090616, + "step": 70 + }, + { + "epoch": 0.02719264649559556, + "grad_norm": 50.61616085983298, + "learning_rate": 5e-06, + "loss": 0.6366, + "num_input_tokens_seen": 12263536, + "step": 71 + }, + { + "epoch": 0.02719264649559556, + "loss": 0.6586880683898926, + "loss_ce": 0.006832567509263754, + "loss_iou": 1.185067892074585, + "loss_num": 0.65234375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 12263536, + "step": 71 + }, + { + "epoch": 0.027575641516660282, + "grad_norm": 45.08389744664967, + "learning_rate": 5e-06, + "loss": 0.4513, + "num_input_tokens_seen": 12436576, + "step": 72 + }, + { + "epoch": 0.027575641516660282, + "loss": 0.45050758123397827, + "loss_ce": 0.00739235058426857, + "loss_iou": 1.0219526290893555, + "loss_num": 0.443359375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 12436576, + "step": 72 + }, + { + "epoch": 0.02795863653772501, + "grad_norm": 33.203652387586985, + "learning_rate": 5e-06, + "loss": 0.6655, + "num_input_tokens_seen": 12609096, + "step": 73 + }, + { + "epoch": 0.02795863653772501, + "loss": 0.6817867755889893, + "loss_ce": 0.006493816152215004, + "loss_iou": 1.0247434377670288, + "loss_num": 0.67578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 12609096, + "step": 73 + }, + { + "epoch": 0.028341631558789736, + "grad_norm": 33.33240414801773, + "learning_rate": 5e-06, + "loss": 0.3952, + "num_input_tokens_seen": 12781776, + "step": 74 + }, + { + "epoch": 0.028341631558789736, + "loss": 0.40079963207244873, + "loss_ce": 0.00700080581009388, + "loss_iou": 1.2275466918945312, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 12781776, + "step": 74 + }, + { + "epoch": 0.028724626579854463, + "grad_norm": 57.60823019996819, + "learning_rate": 5e-06, + "loss": 0.5417, + "num_input_tokens_seen": 12954968, + "step": 75 + }, + { + "epoch": 0.028724626579854463, + "loss": 0.5834289193153381, + "loss_ce": 0.006524628959596157, + "loss_iou": 1.1341683864593506, + "loss_num": 0.578125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 12954968, + "step": 75 + }, + { + "epoch": 0.02910762160091919, + "grad_norm": 56.37315781796848, + "learning_rate": 5e-06, + "loss": 0.6461, + "num_input_tokens_seen": 13127536, + "step": 76 + }, + { + "epoch": 0.02910762160091919, + "loss": 0.6031838059425354, + "loss_ce": 0.005771694239228964, + "loss_iou": 0.763561487197876, + "loss_num": 0.59765625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 13127536, + "step": 76 + }, + { + "epoch": 0.029490616621983913, + "grad_norm": 33.02286060044843, + "learning_rate": 5e-06, + "loss": 0.498, + "num_input_tokens_seen": 13300688, + "step": 77 + }, + { + "epoch": 0.029490616621983913, + "loss": 0.511459231376648, + "loss_ce": 0.007064702454954386, + "loss_iou": 1.1034919023513794, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 13300688, + "step": 77 + }, + { + "epoch": 0.02987361164304864, + "grad_norm": 50.1946807362945, + "learning_rate": 5e-06, + "loss": 0.5003, + "num_input_tokens_seen": 13470424, + "step": 78 + }, + { + "epoch": 0.02987361164304864, + "loss": 0.4736681580543518, + "loss_ce": 0.0068712737411260605, + "loss_iou": 1.0265188217163086, + "loss_num": 0.466796875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 13470424, + "step": 78 + }, + { + "epoch": 0.030256606664113367, + "grad_norm": 44.010831517031136, + "learning_rate": 5e-06, + "loss": 0.4536, + "num_input_tokens_seen": 13643424, + "step": 79 + }, + { + "epoch": 0.030256606664113367, + "loss": 0.44002223014831543, + "loss_ce": 0.005940227769315243, + "loss_iou": 0.7701394557952881, + "loss_num": 0.43359375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 13643424, + "step": 79 + }, + { + "epoch": 0.030639601685178094, + "grad_norm": 44.464194440871175, + "learning_rate": 5e-06, + "loss": 0.522, + "num_input_tokens_seen": 13816072, + "step": 80 + }, + { + "epoch": 0.030639601685178094, + "loss": 0.42485499382019043, + "loss_ce": 0.006397980265319347, + "loss_iou": 1.0180633068084717, + "loss_num": 0.41796875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 13816072, + "step": 80 + }, + { + "epoch": 0.031022596706242817, + "grad_norm": 45.20686847170137, + "learning_rate": 5e-06, + "loss": 0.561, + "num_input_tokens_seen": 13988800, + "step": 81 + }, + { + "epoch": 0.031022596706242817, + "loss": 0.5155953168869019, + "loss_ce": 0.007050366140902042, + "loss_iou": 1.033956527709961, + "loss_num": 0.5078125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 13988800, + "step": 81 + }, + { + "epoch": 0.031405591727307544, + "grad_norm": 40.388364285379396, + "learning_rate": 5e-06, + "loss": 0.4487, + "num_input_tokens_seen": 14162104, + "step": 82 + }, + { + "epoch": 0.031405591727307544, + "loss": 0.44451308250427246, + "loss_ce": 0.007013088092207909, + "loss_iou": 1.0524094104766846, + "loss_num": 0.4375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 14162104, + "step": 82 + }, + { + "epoch": 0.031788586748372274, + "grad_norm": 29.735627600750053, + "learning_rate": 5e-06, + "loss": 0.5666, + "num_input_tokens_seen": 14334784, + "step": 83 + }, + { + "epoch": 0.031788586748372274, + "loss": 0.5328165888786316, + "loss_ce": 0.006693531759083271, + "loss_iou": 1.1166609525680542, + "loss_num": 0.52734375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 14334784, + "step": 83 + }, + { + "epoch": 0.032171581769437, + "grad_norm": 27.722040585539666, + "learning_rate": 5e-06, + "loss": 0.3232, + "num_input_tokens_seen": 14507224, + "step": 84 + }, + { + "epoch": 0.032171581769437, + "loss": 0.3890770971775055, + "loss_ce": 0.006874939426779747, + "loss_iou": 1.0110466480255127, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 14507224, + "step": 84 + }, + { + "epoch": 0.03255457679050172, + "grad_norm": 31.390461757254684, + "learning_rate": 5e-06, + "loss": 0.5257, + "num_input_tokens_seen": 14680288, + "step": 85 + }, + { + "epoch": 0.03255457679050172, + "loss": 0.5557568073272705, + "loss_ce": 0.006440396420657635, + "loss_iou": 1.0433627367019653, + "loss_num": 0.55078125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 14680288, + "step": 85 + }, + { + "epoch": 0.03293757181156645, + "grad_norm": 32.839011842359895, + "learning_rate": 5e-06, + "loss": 0.4123, + "num_input_tokens_seen": 14853464, + "step": 86 + }, + { + "epoch": 0.03293757181156645, + "loss": 0.3949050307273865, + "loss_ce": 0.006965578068047762, + "loss_iou": 1.0332398414611816, + "loss_num": 0.388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 14853464, + "step": 86 + }, + { + "epoch": 0.033320566832631175, + "grad_norm": 25.639189178603175, + "learning_rate": 5e-06, + "loss": 0.5734, + "num_input_tokens_seen": 15026264, + "step": 87 + }, + { + "epoch": 0.033320566832631175, + "loss": 0.6040345430374146, + "loss_ce": 0.006378264632076025, + "loss_iou": 10.921671867370605, + "loss_num": 0.59765625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 15026264, + "step": 87 + }, + { + "epoch": 0.0337035618536959, + "grad_norm": 37.876886363808794, + "learning_rate": 5e-06, + "loss": 0.3964, + "num_input_tokens_seen": 15198920, + "step": 88 + }, + { + "epoch": 0.0337035618536959, + "loss": 0.4036218225955963, + "loss_ce": 0.00603879801928997, + "loss_iou": 1.032512903213501, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 15198920, + "step": 88 + }, + { + "epoch": 0.03408655687476063, + "grad_norm": 22.333466483174806, + "learning_rate": 5e-06, + "loss": 0.4302, + "num_input_tokens_seen": 15371928, + "step": 89 + }, + { + "epoch": 0.03408655687476063, + "loss": 0.460678368806839, + "loss_ce": 0.006088521331548691, + "loss_iou": 1.0937793254852295, + "loss_num": 0.455078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 15371928, + "step": 89 + }, + { + "epoch": 0.03446955189582535, + "grad_norm": 29.069531495758998, + "learning_rate": 5e-06, + "loss": 0.3827, + "num_input_tokens_seen": 15544760, + "step": 90 + }, + { + "epoch": 0.03446955189582535, + "loss": 0.35321101546287537, + "loss_ce": 0.0066533940844237804, + "loss_iou": 1.0216233730316162, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 15544760, + "step": 90 + }, + { + "epoch": 0.03485254691689008, + "grad_norm": 26.540816273996406, + "learning_rate": 5e-06, + "loss": 0.4596, + "num_input_tokens_seen": 15717512, + "step": 91 + }, + { + "epoch": 0.03485254691689008, + "loss": 0.4705299139022827, + "loss_ce": 0.006174429785460234, + "loss_iou": 1.0322036743164062, + "loss_num": 0.46484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 15717512, + "step": 91 + }, + { + "epoch": 0.035235541937954806, + "grad_norm": 43.30091509778155, + "learning_rate": 5e-06, + "loss": 0.3605, + "num_input_tokens_seen": 15890272, + "step": 92 + }, + { + "epoch": 0.035235541937954806, + "loss": 0.3640331029891968, + "loss_ce": 0.00648917630314827, + "loss_iou": 1.5203404426574707, + "loss_num": 0.357421875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 15890272, + "step": 92 + }, + { + "epoch": 0.03561853695901953, + "grad_norm": 26.875492455183462, + "learning_rate": 5e-06, + "loss": 0.5214, + "num_input_tokens_seen": 16063008, + "step": 93 + }, + { + "epoch": 0.03561853695901953, + "loss": 0.5096790194511414, + "loss_ce": 0.006016903556883335, + "loss_iou": 1.1511123180389404, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 16063008, + "step": 93 + }, + { + "epoch": 0.03600153198008426, + "grad_norm": 29.866848393811175, + "learning_rate": 5e-06, + "loss": 0.3029, + "num_input_tokens_seen": 16235992, + "step": 94 + }, + { + "epoch": 0.03600153198008426, + "loss": 0.30797234177589417, + "loss_ce": 0.006702814716845751, + "loss_iou": 1.0129989385604858, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 16235992, + "step": 94 + }, + { + "epoch": 0.036384527001148984, + "grad_norm": 28.283781291947857, + "learning_rate": 5e-06, + "loss": 0.5238, + "num_input_tokens_seen": 16408664, + "step": 95 + }, + { + "epoch": 0.036384527001148984, + "loss": 0.5198010206222534, + "loss_ce": 0.006861596368253231, + "loss_iou": 1.0395960807800293, + "loss_num": 0.51171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 16408664, + "step": 95 + }, + { + "epoch": 0.036767522022213714, + "grad_norm": 41.26521151645431, + "learning_rate": 5e-06, + "loss": 0.333, + "num_input_tokens_seen": 16581656, + "step": 96 + }, + { + "epoch": 0.036767522022213714, + "loss": 0.3221352696418762, + "loss_ce": 0.006339376792311668, + "loss_iou": 1.0053198337554932, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 16581656, + "step": 96 + }, + { + "epoch": 0.03715051704327844, + "grad_norm": 44.74181037480452, + "learning_rate": 5e-06, + "loss": 0.5314, + "num_input_tokens_seen": 16754648, + "step": 97 + }, + { + "epoch": 0.03715051704327844, + "loss": 0.5276448726654053, + "loss_ce": 0.006160501390695572, + "loss_iou": 1.0349318981170654, + "loss_num": 0.5234375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 16754648, + "step": 97 + }, + { + "epoch": 0.03753351206434316, + "grad_norm": 36.167358019916335, + "learning_rate": 5e-06, + "loss": 0.4675, + "num_input_tokens_seen": 16927656, + "step": 98 + }, + { + "epoch": 0.03753351206434316, + "loss": 0.45619115233421326, + "loss_ce": 0.0064841341227293015, + "loss_iou": 1.2112081050872803, + "loss_num": 0.44921875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 16927656, + "step": 98 + }, + { + "epoch": 0.03791650708540789, + "grad_norm": 37.21054003809216, + "learning_rate": 5e-06, + "loss": 0.3829, + "num_input_tokens_seen": 17100912, + "step": 99 + }, + { + "epoch": 0.03791650708540789, + "loss": 0.36574530601501465, + "loss_ce": 0.006126187741756439, + "loss_iou": 1.0228235721588135, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 17100912, + "step": 99 + }, + { + "epoch": 0.038299502106472615, + "grad_norm": 56.04909535425301, + "learning_rate": 5e-06, + "loss": 0.4844, + "num_input_tokens_seen": 17273920, + "step": 100 + }, + { + "epoch": 0.038299502106472615, + "loss": 0.5044032335281372, + "loss_ce": 0.0068446556106209755, + "loss_iou": 1.1203261613845825, + "loss_num": 0.498046875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 17273920, + "step": 100 + }, + { + "epoch": 0.038682497127537345, + "grad_norm": 35.64530879543614, + "learning_rate": 5e-06, + "loss": 0.5197, + "num_input_tokens_seen": 17446584, + "step": 101 + }, + { + "epoch": 0.038682497127537345, + "loss": 0.5517237186431885, + "loss_ce": 0.0068018026649951935, + "loss_iou": 1.022824764251709, + "loss_num": 0.546875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 17446584, + "step": 101 + }, + { + "epoch": 0.03906549214860207, + "grad_norm": 27.756838380117056, + "learning_rate": 5e-06, + "loss": 0.3293, + "num_input_tokens_seen": 17619448, + "step": 102 + }, + { + "epoch": 0.03906549214860207, + "loss": 0.30716967582702637, + "loss_ce": 0.006388426758348942, + "loss_iou": 1.035182237625122, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 17619448, + "step": 102 + }, + { + "epoch": 0.03944848716966679, + "grad_norm": 32.46338554186638, + "learning_rate": 5e-06, + "loss": 0.4103, + "num_input_tokens_seen": 17791928, + "step": 103 + }, + { + "epoch": 0.03944848716966679, + "loss": 0.4780769944190979, + "loss_ce": 0.0061531588435173035, + "loss_iou": 1.180977463722229, + "loss_num": 0.47265625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 17791928, + "step": 103 + }, + { + "epoch": 0.03983148219073152, + "grad_norm": 35.52457985288199, + "learning_rate": 5e-06, + "loss": 0.4137, + "num_input_tokens_seen": 17965016, + "step": 104 + }, + { + "epoch": 0.03983148219073152, + "loss": 0.3601241707801819, + "loss_ce": 0.0063643730245530605, + "loss_iou": 1.0742692947387695, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 17965016, + "step": 104 + }, + { + "epoch": 0.040214477211796246, + "grad_norm": 41.866249068434854, + "learning_rate": 5e-06, + "loss": 0.4377, + "num_input_tokens_seen": 18137736, + "step": 105 + }, + { + "epoch": 0.040214477211796246, + "loss": 0.4701138734817505, + "loss_ce": 0.006490812171250582, + "loss_iou": 1.0544739961624146, + "loss_num": 0.462890625, + "loss_xval": 0.462890625, + "num_input_tokens_seen": 18137736, + "step": 105 + }, + { + "epoch": 0.040597472232860976, + "grad_norm": 30.01917950301748, + "learning_rate": 5e-06, + "loss": 0.3308, + "num_input_tokens_seen": 18310744, + "step": 106 + }, + { + "epoch": 0.040597472232860976, + "loss": 0.31964176893234253, + "loss_ce": 0.006531395949423313, + "loss_iou": 1.0224277973175049, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 18310744, + "step": 106 + }, + { + "epoch": 0.0409804672539257, + "grad_norm": 36.06766890362076, + "learning_rate": 5e-06, + "loss": 0.4414, + "num_input_tokens_seen": 18483568, + "step": 107 + }, + { + "epoch": 0.0409804672539257, + "loss": 0.3927014470100403, + "loss_ce": 0.006715108640491962, + "loss_iou": 1.047964334487915, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 18483568, + "step": 107 + }, + { + "epoch": 0.04136346227499042, + "grad_norm": 42.79337571439956, + "learning_rate": 5e-06, + "loss": 0.4251, + "num_input_tokens_seen": 18656480, + "step": 108 + }, + { + "epoch": 0.04136346227499042, + "loss": 0.4319663345813751, + "loss_ce": 0.006429227534681559, + "loss_iou": 1.017852783203125, + "loss_num": 0.42578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 18656480, + "step": 108 + }, + { + "epoch": 0.04174645729605515, + "grad_norm": 47.21985875144009, + "learning_rate": 5e-06, + "loss": 0.5227, + "num_input_tokens_seen": 18829808, + "step": 109 + }, + { + "epoch": 0.04174645729605515, + "loss": 0.5298129320144653, + "loss_ce": 0.006863709073513746, + "loss_iou": 1.0822196006774902, + "loss_num": 0.5234375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 18829808, + "step": 109 + }, + { + "epoch": 0.04212945231711988, + "grad_norm": 30.911890471985224, + "learning_rate": 5e-06, + "loss": 0.3904, + "num_input_tokens_seen": 19002360, + "step": 110 + }, + { + "epoch": 0.04212945231711988, + "loss": 0.4123959243297577, + "loss_ce": 0.007000410463660955, + "loss_iou": 1.0159223079681396, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 19002360, + "step": 110 + }, + { + "epoch": 0.0425124473381846, + "grad_norm": 27.433482377568076, + "learning_rate": 5e-06, + "loss": 0.6606, + "num_input_tokens_seen": 19175216, + "step": 111 + }, + { + "epoch": 0.0425124473381846, + "loss": 0.6263796091079712, + "loss_ce": 0.006750749424099922, + "loss_iou": 1.340945839881897, + "loss_num": 0.62109375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 19175216, + "step": 111 + }, + { + "epoch": 0.04289544235924933, + "grad_norm": 15.005011675758164, + "learning_rate": 5e-06, + "loss": 0.3459, + "num_input_tokens_seen": 19347720, + "step": 112 + }, + { + "epoch": 0.04289544235924933, + "loss": 0.4036482274532318, + "loss_ce": 0.006675555370748043, + "loss_iou": 1.0429775714874268, + "loss_num": 0.396484375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 19347720, + "step": 112 + }, + { + "epoch": 0.043278437380314054, + "grad_norm": 28.057970281066805, + "learning_rate": 5e-06, + "loss": 0.397, + "num_input_tokens_seen": 19520544, + "step": 113 + }, + { + "epoch": 0.043278437380314054, + "loss": 0.45218348503112793, + "loss_ce": 0.006504817865788937, + "loss_iou": 1.1148324012756348, + "loss_num": 0.4453125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 19520544, + "step": 113 + }, + { + "epoch": 0.043661432401378784, + "grad_norm": 36.79099990985957, + "learning_rate": 5e-06, + "loss": 0.5451, + "num_input_tokens_seen": 19693208, + "step": 114 + }, + { + "epoch": 0.043661432401378784, + "loss": 0.5250085592269897, + "loss_ce": 0.0064538768492639065, + "loss_iou": 1.0836858749389648, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 19693208, + "step": 114 + }, + { + "epoch": 0.04404442742244351, + "grad_norm": 32.01939345082849, + "learning_rate": 5e-06, + "loss": 0.4893, + "num_input_tokens_seen": 19865688, + "step": 115 + }, + { + "epoch": 0.04404442742244351, + "loss": 0.5263857841491699, + "loss_ce": 0.006610426120460033, + "loss_iou": 7.912219047546387, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 19865688, + "step": 115 + }, + { + "epoch": 0.04442742244350823, + "grad_norm": 21.26505252168516, + "learning_rate": 5e-06, + "loss": 0.4792, + "num_input_tokens_seen": 20038768, + "step": 116 + }, + { + "epoch": 0.04442742244350823, + "loss": 0.4978628158569336, + "loss_ce": 0.006407726556062698, + "loss_iou": 0.9237784147262573, + "loss_num": 0.4921875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 20038768, + "step": 116 + }, + { + "epoch": 0.04481041746457296, + "grad_norm": 31.743950455792852, + "learning_rate": 5e-06, + "loss": 0.2938, + "num_input_tokens_seen": 20211744, + "step": 117 + }, + { + "epoch": 0.04481041746457296, + "loss": 0.3334852159023285, + "loss_ce": 0.007313337177038193, + "loss_iou": 1.0548374652862549, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 20211744, + "step": 117 + }, + { + "epoch": 0.045193412485637685, + "grad_norm": 37.95325305399651, + "learning_rate": 5e-06, + "loss": 0.5847, + "num_input_tokens_seen": 20384600, + "step": 118 + }, + { + "epoch": 0.045193412485637685, + "loss": 0.6075345277786255, + "loss_ce": 0.006216216832399368, + "loss_iou": 1.069112777709961, + "loss_num": 0.6015625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 20384600, + "step": 118 + }, + { + "epoch": 0.045576407506702415, + "grad_norm": 27.727475727388736, + "learning_rate": 5e-06, + "loss": 0.3302, + "num_input_tokens_seen": 20557288, + "step": 119 + }, + { + "epoch": 0.045576407506702415, + "loss": 0.37002646923065186, + "loss_ce": 0.006379032041877508, + "loss_iou": 1.0891635417938232, + "loss_num": 0.36328125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 20557288, + "step": 119 + }, + { + "epoch": 0.04595940252776714, + "grad_norm": 29.26227934580431, + "learning_rate": 5e-06, + "loss": 0.56, + "num_input_tokens_seen": 20730232, + "step": 120 + }, + { + "epoch": 0.04595940252776714, + "loss": 0.5652130842208862, + "loss_ce": 0.006131095811724663, + "loss_iou": 1.1232898235321045, + "loss_num": 0.55859375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 20730232, + "step": 120 + }, + { + "epoch": 0.04634239754883186, + "grad_norm": 38.70357615083572, + "learning_rate": 5e-06, + "loss": 0.3099, + "num_input_tokens_seen": 20903112, + "step": 121 + }, + { + "epoch": 0.04634239754883186, + "loss": 0.3055613338947296, + "loss_ce": 0.006244930438697338, + "loss_iou": 1.0352020263671875, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 20903112, + "step": 121 + }, + { + "epoch": 0.04672539256989659, + "grad_norm": 38.84593131483085, + "learning_rate": 5e-06, + "loss": 0.5919, + "num_input_tokens_seen": 21076384, + "step": 122 + }, + { + "epoch": 0.04672539256989659, + "loss": 0.5931558609008789, + "loss_ce": 0.0062418365851044655, + "loss_iou": 1.0618486404418945, + "loss_num": 0.5859375, + "loss_xval": 0.5859375, + "num_input_tokens_seen": 21076384, + "step": 122 + }, + { + "epoch": 0.047108387590961316, + "grad_norm": 23.731658503817986, + "learning_rate": 5e-06, + "loss": 0.3136, + "num_input_tokens_seen": 21249376, + "step": 123 + }, + { + "epoch": 0.047108387590961316, + "loss": 0.3049842119216919, + "loss_ce": 0.006278182379901409, + "loss_iou": 0.8924912810325623, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 21249376, + "step": 123 + }, + { + "epoch": 0.04749138261202605, + "grad_norm": 23.92192871406492, + "learning_rate": 5e-06, + "loss": 0.5108, + "num_input_tokens_seen": 21422104, + "step": 124 + }, + { + "epoch": 0.04749138261202605, + "loss": 0.5700915455818176, + "loss_ce": 0.006126689724624157, + "loss_iou": 1.1228581666946411, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 21422104, + "step": 124 + }, + { + "epoch": 0.04787437763309077, + "grad_norm": 32.0527686315411, + "learning_rate": 5e-06, + "loss": 0.4048, + "num_input_tokens_seen": 21595152, + "step": 125 + }, + { + "epoch": 0.04787437763309077, + "loss": 0.35190096497535706, + "loss_ce": 0.006564033217728138, + "loss_iou": 1.0184372663497925, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 21595152, + "step": 125 + }, + { + "epoch": 0.04825737265415549, + "grad_norm": 32.19051576104468, + "learning_rate": 5e-06, + "loss": 0.5357, + "num_input_tokens_seen": 21767960, + "step": 126 + }, + { + "epoch": 0.04825737265415549, + "loss": 0.5383220314979553, + "loss_ce": 0.005851339548826218, + "loss_iou": 1.0118060111999512, + "loss_num": 0.53125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 21767960, + "step": 126 + }, + { + "epoch": 0.048640367675220224, + "grad_norm": 30.852062165351455, + "learning_rate": 5e-06, + "loss": 0.3996, + "num_input_tokens_seen": 21940920, + "step": 127 + }, + { + "epoch": 0.048640367675220224, + "loss": 0.3716692328453064, + "loss_ce": 0.006434849463403225, + "loss_iou": 1.009103536605835, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 21940920, + "step": 127 + }, + { + "epoch": 0.04902336269628495, + "grad_norm": 25.083165301601046, + "learning_rate": 5e-06, + "loss": 0.5802, + "num_input_tokens_seen": 22113824, + "step": 128 + }, + { + "epoch": 0.04902336269628495, + "loss": 0.6301028728485107, + "loss_ce": 0.006323575042188168, + "loss_iou": 1.0315409898757935, + "loss_num": 0.625, + "loss_xval": 0.625, + "num_input_tokens_seen": 22113824, + "step": 128 + }, + { + "epoch": 0.04940635771734968, + "grad_norm": 32.175327935154606, + "learning_rate": 5e-06, + "loss": 0.294, + "num_input_tokens_seen": 22286616, + "step": 129 + }, + { + "epoch": 0.04940635771734968, + "loss": 0.28241562843322754, + "loss_ce": 0.006292589008808136, + "loss_iou": 1.0409519672393799, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 22286616, + "step": 129 + }, + { + "epoch": 0.0497893527384144, + "grad_norm": 28.477041376569616, + "learning_rate": 5e-06, + "loss": 0.5893, + "num_input_tokens_seen": 22459464, + "step": 130 + }, + { + "epoch": 0.0497893527384144, + "loss": 0.6028316020965576, + "loss_ce": 0.006151881068944931, + "loss_iou": 2.0542666912078857, + "loss_num": 0.59765625, + "loss_xval": 0.59765625, + "num_input_tokens_seen": 22459464, + "step": 130 + }, + { + "epoch": 0.050172347759479125, + "grad_norm": 16.26457165947007, + "learning_rate": 5e-06, + "loss": 0.311, + "num_input_tokens_seen": 22632328, + "step": 131 + }, + { + "epoch": 0.050172347759479125, + "loss": 0.3509281873703003, + "loss_ce": 0.005835396703332663, + "loss_iou": 1.0183525085449219, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 22632328, + "step": 131 + }, + { + "epoch": 0.050555342780543855, + "grad_norm": 14.366608602593727, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 22805792, + "step": 132 + }, + { + "epoch": 0.050555342780543855, + "loss": 0.20429584383964539, + "loss_ce": 0.006541921757161617, + "loss_iou": 1.0044993162155151, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 22805792, + "step": 132 + }, + { + "epoch": 0.05093833780160858, + "grad_norm": 26.165813380894868, + "learning_rate": 5e-06, + "loss": 0.3177, + "num_input_tokens_seen": 22978888, + "step": 133 + }, + { + "epoch": 0.05093833780160858, + "loss": 0.3332655429840088, + "loss_ce": 0.006727441679686308, + "loss_iou": 1.070448637008667, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 22978888, + "step": 133 + }, + { + "epoch": 0.05132133282267331, + "grad_norm": 26.62556524899254, + "learning_rate": 5e-06, + "loss": 0.3302, + "num_input_tokens_seen": 23151696, + "step": 134 + }, + { + "epoch": 0.05132133282267331, + "loss": 0.35435616970062256, + "loss_ce": 0.006455780938267708, + "loss_iou": 1.1026301383972168, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 23151696, + "step": 134 + }, + { + "epoch": 0.05170432784373803, + "grad_norm": 46.30831954660043, + "learning_rate": 5e-06, + "loss": 0.4124, + "num_input_tokens_seen": 23324576, + "step": 135 + }, + { + "epoch": 0.05170432784373803, + "loss": 0.4030225872993469, + "loss_ce": 0.006294081453233957, + "loss_iou": 1.019537091255188, + "loss_num": 0.396484375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 23324576, + "step": 135 + }, + { + "epoch": 0.052087322864802756, + "grad_norm": 48.41750133971601, + "learning_rate": 5e-06, + "loss": 0.6584, + "num_input_tokens_seen": 23497536, + "step": 136 + }, + { + "epoch": 0.052087322864802756, + "loss": 0.6342858076095581, + "loss_ce": 0.0063560702838003635, + "loss_iou": 1.5107569694519043, + "loss_num": 0.62890625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 23497536, + "step": 136 + }, + { + "epoch": 0.052470317885867486, + "grad_norm": 28.673374952503366, + "learning_rate": 5e-06, + "loss": 0.5077, + "num_input_tokens_seen": 23670144, + "step": 137 + }, + { + "epoch": 0.052470317885867486, + "loss": 0.4749900698661804, + "loss_ce": 0.006972530856728554, + "loss_iou": 1.152420997619629, + "loss_num": 0.46875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 23670144, + "step": 137 + }, + { + "epoch": 0.05285331290693221, + "grad_norm": 27.475418106466517, + "learning_rate": 5e-06, + "loss": 0.716, + "num_input_tokens_seen": 23843328, + "step": 138 + }, + { + "epoch": 0.05285331290693221, + "loss": 0.7018737196922302, + "loss_ce": 0.006072905845940113, + "loss_iou": 1.1133317947387695, + "loss_num": 0.6953125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 23843328, + "step": 138 + }, + { + "epoch": 0.05323630792799693, + "grad_norm": 37.77224401074182, + "learning_rate": 5e-06, + "loss": 0.4237, + "num_input_tokens_seen": 24016376, + "step": 139 + }, + { + "epoch": 0.05323630792799693, + "loss": 0.41523200273513794, + "loss_ce": 0.006296470295637846, + "loss_iou": 1.0390156507492065, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 24016376, + "step": 139 + }, + { + "epoch": 0.05361930294906166, + "grad_norm": 62.031647505417695, + "learning_rate": 5e-06, + "loss": 0.9266, + "num_input_tokens_seen": 24189160, + "step": 140 + }, + { + "epoch": 0.05361930294906166, + "loss": 0.9466855525970459, + "loss_ce": 0.005767581053078175, + "loss_iou": 1.0992369651794434, + "loss_num": 0.94140625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 24189160, + "step": 140 + }, + { + "epoch": 0.05400229797012639, + "grad_norm": 45.57677053576892, + "learning_rate": 5e-06, + "loss": 0.6621, + "num_input_tokens_seen": 24362424, + "step": 141 + }, + { + "epoch": 0.05400229797012639, + "loss": 0.6456308364868164, + "loss_ce": 0.0064706951379776, + "loss_iou": 1.0844511985778809, + "loss_num": 0.640625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 24362424, + "step": 141 + }, + { + "epoch": 0.05438529299119112, + "grad_norm": 33.198264048981606, + "learning_rate": 5e-06, + "loss": 1.1533, + "num_input_tokens_seen": 24535224, + "step": 142 + }, + { + "epoch": 0.05438529299119112, + "loss": 1.1889057159423828, + "loss_ce": 0.0062885405495762825, + "loss_iou": 1.2113800048828125, + "loss_num": 1.1796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 24535224, + "step": 142 + }, + { + "epoch": 0.05476828801225584, + "grad_norm": 23.691244793430347, + "learning_rate": 5e-06, + "loss": 0.5377, + "num_input_tokens_seen": 24708200, + "step": 143 + }, + { + "epoch": 0.05476828801225584, + "loss": 0.4705287516117096, + "loss_ce": 0.006417406257241964, + "loss_iou": 5.568227767944336, + "loss_num": 0.46484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 24708200, + "step": 143 + }, + { + "epoch": 0.055151283033320564, + "grad_norm": 72.36887055732298, + "learning_rate": 5e-06, + "loss": 0.9508, + "num_input_tokens_seen": 24881232, + "step": 144 + }, + { + "epoch": 0.055151283033320564, + "loss": 0.9282786846160889, + "loss_ce": 0.006403679493814707, + "loss_iou": 1.1952100992202759, + "loss_num": 0.921875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 24881232, + "step": 144 + }, + { + "epoch": 0.055534278054385294, + "grad_norm": 57.73536944055364, + "learning_rate": 5e-06, + "loss": 1.1748, + "num_input_tokens_seen": 25054168, + "step": 145 + }, + { + "epoch": 0.055534278054385294, + "loss": 1.1655254364013672, + "loss_ce": 0.006834015715867281, + "loss_iou": 1.104722023010254, + "loss_num": 1.15625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 25054168, + "step": 145 + }, + { + "epoch": 0.05591727307545002, + "grad_norm": 47.047430292687814, + "learning_rate": 5e-06, + "loss": 1.2262, + "num_input_tokens_seen": 25223328, + "step": 146 + }, + { + "epoch": 0.05591727307545002, + "loss": 1.195068120956421, + "loss_ce": 0.006591492332518101, + "loss_iou": 1.2141139507293701, + "loss_num": 1.1875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 25223328, + "step": 146 + }, + { + "epoch": 0.05630026809651475, + "grad_norm": 62.718817823310836, + "learning_rate": 5e-06, + "loss": 0.8621, + "num_input_tokens_seen": 25396024, + "step": 147 + }, + { + "epoch": 0.05630026809651475, + "loss": 0.8082544803619385, + "loss_ce": 0.006984918378293514, + "loss_iou": 1.0968055725097656, + "loss_num": 0.80078125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 25396024, + "step": 147 + }, + { + "epoch": 0.05668326311757947, + "grad_norm": 63.201855291575825, + "learning_rate": 5e-06, + "loss": 1.3091, + "num_input_tokens_seen": 25569216, + "step": 148 + }, + { + "epoch": 0.05668326311757947, + "loss": 1.2961779832839966, + "loss_ce": 0.006138939410448074, + "loss_iou": 1.3613967895507812, + "loss_num": 1.2890625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 25569216, + "step": 148 + }, + { + "epoch": 0.057066258138644195, + "grad_norm": 42.81570850698476, + "learning_rate": 5e-06, + "loss": 0.6737, + "num_input_tokens_seen": 25742280, + "step": 149 + }, + { + "epoch": 0.057066258138644195, + "loss": 0.6677260994911194, + "loss_ce": 0.006593281868845224, + "loss_iou": 1.3066319227218628, + "loss_num": 0.66015625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 25742280, + "step": 149 + }, + { + "epoch": 0.057449253159708925, + "grad_norm": 54.721571851084676, + "learning_rate": 5e-06, + "loss": 1.0214, + "num_input_tokens_seen": 25915544, + "step": 150 + }, + { + "epoch": 0.057449253159708925, + "loss": 0.9635263681411743, + "loss_ce": 0.006983435247093439, + "loss_iou": 1.1073896884918213, + "loss_num": 0.95703125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 25915544, + "step": 150 + }, + { + "epoch": 0.05783224818077365, + "grad_norm": 52.011697716773476, + "learning_rate": 5e-06, + "loss": 0.7022, + "num_input_tokens_seen": 26088520, + "step": 151 + }, + { + "epoch": 0.05783224818077365, + "loss": 0.7164946794509888, + "loss_ce": 0.007510306313633919, + "loss_iou": 1.124539852142334, + "loss_num": 0.7109375, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 26088520, + "step": 151 + }, + { + "epoch": 0.05821524320183838, + "grad_norm": 49.124407306881245, + "learning_rate": 5e-06, + "loss": 1.022, + "num_input_tokens_seen": 26261656, + "step": 152 + }, + { + "epoch": 0.05821524320183838, + "loss": 1.027024269104004, + "loss_ce": 0.006516510155051947, + "loss_iou": 1.1156485080718994, + "loss_num": 1.0234375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 26261656, + "step": 152 + }, + { + "epoch": 0.0585982382229031, + "grad_norm": 20.596648525427405, + "learning_rate": 5e-06, + "loss": 0.476, + "num_input_tokens_seen": 26434768, + "step": 153 + }, + { + "epoch": 0.0585982382229031, + "loss": 0.4455060362815857, + "loss_ce": 0.007273610215634108, + "loss_iou": 1.0470380783081055, + "loss_num": 0.4375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 26434768, + "step": 153 + }, + { + "epoch": 0.058981233243967826, + "grad_norm": 38.74292928899851, + "learning_rate": 5e-06, + "loss": 0.4473, + "num_input_tokens_seen": 26607624, + "step": 154 + }, + { + "epoch": 0.058981233243967826, + "loss": 0.4658415615558624, + "loss_ce": 0.0063688987866044044, + "loss_iou": 1.0424962043762207, + "loss_num": 0.458984375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 26607624, + "step": 154 + }, + { + "epoch": 0.059364228265032556, + "grad_norm": 44.82288396062709, + "learning_rate": 5e-06, + "loss": 0.6104, + "num_input_tokens_seen": 26780384, + "step": 155 + }, + { + "epoch": 0.059364228265032556, + "loss": 0.6404857635498047, + "loss_ce": 0.006940814666450024, + "loss_iou": 1.0927469730377197, + "loss_num": 0.6328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 26780384, + "step": 155 + }, + { + "epoch": 0.05974722328609728, + "grad_norm": 49.17972131845761, + "learning_rate": 5e-06, + "loss": 0.6254, + "num_input_tokens_seen": 26953456, + "step": 156 + }, + { + "epoch": 0.05974722328609728, + "loss": 0.5728522539138794, + "loss_ce": 0.007178430445492268, + "loss_iou": 1.025742530822754, + "loss_num": 0.56640625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 26953456, + "step": 156 + }, + { + "epoch": 0.06013021830716201, + "grad_norm": 40.51980029856512, + "learning_rate": 5e-06, + "loss": 0.6819, + "num_input_tokens_seen": 27126304, + "step": 157 + }, + { + "epoch": 0.06013021830716201, + "loss": 0.7225454449653625, + "loss_ce": 0.007701745256781578, + "loss_iou": 1.2270827293395996, + "loss_num": 0.71484375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 27126304, + "step": 157 + }, + { + "epoch": 0.060513213328226734, + "grad_norm": 54.900204917719726, + "learning_rate": 5e-06, + "loss": 0.6266, + "num_input_tokens_seen": 27299432, + "step": 158 + }, + { + "epoch": 0.060513213328226734, + "loss": 0.670357346534729, + "loss_ce": 0.007271396461874247, + "loss_iou": 1.1041251420974731, + "loss_num": 0.6640625, + "loss_xval": 0.6640625, + "num_input_tokens_seen": 27299432, + "step": 158 + }, + { + "epoch": 0.06089620834929146, + "grad_norm": 62.96238710810382, + "learning_rate": 5e-06, + "loss": 1.0806, + "num_input_tokens_seen": 27472512, + "step": 159 + }, + { + "epoch": 0.06089620834929146, + "loss": 1.0890766382217407, + "loss_ce": 0.00753366481512785, + "loss_iou": 1.5399413108825684, + "loss_num": 1.078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 27472512, + "step": 159 + }, + { + "epoch": 0.06127920337035619, + "grad_norm": 28.509577655399855, + "learning_rate": 5e-06, + "loss": 0.3219, + "num_input_tokens_seen": 27645800, + "step": 160 + }, + { + "epoch": 0.06127920337035619, + "loss": 0.3256051242351532, + "loss_ce": 0.007978152483701706, + "loss_iou": 1.0282046794891357, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 27645800, + "step": 160 + }, + { + "epoch": 0.06166219839142091, + "grad_norm": 36.40162438649297, + "learning_rate": 5e-06, + "loss": 0.5308, + "num_input_tokens_seen": 27819040, + "step": 161 + }, + { + "epoch": 0.06166219839142091, + "loss": 0.5527220964431763, + "loss_ce": 0.007800169289112091, + "loss_iou": 1.076228141784668, + "loss_num": 0.546875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 27819040, + "step": 161 + }, + { + "epoch": 0.062045193412485634, + "grad_norm": 43.79767513197773, + "learning_rate": 5e-06, + "loss": 0.5429, + "num_input_tokens_seen": 27991984, + "step": 162 + }, + { + "epoch": 0.062045193412485634, + "loss": 0.5679165124893188, + "loss_ce": 0.0073696100153028965, + "loss_iou": 1.1133733987808228, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 27991984, + "step": 162 + }, + { + "epoch": 0.062428188433550365, + "grad_norm": 43.332650487487484, + "learning_rate": 5e-06, + "loss": 0.5952, + "num_input_tokens_seen": 28164728, + "step": 163 + }, + { + "epoch": 0.062428188433550365, + "loss": 0.5726608633995056, + "loss_ce": 0.007475370075553656, + "loss_iou": 1.0769777297973633, + "loss_num": 0.56640625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 28164728, + "step": 163 + }, + { + "epoch": 0.06281118345461509, + "grad_norm": 44.61240035938271, + "learning_rate": 5e-06, + "loss": 0.5017, + "num_input_tokens_seen": 28337712, + "step": 164 + }, + { + "epoch": 0.06281118345461509, + "loss": 0.5077217817306519, + "loss_ce": 0.007721826899796724, + "loss_iou": 1.0379432439804077, + "loss_num": 0.5, + "loss_xval": 0.5, + "num_input_tokens_seen": 28337712, + "step": 164 + }, + { + "epoch": 0.06319417847567982, + "grad_norm": 40.832653551206405, + "learning_rate": 5e-06, + "loss": 0.749, + "num_input_tokens_seen": 28510480, + "step": 165 + }, + { + "epoch": 0.06319417847567982, + "loss": 0.6996516585350037, + "loss_ce": 0.007512982003390789, + "loss_iou": 1.1332221031188965, + "loss_num": 0.69140625, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 28510480, + "step": 165 + }, + { + "epoch": 0.06357717349674455, + "grad_norm": 22.462002530260524, + "learning_rate": 5e-06, + "loss": 0.3961, + "num_input_tokens_seen": 28683168, + "step": 166 + }, + { + "epoch": 0.06357717349674455, + "loss": 0.4529934823513031, + "loss_ce": 0.007314770948141813, + "loss_iou": 1.0337318181991577, + "loss_num": 0.4453125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 28683168, + "step": 166 + }, + { + "epoch": 0.06396016851780927, + "grad_norm": 47.560377666933995, + "learning_rate": 5e-06, + "loss": 0.4786, + "num_input_tokens_seen": 28856272, + "step": 167 + }, + { + "epoch": 0.06396016851780927, + "loss": 0.42942649126052856, + "loss_ce": 0.007795642130076885, + "loss_iou": 1.039899468421936, + "loss_num": 0.421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 28856272, + "step": 167 + }, + { + "epoch": 0.064343163538874, + "grad_norm": 38.71805316720224, + "learning_rate": 5e-06, + "loss": 0.5277, + "num_input_tokens_seen": 29028808, + "step": 168 + }, + { + "epoch": 0.064343163538874, + "loss": 0.5406422019004822, + "loss_ce": 0.007683200761675835, + "loss_iou": 1.0520868301391602, + "loss_num": 0.53125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 29028808, + "step": 168 + }, + { + "epoch": 0.06472615855993873, + "grad_norm": 34.09177727602261, + "learning_rate": 5e-06, + "loss": 0.6291, + "num_input_tokens_seen": 29201496, + "step": 169 + }, + { + "epoch": 0.06472615855993873, + "loss": 0.6294732093811035, + "loss_ce": 0.00691462866961956, + "loss_iou": 1.0863300561904907, + "loss_num": 0.62109375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 29201496, + "step": 169 + }, + { + "epoch": 0.06510915358100344, + "grad_norm": 30.894532213255346, + "learning_rate": 5e-06, + "loss": 0.3206, + "num_input_tokens_seen": 29374440, + "step": 170 + }, + { + "epoch": 0.06510915358100344, + "loss": 0.31897273659706116, + "loss_ce": 0.007571367546916008, + "loss_iou": 1.0226755142211914, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 29374440, + "step": 170 + }, + { + "epoch": 0.06549214860206817, + "grad_norm": 35.87379981893007, + "learning_rate": 5e-06, + "loss": 0.5715, + "num_input_tokens_seen": 29547296, + "step": 171 + }, + { + "epoch": 0.06549214860206817, + "loss": 0.5430014729499817, + "loss_ce": 0.007356944959610701, + "loss_iou": 1.0217440128326416, + "loss_num": 0.53515625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 29547296, + "step": 171 + }, + { + "epoch": 0.0658751436231329, + "grad_norm": 40.63341353480755, + "learning_rate": 5e-06, + "loss": 0.5034, + "num_input_tokens_seen": 29720576, + "step": 172 + }, + { + "epoch": 0.0658751436231329, + "loss": 0.4879244863986969, + "loss_ce": 0.007944025099277496, + "loss_iou": 1.0626220703125, + "loss_num": 0.48046875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 29720576, + "step": 172 + }, + { + "epoch": 0.06625813864419762, + "grad_norm": 33.30447626176966, + "learning_rate": 5e-06, + "loss": 0.572, + "num_input_tokens_seen": 29893584, + "step": 173 + }, + { + "epoch": 0.06625813864419762, + "loss": 0.5977349877357483, + "loss_ce": 0.00837952271103859, + "loss_iou": 1.091104507446289, + "loss_num": 0.58984375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 29893584, + "step": 173 + }, + { + "epoch": 0.06664113366526235, + "grad_norm": 27.137745305887503, + "learning_rate": 5e-06, + "loss": 0.3584, + "num_input_tokens_seen": 30066968, + "step": 174 + }, + { + "epoch": 0.06664113366526235, + "loss": 0.3292595148086548, + "loss_ce": 0.007970421575009823, + "loss_iou": 1.8459727764129639, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 30066968, + "step": 174 + }, + { + "epoch": 0.06702412868632708, + "grad_norm": 20.19896643616232, + "learning_rate": 5e-06, + "loss": 0.4606, + "num_input_tokens_seen": 30239680, + "step": 175 + }, + { + "epoch": 0.06702412868632708, + "loss": 0.5244916677474976, + "loss_ce": 0.008622505702078342, + "loss_iou": 1.0415037870407104, + "loss_num": 0.515625, + "loss_xval": 0.515625, + "num_input_tokens_seen": 30239680, + "step": 175 + }, + { + "epoch": 0.0674071237073918, + "grad_norm": 15.519149833363251, + "learning_rate": 5e-06, + "loss": 0.3395, + "num_input_tokens_seen": 30412768, + "step": 176 + }, + { + "epoch": 0.0674071237073918, + "loss": 0.3429456353187561, + "loss_ce": 0.008228832855820656, + "loss_iou": 2.97749662399292, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 30412768, + "step": 176 + }, + { + "epoch": 0.06779011872845653, + "grad_norm": 21.20051326178833, + "learning_rate": 5e-06, + "loss": 0.3479, + "num_input_tokens_seen": 30585968, + "step": 177 + }, + { + "epoch": 0.06779011872845653, + "loss": 0.3102911710739136, + "loss_ce": 0.007556804455816746, + "loss_iou": 1.0246970653533936, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 30585968, + "step": 177 + }, + { + "epoch": 0.06817311374952126, + "grad_norm": 33.047666010284104, + "learning_rate": 5e-06, + "loss": 0.6411, + "num_input_tokens_seen": 30758712, + "step": 178 + }, + { + "epoch": 0.06817311374952126, + "loss": 0.6166718006134033, + "loss_ce": 0.007785105612128973, + "loss_iou": 1.0879333019256592, + "loss_num": 0.609375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 30758712, + "step": 178 + }, + { + "epoch": 0.06855610877058599, + "grad_norm": 23.013083233642437, + "learning_rate": 5e-06, + "loss": 0.2356, + "num_input_tokens_seen": 30931520, + "step": 179 + }, + { + "epoch": 0.06855610877058599, + "loss": 0.24693651497364044, + "loss_ce": 0.007678701542317867, + "loss_iou": 1.0095577239990234, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 30931520, + "step": 179 + }, + { + "epoch": 0.0689391037916507, + "grad_norm": 32.276607454957585, + "learning_rate": 5e-06, + "loss": 0.57, + "num_input_tokens_seen": 31104128, + "step": 180 + }, + { + "epoch": 0.0689391037916507, + "loss": 0.5437986850738525, + "loss_ce": 0.0076659126207232475, + "loss_iou": 1.1042509078979492, + "loss_num": 0.53515625, + "loss_xval": 0.53515625, + "num_input_tokens_seen": 31104128, + "step": 180 + }, + { + "epoch": 0.06932209881271544, + "grad_norm": 30.696336614046267, + "learning_rate": 5e-06, + "loss": 0.4571, + "num_input_tokens_seen": 31277088, + "step": 181 + }, + { + "epoch": 0.06932209881271544, + "loss": 0.520167350769043, + "loss_ce": 0.008448591455817223, + "loss_iou": 1.1707911491394043, + "loss_num": 0.51171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 31277088, + "step": 181 + }, + { + "epoch": 0.06970509383378017, + "grad_norm": 21.549556583354015, + "learning_rate": 5e-06, + "loss": 0.5931, + "num_input_tokens_seen": 31450440, + "step": 182 + }, + { + "epoch": 0.06970509383378017, + "loss": 0.5975214242935181, + "loss_ce": 0.00816600676625967, + "loss_iou": 1.0781558752059937, + "loss_num": 0.58984375, + "loss_xval": 0.58984375, + "num_input_tokens_seen": 31450440, + "step": 182 + }, + { + "epoch": 0.07008808885484488, + "grad_norm": 32.2803987366505, + "learning_rate": 5e-06, + "loss": 0.3026, + "num_input_tokens_seen": 31623288, + "step": 183 + }, + { + "epoch": 0.07008808885484488, + "loss": 0.2824394106864929, + "loss_ce": 0.008147402666509151, + "loss_iou": 1.078061580657959, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 31623288, + "step": 183 + }, + { + "epoch": 0.07047108387590961, + "grad_norm": 29.490819955260243, + "learning_rate": 5e-06, + "loss": 0.5545, + "num_input_tokens_seen": 31796328, + "step": 184 + }, + { + "epoch": 0.07047108387590961, + "loss": 0.5676002502441406, + "loss_ce": 0.007053417153656483, + "loss_iou": 1.0296332836151123, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 31796328, + "step": 184 + }, + { + "epoch": 0.07085407889697434, + "grad_norm": 27.546106552010087, + "learning_rate": 5e-06, + "loss": 0.2937, + "num_input_tokens_seen": 31968760, + "step": 185 + }, + { + "epoch": 0.07085407889697434, + "loss": 0.29248303174972534, + "loss_ce": 0.007448871619999409, + "loss_iou": 1.0106669664382935, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 31968760, + "step": 185 + }, + { + "epoch": 0.07123707391803906, + "grad_norm": 29.262020890055478, + "learning_rate": 5e-06, + "loss": 0.4562, + "num_input_tokens_seen": 32141472, + "step": 186 + }, + { + "epoch": 0.07123707391803906, + "loss": 0.4467780590057373, + "loss_ce": 0.007324915379285812, + "loss_iou": 1.034911870956421, + "loss_num": 0.439453125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 32141472, + "step": 186 + }, + { + "epoch": 0.07162006893910379, + "grad_norm": 30.348694777589845, + "learning_rate": 5e-06, + "loss": 0.3995, + "num_input_tokens_seen": 32314280, + "step": 187 + }, + { + "epoch": 0.07162006893910379, + "loss": 0.40412604808807373, + "loss_ce": 0.008618230000138283, + "loss_iou": 4.503968238830566, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 32314280, + "step": 187 + }, + { + "epoch": 0.07200306396016852, + "grad_norm": 19.337885486132954, + "learning_rate": 5e-06, + "loss": 0.4688, + "num_input_tokens_seen": 32487032, + "step": 188 + }, + { + "epoch": 0.07200306396016852, + "loss": 0.43590885400772095, + "loss_ce": 0.007442049216479063, + "loss_iou": 1.0248808860778809, + "loss_num": 0.427734375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 32487032, + "step": 188 + }, + { + "epoch": 0.07238605898123325, + "grad_norm": 17.980072416669966, + "learning_rate": 5e-06, + "loss": 0.2825, + "num_input_tokens_seen": 32660208, + "step": 189 + }, + { + "epoch": 0.07238605898123325, + "loss": 0.26397398114204407, + "loss_ce": 0.008480816148221493, + "loss_iou": 1.0260534286499023, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 32660208, + "step": 189 + }, + { + "epoch": 0.07276905400229797, + "grad_norm": 23.282616528478933, + "learning_rate": 5e-06, + "loss": 0.4014, + "num_input_tokens_seen": 32833144, + "step": 190 + }, + { + "epoch": 0.07276905400229797, + "loss": 0.38087987899780273, + "loss_ce": 0.007832997478544712, + "loss_iou": 1.0457580089569092, + "loss_num": 0.373046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 32833144, + "step": 190 + }, + { + "epoch": 0.0731520490233627, + "grad_norm": 36.93242731966085, + "learning_rate": 5e-06, + "loss": 0.4199, + "num_input_tokens_seen": 33006520, + "step": 191 + }, + { + "epoch": 0.0731520490233627, + "loss": 0.46541857719421387, + "loss_ce": 0.007899067364633083, + "loss_iou": 1.0965670347213745, + "loss_num": 0.45703125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 33006520, + "step": 191 + }, + { + "epoch": 0.07353504404442743, + "grad_norm": 75.6533228884653, + "learning_rate": 5e-06, + "loss": 0.4512, + "num_input_tokens_seen": 33179088, + "step": 192 + }, + { + "epoch": 0.07353504404442743, + "loss": 0.4501715898513794, + "loss_ce": 0.007056348491460085, + "loss_iou": 1.0614676475524902, + "loss_num": 0.443359375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 33179088, + "step": 192 + }, + { + "epoch": 0.07391803906549214, + "grad_norm": 25.310524245622176, + "learning_rate": 5e-06, + "loss": 0.3013, + "num_input_tokens_seen": 33351976, + "step": 193 + }, + { + "epoch": 0.07391803906549214, + "loss": 0.3208417594432831, + "loss_ce": 0.007609330117702484, + "loss_iou": 1.0761098861694336, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 33351976, + "step": 193 + }, + { + "epoch": 0.07430103408655687, + "grad_norm": 25.649629507305033, + "learning_rate": 5e-06, + "loss": 0.4841, + "num_input_tokens_seen": 33525168, + "step": 194 + }, + { + "epoch": 0.07430103408655687, + "loss": 0.5254085063934326, + "loss_ce": 0.007342115510255098, + "loss_iou": 1.126143455505371, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 33525168, + "step": 194 + }, + { + "epoch": 0.0746840291076216, + "grad_norm": 29.68138193030563, + "learning_rate": 5e-06, + "loss": 0.3128, + "num_input_tokens_seen": 33698232, + "step": 195 + }, + { + "epoch": 0.0746840291076216, + "loss": 0.3225231170654297, + "loss_ce": 0.007825830951333046, + "loss_iou": 1.0991780757904053, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 33698232, + "step": 195 + }, + { + "epoch": 0.07506702412868632, + "grad_norm": 37.14601624704469, + "learning_rate": 5e-06, + "loss": 0.4957, + "num_input_tokens_seen": 33867800, + "step": 196 + }, + { + "epoch": 0.07506702412868632, + "loss": 0.4901979863643646, + "loss_ce": 0.007776104845106602, + "loss_iou": 1.0547126531600952, + "loss_num": 0.482421875, + "loss_xval": 0.482421875, + "num_input_tokens_seen": 33867800, + "step": 196 + }, + { + "epoch": 0.07545001914975105, + "grad_norm": 38.09017228687531, + "learning_rate": 5e-06, + "loss": 0.4282, + "num_input_tokens_seen": 34041192, + "step": 197 + }, + { + "epoch": 0.07545001914975105, + "loss": 0.4419156312942505, + "loss_ce": 0.008321871981024742, + "loss_iou": 1.1644515991210938, + "loss_num": 0.43359375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 34041192, + "step": 197 + }, + { + "epoch": 0.07583301417081578, + "grad_norm": 29.58474207677991, + "learning_rate": 5e-06, + "loss": 0.5765, + "num_input_tokens_seen": 34214016, + "step": 198 + }, + { + "epoch": 0.07583301417081578, + "loss": 0.5875325798988342, + "loss_ce": 0.00818687118589878, + "loss_iou": 1.0438282489776611, + "loss_num": 0.578125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 34214016, + "step": 198 + }, + { + "epoch": 0.0762160091918805, + "grad_norm": 24.19820119095497, + "learning_rate": 5e-06, + "loss": 0.2809, + "num_input_tokens_seen": 34387072, + "step": 199 + }, + { + "epoch": 0.0762160091918805, + "loss": 0.2809727191925049, + "loss_ce": 0.007901410572230816, + "loss_iou": 1.0663940906524658, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 34387072, + "step": 199 + }, + { + "epoch": 0.07659900421294523, + "grad_norm": 49.97462063305508, + "learning_rate": 5e-06, + "loss": 0.3485, + "num_input_tokens_seen": 34559736, + "step": 200 + }, + { + "epoch": 0.07659900421294523, + "loss": 0.35517212748527527, + "loss_ce": 0.007760013919323683, + "loss_iou": 1.0157616138458252, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 34559736, + "step": 200 + }, + { + "epoch": 0.07698199923400996, + "grad_norm": 43.16095857177117, + "learning_rate": 5e-06, + "loss": 0.6135, + "num_input_tokens_seen": 34732608, + "step": 201 + }, + { + "epoch": 0.07698199923400996, + "loss": 0.5544754266738892, + "loss_ce": 0.007112160325050354, + "loss_iou": 1.0241763591766357, + "loss_num": 0.546875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 34732608, + "step": 201 + }, + { + "epoch": 0.07736499425507469, + "grad_norm": 20.95385839326255, + "learning_rate": 5e-06, + "loss": 0.4632, + "num_input_tokens_seen": 34905488, + "step": 202 + }, + { + "epoch": 0.07736499425507469, + "loss": 0.48185425996780396, + "loss_ce": 0.007977311499416828, + "loss_iou": 1.038071632385254, + "loss_num": 0.474609375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 34905488, + "step": 202 + }, + { + "epoch": 0.0777479892761394, + "grad_norm": 56.181913423999944, + "learning_rate": 5e-06, + "loss": 0.6183, + "num_input_tokens_seen": 35078584, + "step": 203 + }, + { + "epoch": 0.0777479892761394, + "loss": 0.56095290184021, + "loss_ce": 0.00846262089908123, + "loss_iou": 1.0347716808319092, + "loss_num": 0.55078125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 35078584, + "step": 203 + }, + { + "epoch": 0.07813098429720414, + "grad_norm": 56.88320874646355, + "learning_rate": 5e-06, + "loss": 0.892, + "num_input_tokens_seen": 35251872, + "step": 204 + }, + { + "epoch": 0.07813098429720414, + "loss": 0.9481920003890991, + "loss_ce": 0.0077622984535992146, + "loss_iou": 1.1150736808776855, + "loss_num": 0.94140625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 35251872, + "step": 204 + }, + { + "epoch": 0.07851397931826887, + "grad_norm": 47.13745838175376, + "learning_rate": 5e-06, + "loss": 0.5414, + "num_input_tokens_seen": 35424672, + "step": 205 + }, + { + "epoch": 0.07851397931826887, + "loss": 0.5352663397789001, + "loss_ce": 0.008166728541254997, + "loss_iou": 1.032301425933838, + "loss_num": 0.52734375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 35424672, + "step": 205 + }, + { + "epoch": 0.07889697433933358, + "grad_norm": 32.202532364665835, + "learning_rate": 5e-06, + "loss": 0.9978, + "num_input_tokens_seen": 35597600, + "step": 206 + }, + { + "epoch": 0.07889697433933358, + "loss": 1.0033421516418457, + "loss_ce": 0.008225006982684135, + "loss_iou": 1.0893646478652954, + "loss_num": 0.99609375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 35597600, + "step": 206 + }, + { + "epoch": 0.07927996936039831, + "grad_norm": 27.851259730721388, + "learning_rate": 5e-06, + "loss": 0.5275, + "num_input_tokens_seen": 35770688, + "step": 207 + }, + { + "epoch": 0.07927996936039831, + "loss": 0.505529522895813, + "loss_ce": 0.007726819254457951, + "loss_iou": 1.0436320304870605, + "loss_num": 0.498046875, + "loss_xval": 0.498046875, + "num_input_tokens_seen": 35770688, + "step": 207 + }, + { + "epoch": 0.07966296438146304, + "grad_norm": 67.75949609941692, + "learning_rate": 5e-06, + "loss": 0.7863, + "num_input_tokens_seen": 35943440, + "step": 208 + }, + { + "epoch": 0.07966296438146304, + "loss": 0.7841857671737671, + "loss_ce": 0.0073302993550896645, + "loss_iou": 1.0888667106628418, + "loss_num": 0.77734375, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 35943440, + "step": 208 + }, + { + "epoch": 0.08004595940252776, + "grad_norm": 58.05924815659133, + "learning_rate": 5e-06, + "loss": 0.9796, + "num_input_tokens_seen": 36116504, + "step": 209 + }, + { + "epoch": 0.08004595940252776, + "loss": 0.9908746480941772, + "loss_ce": 0.00845281220972538, + "loss_iou": 1.0735149383544922, + "loss_num": 0.984375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 36116504, + "step": 209 + }, + { + "epoch": 0.08042895442359249, + "grad_norm": 27.12379421689299, + "learning_rate": 5e-06, + "loss": 0.7469, + "num_input_tokens_seen": 36289384, + "step": 210 + }, + { + "epoch": 0.08042895442359249, + "loss": 0.7505313158035278, + "loss_ce": 0.008343802765011787, + "loss_iou": 1.218379020690918, + "loss_num": 0.7421875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 36289384, + "step": 210 + }, + { + "epoch": 0.08081194944465722, + "grad_norm": 45.08058931446742, + "learning_rate": 5e-06, + "loss": 0.6479, + "num_input_tokens_seen": 36462232, + "step": 211 + }, + { + "epoch": 0.08081194944465722, + "loss": 0.6172372102737427, + "loss_ce": 0.00835050456225872, + "loss_iou": 1.0919554233551025, + "loss_num": 0.609375, + "loss_xval": 0.609375, + "num_input_tokens_seen": 36462232, + "step": 211 + }, + { + "epoch": 0.08119494446572195, + "grad_norm": 68.53234771774173, + "learning_rate": 5e-06, + "loss": 1.0, + "num_input_tokens_seen": 36634992, + "step": 212 + }, + { + "epoch": 0.08119494446572195, + "loss": 0.9534499645233154, + "loss_ce": 0.008137499913573265, + "loss_iou": 1.1350417137145996, + "loss_num": 0.9453125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 36634992, + "step": 212 + }, + { + "epoch": 0.08157793948678667, + "grad_norm": 60.45343021219607, + "learning_rate": 5e-06, + "loss": 0.8913, + "num_input_tokens_seen": 36807480, + "step": 213 + }, + { + "epoch": 0.08157793948678667, + "loss": 0.8964667320251465, + "loss_ce": 0.008771365508437157, + "loss_iou": 1.0690150260925293, + "loss_num": 0.88671875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 36807480, + "step": 213 + }, + { + "epoch": 0.0819609345078514, + "grad_norm": 31.560015909623697, + "learning_rate": 5e-06, + "loss": 1.0086, + "num_input_tokens_seen": 36980544, + "step": 214 + }, + { + "epoch": 0.0819609345078514, + "loss": 0.9310482144355774, + "loss_ce": 0.00868492852896452, + "loss_iou": 1.2417666912078857, + "loss_num": 0.921875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 36980544, + "step": 214 + }, + { + "epoch": 0.08234392952891613, + "grad_norm": 52.97214741803255, + "learning_rate": 5e-06, + "loss": 0.6637, + "num_input_tokens_seen": 37153688, + "step": 215 + }, + { + "epoch": 0.08234392952891613, + "loss": 0.6983016729354858, + "loss_ce": 0.009336812421679497, + "loss_iou": 1.079323172569275, + "loss_num": 0.6875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 37153688, + "step": 215 + }, + { + "epoch": 0.08272692454998085, + "grad_norm": 60.17389726110234, + "learning_rate": 5e-06, + "loss": 0.843, + "num_input_tokens_seen": 37323184, + "step": 216 + }, + { + "epoch": 0.08272692454998085, + "loss": 0.8084574937820435, + "loss_ce": 0.00816454365849495, + "loss_iou": 1.069022297859192, + "loss_num": 0.80078125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 37323184, + "step": 216 + }, + { + "epoch": 0.08310991957104558, + "grad_norm": 57.111171054147256, + "learning_rate": 5e-06, + "loss": 0.5962, + "num_input_tokens_seen": 37495968, + "step": 217 + }, + { + "epoch": 0.08310991957104558, + "loss": 0.6317141056060791, + "loss_ce": 0.009155533276498318, + "loss_iou": 1.0423400402069092, + "loss_num": 0.62109375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 37495968, + "step": 217 + }, + { + "epoch": 0.0834929145921103, + "grad_norm": 50.41203884024478, + "learning_rate": 5e-06, + "loss": 1.0768, + "num_input_tokens_seen": 37669000, + "step": 218 + }, + { + "epoch": 0.0834929145921103, + "loss": 1.0677589178085327, + "loss_ce": 0.009653490036725998, + "loss_iou": 1.2282007932662964, + "loss_num": 1.0546875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 37669000, + "step": 218 + }, + { + "epoch": 0.08387590961317502, + "grad_norm": 43.272735637508, + "learning_rate": 5e-06, + "loss": 0.443, + "num_input_tokens_seen": 37842168, + "step": 219 + }, + { + "epoch": 0.08387590961317502, + "loss": 0.46490222215652466, + "loss_ce": 0.009335814043879509, + "loss_iou": 1.046978235244751, + "loss_num": 0.455078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 37842168, + "step": 219 + }, + { + "epoch": 0.08425890463423975, + "grad_norm": 52.78682273165674, + "learning_rate": 5e-06, + "loss": 0.9541, + "num_input_tokens_seen": 38015080, + "step": 220 + }, + { + "epoch": 0.08425890463423975, + "loss": 0.9495786428451538, + "loss_ce": 0.008660627529025078, + "loss_iou": 1.1405905485153198, + "loss_num": 0.94140625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 38015080, + "step": 220 + }, + { + "epoch": 0.08464189965530448, + "grad_norm": 47.76023451785587, + "learning_rate": 5e-06, + "loss": 0.6282, + "num_input_tokens_seen": 38188104, + "step": 221 + }, + { + "epoch": 0.08464189965530448, + "loss": 0.6390358209609985, + "loss_ce": 0.009885435923933983, + "loss_iou": 1.0816030502319336, + "loss_num": 0.62890625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 38188104, + "step": 221 + }, + { + "epoch": 0.0850248946763692, + "grad_norm": 39.6378992368006, + "learning_rate": 5e-06, + "loss": 0.8432, + "num_input_tokens_seen": 38361440, + "step": 222 + }, + { + "epoch": 0.0850248946763692, + "loss": 0.8081187009811401, + "loss_ce": 0.009778868407011032, + "loss_iou": 1.0645579099655151, + "loss_num": 0.796875, + "loss_xval": 0.796875, + "num_input_tokens_seen": 38361440, + "step": 222 + }, + { + "epoch": 0.08540788969743393, + "grad_norm": 27.74761331621742, + "learning_rate": 5e-06, + "loss": 0.4048, + "num_input_tokens_seen": 38534360, + "step": 223 + }, + { + "epoch": 0.08540788969743393, + "loss": 0.42580628395080566, + "loss_ce": 0.00881408154964447, + "loss_iou": 1.0770916938781738, + "loss_num": 0.41796875, + "loss_xval": 0.41796875, + "num_input_tokens_seen": 38534360, + "step": 223 + }, + { + "epoch": 0.08579088471849866, + "grad_norm": 48.754843653913895, + "learning_rate": 5e-06, + "loss": 0.5333, + "num_input_tokens_seen": 38707240, + "step": 224 + }, + { + "epoch": 0.08579088471849866, + "loss": 0.520818829536438, + "loss_ce": 0.009588375687599182, + "loss_iou": 1.0615181922912598, + "loss_num": 0.51171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 38707240, + "step": 224 + }, + { + "epoch": 0.08617387973956339, + "grad_norm": 35.67101145239377, + "learning_rate": 5e-06, + "loss": 0.4983, + "num_input_tokens_seen": 38879976, + "step": 225 + }, + { + "epoch": 0.08617387973956339, + "loss": 0.5113679766654968, + "loss_ce": 0.009414846077561378, + "loss_iou": 1.0937516689300537, + "loss_num": 0.5, + "loss_xval": 0.5, + "num_input_tokens_seen": 38879976, + "step": 225 + }, + { + "epoch": 0.08655687476062811, + "grad_norm": 37.86353245054712, + "learning_rate": 5e-06, + "loss": 0.6152, + "num_input_tokens_seen": 39052584, + "step": 226 + }, + { + "epoch": 0.08655687476062811, + "loss": 0.5857442021369934, + "loss_ce": 0.01054888591170311, + "loss_iou": 1.019634485244751, + "loss_num": 0.57421875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 39052584, + "step": 226 + }, + { + "epoch": 0.08693986978169284, + "grad_norm": 41.492479478682746, + "learning_rate": 5e-06, + "loss": 0.6529, + "num_input_tokens_seen": 39225600, + "step": 227 + }, + { + "epoch": 0.08693986978169284, + "loss": 0.6377010941505432, + "loss_ce": 0.010015567764639854, + "loss_iou": 1.1252610683441162, + "loss_num": 0.62890625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 39225600, + "step": 227 + }, + { + "epoch": 0.08732286480275757, + "grad_norm": 36.925861584920455, + "learning_rate": 5e-06, + "loss": 0.6026, + "num_input_tokens_seen": 39398648, + "step": 228 + }, + { + "epoch": 0.08732286480275757, + "loss": 0.6346437335014343, + "loss_ce": 0.009643702767789364, + "loss_iou": 0.9308451414108276, + "loss_num": 0.625, + "loss_xval": 0.625, + "num_input_tokens_seen": 39398648, + "step": 228 + }, + { + "epoch": 0.08770585982382229, + "grad_norm": 33.41380847989801, + "learning_rate": 5e-06, + "loss": 0.331, + "num_input_tokens_seen": 39571560, + "step": 229 + }, + { + "epoch": 0.08770585982382229, + "loss": 0.31973588466644287, + "loss_ce": 0.010531795211136341, + "loss_iou": 1.2544775009155273, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 39571560, + "step": 229 + }, + { + "epoch": 0.08808885484488702, + "grad_norm": 37.27004984198461, + "learning_rate": 5e-06, + "loss": 0.689, + "num_input_tokens_seen": 39741216, + "step": 230 + }, + { + "epoch": 0.08808885484488702, + "loss": 0.6989870071411133, + "loss_ce": 0.010510453954339027, + "loss_iou": 1.0752999782562256, + "loss_num": 0.6875, + "loss_xval": 0.6875, + "num_input_tokens_seen": 39741216, + "step": 230 + }, + { + "epoch": 0.08847184986595175, + "grad_norm": 34.237033316216014, + "learning_rate": 5e-06, + "loss": 0.3351, + "num_input_tokens_seen": 39914552, + "step": 231 + }, + { + "epoch": 0.08847184986595175, + "loss": 0.2838922142982483, + "loss_ce": 0.010820926167070866, + "loss_iou": 1.011523723602295, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 39914552, + "step": 231 + }, + { + "epoch": 0.08885484488701646, + "grad_norm": 28.558703603816248, + "learning_rate": 5e-06, + "loss": 0.6381, + "num_input_tokens_seen": 40087440, + "step": 232 + }, + { + "epoch": 0.08885484488701646, + "loss": 0.6444008946418762, + "loss_ce": 0.010123548097908497, + "loss_iou": 1.0606837272644043, + "loss_num": 0.6328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 40087440, + "step": 232 + }, + { + "epoch": 0.08923783990808119, + "grad_norm": 20.711593050355276, + "learning_rate": 5e-06, + "loss": 0.2953, + "num_input_tokens_seen": 40260120, + "step": 233 + }, + { + "epoch": 0.08923783990808119, + "loss": 0.26176732778549194, + "loss_ce": 0.009936293587088585, + "loss_iou": 0.9967314004898071, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 40260120, + "step": 233 + }, + { + "epoch": 0.08962083492914592, + "grad_norm": 39.27223566087036, + "learning_rate": 5e-06, + "loss": 0.4499, + "num_input_tokens_seen": 40433352, + "step": 234 + }, + { + "epoch": 0.08962083492914592, + "loss": 0.37641066312789917, + "loss_ce": 0.009711459279060364, + "loss_iou": 1.0314005613327026, + "loss_num": 0.3671875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 40433352, + "step": 234 + }, + { + "epoch": 0.09000382995021065, + "grad_norm": 44.82076244879522, + "learning_rate": 5e-06, + "loss": 0.5356, + "num_input_tokens_seen": 40606288, + "step": 235 + }, + { + "epoch": 0.09000382995021065, + "loss": 0.514398992061615, + "loss_ce": 0.00951617956161499, + "loss_iou": 1.0371146202087402, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 40606288, + "step": 235 + }, + { + "epoch": 0.09038682497127537, + "grad_norm": 23.730780034423823, + "learning_rate": 5e-06, + "loss": 0.5958, + "num_input_tokens_seen": 40779264, + "step": 236 + }, + { + "epoch": 0.09038682497127537, + "loss": 0.6494039297103882, + "loss_ce": 0.009755544364452362, + "loss_iou": 1.2160544395446777, + "loss_num": 0.640625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 40779264, + "step": 236 + }, + { + "epoch": 0.0907698199923401, + "grad_norm": 39.65756158381604, + "learning_rate": 5e-06, + "loss": 0.3562, + "num_input_tokens_seen": 40948744, + "step": 237 + }, + { + "epoch": 0.0907698199923401, + "loss": 0.3717811405658722, + "loss_ce": 0.010453008115291595, + "loss_iou": 1.011744737625122, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 40948744, + "step": 237 + }, + { + "epoch": 0.09115281501340483, + "grad_norm": 33.31493995502488, + "learning_rate": 5e-06, + "loss": 0.4441, + "num_input_tokens_seen": 41121688, + "step": 238 + }, + { + "epoch": 0.09115281501340483, + "loss": 0.47466912865638733, + "loss_ce": 0.009825348854064941, + "loss_iou": 1.0839557647705078, + "loss_num": 0.46484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 41121688, + "step": 238 + }, + { + "epoch": 0.09153581003446955, + "grad_norm": 39.187070010958436, + "learning_rate": 5e-06, + "loss": 0.4312, + "num_input_tokens_seen": 41294488, + "step": 239 + }, + { + "epoch": 0.09153581003446955, + "loss": 0.4678073227405548, + "loss_ce": 0.009311222471296787, + "loss_iou": 1.0262682437896729, + "loss_num": 0.458984375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 41294488, + "step": 239 + }, + { + "epoch": 0.09191880505553428, + "grad_norm": 30.268551043091506, + "learning_rate": 5e-06, + "loss": 0.5959, + "num_input_tokens_seen": 41467400, + "step": 240 + }, + { + "epoch": 0.09191880505553428, + "loss": 0.6431682109832764, + "loss_ce": 0.010111584328114986, + "loss_iou": 1.150076150894165, + "loss_num": 0.6328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 41467400, + "step": 240 + }, + { + "epoch": 0.09230180007659901, + "grad_norm": 29.354480728493197, + "learning_rate": 5e-06, + "loss": 0.352, + "num_input_tokens_seen": 41640488, + "step": 241 + }, + { + "epoch": 0.09230180007659901, + "loss": 0.3974408805370331, + "loss_ce": 0.009989721700549126, + "loss_iou": 1.3911032676696777, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 41640488, + "step": 241 + }, + { + "epoch": 0.09268479509766372, + "grad_norm": 57.10524842136358, + "learning_rate": 5e-06, + "loss": 0.6226, + "num_input_tokens_seen": 41813624, + "step": 242 + }, + { + "epoch": 0.09268479509766372, + "loss": 0.6125884056091309, + "loss_ce": 0.010049371980130672, + "loss_iou": 1.063937783241272, + "loss_num": 0.6015625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 41813624, + "step": 242 + }, + { + "epoch": 0.09306779011872846, + "grad_norm": 45.84410776062516, + "learning_rate": 5e-06, + "loss": 0.5885, + "num_input_tokens_seen": 41986624, + "step": 243 + }, + { + "epoch": 0.09306779011872846, + "loss": 0.5837477445602417, + "loss_ce": 0.00977311935275793, + "loss_iou": 1.0533053874969482, + "loss_num": 0.57421875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 41986624, + "step": 243 + }, + { + "epoch": 0.09345078513979319, + "grad_norm": 32.262416599010756, + "learning_rate": 5e-06, + "loss": 0.6973, + "num_input_tokens_seen": 42159784, + "step": 244 + }, + { + "epoch": 0.09345078513979319, + "loss": 0.7056179642677307, + "loss_ce": 0.010305466130375862, + "loss_iou": 1.114302396774292, + "loss_num": 0.6953125, + "loss_xval": 0.6953125, + "num_input_tokens_seen": 42159784, + "step": 244 + }, + { + "epoch": 0.0938337801608579, + "grad_norm": 48.773859092039544, + "learning_rate": 5e-06, + "loss": 0.6242, + "num_input_tokens_seen": 42332736, + "step": 245 + }, + { + "epoch": 0.0938337801608579, + "loss": 0.668730616569519, + "loss_ce": 0.009306754916906357, + "loss_iou": 1.2523534297943115, + "loss_num": 0.66015625, + "loss_xval": 0.66015625, + "num_input_tokens_seen": 42332736, + "step": 245 + }, + { + "epoch": 0.09421677518192263, + "grad_norm": 60.37995512607071, + "learning_rate": 5e-06, + "loss": 0.738, + "num_input_tokens_seen": 42505896, + "step": 246 + }, + { + "epoch": 0.09421677518192263, + "loss": 0.7233730554580688, + "loss_ce": 0.00950586423277855, + "loss_iou": 1.0678024291992188, + "loss_num": 0.71484375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 42505896, + "step": 246 + }, + { + "epoch": 0.09459977020298736, + "grad_norm": 34.59265204812187, + "learning_rate": 5e-06, + "loss": 0.4118, + "num_input_tokens_seen": 42678720, + "step": 247 + }, + { + "epoch": 0.09459977020298736, + "loss": 0.4069308936595917, + "loss_ce": 0.009836145676672459, + "loss_iou": 1.0240042209625244, + "loss_num": 0.396484375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 42678720, + "step": 247 + }, + { + "epoch": 0.0949827652240521, + "grad_norm": 25.429298668066338, + "learning_rate": 5e-06, + "loss": 0.5488, + "num_input_tokens_seen": 42851568, + "step": 248 + }, + { + "epoch": 0.0949827652240521, + "loss": 0.5835607051849365, + "loss_ce": 0.010806784965097904, + "loss_iou": 1.0707110166549683, + "loss_num": 0.57421875, + "loss_xval": 0.57421875, + "num_input_tokens_seen": 42851568, + "step": 248 + }, + { + "epoch": 0.09536576024511681, + "grad_norm": 33.49257412615864, + "learning_rate": 5e-06, + "loss": 0.5205, + "num_input_tokens_seen": 43024512, + "step": 249 + }, + { + "epoch": 0.09536576024511681, + "loss": 0.5233336687088013, + "loss_ce": 0.01063838042318821, + "loss_iou": 1.0241611003875732, + "loss_num": 0.51171875, + "loss_xval": 0.51171875, + "num_input_tokens_seen": 43024512, + "step": 249 + }, + { + "epoch": 0.09574875526618154, + "grad_norm": 35.51917126089131, + "learning_rate": 5e-06, + "loss": 0.7009, + "num_input_tokens_seen": 43193736, + "step": 250 + }, + { + "epoch": 0.09574875526618154, + "eval_websight_new_CIoU": 0.7970602214336395, + "eval_websight_new_GIoU": 0.7953665852546692, + "eval_websight_new_IoU": 0.7986245453357697, + "eval_websight_new_MAE_all": 0.01940658688545227, + "eval_websight_new_MAE_h": 0.02148013934493065, + "eval_websight_new_MAE_w": 0.023069137707352638, + "eval_websight_new_MAE_x": 0.014143784064799547, + "eval_websight_new_MAE_y": 0.01893328595906496, + "eval_websight_new_NUM_probability": 0.012394892051815987, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.24325791001319885, + "eval_websight_new_loss_ce": 0.02025469858199358, + "eval_websight_new_loss_iou": 1.0012649893760681, + "eval_websight_new_loss_num": 0.2210693359375, + "eval_websight_new_loss_xval": 0.2210693359375, + "eval_websight_new_runtime": 59.1202, + "eval_websight_new_samples_per_second": 0.846, + "eval_websight_new_steps_per_second": 0.034, + "num_input_tokens_seen": 43193736, + "step": 250 + }, + { + "epoch": 0.09574875526618154, + "eval_seeclick_CIoU": 0.4883647561073303, + "eval_seeclick_GIoU": 0.48301491141319275, + "eval_seeclick_IoU": 0.5227425396442413, + "eval_seeclick_MAE_all": 0.08011750504374504, + "eval_seeclick_MAE_h": 0.07117656245827675, + "eval_seeclick_MAE_w": 0.09652542322874069, + "eval_seeclick_MAE_x": 0.08502181246876717, + "eval_seeclick_MAE_y": 0.067746227607131, + "eval_seeclick_NUM_probability": 0.019341954961419106, + "eval_seeclick_inside_bbox": 0.8541666567325592, + "eval_seeclick_loss": 0.6910960674285889, + "eval_seeclick_loss_ce": 0.014814576599746943, + "eval_seeclick_loss_iou": 1.142272412776947, + "eval_seeclick_loss_num": 0.6513671875, + "eval_seeclick_loss_xval": 0.6513671875, + "eval_seeclick_runtime": 82.318, + "eval_seeclick_samples_per_second": 0.607, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 43193736, + "step": 250 + }, + { + "epoch": 0.09574875526618154, + "eval_icons_CIoU": 0.7694402039051056, + "eval_icons_GIoU": 0.7592164576053619, + "eval_icons_IoU": 0.7789261639118195, + "eval_icons_MAE_all": 0.029270297847688198, + "eval_icons_MAE_h": 0.03659070935100317, + "eval_icons_MAE_w": 0.02758670412003994, + "eval_icons_MAE_x": 0.024439156521111727, + "eval_icons_MAE_y": 0.02846463117748499, + "eval_icons_NUM_probability": 0.008785805199295282, + "eval_icons_inside_bbox": 0.9722222089767456, + "eval_icons_loss": 0.24321779608726501, + "eval_icons_loss_ce": 0.014401643071323633, + "eval_icons_loss_iou": 1.022590696811676, + "eval_icons_loss_num": 0.21490478515625, + "eval_icons_loss_xval": 0.21490478515625, + "eval_icons_runtime": 82.7673, + "eval_icons_samples_per_second": 0.604, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 43193736, + "step": 250 + }, + { + "epoch": 0.09574875526618154, + "loss": 0.2483743131160736, + "loss_ce": 0.014487605541944504, + "loss_iou": 1.037798285484314, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 43193736, + "step": 250 + }, + { + "epoch": 0.09613175028724627, + "grad_norm": 20.25484850817279, + "learning_rate": 5e-06, + "loss": 0.2744, + "num_input_tokens_seen": 43366592, + "step": 251 + }, + { + "epoch": 0.09613175028724627, + "loss": 0.2883433699607849, + "loss_ce": 0.01063341461122036, + "loss_iou": 1.0252397060394287, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 43366592, + "step": 251 + }, + { + "epoch": 0.09651474530831099, + "grad_norm": 25.324768920980308, + "learning_rate": 5e-06, + "loss": 0.6112, + "num_input_tokens_seen": 43539632, + "step": 252 + }, + { + "epoch": 0.09651474530831099, + "loss": 0.6387765407562256, + "loss_ce": 0.009626125916838646, + "loss_iou": 1.0179805755615234, + "loss_num": 0.62890625, + "loss_xval": 0.62890625, + "num_input_tokens_seen": 43539632, + "step": 252 + }, + { + "epoch": 0.09689774032937572, + "grad_norm": 25.420029732622826, + "learning_rate": 5e-06, + "loss": 0.3988, + "num_input_tokens_seen": 43712048, + "step": 253 + }, + { + "epoch": 0.09689774032937572, + "loss": 0.45570528507232666, + "loss_ce": 0.008927949704229832, + "loss_iou": 1.9686532020568848, + "loss_num": 0.447265625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 43712048, + "step": 253 + }, + { + "epoch": 0.09728073535044045, + "grad_norm": 47.35429105933384, + "learning_rate": 5e-06, + "loss": 0.5065, + "num_input_tokens_seen": 43885328, + "step": 254 + }, + { + "epoch": 0.09728073535044045, + "loss": 0.4618152976036072, + "loss_ce": 0.010887575335800648, + "loss_iou": 1.2346810102462769, + "loss_num": 0.451171875, + "loss_xval": 0.451171875, + "num_input_tokens_seen": 43885328, + "step": 254 + }, + { + "epoch": 0.09766373037150516, + "grad_norm": 32.21036374102037, + "learning_rate": 5e-06, + "loss": 0.3966, + "num_input_tokens_seen": 44058120, + "step": 255 + }, + { + "epoch": 0.09766373037150516, + "loss": 0.3424299359321594, + "loss_ce": 0.009666264057159424, + "loss_iou": 1.009257197380066, + "loss_num": 0.33203125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 44058120, + "step": 255 + }, + { + "epoch": 0.0980467253925699, + "grad_norm": 31.43279394338899, + "learning_rate": 5e-06, + "loss": 0.5264, + "num_input_tokens_seen": 44231032, + "step": 256 + }, + { + "epoch": 0.0980467253925699, + "loss": 0.5027552843093872, + "loss_ce": 0.010323646478354931, + "loss_iou": 1.0422286987304688, + "loss_num": 0.4921875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 44231032, + "step": 256 + }, + { + "epoch": 0.09842972041363462, + "grad_norm": 40.30321116368787, + "learning_rate": 5e-06, + "loss": 0.4123, + "num_input_tokens_seen": 44404192, + "step": 257 + }, + { + "epoch": 0.09842972041363462, + "loss": 0.40247195959091187, + "loss_ce": 0.010382117703557014, + "loss_iou": 1.0118200778961182, + "loss_num": 0.392578125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 44404192, + "step": 257 + }, + { + "epoch": 0.09881271543469936, + "grad_norm": 34.974454284072436, + "learning_rate": 5e-06, + "loss": 0.5083, + "num_input_tokens_seen": 44573512, + "step": 258 + }, + { + "epoch": 0.09881271543469936, + "loss": 0.5065228343009949, + "loss_ce": 0.011161506175994873, + "loss_iou": 1.0581029653549194, + "loss_num": 0.49609375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 44573512, + "step": 258 + }, + { + "epoch": 0.09919571045576407, + "grad_norm": 19.965723552087525, + "learning_rate": 5e-06, + "loss": 0.2637, + "num_input_tokens_seen": 44746088, + "step": 259 + }, + { + "epoch": 0.09919571045576407, + "loss": 0.28278666734695435, + "loss_ce": 0.010081607848405838, + "loss_iou": 1.0371840000152588, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 44746088, + "step": 259 + }, + { + "epoch": 0.0995787054768288, + "grad_norm": 22.547722992093767, + "learning_rate": 5e-06, + "loss": 0.3887, + "num_input_tokens_seen": 44919376, + "step": 260 + }, + { + "epoch": 0.0995787054768288, + "loss": 0.42721009254455566, + "loss_ce": 0.010828271508216858, + "loss_iou": 1.0751303434371948, + "loss_num": 0.416015625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 44919376, + "step": 260 + }, + { + "epoch": 0.09996170049789353, + "grad_norm": 38.337449044979444, + "learning_rate": 5e-06, + "loss": 0.5143, + "num_input_tokens_seen": 45092656, + "step": 261 + }, + { + "epoch": 0.09996170049789353, + "loss": 0.5790233016014099, + "loss_ce": 0.010175647214055061, + "loss_iou": 1.0254803895950317, + "loss_num": 0.5703125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 45092656, + "step": 261 + }, + { + "epoch": 0.10034469551895825, + "grad_norm": 21.79110441345305, + "learning_rate": 5e-06, + "loss": 0.4789, + "num_input_tokens_seen": 45265336, + "step": 262 + }, + { + "epoch": 0.10034469551895825, + "loss": 0.49044251441955566, + "loss_ce": 0.009973737411201, + "loss_iou": 1.088423252105713, + "loss_num": 0.48046875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 45265336, + "step": 262 + }, + { + "epoch": 0.10072769054002298, + "grad_norm": 30.89290505138009, + "learning_rate": 5e-06, + "loss": 0.3411, + "num_input_tokens_seen": 45438600, + "step": 263 + }, + { + "epoch": 0.10072769054002298, + "loss": 0.37563595175743103, + "loss_ce": 0.010523635894060135, + "loss_iou": 1.0144340991973877, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 45438600, + "step": 263 + }, + { + "epoch": 0.10111068556108771, + "grad_norm": 23.257658751783733, + "learning_rate": 5e-06, + "loss": 0.5822, + "num_input_tokens_seen": 45608112, + "step": 264 + }, + { + "epoch": 0.10111068556108771, + "loss": 0.5752701163291931, + "loss_ce": 0.010328728705644608, + "loss_iou": 1.0628750324249268, + "loss_num": 0.56640625, + "loss_xval": 0.56640625, + "num_input_tokens_seen": 45608112, + "step": 264 + }, + { + "epoch": 0.10149368058215243, + "grad_norm": 23.542558977591938, + "learning_rate": 5e-06, + "loss": 0.3597, + "num_input_tokens_seen": 45781272, + "step": 265 + }, + { + "epoch": 0.10149368058215243, + "loss": 0.3570312261581421, + "loss_ce": 0.01059566717594862, + "loss_iou": 1.1102811098098755, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 45781272, + "step": 265 + }, + { + "epoch": 0.10187667560321716, + "grad_norm": 32.735593085781545, + "learning_rate": 5e-06, + "loss": 0.5042, + "num_input_tokens_seen": 45954352, + "step": 266 + }, + { + "epoch": 0.10187667560321716, + "loss": 0.4968581795692444, + "loss_ce": 0.010774178430438042, + "loss_iou": 1.0254132747650146, + "loss_num": 0.486328125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 45954352, + "step": 266 + }, + { + "epoch": 0.10225967062428189, + "grad_norm": 20.80385360127902, + "learning_rate": 5e-06, + "loss": 0.362, + "num_input_tokens_seen": 46127568, + "step": 267 + }, + { + "epoch": 0.10225967062428189, + "loss": 0.3527145981788635, + "loss_ce": 0.01116187870502472, + "loss_iou": 1.0225780010223389, + "loss_num": 0.341796875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 46127568, + "step": 267 + }, + { + "epoch": 0.10264266564534662, + "grad_norm": 17.243438931835026, + "learning_rate": 5e-06, + "loss": 0.457, + "num_input_tokens_seen": 46300576, + "step": 268 + }, + { + "epoch": 0.10264266564534662, + "loss": 0.4650697708129883, + "loss_ce": 0.010724080726504326, + "loss_iou": 1.0136557817459106, + "loss_num": 0.455078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 46300576, + "step": 268 + }, + { + "epoch": 0.10302566066641133, + "grad_norm": 46.74543932677949, + "learning_rate": 5e-06, + "loss": 0.3367, + "num_input_tokens_seen": 46474072, + "step": 269 + }, + { + "epoch": 0.10302566066641133, + "loss": 0.33559098839759827, + "loss_ce": 0.01149429939687252, + "loss_iou": 1.0826479196548462, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 46474072, + "step": 269 + }, + { + "epoch": 0.10340865568747606, + "grad_norm": 38.67504807042694, + "learning_rate": 5e-06, + "loss": 0.4949, + "num_input_tokens_seen": 46647136, + "step": 270 + }, + { + "epoch": 0.10340865568747606, + "loss": 0.5154224634170532, + "loss_ce": 0.011027935892343521, + "loss_iou": 1.0698435306549072, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 46647136, + "step": 270 + }, + { + "epoch": 0.1037916507085408, + "grad_norm": 27.550225883706, + "learning_rate": 5e-06, + "loss": 0.3874, + "num_input_tokens_seen": 46819680, + "step": 271 + }, + { + "epoch": 0.1037916507085408, + "loss": 0.37122344970703125, + "loss_ce": 0.010627737268805504, + "loss_iou": 1.0211730003356934, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 46819680, + "step": 271 + }, + { + "epoch": 0.10417464572960551, + "grad_norm": 22.806761619302367, + "learning_rate": 5e-06, + "loss": 0.5148, + "num_input_tokens_seen": 46992712, + "step": 272 + }, + { + "epoch": 0.10417464572960551, + "loss": 0.4890601634979248, + "loss_ce": 0.010056260973215103, + "loss_iou": 1.1803604364395142, + "loss_num": 0.478515625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 46992712, + "step": 272 + }, + { + "epoch": 0.10455764075067024, + "grad_norm": 15.847794001537517, + "learning_rate": 5e-06, + "loss": 0.254, + "num_input_tokens_seen": 47165808, + "step": 273 + }, + { + "epoch": 0.10455764075067024, + "loss": 0.2679173946380615, + "loss_ce": 0.010898372158408165, + "loss_iou": 1.0206106901168823, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 47165808, + "step": 273 + }, + { + "epoch": 0.10494063577173497, + "grad_norm": 34.31450912159891, + "learning_rate": 5e-06, + "loss": 0.3414, + "num_input_tokens_seen": 47339232, + "step": 274 + }, + { + "epoch": 0.10494063577173497, + "loss": 0.32526785135269165, + "loss_ce": 0.010570560581982136, + "loss_iou": 1.0116055011749268, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 47339232, + "step": 274 + }, + { + "epoch": 0.10532363079279969, + "grad_norm": 27.716014130758875, + "learning_rate": 5e-06, + "loss": 0.4536, + "num_input_tokens_seen": 47511976, + "step": 275 + }, + { + "epoch": 0.10532363079279969, + "loss": 0.4193720519542694, + "loss_ce": 0.009704088792204857, + "loss_iou": 0.9634692668914795, + "loss_num": 0.41015625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 47511976, + "step": 275 + }, + { + "epoch": 0.10570662581386442, + "grad_norm": 17.7829828252544, + "learning_rate": 5e-06, + "loss": 0.3866, + "num_input_tokens_seen": 47685064, + "step": 276 + }, + { + "epoch": 0.10570662581386442, + "loss": 0.41627562046051025, + "loss_ce": 0.011002162471413612, + "loss_iou": 1.0582951307296753, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 47685064, + "step": 276 + }, + { + "epoch": 0.10608962083492915, + "grad_norm": 31.24090989532103, + "learning_rate": 5e-06, + "loss": 0.3073, + "num_input_tokens_seen": 47858312, + "step": 277 + }, + { + "epoch": 0.10608962083492915, + "loss": 0.34194135665893555, + "loss_ce": 0.010398398153483868, + "loss_iou": 1.0338774919509888, + "loss_num": 0.33203125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 47858312, + "step": 277 + }, + { + "epoch": 0.10647261585599387, + "grad_norm": 32.0296279715356, + "learning_rate": 5e-06, + "loss": 0.5668, + "num_input_tokens_seen": 48031616, + "step": 278 + }, + { + "epoch": 0.10647261585599387, + "loss": 0.5294106006622314, + "loss_ce": 0.010367615148425102, + "loss_iou": 1.145504355430603, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 48031616, + "step": 278 + }, + { + "epoch": 0.1068556108770586, + "grad_norm": 21.181429801625686, + "learning_rate": 5e-06, + "loss": 0.3589, + "num_input_tokens_seen": 48204776, + "step": 279 + }, + { + "epoch": 0.1068556108770586, + "loss": 0.2801125645637512, + "loss_ce": 0.010459225624799728, + "loss_iou": 1.0051308870315552, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 48204776, + "step": 279 + }, + { + "epoch": 0.10723860589812333, + "grad_norm": 19.094716754371948, + "learning_rate": 5e-06, + "loss": 0.4459, + "num_input_tokens_seen": 48377704, + "step": 280 + }, + { + "epoch": 0.10723860589812333, + "loss": 0.43079090118408203, + "loss_ce": 0.01038073468953371, + "loss_iou": 1.0597786903381348, + "loss_num": 0.419921875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 48377704, + "step": 280 + }, + { + "epoch": 0.10762160091918806, + "grad_norm": 23.839418127132326, + "learning_rate": 5e-06, + "loss": 0.3668, + "num_input_tokens_seen": 48550864, + "step": 281 + }, + { + "epoch": 0.10762160091918806, + "loss": 0.45468515157699585, + "loss_ce": 0.009494691155850887, + "loss_iou": 1.0974698066711426, + "loss_num": 0.4453125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 48550864, + "step": 281 + }, + { + "epoch": 0.10800459594025277, + "grad_norm": 29.281611849796462, + "learning_rate": 5e-06, + "loss": 0.4213, + "num_input_tokens_seen": 48724008, + "step": 282 + }, + { + "epoch": 0.10800459594025277, + "loss": 0.38292914628982544, + "loss_ce": 0.010126392357051373, + "loss_iou": 1.0047979354858398, + "loss_num": 0.373046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 48724008, + "step": 282 + }, + { + "epoch": 0.1083875909613175, + "grad_norm": 22.852496320719037, + "learning_rate": 5e-06, + "loss": 0.3968, + "num_input_tokens_seen": 48897088, + "step": 283 + }, + { + "epoch": 0.1083875909613175, + "loss": 0.36059004068374634, + "loss_ce": 0.01049237884581089, + "loss_iou": 1.095686912536621, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 48897088, + "step": 283 + }, + { + "epoch": 0.10877058598238223, + "grad_norm": 16.194662237848114, + "learning_rate": 5e-06, + "loss": 0.4194, + "num_input_tokens_seen": 49070160, + "step": 284 + }, + { + "epoch": 0.10877058598238223, + "loss": 0.4404478669166565, + "loss_ce": 0.010516216047108173, + "loss_iou": 1.0644468069076538, + "loss_num": 0.4296875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 49070160, + "step": 284 + }, + { + "epoch": 0.10915358100344695, + "grad_norm": 27.283997636832314, + "learning_rate": 5e-06, + "loss": 0.2844, + "num_input_tokens_seen": 49243192, + "step": 285 + }, + { + "epoch": 0.10915358100344695, + "loss": 0.23761063814163208, + "loss_ce": 0.010437790304422379, + "loss_iou": 1.0049580335617065, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 49243192, + "step": 285 + }, + { + "epoch": 0.10953657602451168, + "grad_norm": 38.47563103402911, + "learning_rate": 5e-06, + "loss": 0.5385, + "num_input_tokens_seen": 49416184, + "step": 286 + }, + { + "epoch": 0.10953657602451168, + "loss": 0.5168796181678772, + "loss_ce": 0.011264389380812645, + "loss_iou": 1.1069316864013672, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 49416184, + "step": 286 + }, + { + "epoch": 0.10991957104557641, + "grad_norm": 28.808207745099345, + "learning_rate": 5e-06, + "loss": 0.2741, + "num_input_tokens_seen": 49589080, + "step": 287 + }, + { + "epoch": 0.10991957104557641, + "loss": 0.2880805730819702, + "loss_ce": 0.01098096463829279, + "loss_iou": 1.044050693511963, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 49589080, + "step": 287 + }, + { + "epoch": 0.11030256606664113, + "grad_norm": 32.05739369285175, + "learning_rate": 5e-06, + "loss": 0.3938, + "num_input_tokens_seen": 49761760, + "step": 288 + }, + { + "epoch": 0.11030256606664113, + "loss": 0.41695815324783325, + "loss_ce": 0.010586072690784931, + "loss_iou": 1.3855371475219727, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 49761760, + "step": 288 + }, + { + "epoch": 0.11068556108770586, + "grad_norm": 50.29793791048213, + "learning_rate": 5e-06, + "loss": 0.583, + "num_input_tokens_seen": 49934704, + "step": 289 + }, + { + "epoch": 0.11068556108770586, + "loss": 0.561424970626831, + "loss_ce": 0.01015543844550848, + "loss_iou": 1.0180261135101318, + "loss_num": 0.55078125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 49934704, + "step": 289 + }, + { + "epoch": 0.11106855610877059, + "grad_norm": 35.66832463769736, + "learning_rate": 5e-06, + "loss": 0.5456, + "num_input_tokens_seen": 50107712, + "step": 290 + }, + { + "epoch": 0.11106855610877059, + "loss": 0.5598129630088806, + "loss_ce": 0.01049656979739666, + "loss_iou": 1.013216257095337, + "loss_num": 0.55078125, + "loss_xval": 0.55078125, + "num_input_tokens_seen": 50107712, + "step": 290 + }, + { + "epoch": 0.11145155112983532, + "grad_norm": 31.568232222493787, + "learning_rate": 5e-06, + "loss": 0.4637, + "num_input_tokens_seen": 50280752, + "step": 291 + }, + { + "epoch": 0.11145155112983532, + "loss": 0.4419333338737488, + "loss_ce": 0.01102512702345848, + "loss_iou": 1.2086838483810425, + "loss_num": 0.431640625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 50280752, + "step": 291 + }, + { + "epoch": 0.11183454615090004, + "grad_norm": 32.23048022560672, + "learning_rate": 5e-06, + "loss": 0.5149, + "num_input_tokens_seen": 50453728, + "step": 292 + }, + { + "epoch": 0.11183454615090004, + "loss": 0.4906352162361145, + "loss_ce": 0.010654768906533718, + "loss_iou": 1.09377121925354, + "loss_num": 0.48046875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 50453728, + "step": 292 + }, + { + "epoch": 0.11221754117196477, + "grad_norm": 38.09973773117454, + "learning_rate": 5e-06, + "loss": 0.492, + "num_input_tokens_seen": 50626616, + "step": 293 + }, + { + "epoch": 0.11221754117196477, + "loss": 0.43555042147636414, + "loss_ce": 0.010257437825202942, + "loss_iou": 1.026540756225586, + "loss_num": 0.42578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 50626616, + "step": 293 + }, + { + "epoch": 0.1126005361930295, + "grad_norm": 39.24787901065983, + "learning_rate": 5e-06, + "loss": 0.6363, + "num_input_tokens_seen": 50799848, + "step": 294 + }, + { + "epoch": 0.1126005361930295, + "loss": 0.6617828607559204, + "loss_ce": 0.011392205953598022, + "loss_iou": 1.0512713193893433, + "loss_num": 0.6484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 50799848, + "step": 294 + }, + { + "epoch": 0.11298353121409421, + "grad_norm": 32.90943652131842, + "learning_rate": 5e-06, + "loss": 0.6314, + "num_input_tokens_seen": 50973248, + "step": 295 + }, + { + "epoch": 0.11298353121409421, + "loss": 0.6341913938522339, + "loss_ce": 0.011144520714879036, + "loss_iou": 1.3047211170196533, + "loss_num": 0.625, + "loss_xval": 0.625, + "num_input_tokens_seen": 50973248, + "step": 295 + }, + { + "epoch": 0.11336652623515894, + "grad_norm": 38.52268484842589, + "learning_rate": 5e-06, + "loss": 0.9833, + "num_input_tokens_seen": 51146048, + "step": 296 + }, + { + "epoch": 0.11336652623515894, + "loss": 0.9849939346313477, + "loss_ce": 0.010384575463831425, + "loss_iou": 1.2159556150436401, + "loss_num": 0.9765625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 51146048, + "step": 296 + }, + { + "epoch": 0.11374952125622367, + "grad_norm": 57.08290135274048, + "learning_rate": 5e-06, + "loss": 0.541, + "num_input_tokens_seen": 51318912, + "step": 297 + }, + { + "epoch": 0.11374952125622367, + "loss": 0.5805840492248535, + "loss_ce": 0.010027414187788963, + "loss_iou": 1.2200486660003662, + "loss_num": 0.5703125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 51318912, + "step": 297 + }, + { + "epoch": 0.11413251627728839, + "grad_norm": 72.8746542496279, + "learning_rate": 5e-06, + "loss": 1.3715, + "num_input_tokens_seen": 51491568, + "step": 298 + }, + { + "epoch": 0.11413251627728839, + "loss": 1.379122018814087, + "loss_ce": 0.010957980528473854, + "loss_iou": 1.378398060798645, + "loss_num": 1.3671875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 51491568, + "step": 298 + }, + { + "epoch": 0.11451551129835312, + "grad_norm": 25.52973147801263, + "learning_rate": 5e-06, + "loss": 0.5142, + "num_input_tokens_seen": 51664720, + "step": 299 + }, + { + "epoch": 0.11451551129835312, + "loss": 0.5176573395729065, + "loss_ce": 0.01228625699877739, + "loss_iou": 2.6109676361083984, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 51664720, + "step": 299 + }, + { + "epoch": 0.11489850631941785, + "grad_norm": 25.631635843937243, + "learning_rate": 5e-06, + "loss": 0.3688, + "num_input_tokens_seen": 51837584, + "step": 300 + }, + { + "epoch": 0.11489850631941785, + "loss": 0.3400018811225891, + "loss_ce": 0.012365150265395641, + "loss_iou": 1.0193140506744385, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 51837584, + "step": 300 + }, + { + "epoch": 0.11528150134048257, + "grad_norm": 36.27654091995768, + "learning_rate": 5e-06, + "loss": 0.7293, + "num_input_tokens_seen": 52010008, + "step": 301 + }, + { + "epoch": 0.11528150134048257, + "loss": 0.7323391437530518, + "loss_ce": 0.009682867676019669, + "loss_iou": 0.9314297437667847, + "loss_num": 0.72265625, + "loss_xval": 0.72265625, + "num_input_tokens_seen": 52010008, + "step": 301 + }, + { + "epoch": 0.1156644963615473, + "grad_norm": 35.419766480705015, + "learning_rate": 5e-06, + "loss": 0.4071, + "num_input_tokens_seen": 52182672, + "step": 302 + }, + { + "epoch": 0.1156644963615473, + "loss": 0.42031729221343994, + "loss_ce": 0.012114165350794792, + "loss_iou": 1.031744360923767, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 52182672, + "step": 302 + }, + { + "epoch": 0.11604749138261203, + "grad_norm": 42.847599300870044, + "learning_rate": 5e-06, + "loss": 0.6895, + "num_input_tokens_seen": 52355624, + "step": 303 + }, + { + "epoch": 0.11604749138261203, + "loss": 0.6960222721099854, + "loss_ce": 0.011696080677211285, + "loss_iou": 1.0434455871582031, + "loss_num": 0.68359375, + "loss_xval": 0.68359375, + "num_input_tokens_seen": 52355624, + "step": 303 + }, + { + "epoch": 0.11643048640367676, + "grad_norm": 32.86704853238209, + "learning_rate": 5e-06, + "loss": 0.4558, + "num_input_tokens_seen": 52528528, + "step": 304 + }, + { + "epoch": 0.11643048640367676, + "loss": 0.45993906259536743, + "loss_ce": 0.010964442044496536, + "loss_iou": 1.0512936115264893, + "loss_num": 0.44921875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 52528528, + "step": 304 + }, + { + "epoch": 0.11681348142474147, + "grad_norm": 40.050753637117815, + "learning_rate": 5e-06, + "loss": 0.4655, + "num_input_tokens_seen": 52701512, + "step": 305 + }, + { + "epoch": 0.11681348142474147, + "loss": 0.4385191798210144, + "loss_ce": 0.01078478991985321, + "loss_iou": 1.0094962120056152, + "loss_num": 0.427734375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 52701512, + "step": 305 + }, + { + "epoch": 0.1171964764458062, + "grad_norm": 54.80718049230888, + "learning_rate": 5e-06, + "loss": 0.9926, + "num_input_tokens_seen": 52874496, + "step": 306 + }, + { + "epoch": 0.1171964764458062, + "loss": 0.9117997884750366, + "loss_ce": 0.011897400952875614, + "loss_iou": 1.0914658308029175, + "loss_num": 0.8984375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 52874496, + "step": 306 + }, + { + "epoch": 0.11757947146687094, + "grad_norm": 29.68185097214178, + "learning_rate": 5e-06, + "loss": 0.6096, + "num_input_tokens_seen": 53047616, + "step": 307 + }, + { + "epoch": 0.11757947146687094, + "loss": 0.6523078680038452, + "loss_ce": 0.01217116229236126, + "loss_iou": 1.153963327407837, + "loss_num": 0.640625, + "loss_xval": 0.640625, + "num_input_tokens_seen": 53047616, + "step": 307 + }, + { + "epoch": 0.11796246648793565, + "grad_norm": 38.749918455185316, + "learning_rate": 5e-06, + "loss": 0.8524, + "num_input_tokens_seen": 53220816, + "step": 308 + }, + { + "epoch": 0.11796246648793565, + "loss": 0.8711147308349609, + "loss_ce": 0.01222802884876728, + "loss_iou": 1.238293170928955, + "loss_num": 0.859375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 53220816, + "step": 308 + }, + { + "epoch": 0.11834546150900038, + "grad_norm": 56.841035253055814, + "learning_rate": 5e-06, + "loss": 0.6204, + "num_input_tokens_seen": 53393744, + "step": 309 + }, + { + "epoch": 0.11834546150900038, + "loss": 0.6435734629631042, + "loss_ce": 0.01100508775562048, + "loss_iou": 1.0825989246368408, + "loss_num": 0.6328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 53393744, + "step": 309 + }, + { + "epoch": 0.11872845653006511, + "grad_norm": 65.23315581624193, + "learning_rate": 5e-06, + "loss": 1.0426, + "num_input_tokens_seen": 53566608, + "step": 310 + }, + { + "epoch": 0.11872845653006511, + "loss": 1.0427701473236084, + "loss_ce": 0.011520168744027615, + "loss_iou": 1.2222198247909546, + "loss_num": 1.03125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 53566608, + "step": 310 + }, + { + "epoch": 0.11911145155112983, + "grad_norm": 25.316963856011057, + "learning_rate": 5e-06, + "loss": 0.5238, + "num_input_tokens_seen": 53739936, + "step": 311 + }, + { + "epoch": 0.11911145155112983, + "loss": 0.5922715067863464, + "loss_ce": 0.013414058834314346, + "loss_iou": 1.1291487216949463, + "loss_num": 0.578125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 53739936, + "step": 311 + }, + { + "epoch": 0.11949444657219456, + "grad_norm": 33.11489631674243, + "learning_rate": 5e-06, + "loss": 0.7113, + "num_input_tokens_seen": 53912920, + "step": 312 + }, + { + "epoch": 0.11949444657219456, + "loss": 0.6654304265975952, + "loss_ce": 0.013086654245853424, + "loss_iou": 1.065284252166748, + "loss_num": 0.65234375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 53912920, + "step": 312 + }, + { + "epoch": 0.11987744159325929, + "grad_norm": 34.16303791635373, + "learning_rate": 5e-06, + "loss": 0.5034, + "num_input_tokens_seen": 54085432, + "step": 313 + }, + { + "epoch": 0.11987744159325929, + "loss": 0.36735761165618896, + "loss_ce": 0.011644711717963219, + "loss_iou": 1.026916265487671, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 54085432, + "step": 313 + }, + { + "epoch": 0.12026043661432402, + "grad_norm": 38.22994920245021, + "learning_rate": 5e-06, + "loss": 0.7024, + "num_input_tokens_seen": 54258280, + "step": 314 + }, + { + "epoch": 0.12026043661432402, + "loss": 0.7669047117233276, + "loss_ce": 0.01251023355871439, + "loss_iou": 1.106337547302246, + "loss_num": 0.75390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 54258280, + "step": 314 + }, + { + "epoch": 0.12064343163538874, + "grad_norm": 42.09697413056026, + "learning_rate": 5e-06, + "loss": 0.3733, + "num_input_tokens_seen": 54431280, + "step": 315 + }, + { + "epoch": 0.12064343163538874, + "loss": 0.35330480337142944, + "loss_ce": 0.01333899050951004, + "loss_iou": 1.0186564922332764, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 54431280, + "step": 315 + }, + { + "epoch": 0.12102642665645347, + "grad_norm": 35.74366625552186, + "learning_rate": 5e-06, + "loss": 0.6181, + "num_input_tokens_seen": 54604368, + "step": 316 + }, + { + "epoch": 0.12102642665645347, + "loss": 0.6472422480583191, + "loss_ce": 0.013453170657157898, + "loss_iou": 1.0799098014831543, + "loss_num": 0.6328125, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 54604368, + "step": 316 + }, + { + "epoch": 0.1214094216775182, + "grad_norm": 11.527198722935454, + "learning_rate": 5e-06, + "loss": 0.3938, + "num_input_tokens_seen": 54777456, + "step": 317 + }, + { + "epoch": 0.1214094216775182, + "loss": 0.36960890889167786, + "loss_ce": 0.012919455766677856, + "loss_iou": 1.0854665040969849, + "loss_num": 0.357421875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 54777456, + "step": 317 + }, + { + "epoch": 0.12179241669858291, + "grad_norm": 24.293199292377544, + "learning_rate": 5e-06, + "loss": 0.3295, + "num_input_tokens_seen": 54950048, + "step": 318 + }, + { + "epoch": 0.12179241669858291, + "loss": 0.35775184631347656, + "loss_ce": 0.011560434475541115, + "loss_iou": 1.0050764083862305, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 54950048, + "step": 318 + }, + { + "epoch": 0.12217541171964764, + "grad_norm": 33.30097884131126, + "learning_rate": 5e-06, + "loss": 0.4818, + "num_input_tokens_seen": 55123176, + "step": 319 + }, + { + "epoch": 0.12217541171964764, + "loss": 0.4854978322982788, + "loss_ce": 0.012841572985053062, + "loss_iou": 1.0677175521850586, + "loss_num": 0.47265625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 55123176, + "step": 319 + }, + { + "epoch": 0.12255840674071238, + "grad_norm": 30.662633429604817, + "learning_rate": 5e-06, + "loss": 0.4641, + "num_input_tokens_seen": 55295904, + "step": 320 + }, + { + "epoch": 0.12255840674071238, + "loss": 0.46251463890075684, + "loss_ce": 0.012563494965434074, + "loss_iou": 1.0785880088806152, + "loss_num": 0.44921875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 55295904, + "step": 320 + }, + { + "epoch": 0.12294140176177709, + "grad_norm": 36.52319132677881, + "learning_rate": 5e-06, + "loss": 0.4746, + "num_input_tokens_seen": 55468688, + "step": 321 + }, + { + "epoch": 0.12294140176177709, + "loss": 0.38861754536628723, + "loss_ce": 0.012885136529803276, + "loss_iou": 1.047745943069458, + "loss_num": 0.375, + "loss_xval": 0.375, + "num_input_tokens_seen": 55468688, + "step": 321 + }, + { + "epoch": 0.12332439678284182, + "grad_norm": 30.7933132232677, + "learning_rate": 5e-06, + "loss": 0.4801, + "num_input_tokens_seen": 55641392, + "step": 322 + }, + { + "epoch": 0.12332439678284182, + "loss": 0.42514312267303467, + "loss_ce": 0.014010298997163773, + "loss_iou": 1.0376410484313965, + "loss_num": 0.41015625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 55641392, + "step": 322 + }, + { + "epoch": 0.12370739180390655, + "grad_norm": 29.101293969380325, + "learning_rate": 5e-06, + "loss": 0.3619, + "num_input_tokens_seen": 55814200, + "step": 323 + }, + { + "epoch": 0.12370739180390655, + "loss": 0.40042075514793396, + "loss_ce": 0.012481309473514557, + "loss_iou": 1.045759677886963, + "loss_num": 0.388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 55814200, + "step": 323 + }, + { + "epoch": 0.12409038682497127, + "grad_norm": 32.08435616179997, + "learning_rate": 5e-06, + "loss": 0.4483, + "num_input_tokens_seen": 55987296, + "step": 324 + }, + { + "epoch": 0.12409038682497127, + "loss": 0.4775657653808594, + "loss_ce": 0.013454440981149673, + "loss_iou": 1.0528295040130615, + "loss_num": 0.46484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 55987296, + "step": 324 + }, + { + "epoch": 0.124473381846036, + "grad_norm": 28.387154844066295, + "learning_rate": 5e-06, + "loss": 0.409, + "num_input_tokens_seen": 56159808, + "step": 325 + }, + { + "epoch": 0.124473381846036, + "loss": 0.43876785039901733, + "loss_ce": 0.012864558026194572, + "loss_iou": 1.0930651426315308, + "loss_num": 0.42578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 56159808, + "step": 325 + }, + { + "epoch": 0.12485637686710073, + "grad_norm": 33.37057560894794, + "learning_rate": 5e-06, + "loss": 0.4507, + "num_input_tokens_seen": 56333032, + "step": 326 + }, + { + "epoch": 0.12485637686710073, + "loss": 0.4985562860965729, + "loss_ce": 0.012716436758637428, + "loss_iou": 1.147887110710144, + "loss_num": 0.486328125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 56333032, + "step": 326 + }, + { + "epoch": 0.12523937188816545, + "grad_norm": 37.32139130798763, + "learning_rate": 5e-06, + "loss": 0.4723, + "num_input_tokens_seen": 56502904, + "step": 327 + }, + { + "epoch": 0.12523937188816545, + "loss": 0.44586414098739624, + "loss_ce": 0.012514510191977024, + "loss_iou": 1.1612491607666016, + "loss_num": 0.43359375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 56502904, + "step": 327 + }, + { + "epoch": 0.12562236690923018, + "grad_norm": 28.902907172422736, + "learning_rate": 5e-06, + "loss": 0.4802, + "num_input_tokens_seen": 56675760, + "step": 328 + }, + { + "epoch": 0.12562236690923018, + "loss": 0.4507526159286499, + "loss_ce": 0.0130084790289402, + "loss_iou": 1.0351426601409912, + "loss_num": 0.4375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 56675760, + "step": 328 + }, + { + "epoch": 0.1260053619302949, + "grad_norm": 38.32465303461408, + "learning_rate": 5e-06, + "loss": 0.4264, + "num_input_tokens_seen": 56849296, + "step": 329 + }, + { + "epoch": 0.1260053619302949, + "loss": 0.4553164541721344, + "loss_ce": 0.01342192105948925, + "loss_iou": 1.3379547595977783, + "loss_num": 0.44140625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 56849296, + "step": 329 + }, + { + "epoch": 0.12638835695135964, + "grad_norm": 31.373504744313774, + "learning_rate": 5e-06, + "loss": 0.5351, + "num_input_tokens_seen": 57022456, + "step": 330 + }, + { + "epoch": 0.12638835695135964, + "loss": 0.5396110415458679, + "loss_ce": 0.013732131570577621, + "loss_iou": 1.042524814605713, + "loss_num": 0.52734375, + "loss_xval": 0.52734375, + "num_input_tokens_seen": 57022456, + "step": 330 + }, + { + "epoch": 0.12677135197242437, + "grad_norm": 25.24014458006925, + "learning_rate": 5e-06, + "loss": 0.3363, + "num_input_tokens_seen": 57195656, + "step": 331 + }, + { + "epoch": 0.12677135197242437, + "loss": 0.30840808153152466, + "loss_ce": 0.013608288019895554, + "loss_iou": 1.026963710784912, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 57195656, + "step": 331 + }, + { + "epoch": 0.1271543469934891, + "grad_norm": 20.587109923649756, + "learning_rate": 5e-06, + "loss": 0.5719, + "num_input_tokens_seen": 57368784, + "step": 332 + }, + { + "epoch": 0.1271543469934891, + "loss": 0.5962228775024414, + "loss_ce": 0.014435779303312302, + "loss_iou": 1.1006231307983398, + "loss_num": 0.58203125, + "loss_xval": 0.58203125, + "num_input_tokens_seen": 57368784, + "step": 332 + }, + { + "epoch": 0.1275373420145538, + "grad_norm": 19.42338701583887, + "learning_rate": 5e-06, + "loss": 0.3134, + "num_input_tokens_seen": 57541640, + "step": 333 + }, + { + "epoch": 0.1275373420145538, + "loss": 0.3170633018016815, + "loss_ce": 0.013718578033149242, + "loss_iou": 1.0257070064544678, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 57541640, + "step": 333 + }, + { + "epoch": 0.12792033703561853, + "grad_norm": 23.602551955156134, + "learning_rate": 5e-06, + "loss": 0.4829, + "num_input_tokens_seen": 57714872, + "step": 334 + }, + { + "epoch": 0.12792033703561853, + "loss": 0.5177361369132996, + "loss_ce": 0.01260918378829956, + "loss_iou": 1.1138770580291748, + "loss_num": 0.50390625, + "loss_xval": 0.50390625, + "num_input_tokens_seen": 57714872, + "step": 334 + }, + { + "epoch": 0.12830333205668326, + "grad_norm": 28.002566112166686, + "learning_rate": 5e-06, + "loss": 0.3301, + "num_input_tokens_seen": 57888000, + "step": 335 + }, + { + "epoch": 0.12830333205668326, + "loss": 0.3402954936027527, + "loss_ce": 0.012658771127462387, + "loss_iou": 1.0171738862991333, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 57888000, + "step": 335 + }, + { + "epoch": 0.128686327077748, + "grad_norm": 29.871569630960213, + "learning_rate": 5e-06, + "loss": 0.4392, + "num_input_tokens_seen": 58060760, + "step": 336 + }, + { + "epoch": 0.128686327077748, + "loss": 0.4566802382469177, + "loss_ce": 0.012344276532530785, + "loss_iou": 1.161993384361267, + "loss_num": 0.4453125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 58060760, + "step": 336 + }, + { + "epoch": 0.12906932209881272, + "grad_norm": 18.46477895053161, + "learning_rate": 5e-06, + "loss": 0.2781, + "num_input_tokens_seen": 58233368, + "step": 337 + }, + { + "epoch": 0.12906932209881272, + "loss": 0.2804095149040222, + "loss_ce": 0.012953441590070724, + "loss_iou": 1.0590258836746216, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 58233368, + "step": 337 + }, + { + "epoch": 0.12945231711987745, + "grad_norm": 19.423082442865574, + "learning_rate": 5e-06, + "loss": 0.3513, + "num_input_tokens_seen": 58405992, + "step": 338 + }, + { + "epoch": 0.12945231711987745, + "loss": 0.3404031991958618, + "loss_ce": 0.01227821595966816, + "loss_iou": 1.0197341442108154, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 58405992, + "step": 338 + }, + { + "epoch": 0.12983531214094216, + "grad_norm": 28.033606565177212, + "learning_rate": 5e-06, + "loss": 0.388, + "num_input_tokens_seen": 58578624, + "step": 339 + }, + { + "epoch": 0.12983531214094216, + "loss": 0.4279400408267975, + "loss_ce": 0.01265685074031353, + "loss_iou": 1.0694093704223633, + "loss_num": 0.416015625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 58578624, + "step": 339 + }, + { + "epoch": 0.13021830716200689, + "grad_norm": 36.54563621057312, + "learning_rate": 5e-06, + "loss": 0.5259, + "num_input_tokens_seen": 58751488, + "step": 340 + }, + { + "epoch": 0.13021830716200689, + "loss": 0.5527739524841309, + "loss_ce": 0.01371149905025959, + "loss_iou": 1.3441195487976074, + "loss_num": 0.5390625, + "loss_xval": 0.5390625, + "num_input_tokens_seen": 58751488, + "step": 340 + }, + { + "epoch": 0.13060130218307162, + "grad_norm": 25.999930582103996, + "learning_rate": 5e-06, + "loss": 0.3373, + "num_input_tokens_seen": 58924544, + "step": 341 + }, + { + "epoch": 0.13060130218307162, + "loss": 0.33388644456863403, + "loss_ce": 0.012719469144940376, + "loss_iou": 1.0462379455566406, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 58924544, + "step": 341 + }, + { + "epoch": 0.13098429720413635, + "grad_norm": 20.40995864655311, + "learning_rate": 5e-06, + "loss": 0.4059, + "num_input_tokens_seen": 59097784, + "step": 342 + }, + { + "epoch": 0.13098429720413635, + "loss": 0.3902062177658081, + "loss_ce": 0.012032410129904747, + "loss_iou": 1.0180613994598389, + "loss_num": 0.37890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 59097784, + "step": 342 + }, + { + "epoch": 0.13136729222520108, + "grad_norm": 30.970832407923083, + "learning_rate": 5e-06, + "loss": 0.3628, + "num_input_tokens_seen": 59270568, + "step": 343 + }, + { + "epoch": 0.13136729222520108, + "loss": 0.3716558814048767, + "loss_ce": 0.012402951717376709, + "loss_iou": 1.0250247716903687, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 59270568, + "step": 343 + }, + { + "epoch": 0.1317502872462658, + "grad_norm": 33.1759437003962, + "learning_rate": 5e-06, + "loss": 0.5395, + "num_input_tokens_seen": 59443696, + "step": 344 + }, + { + "epoch": 0.1317502872462658, + "loss": 0.49835091829299927, + "loss_ce": 0.012022789567708969, + "loss_iou": 1.127155065536499, + "loss_num": 0.486328125, + "loss_xval": 0.486328125, + "num_input_tokens_seen": 59443696, + "step": 344 + }, + { + "epoch": 0.13213328226733054, + "grad_norm": 21.268179837572763, + "learning_rate": 5e-06, + "loss": 0.375, + "num_input_tokens_seen": 59616664, + "step": 345 + }, + { + "epoch": 0.13213328226733054, + "loss": 0.41703686118125916, + "loss_ce": 0.013228273950517178, + "loss_iou": 1.1385784149169922, + "loss_num": 0.404296875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 59616664, + "step": 345 + }, + { + "epoch": 0.13251627728839524, + "grad_norm": 22.353032834658464, + "learning_rate": 5e-06, + "loss": 0.3995, + "num_input_tokens_seen": 59789536, + "step": 346 + }, + { + "epoch": 0.13251627728839524, + "loss": 0.44128888845443726, + "loss_ce": 0.013066226616501808, + "loss_iou": 1.0135618448257446, + "loss_num": 0.427734375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 59789536, + "step": 346 + }, + { + "epoch": 0.13289927230945997, + "grad_norm": 31.340023846265844, + "learning_rate": 5e-06, + "loss": 0.3996, + "num_input_tokens_seen": 59962072, + "step": 347 + }, + { + "epoch": 0.13289927230945997, + "loss": 0.36322999000549316, + "loss_ce": 0.013132363557815552, + "loss_iou": 1.021392822265625, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 59962072, + "step": 347 + }, + { + "epoch": 0.1332822673305247, + "grad_norm": 30.760240558756742, + "learning_rate": 5e-06, + "loss": 0.3693, + "num_input_tokens_seen": 60134888, + "step": 348 + }, + { + "epoch": 0.1332822673305247, + "loss": 0.3806111812591553, + "loss_ce": 0.013179522007703781, + "loss_iou": 1.0316166877746582, + "loss_num": 0.3671875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 60134888, + "step": 348 + }, + { + "epoch": 0.13366526235158943, + "grad_norm": 27.02673377461334, + "learning_rate": 5e-06, + "loss": 0.3376, + "num_input_tokens_seen": 60307536, + "step": 349 + }, + { + "epoch": 0.13366526235158943, + "loss": 0.36044585704803467, + "loss_ce": 0.01364409551024437, + "loss_iou": 1.0658987760543823, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 60307536, + "step": 349 + }, + { + "epoch": 0.13404825737265416, + "grad_norm": 19.969380481440243, + "learning_rate": 5e-06, + "loss": 0.4478, + "num_input_tokens_seen": 60480200, + "step": 350 + }, + { + "epoch": 0.13404825737265416, + "loss": 0.4545081853866577, + "loss_ce": 0.012613639235496521, + "loss_iou": 1.1202318668365479, + "loss_num": 0.44140625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 60480200, + "step": 350 + }, + { + "epoch": 0.1344312523937189, + "grad_norm": 23.64571380270694, + "learning_rate": 5e-06, + "loss": 0.2641, + "num_input_tokens_seen": 60652832, + "step": 351 + }, + { + "epoch": 0.1344312523937189, + "loss": 0.24726730585098267, + "loss_ce": 0.012281963601708412, + "loss_iou": 1.019364595413208, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 60652832, + "step": 351 + }, + { + "epoch": 0.1348142474147836, + "grad_norm": 37.0483407213676, + "learning_rate": 5e-06, + "loss": 0.4657, + "num_input_tokens_seen": 60825920, + "step": 352 + }, + { + "epoch": 0.1348142474147836, + "loss": 0.4851919114589691, + "loss_ce": 0.011559093371033669, + "loss_iou": 1.0131995677947998, + "loss_num": 0.47265625, + "loss_xval": 0.47265625, + "num_input_tokens_seen": 60825920, + "step": 352 + }, + { + "epoch": 0.13519724243584832, + "grad_norm": 27.351549801074697, + "learning_rate": 5e-06, + "loss": 0.3466, + "num_input_tokens_seen": 60998592, + "step": 353 + }, + { + "epoch": 0.13519724243584832, + "loss": 0.2995198667049408, + "loss_ce": 0.013387059792876244, + "loss_iou": 1.0160716772079468, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 60998592, + "step": 353 + }, + { + "epoch": 0.13558023745691306, + "grad_norm": 19.41353116033888, + "learning_rate": 5e-06, + "loss": 0.5005, + "num_input_tokens_seen": 61171408, + "step": 354 + }, + { + "epoch": 0.13558023745691306, + "loss": 0.5307155251502991, + "loss_ce": 0.01289329119026661, + "loss_iou": 1.0501670837402344, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 61171408, + "step": 354 + }, + { + "epoch": 0.13596323247797779, + "grad_norm": 17.48712667532762, + "learning_rate": 5e-06, + "loss": 0.27, + "num_input_tokens_seen": 61344744, + "step": 355 + }, + { + "epoch": 0.13596323247797779, + "loss": 0.30249089002609253, + "loss_ce": 0.013184243813157082, + "loss_iou": 1.1163172721862793, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 61344744, + "step": 355 + }, + { + "epoch": 0.13634622749904252, + "grad_norm": 23.233388365718046, + "learning_rate": 5e-06, + "loss": 0.3519, + "num_input_tokens_seen": 61517936, + "step": 356 + }, + { + "epoch": 0.13634622749904252, + "loss": 0.3717007339000702, + "loss_ce": 0.012569872662425041, + "loss_iou": 1.3388407230377197, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 61517936, + "step": 356 + }, + { + "epoch": 0.13672922252010725, + "grad_norm": 26.758695720823354, + "learning_rate": 5e-06, + "loss": 0.3853, + "num_input_tokens_seen": 61691008, + "step": 357 + }, + { + "epoch": 0.13672922252010725, + "loss": 0.35257813334465027, + "loss_ce": 0.01236817054450512, + "loss_iou": 1.1086204051971436, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 61691008, + "step": 357 + }, + { + "epoch": 0.13711221754117198, + "grad_norm": 28.263344707617506, + "learning_rate": 5e-06, + "loss": 0.3578, + "num_input_tokens_seen": 61864416, + "step": 358 + }, + { + "epoch": 0.13711221754117198, + "loss": 0.3794240653514862, + "loss_ce": 0.013213129714131355, + "loss_iou": 1.0335272550582886, + "loss_num": 0.3671875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 61864416, + "step": 358 + }, + { + "epoch": 0.13749521256223668, + "grad_norm": 40.90973744330012, + "learning_rate": 5e-06, + "loss": 0.3265, + "num_input_tokens_seen": 62037032, + "step": 359 + }, + { + "epoch": 0.13749521256223668, + "loss": 0.3287930190563202, + "loss_ce": 0.013119187206029892, + "loss_iou": 1.017606258392334, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 62037032, + "step": 359 + }, + { + "epoch": 0.1378782075833014, + "grad_norm": 40.920368087423206, + "learning_rate": 5e-06, + "loss": 0.4415, + "num_input_tokens_seen": 62210144, + "step": 360 + }, + { + "epoch": 0.1378782075833014, + "loss": 0.3989262580871582, + "loss_ce": 0.012207522056996822, + "loss_iou": 1.0507839918136597, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 62210144, + "step": 360 + }, + { + "epoch": 0.13826120260436614, + "grad_norm": 21.84843688634946, + "learning_rate": 5e-06, + "loss": 0.3067, + "num_input_tokens_seen": 62383016, + "step": 361 + }, + { + "epoch": 0.13826120260436614, + "loss": 0.3030824065208435, + "loss_ce": 0.012921245768666267, + "loss_iou": 1.1078020334243774, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 62383016, + "step": 361 + }, + { + "epoch": 0.13864419762543087, + "grad_norm": 25.303836611757855, + "learning_rate": 5e-06, + "loss": 0.3307, + "num_input_tokens_seen": 62555992, + "step": 362 + }, + { + "epoch": 0.13864419762543087, + "loss": 0.3620717227458954, + "loss_ce": 0.013927199877798557, + "loss_iou": 1.0925281047821045, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 62555992, + "step": 362 + }, + { + "epoch": 0.1390271926464956, + "grad_norm": 38.079841138035974, + "learning_rate": 5e-06, + "loss": 0.3423, + "num_input_tokens_seen": 62729008, + "step": 363 + }, + { + "epoch": 0.1390271926464956, + "loss": 0.34251147508621216, + "loss_ce": 0.01243335846811533, + "loss_iou": 1.069159746170044, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 62729008, + "step": 363 + }, + { + "epoch": 0.13941018766756033, + "grad_norm": 38.19344770186388, + "learning_rate": 5e-06, + "loss": 0.438, + "num_input_tokens_seen": 62902368, + "step": 364 + }, + { + "epoch": 0.13941018766756033, + "loss": 0.4055544137954712, + "loss_ce": 0.012976281344890594, + "loss_iou": 1.0455716848373413, + "loss_num": 0.392578125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 62902368, + "step": 364 + }, + { + "epoch": 0.13979318268862506, + "grad_norm": 25.96910003641756, + "learning_rate": 5e-06, + "loss": 0.3534, + "num_input_tokens_seen": 63075368, + "step": 365 + }, + { + "epoch": 0.13979318268862506, + "loss": 0.3414488434791565, + "loss_ce": 0.011614874936640263, + "loss_iou": 1.0137832164764404, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 63075368, + "step": 365 + }, + { + "epoch": 0.14017617770968976, + "grad_norm": 46.3796791167195, + "learning_rate": 5e-06, + "loss": 0.5287, + "num_input_tokens_seen": 63248064, + "step": 366 + }, + { + "epoch": 0.14017617770968976, + "loss": 0.5414717793464661, + "loss_ce": 0.011930737644433975, + "loss_iou": 1.0576518774032593, + "loss_num": 0.53125, + "loss_xval": 0.53125, + "num_input_tokens_seen": 63248064, + "step": 366 + }, + { + "epoch": 0.1405591727307545, + "grad_norm": 49.121315858050615, + "learning_rate": 5e-06, + "loss": 0.433, + "num_input_tokens_seen": 63420776, + "step": 367 + }, + { + "epoch": 0.1405591727307545, + "loss": 0.4409557580947876, + "loss_ce": 0.01175654400140047, + "loss_iou": 1.0536702871322632, + "loss_num": 0.4296875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 63420776, + "step": 367 + }, + { + "epoch": 0.14094216775181923, + "grad_norm": 64.61330303892356, + "learning_rate": 5e-06, + "loss": 0.8656, + "num_input_tokens_seen": 63590328, + "step": 368 + }, + { + "epoch": 0.14094216775181923, + "loss": 0.7870058417320251, + "loss_ce": 0.012591780163347721, + "loss_iou": 1.0539166927337646, + "loss_num": 0.7734375, + "loss_xval": 0.7734375, + "num_input_tokens_seen": 63590328, + "step": 368 + }, + { + "epoch": 0.14132516277288396, + "grad_norm": 24.734888518615882, + "learning_rate": 5e-06, + "loss": 0.4936, + "num_input_tokens_seen": 63763272, + "step": 369 + }, + { + "epoch": 0.14132516277288396, + "loss": 0.4711846113204956, + "loss_ce": 0.013420945033431053, + "loss_iou": 1.0639902353286743, + "loss_num": 0.45703125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 63763272, + "step": 369 + }, + { + "epoch": 0.14170815779394869, + "grad_norm": 27.347220635004692, + "learning_rate": 5e-06, + "loss": 0.6949, + "num_input_tokens_seen": 63936464, + "step": 370 + }, + { + "epoch": 0.14170815779394869, + "loss": 0.7150565385818481, + "loss_ce": 0.014861243776977062, + "loss_iou": 1.0729635953903198, + "loss_num": 0.69921875, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 63936464, + "step": 370 + }, + { + "epoch": 0.14209115281501342, + "grad_norm": 51.38802471251687, + "learning_rate": 5e-06, + "loss": 0.6282, + "num_input_tokens_seen": 64109728, + "step": 371 + }, + { + "epoch": 0.14209115281501342, + "loss": 0.5930242538452148, + "loss_ce": 0.013922711834311485, + "loss_iou": 1.049805760383606, + "loss_num": 0.578125, + "loss_xval": 0.578125, + "num_input_tokens_seen": 64109728, + "step": 371 + }, + { + "epoch": 0.14247414783607812, + "grad_norm": 60.02099082142038, + "learning_rate": 5e-06, + "loss": 1.0242, + "num_input_tokens_seen": 64282720, + "step": 372 + }, + { + "epoch": 0.14247414783607812, + "loss": 1.0294208526611328, + "loss_ce": 0.01281927339732647, + "loss_iou": 1.1568076610565186, + "loss_num": 1.015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 64282720, + "step": 372 + }, + { + "epoch": 0.14285714285714285, + "grad_norm": 28.16591489985602, + "learning_rate": 5e-06, + "loss": 0.6113, + "num_input_tokens_seen": 64455304, + "step": 373 + }, + { + "epoch": 0.14285714285714285, + "loss": 0.5786975622177124, + "loss_ce": 0.014488564804196358, + "loss_iou": 1.6266827583312988, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 64455304, + "step": 373 + }, + { + "epoch": 0.14324013787820758, + "grad_norm": 41.33892512644171, + "learning_rate": 5e-06, + "loss": 0.9286, + "num_input_tokens_seen": 64628736, + "step": 374 + }, + { + "epoch": 0.14324013787820758, + "loss": 0.9207422733306885, + "loss_ce": 0.014492250978946686, + "loss_iou": 1.0976927280426025, + "loss_num": 0.90625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 64628736, + "step": 374 + }, + { + "epoch": 0.1436231328992723, + "grad_norm": 59.9188461455712, + "learning_rate": 5e-06, + "loss": 0.6966, + "num_input_tokens_seen": 64801456, + "step": 375 + }, + { + "epoch": 0.1436231328992723, + "loss": 0.6877446174621582, + "loss_ce": 0.012939905747771263, + "loss_iou": 1.0618836879730225, + "loss_num": 0.67578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 64801456, + "step": 375 + }, + { + "epoch": 0.14400612792033704, + "grad_norm": 61.689073573426874, + "learning_rate": 5e-06, + "loss": 0.9012, + "num_input_tokens_seen": 64974208, + "step": 376 + }, + { + "epoch": 0.14400612792033704, + "loss": 0.9289538860321045, + "loss_ce": 0.012938244268298149, + "loss_iou": 1.1032987833023071, + "loss_num": 0.9140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 64974208, + "step": 376 + }, + { + "epoch": 0.14438912294140177, + "grad_norm": 19.84003180347874, + "learning_rate": 5e-06, + "loss": 0.5017, + "num_input_tokens_seen": 65147280, + "step": 377 + }, + { + "epoch": 0.14438912294140177, + "loss": 0.524929404258728, + "loss_ce": 0.01614035852253437, + "loss_iou": 1.0543954372406006, + "loss_num": 0.5078125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 65147280, + "step": 377 + }, + { + "epoch": 0.1447721179624665, + "grad_norm": 24.906736164638446, + "learning_rate": 5e-06, + "loss": 0.671, + "num_input_tokens_seen": 65320232, + "step": 378 + }, + { + "epoch": 0.1447721179624665, + "loss": 0.6904242634773254, + "loss_ce": 0.015131345950067043, + "loss_iou": 1.0514883995056152, + "loss_num": 0.67578125, + "loss_xval": 0.67578125, + "num_input_tokens_seen": 65320232, + "step": 378 + }, + { + "epoch": 0.1451551129835312, + "grad_norm": 41.04443374679105, + "learning_rate": 5e-06, + "loss": 0.431, + "num_input_tokens_seen": 65493048, + "step": 379 + }, + { + "epoch": 0.1451551129835312, + "loss": 0.4022664427757263, + "loss_ce": 0.014082863926887512, + "loss_iou": 1.0451061725616455, + "loss_num": 0.388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 65493048, + "step": 379 + }, + { + "epoch": 0.14553810800459593, + "grad_norm": 50.32946176470696, + "learning_rate": 5e-06, + "loss": 0.8086, + "num_input_tokens_seen": 65665960, + "step": 380 + }, + { + "epoch": 0.14553810800459593, + "loss": 0.8396444916725159, + "loss_ce": 0.014937466010451317, + "loss_iou": 2.551274061203003, + "loss_num": 0.82421875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 65665960, + "step": 380 + }, + { + "epoch": 0.14592110302566066, + "grad_norm": 25.037409993565685, + "learning_rate": 5e-06, + "loss": 0.3744, + "num_input_tokens_seen": 65838928, + "step": 381 + }, + { + "epoch": 0.14592110302566066, + "loss": 0.378633975982666, + "loss_ce": 0.01535273902118206, + "loss_iou": 1.0505462884902954, + "loss_num": 0.36328125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 65838928, + "step": 381 + }, + { + "epoch": 0.1463040980467254, + "grad_norm": 31.282118782535907, + "learning_rate": 5e-06, + "loss": 0.5373, + "num_input_tokens_seen": 66011632, + "step": 382 + }, + { + "epoch": 0.1463040980467254, + "loss": 0.5232303142547607, + "loss_ce": 0.013464685529470444, + "loss_iou": 1.0495527982711792, + "loss_num": 0.5078125, + "loss_xval": 0.5078125, + "num_input_tokens_seen": 66011632, + "step": 382 + }, + { + "epoch": 0.14668709306779013, + "grad_norm": 27.643199490450233, + "learning_rate": 5e-06, + "loss": 0.4729, + "num_input_tokens_seen": 66184064, + "step": 383 + }, + { + "epoch": 0.14668709306779013, + "loss": 0.4467092454433441, + "loss_ce": 0.013481696136295795, + "loss_iou": 1.053185224533081, + "loss_num": 0.43359375, + "loss_xval": 0.43359375, + "num_input_tokens_seen": 66184064, + "step": 383 + }, + { + "epoch": 0.14707008808885486, + "grad_norm": 31.827409372569036, + "learning_rate": 5e-06, + "loss": 0.464, + "num_input_tokens_seen": 66356768, + "step": 384 + }, + { + "epoch": 0.14707008808885486, + "loss": 0.4203503429889679, + "loss_ce": 0.01434449665248394, + "loss_iou": 1.0519211292266846, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 66356768, + "step": 384 + }, + { + "epoch": 0.14745308310991956, + "grad_norm": 34.496761129495205, + "learning_rate": 5e-06, + "loss": 0.4934, + "num_input_tokens_seen": 66529864, + "step": 385 + }, + { + "epoch": 0.14745308310991956, + "loss": 0.4703870415687561, + "loss_ce": 0.01433233730494976, + "loss_iou": 1.0265611410140991, + "loss_num": 0.45703125, + "loss_xval": 0.45703125, + "num_input_tokens_seen": 66529864, + "step": 385 + }, + { + "epoch": 0.1478360781309843, + "grad_norm": 26.041557954952363, + "learning_rate": 5e-06, + "loss": 0.4909, + "num_input_tokens_seen": 66702944, + "step": 386 + }, + { + "epoch": 0.1478360781309843, + "loss": 0.4845116138458252, + "loss_ce": 0.01673818938434124, + "loss_iou": 1.0745141506195068, + "loss_num": 0.46875, + "loss_xval": 0.46875, + "num_input_tokens_seen": 66702944, + "step": 386 + }, + { + "epoch": 0.14821907315204902, + "grad_norm": 32.47151434777721, + "learning_rate": 5e-06, + "loss": 0.3861, + "num_input_tokens_seen": 66876104, + "step": 387 + }, + { + "epoch": 0.14821907315204902, + "loss": 0.3625410497188568, + "loss_ce": 0.016105500981211662, + "loss_iou": 1.0794681310653687, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 66876104, + "step": 387 + }, + { + "epoch": 0.14860206817311375, + "grad_norm": 29.411348605919954, + "learning_rate": 5e-06, + "loss": 0.4896, + "num_input_tokens_seen": 67049216, + "step": 388 + }, + { + "epoch": 0.14860206817311375, + "loss": 0.4619877338409424, + "loss_ce": 0.014722118154168129, + "loss_iou": 1.6182482242584229, + "loss_num": 0.447265625, + "loss_xval": 0.447265625, + "num_input_tokens_seen": 67049216, + "step": 388 + }, + { + "epoch": 0.14898506319417848, + "grad_norm": 12.758152150747003, + "learning_rate": 5e-06, + "loss": 0.3211, + "num_input_tokens_seen": 67222088, + "step": 389 + }, + { + "epoch": 0.14898506319417848, + "loss": 0.36468395590782166, + "loss_ce": 0.013975952751934528, + "loss_iou": 1.072611689567566, + "loss_num": 0.3515625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 67222088, + "step": 389 + }, + { + "epoch": 0.1493680582152432, + "grad_norm": 33.42467835445704, + "learning_rate": 5e-06, + "loss": 0.2915, + "num_input_tokens_seen": 67394984, + "step": 390 + }, + { + "epoch": 0.1493680582152432, + "loss": 0.3005879819393158, + "loss_ce": 0.013722749426960945, + "loss_iou": 1.1104605197906494, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 67394984, + "step": 390 + }, + { + "epoch": 0.14975105323630794, + "grad_norm": 34.052127278713876, + "learning_rate": 5e-06, + "loss": 0.3981, + "num_input_tokens_seen": 67567776, + "step": 391 + }, + { + "epoch": 0.14975105323630794, + "loss": 0.42281055450439453, + "loss_ce": 0.013630880042910576, + "loss_iou": 1.039305329322815, + "loss_num": 0.41015625, + "loss_xval": 0.41015625, + "num_input_tokens_seen": 67567776, + "step": 391 + }, + { + "epoch": 0.15013404825737264, + "grad_norm": 24.50332959564291, + "learning_rate": 5e-06, + "loss": 0.3559, + "num_input_tokens_seen": 67740832, + "step": 392 + }, + { + "epoch": 0.15013404825737264, + "loss": 0.3538506031036377, + "loss_ce": 0.01486134435981512, + "loss_iou": 1.0434025526046753, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 67740832, + "step": 392 + }, + { + "epoch": 0.15051704327843737, + "grad_norm": 31.62897829173563, + "learning_rate": 5e-06, + "loss": 0.3742, + "num_input_tokens_seen": 67913800, + "step": 393 + }, + { + "epoch": 0.15051704327843737, + "loss": 0.37665513157844543, + "loss_ce": 0.01666979305446148, + "loss_iou": 1.0603320598602295, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 67913800, + "step": 393 + }, + { + "epoch": 0.1509000382995021, + "grad_norm": 18.01387751534475, + "learning_rate": 5e-06, + "loss": 0.4846, + "num_input_tokens_seen": 68086800, + "step": 394 + }, + { + "epoch": 0.1509000382995021, + "loss": 0.4789716601371765, + "loss_ce": 0.013883786275982857, + "loss_iou": 1.030623435974121, + "loss_num": 0.46484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 68086800, + "step": 394 + }, + { + "epoch": 0.15128303332056683, + "grad_norm": 22.891881277396507, + "learning_rate": 5e-06, + "loss": 0.269, + "num_input_tokens_seen": 68259872, + "step": 395 + }, + { + "epoch": 0.15128303332056683, + "loss": 0.2748351991176605, + "loss_ce": 0.015557845123112202, + "loss_iou": 1.0188287496566772, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 68259872, + "step": 395 + }, + { + "epoch": 0.15166602834163156, + "grad_norm": 24.958916511184206, + "learning_rate": 5e-06, + "loss": 0.4305, + "num_input_tokens_seen": 68429376, + "step": 396 + }, + { + "epoch": 0.15166602834163156, + "loss": 0.4136584997177124, + "loss_ce": 0.015221013687551022, + "loss_iou": 1.0223636627197266, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 68429376, + "step": 396 + }, + { + "epoch": 0.1520490233626963, + "grad_norm": 21.58147943119569, + "learning_rate": 5e-06, + "loss": 0.3588, + "num_input_tokens_seen": 68602144, + "step": 397 + }, + { + "epoch": 0.1520490233626963, + "loss": 0.3096686005592346, + "loss_ce": 0.015357056632637978, + "loss_iou": 1.025606393814087, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 68602144, + "step": 397 + }, + { + "epoch": 0.152432018383761, + "grad_norm": 23.00317159852234, + "learning_rate": 5e-06, + "loss": 0.4268, + "num_input_tokens_seen": 68775072, + "step": 398 + }, + { + "epoch": 0.152432018383761, + "loss": 0.4410243034362793, + "loss_ce": 0.014998898841440678, + "loss_iou": 1.0346786975860596, + "loss_num": 0.42578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 68775072, + "step": 398 + }, + { + "epoch": 0.15281501340482573, + "grad_norm": 22.980156215562417, + "learning_rate": 5e-06, + "loss": 0.319, + "num_input_tokens_seen": 68948048, + "step": 399 + }, + { + "epoch": 0.15281501340482573, + "loss": 0.3305703401565552, + "loss_ce": 0.014896501787006855, + "loss_iou": 1.210439920425415, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 68948048, + "step": 399 + }, + { + "epoch": 0.15319800842589046, + "grad_norm": 24.19557129912023, + "learning_rate": 5e-06, + "loss": 0.3657, + "num_input_tokens_seen": 69120840, + "step": 400 + }, + { + "epoch": 0.15319800842589046, + "loss": 0.3948410153388977, + "loss_ce": 0.014591965824365616, + "loss_iou": 0.9931958317756653, + "loss_num": 0.380859375, + "loss_xval": 0.380859375, + "num_input_tokens_seen": 69120840, + "step": 400 + }, + { + "epoch": 0.1535810034469552, + "grad_norm": 28.561895573863982, + "learning_rate": 5e-06, + "loss": 0.2773, + "num_input_tokens_seen": 69294032, + "step": 401 + }, + { + "epoch": 0.1535810034469552, + "loss": 0.23597465455532074, + "loss_ce": 0.015027390792965889, + "loss_iou": 1.0210425853729248, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 69294032, + "step": 401 + }, + { + "epoch": 0.15396399846801992, + "grad_norm": 16.847300127233122, + "learning_rate": 5e-06, + "loss": 0.4095, + "num_input_tokens_seen": 69466776, + "step": 402 + }, + { + "epoch": 0.15396399846801992, + "loss": 0.4185844659805298, + "loss_ce": 0.01477588526904583, + "loss_iou": 1.0442343950271606, + "loss_num": 0.404296875, + "loss_xval": 0.404296875, + "num_input_tokens_seen": 69466776, + "step": 402 + }, + { + "epoch": 0.15434699348908465, + "grad_norm": 16.16141403050447, + "learning_rate": 5e-06, + "loss": 0.2522, + "num_input_tokens_seen": 69640120, + "step": 403 + }, + { + "epoch": 0.15434699348908465, + "loss": 0.2614048719406128, + "loss_ce": 0.01586044766008854, + "loss_iou": 1.0320557355880737, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 69640120, + "step": 403 + }, + { + "epoch": 0.15472998851014938, + "grad_norm": 23.152463989977253, + "learning_rate": 5e-06, + "loss": 0.3147, + "num_input_tokens_seen": 69813408, + "step": 404 + }, + { + "epoch": 0.15472998851014938, + "loss": 0.2476850003004074, + "loss_ce": 0.013920358382165432, + "loss_iou": 1.002660870552063, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 69813408, + "step": 404 + }, + { + "epoch": 0.15511298353121408, + "grad_norm": 27.080087001699383, + "learning_rate": 5e-06, + "loss": 0.38, + "num_input_tokens_seen": 69986488, + "step": 405 + }, + { + "epoch": 0.15511298353121408, + "loss": 0.43006911873817444, + "loss_ce": 0.014541776850819588, + "loss_iou": 1.0204484462738037, + "loss_num": 0.416015625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 69986488, + "step": 405 + }, + { + "epoch": 0.1554959785522788, + "grad_norm": 27.415850120433483, + "learning_rate": 5e-06, + "loss": 0.3969, + "num_input_tokens_seen": 70159320, + "step": 406 + }, + { + "epoch": 0.1554959785522788, + "loss": 0.3625633120536804, + "loss_ce": 0.01588360406458378, + "loss_iou": 1.013737440109253, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 70159320, + "step": 406 + }, + { + "epoch": 0.15587897357334354, + "grad_norm": 23.287520317442905, + "learning_rate": 5e-06, + "loss": 0.3078, + "num_input_tokens_seen": 70332400, + "step": 407 + }, + { + "epoch": 0.15587897357334354, + "loss": 0.35009288787841797, + "loss_ce": 0.015376098453998566, + "loss_iou": 1.0652670860290527, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 70332400, + "step": 407 + }, + { + "epoch": 0.15626196859440827, + "grad_norm": 21.034823162950822, + "learning_rate": 5e-06, + "loss": 0.4469, + "num_input_tokens_seen": 70505272, + "step": 408 + }, + { + "epoch": 0.15626196859440827, + "loss": 0.44711577892303467, + "loss_ce": 0.014986889436841011, + "loss_iou": 1.0202596187591553, + "loss_num": 0.431640625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 70505272, + "step": 408 + }, + { + "epoch": 0.156644963615473, + "grad_norm": 20.809851831139678, + "learning_rate": 5e-06, + "loss": 0.2758, + "num_input_tokens_seen": 70677928, + "step": 409 + }, + { + "epoch": 0.156644963615473, + "loss": 0.30263593792915344, + "loss_ce": 0.016014840453863144, + "loss_iou": 1.1829955577850342, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 70677928, + "step": 409 + }, + { + "epoch": 0.15702795863653773, + "grad_norm": 18.86130465184379, + "learning_rate": 5e-06, + "loss": 0.3039, + "num_input_tokens_seen": 70851096, + "step": 410 + }, + { + "epoch": 0.15702795863653773, + "loss": 0.2668569087982178, + "loss_ce": 0.014537591487169266, + "loss_iou": 1.0007319450378418, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 70851096, + "step": 410 + }, + { + "epoch": 0.15741095365760246, + "grad_norm": 23.15169657964992, + "learning_rate": 5e-06, + "loss": 0.3236, + "num_input_tokens_seen": 71023624, + "step": 411 + }, + { + "epoch": 0.15741095365760246, + "loss": 0.26910457015037537, + "loss_ce": 0.014832114800810814, + "loss_iou": 1.0035874843597412, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 71023624, + "step": 411 + }, + { + "epoch": 0.15779394867866717, + "grad_norm": 29.247604662834046, + "learning_rate": 5e-06, + "loss": 0.4098, + "num_input_tokens_seen": 71196792, + "step": 412 + }, + { + "epoch": 0.15779394867866717, + "loss": 0.3962034583091736, + "loss_ce": 0.014245479367673397, + "loss_iou": 1.324742078781128, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 71196792, + "step": 412 + }, + { + "epoch": 0.1581769436997319, + "grad_norm": 18.173080177086746, + "learning_rate": 5e-06, + "loss": 0.2962, + "num_input_tokens_seen": 71370040, + "step": 413 + }, + { + "epoch": 0.1581769436997319, + "loss": 0.2685660123825073, + "loss_ce": 0.015025995671749115, + "loss_iou": 1.0040959119796753, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 71370040, + "step": 413 + }, + { + "epoch": 0.15855993872079663, + "grad_norm": 24.228713979434175, + "learning_rate": 5e-06, + "loss": 0.4035, + "num_input_tokens_seen": 71542984, + "step": 414 + }, + { + "epoch": 0.15855993872079663, + "loss": 0.451690673828125, + "loss_ce": 0.014312762767076492, + "loss_iou": 2.112351894378662, + "loss_num": 0.4375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 71542984, + "step": 414 + }, + { + "epoch": 0.15894293374186136, + "grad_norm": 28.466619697327705, + "learning_rate": 5e-06, + "loss": 0.3337, + "num_input_tokens_seen": 71715968, + "step": 415 + }, + { + "epoch": 0.15894293374186136, + "loss": 0.31649863719940186, + "loss_ce": 0.014374621212482452, + "loss_iou": 1.0054057836532593, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 71715968, + "step": 415 + }, + { + "epoch": 0.1593259287629261, + "grad_norm": 34.22332767495968, + "learning_rate": 5e-06, + "loss": 0.3714, + "num_input_tokens_seen": 71888720, + "step": 416 + }, + { + "epoch": 0.1593259287629261, + "loss": 0.3528294265270233, + "loss_ce": 0.014084311202168465, + "loss_iou": 1.0216398239135742, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 71888720, + "step": 416 + }, + { + "epoch": 0.15970892378399082, + "grad_norm": 24.9234179030379, + "learning_rate": 5e-06, + "loss": 0.3386, + "num_input_tokens_seen": 72061560, + "step": 417 + }, + { + "epoch": 0.15970892378399082, + "loss": 0.29265356063842773, + "loss_ce": 0.014943597838282585, + "loss_iou": 1.037266492843628, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 72061560, + "step": 417 + }, + { + "epoch": 0.16009191880505552, + "grad_norm": 26.353183066889333, + "learning_rate": 5e-06, + "loss": 0.3834, + "num_input_tokens_seen": 72234512, + "step": 418 + }, + { + "epoch": 0.16009191880505552, + "loss": 0.3900579810142517, + "loss_ce": 0.015057964250445366, + "loss_iou": 1.0375490188598633, + "loss_num": 0.375, + "loss_xval": 0.375, + "num_input_tokens_seen": 72234512, + "step": 418 + }, + { + "epoch": 0.16047491382612025, + "grad_norm": 24.393157376345613, + "learning_rate": 5e-06, + "loss": 0.2836, + "num_input_tokens_seen": 72407432, + "step": 419 + }, + { + "epoch": 0.16047491382612025, + "loss": 0.3062630295753479, + "loss_ce": 0.015979833900928497, + "loss_iou": 1.1124298572540283, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 72407432, + "step": 419 + }, + { + "epoch": 0.16085790884718498, + "grad_norm": 48.232176848351784, + "learning_rate": 5e-06, + "loss": 0.4717, + "num_input_tokens_seen": 72580496, + "step": 420 + }, + { + "epoch": 0.16085790884718498, + "loss": 0.45536208152770996, + "loss_ce": 0.014932382851839066, + "loss_iou": 1.0621998310089111, + "loss_num": 0.44140625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 72580496, + "step": 420 + }, + { + "epoch": 0.1612409038682497, + "grad_norm": 21.538410401949463, + "learning_rate": 5e-06, + "loss": 0.3602, + "num_input_tokens_seen": 72753192, + "step": 421 + }, + { + "epoch": 0.1612409038682497, + "loss": 0.3762405216693878, + "loss_ce": 0.01442410796880722, + "loss_iou": 1.0506818294525146, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 72753192, + "step": 421 + }, + { + "epoch": 0.16162389888931444, + "grad_norm": 19.17179364958482, + "learning_rate": 5e-06, + "loss": 0.4197, + "num_input_tokens_seen": 72926376, + "step": 422 + }, + { + "epoch": 0.16162389888931444, + "loss": 0.4543018341064453, + "loss_ce": 0.014116259291768074, + "loss_iou": 1.1327141523361206, + "loss_num": 0.439453125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 72926376, + "step": 422 + }, + { + "epoch": 0.16200689391037917, + "grad_norm": 22.442863660160192, + "learning_rate": 5e-06, + "loss": 0.3071, + "num_input_tokens_seen": 73099552, + "step": 423 + }, + { + "epoch": 0.16200689391037917, + "loss": 0.33814510703086853, + "loss_ce": 0.014292564243078232, + "loss_iou": 1.0207548141479492, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 73099552, + "step": 423 + }, + { + "epoch": 0.1623898889314439, + "grad_norm": 24.46670225734681, + "learning_rate": 5e-06, + "loss": 0.4117, + "num_input_tokens_seen": 73272544, + "step": 424 + }, + { + "epoch": 0.1623898889314439, + "loss": 0.4213331639766693, + "loss_ce": 0.01398453488945961, + "loss_iou": 0.9296959638595581, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 73272544, + "step": 424 + }, + { + "epoch": 0.1627728839525086, + "grad_norm": 22.183220184802156, + "learning_rate": 5e-06, + "loss": 0.3162, + "num_input_tokens_seen": 73445656, + "step": 425 + }, + { + "epoch": 0.1627728839525086, + "loss": 0.3421262800693512, + "loss_ce": 0.015466131269931793, + "loss_iou": 1.4988172054290771, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 73445656, + "step": 425 + }, + { + "epoch": 0.16315587897357334, + "grad_norm": 20.306979484014494, + "learning_rate": 5e-06, + "loss": 0.3868, + "num_input_tokens_seen": 73618632, + "step": 426 + }, + { + "epoch": 0.16315587897357334, + "loss": 0.43750306963920593, + "loss_ce": 0.015872187912464142, + "loss_iou": 1.0614721775054932, + "loss_num": 0.421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 73618632, + "step": 426 + }, + { + "epoch": 0.16353887399463807, + "grad_norm": 33.29914225204966, + "learning_rate": 5e-06, + "loss": 0.3026, + "num_input_tokens_seen": 73791496, + "step": 427 + }, + { + "epoch": 0.16353887399463807, + "loss": 0.2640039324760437, + "loss_ce": 0.015712920576334, + "loss_iou": 1.0259437561035156, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 73791496, + "step": 427 + }, + { + "epoch": 0.1639218690157028, + "grad_norm": 41.4513683196305, + "learning_rate": 5e-06, + "loss": 0.4092, + "num_input_tokens_seen": 73964424, + "step": 428 + }, + { + "epoch": 0.1639218690157028, + "loss": 0.36324575543403625, + "loss_ce": 0.014124661684036255, + "loss_iou": 1.0058783292770386, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 73964424, + "step": 428 + }, + { + "epoch": 0.16430486403676753, + "grad_norm": 18.832055606098304, + "learning_rate": 5e-06, + "loss": 0.2375, + "num_input_tokens_seen": 74137296, + "step": 429 + }, + { + "epoch": 0.16430486403676753, + "loss": 0.2725387215614319, + "loss_ce": 0.014482062309980392, + "loss_iou": 1.0127389430999756, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 74137296, + "step": 429 + }, + { + "epoch": 0.16468785905783226, + "grad_norm": 18.32240215523765, + "learning_rate": 5e-06, + "loss": 0.3873, + "num_input_tokens_seen": 74309776, + "step": 430 + }, + { + "epoch": 0.16468785905783226, + "loss": 0.35966914892196655, + "loss_ce": 0.013843945227563381, + "loss_iou": 2.9520955085754395, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 74309776, + "step": 430 + }, + { + "epoch": 0.16507085407889696, + "grad_norm": 17.887323166547127, + "learning_rate": 5e-06, + "loss": 0.2928, + "num_input_tokens_seen": 74483064, + "step": 431 + }, + { + "epoch": 0.16507085407889696, + "loss": 0.23529697954654694, + "loss_ce": 0.01593662239611149, + "loss_iou": 1.0336109399795532, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 74483064, + "step": 431 + }, + { + "epoch": 0.1654538490999617, + "grad_norm": 21.04844653031928, + "learning_rate": 5e-06, + "loss": 0.3283, + "num_input_tokens_seen": 74656048, + "step": 432 + }, + { + "epoch": 0.1654538490999617, + "loss": 0.36195388436317444, + "loss_ce": 0.014297634363174438, + "loss_iou": 1.1591129302978516, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 74656048, + "step": 432 + }, + { + "epoch": 0.16583684412102642, + "grad_norm": 31.312609701196052, + "learning_rate": 5e-06, + "loss": 0.338, + "num_input_tokens_seen": 74829040, + "step": 433 + }, + { + "epoch": 0.16583684412102642, + "loss": 0.3524959683418274, + "loss_ce": 0.015215707942843437, + "loss_iou": 1.11476469039917, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 74829040, + "step": 433 + }, + { + "epoch": 0.16621983914209115, + "grad_norm": 19.451149418750603, + "learning_rate": 5e-06, + "loss": 0.3024, + "num_input_tokens_seen": 75001784, + "step": 434 + }, + { + "epoch": 0.16621983914209115, + "loss": 0.2773982286453247, + "loss_ce": 0.015191186219453812, + "loss_iou": 1.0119487047195435, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 75001784, + "step": 434 + }, + { + "epoch": 0.16660283416315588, + "grad_norm": 23.831570436092203, + "learning_rate": 5e-06, + "loss": 0.3795, + "num_input_tokens_seen": 75170824, + "step": 435 + }, + { + "epoch": 0.16660283416315588, + "loss": 0.4131200909614563, + "loss_ce": 0.01443847268819809, + "loss_iou": 1.014352798461914, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 75170824, + "step": 435 + }, + { + "epoch": 0.1669858291842206, + "grad_norm": 29.056372361366613, + "learning_rate": 5e-06, + "loss": 0.3245, + "num_input_tokens_seen": 75343704, + "step": 436 + }, + { + "epoch": 0.1669858291842206, + "loss": 0.2704662084579468, + "loss_ce": 0.014973044395446777, + "loss_iou": 1.0120782852172852, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 75343704, + "step": 436 + }, + { + "epoch": 0.16736882420528534, + "grad_norm": 28.53054222649919, + "learning_rate": 5e-06, + "loss": 0.3515, + "num_input_tokens_seen": 75516808, + "step": 437 + }, + { + "epoch": 0.16736882420528534, + "loss": 0.37008148431777954, + "loss_ce": 0.014978909865021706, + "loss_iou": 1.0898067951202393, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 75516808, + "step": 437 + }, + { + "epoch": 0.16775181922635005, + "grad_norm": 29.32113828431669, + "learning_rate": 5e-06, + "loss": 0.3746, + "num_input_tokens_seen": 75689640, + "step": 438 + }, + { + "epoch": 0.16775181922635005, + "loss": 0.4055299162864685, + "loss_ce": 0.015271143987774849, + "loss_iou": 1.2104315757751465, + "loss_num": 0.390625, + "loss_xval": 0.390625, + "num_input_tokens_seen": 75689640, + "step": 438 + }, + { + "epoch": 0.16813481424741478, + "grad_norm": 37.7460884615262, + "learning_rate": 5e-06, + "loss": 0.3185, + "num_input_tokens_seen": 75862656, + "step": 439 + }, + { + "epoch": 0.16813481424741478, + "loss": 0.337505042552948, + "loss_ce": 0.015239392407238483, + "loss_iou": 1.0106298923492432, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 75862656, + "step": 439 + }, + { + "epoch": 0.1685178092684795, + "grad_norm": 53.12064814976083, + "learning_rate": 5e-06, + "loss": 0.4725, + "num_input_tokens_seen": 76035896, + "step": 440 + }, + { + "epoch": 0.1685178092684795, + "loss": 0.4797833561897278, + "loss_ce": 0.013474768027663231, + "loss_iou": 1.0127484798431396, + "loss_num": 0.466796875, + "loss_xval": 0.466796875, + "num_input_tokens_seen": 76035896, + "step": 440 + }, + { + "epoch": 0.16890080428954424, + "grad_norm": 28.55611723652522, + "learning_rate": 5e-06, + "loss": 0.3083, + "num_input_tokens_seen": 76208512, + "step": 441 + }, + { + "epoch": 0.16890080428954424, + "loss": 0.2897804379463196, + "loss_ce": 0.015854664146900177, + "loss_iou": 1.0168652534484863, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 76208512, + "step": 441 + }, + { + "epoch": 0.16928379931060897, + "grad_norm": 33.65285734014548, + "learning_rate": 5e-06, + "loss": 0.4504, + "num_input_tokens_seen": 76381496, + "step": 442 + }, + { + "epoch": 0.16928379931060897, + "loss": 0.45344850420951843, + "loss_ce": 0.01594851166009903, + "loss_iou": 1.0340341329574585, + "loss_num": 0.4375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 76381496, + "step": 442 + }, + { + "epoch": 0.1696667943316737, + "grad_norm": 19.83607376978284, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 76554016, + "step": 443 + }, + { + "epoch": 0.1696667943316737, + "loss": 0.2182447612285614, + "loss_ce": 0.01511977519840002, + "loss_iou": 1.00160813331604, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 76554016, + "step": 443 + }, + { + "epoch": 0.1700497893527384, + "grad_norm": 50.73054005217147, + "learning_rate": 5e-06, + "loss": 0.4392, + "num_input_tokens_seen": 76727072, + "step": 444 + }, + { + "epoch": 0.1700497893527384, + "loss": 0.4633931517601013, + "loss_ce": 0.013930247165262699, + "loss_iou": 1.4889075756072998, + "loss_num": 0.44921875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 76727072, + "step": 444 + }, + { + "epoch": 0.17043278437380313, + "grad_norm": 48.56503222833234, + "learning_rate": 5e-06, + "loss": 0.5239, + "num_input_tokens_seen": 76900224, + "step": 445 + }, + { + "epoch": 0.17043278437380313, + "loss": 0.5089391469955444, + "loss_ce": 0.015775062143802643, + "loss_iou": 1.053293228149414, + "loss_num": 0.4921875, + "loss_xval": 0.4921875, + "num_input_tokens_seen": 76900224, + "step": 445 + }, + { + "epoch": 0.17081577939486786, + "grad_norm": 20.773227639173367, + "learning_rate": 5e-06, + "loss": 0.4272, + "num_input_tokens_seen": 77073176, + "step": 446 + }, + { + "epoch": 0.17081577939486786, + "loss": 0.4128382205963135, + "loss_ce": 0.015865560621023178, + "loss_iou": 1.023116111755371, + "loss_num": 0.396484375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 77073176, + "step": 446 + }, + { + "epoch": 0.1711987744159326, + "grad_norm": 34.943028927085834, + "learning_rate": 5e-06, + "loss": 0.5892, + "num_input_tokens_seen": 77245872, + "step": 447 + }, + { + "epoch": 0.1711987744159326, + "loss": 0.5758577585220337, + "loss_ce": 0.0153108611702919, + "loss_iou": 1.0360980033874512, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 77245872, + "step": 447 + }, + { + "epoch": 0.17158176943699732, + "grad_norm": 51.89654268093125, + "learning_rate": 5e-06, + "loss": 0.4811, + "num_input_tokens_seen": 77419048, + "step": 448 + }, + { + "epoch": 0.17158176943699732, + "loss": 0.45400649309158325, + "loss_ce": 0.014797521755099297, + "loss_iou": 1.245399832725525, + "loss_num": 0.439453125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 77419048, + "step": 448 + }, + { + "epoch": 0.17196476445806205, + "grad_norm": 61.78119051181622, + "learning_rate": 5e-06, + "loss": 0.8629, + "num_input_tokens_seen": 77591896, + "step": 449 + }, + { + "epoch": 0.17196476445806205, + "loss": 0.7947770357131958, + "loss_ce": 0.014503620564937592, + "loss_iou": 1.0696353912353516, + "loss_num": 0.78125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 77591896, + "step": 449 + }, + { + "epoch": 0.17234775947912678, + "grad_norm": 28.53533006356215, + "learning_rate": 5e-06, + "loss": 0.7408, + "num_input_tokens_seen": 77761600, + "step": 450 + }, + { + "epoch": 0.17234775947912678, + "loss": 0.7695003747940063, + "loss_ce": 0.017059026286005974, + "loss_iou": 1.0799751281738281, + "loss_num": 0.75390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 77761600, + "step": 450 + }, + { + "epoch": 0.17273075450019149, + "grad_norm": 39.79383529554913, + "learning_rate": 5e-06, + "loss": 0.7015, + "num_input_tokens_seen": 77934504, + "step": 451 + }, + { + "epoch": 0.17273075450019149, + "loss": 0.7374422550201416, + "loss_ce": 0.016739122569561005, + "loss_iou": 1.1498925685882568, + "loss_num": 0.71875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 77934504, + "step": 451 + }, + { + "epoch": 0.17311374952125622, + "grad_norm": 52.263229433792276, + "learning_rate": 5e-06, + "loss": 0.7574, + "num_input_tokens_seen": 78107304, + "step": 452 + }, + { + "epoch": 0.17311374952125622, + "loss": 0.7194201350212097, + "loss_ce": 0.015318576246500015, + "loss_iou": 1.105474829673767, + "loss_num": 0.703125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 78107304, + "step": 452 + }, + { + "epoch": 0.17349674454232095, + "grad_norm": 37.7961219675428, + "learning_rate": 5e-06, + "loss": 0.6433, + "num_input_tokens_seen": 78279896, + "step": 453 + }, + { + "epoch": 0.17349674454232095, + "loss": 0.6548479199409485, + "loss_ce": 0.01666434109210968, + "loss_iou": 1.0721873044967651, + "loss_num": 0.63671875, + "loss_xval": 0.63671875, + "num_input_tokens_seen": 78279896, + "step": 453 + }, + { + "epoch": 0.17387973956338568, + "grad_norm": 30.33310550199955, + "learning_rate": 5e-06, + "loss": 0.6961, + "num_input_tokens_seen": 78453088, + "step": 454 + }, + { + "epoch": 0.17387973956338568, + "loss": 0.6686890125274658, + "loss_ce": 0.016345294192433357, + "loss_iou": 1.0560839176177979, + "loss_num": 0.65234375, + "loss_xval": 0.65234375, + "num_input_tokens_seen": 78453088, + "step": 454 + }, + { + "epoch": 0.1742627345844504, + "grad_norm": 27.849510545790412, + "learning_rate": 5e-06, + "loss": 0.4943, + "num_input_tokens_seen": 78626048, + "step": 455 + }, + { + "epoch": 0.1742627345844504, + "loss": 0.4955611824989319, + "loss_ce": 0.01704554632306099, + "loss_iou": 1.1379518508911133, + "loss_num": 0.478515625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 78626048, + "step": 455 + }, + { + "epoch": 0.17464572960551514, + "grad_norm": 35.00992202568288, + "learning_rate": 5e-06, + "loss": 0.5929, + "num_input_tokens_seen": 78798792, + "step": 456 + }, + { + "epoch": 0.17464572960551514, + "loss": 0.6162586808204651, + "loss_ce": 0.016649283468723297, + "loss_iou": 1.0767818689346313, + "loss_num": 0.6015625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 78798792, + "step": 456 + }, + { + "epoch": 0.17502872462657987, + "grad_norm": 24.680822280520914, + "learning_rate": 5e-06, + "loss": 0.3934, + "num_input_tokens_seen": 78971744, + "step": 457 + }, + { + "epoch": 0.17502872462657987, + "loss": 0.3981289863586426, + "loss_ce": 0.01531648263335228, + "loss_iou": 1.0487823486328125, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 78971744, + "step": 457 + }, + { + "epoch": 0.17541171964764457, + "grad_norm": 30.85913713643089, + "learning_rate": 5e-06, + "loss": 0.4519, + "num_input_tokens_seen": 79144760, + "step": 458 + }, + { + "epoch": 0.17541171964764457, + "loss": 0.46264809370040894, + "loss_ce": 0.01733560673892498, + "loss_iou": 1.066173791885376, + "loss_num": 0.4453125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 79144760, + "step": 458 + }, + { + "epoch": 0.1757947146687093, + "grad_norm": 32.07405643946138, + "learning_rate": 5e-06, + "loss": 0.413, + "num_input_tokens_seen": 79317632, + "step": 459 + }, + { + "epoch": 0.1757947146687093, + "loss": 0.43378493189811707, + "loss_ce": 0.017769280821084976, + "loss_iou": 1.0236985683441162, + "loss_num": 0.416015625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 79317632, + "step": 459 + }, + { + "epoch": 0.17617770968977403, + "grad_norm": 26.89925591070896, + "learning_rate": 5e-06, + "loss": 0.3841, + "num_input_tokens_seen": 79491000, + "step": 460 + }, + { + "epoch": 0.17617770968977403, + "loss": 0.37873604893684387, + "loss_ce": 0.016065148636698723, + "loss_iou": 1.0253320932388306, + "loss_num": 0.36328125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 79491000, + "step": 460 + }, + { + "epoch": 0.17656070471083876, + "grad_norm": 32.526180603764686, + "learning_rate": 5e-06, + "loss": 0.3972, + "num_input_tokens_seen": 79663888, + "step": 461 + }, + { + "epoch": 0.17656070471083876, + "loss": 0.4289093017578125, + "loss_ce": 0.015945417806506157, + "loss_iou": 1.243054986000061, + "loss_num": 0.412109375, + "loss_xval": 0.412109375, + "num_input_tokens_seen": 79663888, + "step": 461 + }, + { + "epoch": 0.1769436997319035, + "grad_norm": 29.189795519064553, + "learning_rate": 5e-06, + "loss": 0.42, + "num_input_tokens_seen": 79837168, + "step": 462 + }, + { + "epoch": 0.1769436997319035, + "loss": 0.4120938181877136, + "loss_ce": 0.01683015003800392, + "loss_iou": 1.0402803421020508, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 79837168, + "step": 462 + }, + { + "epoch": 0.17732669475296822, + "grad_norm": 23.71756012685803, + "learning_rate": 5e-06, + "loss": 0.3112, + "num_input_tokens_seen": 80009976, + "step": 463 + }, + { + "epoch": 0.17732669475296822, + "loss": 0.32907718420028687, + "loss_ce": 0.016821343451738358, + "loss_iou": 1.041823148727417, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 80009976, + "step": 463 + }, + { + "epoch": 0.17770968977403293, + "grad_norm": 27.954120116997547, + "learning_rate": 5e-06, + "loss": 0.3743, + "num_input_tokens_seen": 80182832, + "step": 464 + }, + { + "epoch": 0.17770968977403293, + "loss": 0.39653757214546204, + "loss_ce": 0.016898900270462036, + "loss_iou": 1.0429874658584595, + "loss_num": 0.37890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 80182832, + "step": 464 + }, + { + "epoch": 0.17809268479509766, + "grad_norm": 38.597161628003036, + "learning_rate": 5e-06, + "loss": 0.3038, + "num_input_tokens_seen": 80355696, + "step": 465 + }, + { + "epoch": 0.17809268479509766, + "loss": 0.2565761208534241, + "loss_ce": 0.01573140360414982, + "loss_iou": 1.0030584335327148, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 80355696, + "step": 465 + }, + { + "epoch": 0.17847567981616239, + "grad_norm": 35.18100461780769, + "learning_rate": 5e-06, + "loss": 0.353, + "num_input_tokens_seen": 80528896, + "step": 466 + }, + { + "epoch": 0.17847567981616239, + "loss": 0.3511597514152527, + "loss_ce": 0.017419513314962387, + "loss_iou": 1.0240516662597656, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 80528896, + "step": 466 + }, + { + "epoch": 0.17885867483722712, + "grad_norm": 31.213504805025785, + "learning_rate": 5e-06, + "loss": 0.3468, + "num_input_tokens_seen": 80702016, + "step": 467 + }, + { + "epoch": 0.17885867483722712, + "loss": 0.37608304619789124, + "loss_ce": 0.017562542110681534, + "loss_iou": 1.065513253211975, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 80702016, + "step": 467 + }, + { + "epoch": 0.17924166985829185, + "grad_norm": 28.5635739429278, + "learning_rate": 5e-06, + "loss": 0.3127, + "num_input_tokens_seen": 80874832, + "step": 468 + }, + { + "epoch": 0.17924166985829185, + "loss": 0.3625808358192444, + "loss_ce": 0.01699979603290558, + "loss_iou": 1.1267366409301758, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 80874832, + "step": 468 + }, + { + "epoch": 0.17962466487935658, + "grad_norm": 21.934907619342116, + "learning_rate": 5e-06, + "loss": 0.298, + "num_input_tokens_seen": 81047712, + "step": 469 + }, + { + "epoch": 0.17962466487935658, + "loss": 0.2947612404823303, + "loss_ce": 0.016807135194540024, + "loss_iou": 1.0040606260299683, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 81047712, + "step": 469 + }, + { + "epoch": 0.1800076599004213, + "grad_norm": 18.3671290820128, + "learning_rate": 5e-06, + "loss": 0.4184, + "num_input_tokens_seen": 81220952, + "step": 470 + }, + { + "epoch": 0.1800076599004213, + "loss": 0.4173802435398102, + "loss_ce": 0.01698961667716503, + "loss_iou": 1.0343729257583618, + "loss_num": 0.400390625, + "loss_xval": 0.400390625, + "num_input_tokens_seen": 81220952, + "step": 470 + }, + { + "epoch": 0.180390654921486, + "grad_norm": 21.93973986022741, + "learning_rate": 5e-06, + "loss": 0.3134, + "num_input_tokens_seen": 81393944, + "step": 471 + }, + { + "epoch": 0.180390654921486, + "loss": 0.27204078435897827, + "loss_ce": 0.015326913446187973, + "loss_iou": 1.014489769935608, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 81393944, + "step": 471 + }, + { + "epoch": 0.18077364994255074, + "grad_norm": 29.11599156888172, + "learning_rate": 5e-06, + "loss": 0.3602, + "num_input_tokens_seen": 81563264, + "step": 472 + }, + { + "epoch": 0.18077364994255074, + "loss": 0.3316724896430969, + "loss_ce": 0.01624278910458088, + "loss_iou": 1.0116811990737915, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 81563264, + "step": 472 + }, + { + "epoch": 0.18115664496361547, + "grad_norm": 21.048303326103163, + "learning_rate": 5e-06, + "loss": 0.2952, + "num_input_tokens_seen": 81736328, + "step": 473 + }, + { + "epoch": 0.18115664496361547, + "loss": 0.26047569513320923, + "loss_ce": 0.016823377460241318, + "loss_iou": 1.0194365978240967, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 81736328, + "step": 473 + }, + { + "epoch": 0.1815396399846802, + "grad_norm": 18.105913293272476, + "learning_rate": 5e-06, + "loss": 0.3966, + "num_input_tokens_seen": 81909576, + "step": 474 + }, + { + "epoch": 0.1815396399846802, + "loss": 0.3882972002029419, + "loss_ce": 0.01671518012881279, + "loss_iou": 1.0324071645736694, + "loss_num": 0.37109375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 81909576, + "step": 474 + }, + { + "epoch": 0.18192263500574493, + "grad_norm": 27.69496716833715, + "learning_rate": 5e-06, + "loss": 0.2466, + "num_input_tokens_seen": 82082520, + "step": 475 + }, + { + "epoch": 0.18192263500574493, + "loss": 0.2682589292526245, + "loss_ce": 0.017648600041866302, + "loss_iou": 1.0662319660186768, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 82082520, + "step": 475 + }, + { + "epoch": 0.18230563002680966, + "grad_norm": 39.11406909071093, + "learning_rate": 5e-06, + "loss": 0.413, + "num_input_tokens_seen": 82255176, + "step": 476 + }, + { + "epoch": 0.18230563002680966, + "loss": 0.3718685507774353, + "loss_ce": 0.015667375177145004, + "loss_iou": 1.026766061782837, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 82255176, + "step": 476 + }, + { + "epoch": 0.18268862504787436, + "grad_norm": 32.52377364497424, + "learning_rate": 5e-06, + "loss": 0.2955, + "num_input_tokens_seen": 82428096, + "step": 477 + }, + { + "epoch": 0.18268862504787436, + "loss": 0.3095782995223999, + "loss_ce": 0.016853690147399902, + "loss_iou": 1.0867146253585815, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 82428096, + "step": 477 + }, + { + "epoch": 0.1830716200689391, + "grad_norm": 17.85674849229512, + "learning_rate": 5e-06, + "loss": 0.3755, + "num_input_tokens_seen": 82600960, + "step": 478 + }, + { + "epoch": 0.1830716200689391, + "loss": 0.4125042259693146, + "loss_ce": 0.016019873321056366, + "loss_iou": 1.0462924242019653, + "loss_num": 0.396484375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 82600960, + "step": 478 + }, + { + "epoch": 0.18345461509000383, + "grad_norm": 16.392710654275135, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 82773936, + "step": 479 + }, + { + "epoch": 0.18345461509000383, + "loss": 0.19046780467033386, + "loss_ce": 0.015296910889446735, + "loss_iou": 1.0163350105285645, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 82773936, + "step": 479 + }, + { + "epoch": 0.18383761011106856, + "grad_norm": 21.91953546551012, + "learning_rate": 5e-06, + "loss": 0.2539, + "num_input_tokens_seen": 82946864, + "step": 480 + }, + { + "epoch": 0.18383761011106856, + "loss": 0.2376902997493744, + "loss_ce": 0.016315776854753494, + "loss_iou": 1.0335052013397217, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 82946864, + "step": 480 + }, + { + "epoch": 0.18422060513213329, + "grad_norm": 25.88124896687149, + "learning_rate": 5e-06, + "loss": 0.3046, + "num_input_tokens_seen": 83119272, + "step": 481 + }, + { + "epoch": 0.18422060513213329, + "loss": 0.29418930411338806, + "loss_ce": 0.013854829594492912, + "loss_iou": 1.0280160903930664, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 83119272, + "step": 481 + }, + { + "epoch": 0.18460360015319802, + "grad_norm": 25.5137148210727, + "learning_rate": 5e-06, + "loss": 0.3666, + "num_input_tokens_seen": 83292192, + "step": 482 + }, + { + "epoch": 0.18460360015319802, + "loss": 0.35649994015693665, + "loss_ce": 0.0181210245937109, + "loss_iou": 1.0152759552001953, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 83292192, + "step": 482 + }, + { + "epoch": 0.18498659517426275, + "grad_norm": 21.273354139372323, + "learning_rate": 5e-06, + "loss": 0.2382, + "num_input_tokens_seen": 83465240, + "step": 483 + }, + { + "epoch": 0.18498659517426275, + "loss": 0.2260705679655075, + "loss_ce": 0.01806274801492691, + "loss_iou": 1.017911672592163, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 83465240, + "step": 483 + }, + { + "epoch": 0.18536959019532745, + "grad_norm": 22.990545344758896, + "learning_rate": 5e-06, + "loss": 0.3979, + "num_input_tokens_seen": 83638088, + "step": 484 + }, + { + "epoch": 0.18536959019532745, + "loss": 0.4391933083534241, + "loss_ce": 0.016097616404294968, + "loss_iou": 1.2297831773757935, + "loss_num": 0.423828125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 83638088, + "step": 484 + }, + { + "epoch": 0.18575258521639218, + "grad_norm": 15.283715848072962, + "learning_rate": 5e-06, + "loss": 0.1777, + "num_input_tokens_seen": 83810856, + "step": 485 + }, + { + "epoch": 0.18575258521639218, + "loss": 0.16487838327884674, + "loss_ce": 0.015708457678556442, + "loss_iou": 1.0036348104476929, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 83810856, + "step": 485 + }, + { + "epoch": 0.1861355802374569, + "grad_norm": 24.842294893652895, + "learning_rate": 5e-06, + "loss": 0.3012, + "num_input_tokens_seen": 83983872, + "step": 486 + }, + { + "epoch": 0.1861355802374569, + "loss": 0.2701072096824646, + "loss_ce": 0.017177525907754898, + "loss_iou": 1.0082621574401855, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 83983872, + "step": 486 + }, + { + "epoch": 0.18651857525852164, + "grad_norm": 30.158734932416355, + "learning_rate": 5e-06, + "loss": 0.3803, + "num_input_tokens_seen": 84156688, + "step": 487 + }, + { + "epoch": 0.18651857525852164, + "loss": 0.34104037284851074, + "loss_ce": 0.01572299748659134, + "loss_iou": 1.020815134048462, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 84156688, + "step": 487 + }, + { + "epoch": 0.18690157027958637, + "grad_norm": 25.587891636501823, + "learning_rate": 5e-06, + "loss": 0.2957, + "num_input_tokens_seen": 84329472, + "step": 488 + }, + { + "epoch": 0.18690157027958637, + "loss": 0.26617318391799927, + "loss_ce": 0.016051091253757477, + "loss_iou": 1.0045745372772217, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 84329472, + "step": 488 + }, + { + "epoch": 0.1872845653006511, + "grad_norm": 22.26617325658685, + "learning_rate": 5e-06, + "loss": 0.3086, + "num_input_tokens_seen": 84498568, + "step": 489 + }, + { + "epoch": 0.1872845653006511, + "loss": 0.3364994525909424, + "loss_ce": 0.014966249465942383, + "loss_iou": 4.4488043785095215, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 84498568, + "step": 489 + }, + { + "epoch": 0.1876675603217158, + "grad_norm": 36.737892807519984, + "learning_rate": 5e-06, + "loss": 0.4123, + "num_input_tokens_seen": 84671392, + "step": 490 + }, + { + "epoch": 0.1876675603217158, + "loss": 0.41526561975479126, + "loss_ce": 0.016095727682113647, + "loss_iou": 1.0190294981002808, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 84671392, + "step": 490 + }, + { + "epoch": 0.18805055534278053, + "grad_norm": 24.295039691533272, + "learning_rate": 5e-06, + "loss": 0.4046, + "num_input_tokens_seen": 84844384, + "step": 491 + }, + { + "epoch": 0.18805055534278053, + "loss": 0.43257319927215576, + "loss_ce": 0.01655757799744606, + "loss_iou": 1.3567330837249756, + "loss_num": 0.416015625, + "loss_xval": 0.416015625, + "num_input_tokens_seen": 84844384, + "step": 491 + }, + { + "epoch": 0.18843355036384526, + "grad_norm": 25.16055679726767, + "learning_rate": 5e-06, + "loss": 0.38, + "num_input_tokens_seen": 85017424, + "step": 492 + }, + { + "epoch": 0.18843355036384526, + "loss": 0.3606225848197937, + "loss_ce": 0.017849139869213104, + "loss_iou": 1.0175882577896118, + "loss_num": 0.34375, + "loss_xval": 0.34375, + "num_input_tokens_seen": 85017424, + "step": 492 + }, + { + "epoch": 0.18881654538491, + "grad_norm": 25.70617228138958, + "learning_rate": 5e-06, + "loss": 0.3107, + "num_input_tokens_seen": 85190704, + "step": 493 + }, + { + "epoch": 0.18881654538491, + "loss": 0.27605336904525757, + "loss_ce": 0.016776029020547867, + "loss_iou": 1.0141079425811768, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 85190704, + "step": 493 + }, + { + "epoch": 0.18919954040597473, + "grad_norm": 29.598310923820303, + "learning_rate": 5e-06, + "loss": 0.4076, + "num_input_tokens_seen": 85363688, + "step": 494 + }, + { + "epoch": 0.18919954040597473, + "loss": 0.3656034469604492, + "loss_ce": 0.01672646962106228, + "loss_iou": 1.0188572406768799, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 85363688, + "step": 494 + }, + { + "epoch": 0.18958253542703946, + "grad_norm": 24.145249013799273, + "learning_rate": 5e-06, + "loss": 0.2913, + "num_input_tokens_seen": 85536736, + "step": 495 + }, + { + "epoch": 0.18958253542703946, + "loss": 0.28448039293289185, + "loss_ce": 0.016902253031730652, + "loss_iou": 1.0133237838745117, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 85536736, + "step": 495 + }, + { + "epoch": 0.1899655304481042, + "grad_norm": 20.78818998843579, + "learning_rate": 5e-06, + "loss": 0.4157, + "num_input_tokens_seen": 85709432, + "step": 496 + }, + { + "epoch": 0.1899655304481042, + "loss": 0.46383488178253174, + "loss_ce": 0.01559271290898323, + "loss_iou": 1.074376106262207, + "loss_num": 0.44921875, + "loss_xval": 0.44921875, + "num_input_tokens_seen": 85709432, + "step": 496 + }, + { + "epoch": 0.1903485254691689, + "grad_norm": 16.754483725629253, + "learning_rate": 5e-06, + "loss": 0.3812, + "num_input_tokens_seen": 85882104, + "step": 497 + }, + { + "epoch": 0.1903485254691689, + "loss": 0.3708678185939789, + "loss_ce": 0.017047010362148285, + "loss_iou": 2.423377752304077, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 85882104, + "step": 497 + }, + { + "epoch": 0.19073152049023362, + "grad_norm": 18.086234825479977, + "learning_rate": 5e-06, + "loss": 0.2875, + "num_input_tokens_seen": 86055216, + "step": 498 + }, + { + "epoch": 0.19073152049023362, + "loss": 0.27319908142089844, + "loss_ce": 0.01660730317234993, + "loss_iou": 1.0002715587615967, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 86055216, + "step": 498 + }, + { + "epoch": 0.19111451551129835, + "grad_norm": 28.51022351505333, + "learning_rate": 5e-06, + "loss": 0.3366, + "num_input_tokens_seen": 86228040, + "step": 499 + }, + { + "epoch": 0.19111451551129835, + "loss": 0.41052719950675964, + "loss_ce": 0.017582854256033897, + "loss_iou": 1.1613000631332397, + "loss_num": 0.392578125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 86228040, + "step": 499 + }, + { + "epoch": 0.19149751053236308, + "grad_norm": 31.11421551827568, + "learning_rate": 5e-06, + "loss": 0.3285, + "num_input_tokens_seen": 86401016, + "step": 500 + }, + { + "epoch": 0.19149751053236308, + "eval_websight_new_CIoU": 0.8393459916114807, + "eval_websight_new_GIoU": 0.8380286693572998, + "eval_websight_new_IoU": 0.8403661549091339, + "eval_websight_new_MAE_all": 0.01726989820599556, + "eval_websight_new_MAE_h": 0.01403492921963334, + "eval_websight_new_MAE_w": 0.025333072058856487, + "eval_websight_new_MAE_x": 0.015599821228533983, + "eval_websight_new_MAE_y": 0.01411177171394229, + "eval_websight_new_NUM_probability": 0.0007267279142979532, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.21308483183383942, + "eval_websight_new_loss_ce": 0.03257010132074356, + "eval_websight_new_loss_iou": 1.0014038681983948, + "eval_websight_new_loss_num": 0.18133544921875, + "eval_websight_new_loss_xval": 0.18133544921875, + "eval_websight_new_runtime": 55.4241, + "eval_websight_new_samples_per_second": 0.902, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 86401016, + "step": 500 + }, + { + "epoch": 0.19149751053236308, + "eval_seeclick_CIoU": 0.5755606889724731, + "eval_seeclick_GIoU": 0.5703078657388687, + "eval_seeclick_IoU": 0.6038848459720612, + "eval_seeclick_MAE_all": 0.06362584792077541, + "eval_seeclick_MAE_h": 0.05255229026079178, + "eval_seeclick_MAE_w": 0.07664298266172409, + "eval_seeclick_MAE_x": 0.0703110322356224, + "eval_seeclick_MAE_y": 0.05499708279967308, + "eval_seeclick_NUM_probability": 0.001212626346386969, + "eval_seeclick_inside_bbox": 0.8854166567325592, + "eval_seeclick_loss": 0.5587007999420166, + "eval_seeclick_loss_ce": 0.02439273614436388, + "eval_seeclick_loss_iou": 1.0927678346633911, + "eval_seeclick_loss_num": 0.481201171875, + "eval_seeclick_loss_xval": 0.481201171875, + "eval_seeclick_runtime": 81.5555, + "eval_seeclick_samples_per_second": 0.613, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 86401016, + "step": 500 + }, + { + "epoch": 0.19149751053236308, + "eval_icons_CIoU": 0.7974367737770081, + "eval_icons_GIoU": 0.7924989759922028, + "eval_icons_IoU": 0.8080669343471527, + "eval_icons_MAE_all": 0.028948302380740643, + "eval_icons_MAE_h": 0.024239955469965935, + "eval_icons_MAE_w": 0.03740705642849207, + "eval_icons_MAE_x": 0.03228426072746515, + "eval_icons_MAE_y": 0.02186194295063615, + "eval_icons_NUM_probability": 0.0007074489549268037, + "eval_icons_inside_bbox": 0.9722222089767456, + "eval_icons_loss": 0.21479381620883942, + "eval_icons_loss_ce": 0.02179853804409504, + "eval_icons_loss_iou": 1.0462433099746704, + "eval_icons_loss_num": 0.179931640625, + "eval_icons_loss_xval": 0.179931640625, + "eval_icons_runtime": 81.8569, + "eval_icons_samples_per_second": 0.611, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 86401016, + "step": 500 + }, + { + "epoch": 0.19149751053236308, + "loss": 0.23735055327415466, + "loss_ce": 0.021835414692759514, + "loss_iou": 1.0912595987319946, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 86401016, + "step": 500 + }, + { + "epoch": 0.1918805055534278, + "grad_norm": 31.044111730894407, + "learning_rate": 5e-06, + "loss": 0.2618, + "num_input_tokens_seen": 86574472, + "step": 501 + }, + { + "epoch": 0.1918805055534278, + "loss": 0.24594920873641968, + "loss_ce": 0.017677726224064827, + "loss_iou": 1.0402929782867432, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 86574472, + "step": 501 + }, + { + "epoch": 0.19226350057449254, + "grad_norm": 45.46735147246926, + "learning_rate": 5e-06, + "loss": 0.3916, + "num_input_tokens_seen": 86747880, + "step": 502 + }, + { + "epoch": 0.19226350057449254, + "loss": 0.3739714026451111, + "loss_ce": 0.018136456608772278, + "loss_iou": 1.0316057205200195, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 86747880, + "step": 502 + }, + { + "epoch": 0.19264649559555727, + "grad_norm": 42.728156691624314, + "learning_rate": 5e-06, + "loss": 0.3649, + "num_input_tokens_seen": 86921424, + "step": 503 + }, + { + "epoch": 0.19264649559555727, + "loss": 0.3752208352088928, + "loss_ce": 0.016578257083892822, + "loss_iou": 1.0332841873168945, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 86921424, + "step": 503 + }, + { + "epoch": 0.19302949061662197, + "grad_norm": 39.50231245137594, + "learning_rate": 5e-06, + "loss": 0.3695, + "num_input_tokens_seen": 87094280, + "step": 504 + }, + { + "epoch": 0.19302949061662197, + "loss": 0.3911907374858856, + "loss_ce": 0.01643488183617592, + "loss_iou": 1.0340772867202759, + "loss_num": 0.375, + "loss_xval": 0.375, + "num_input_tokens_seen": 87094280, + "step": 504 + }, + { + "epoch": 0.1934124856376867, + "grad_norm": 17.28129230275025, + "learning_rate": 5e-06, + "loss": 0.2657, + "num_input_tokens_seen": 87267304, + "step": 505 + }, + { + "epoch": 0.1934124856376867, + "loss": 0.2689210772514343, + "loss_ce": 0.015747249126434326, + "loss_iou": 1.0793946981430054, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 87267304, + "step": 505 + }, + { + "epoch": 0.19379548065875143, + "grad_norm": 21.802907956959405, + "learning_rate": 5e-06, + "loss": 0.304, + "num_input_tokens_seen": 87440528, + "step": 506 + }, + { + "epoch": 0.19379548065875143, + "loss": 0.198129802942276, + "loss_ce": 0.016794349998235703, + "loss_iou": 1.0124921798706055, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 87440528, + "step": 506 + }, + { + "epoch": 0.19417847567981616, + "grad_norm": 39.47269099258926, + "learning_rate": 5e-06, + "loss": 0.358, + "num_input_tokens_seen": 87613536, + "step": 507 + }, + { + "epoch": 0.19417847567981616, + "loss": 0.39029720425605774, + "loss_ce": 0.016517896205186844, + "loss_iou": 1.0555205345153809, + "loss_num": 0.373046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 87613536, + "step": 507 + }, + { + "epoch": 0.1945614707008809, + "grad_norm": 34.08258050384075, + "learning_rate": 5e-06, + "loss": 0.3682, + "num_input_tokens_seen": 87786504, + "step": 508 + }, + { + "epoch": 0.1945614707008809, + "loss": 0.33308690786361694, + "loss_ce": 0.016802724450826645, + "loss_iou": 1.0751721858978271, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 87786504, + "step": 508 + }, + { + "epoch": 0.19494446572194563, + "grad_norm": 22.0627334451357, + "learning_rate": 5e-06, + "loss": 0.3496, + "num_input_tokens_seen": 87959584, + "step": 509 + }, + { + "epoch": 0.19494446572194563, + "loss": 0.35061129927635193, + "loss_ce": 0.017237259075045586, + "loss_iou": 1.0571727752685547, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 87959584, + "step": 509 + }, + { + "epoch": 0.19532746074301033, + "grad_norm": 35.033535187685835, + "learning_rate": 5e-06, + "loss": 0.3539, + "num_input_tokens_seen": 88132872, + "step": 510 + }, + { + "epoch": 0.19532746074301033, + "loss": 0.35290834307670593, + "loss_ce": 0.017092909663915634, + "loss_iou": 1.0979015827178955, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 88132872, + "step": 510 + }, + { + "epoch": 0.19571045576407506, + "grad_norm": 38.48371967756046, + "learning_rate": 5e-06, + "loss": 0.3068, + "num_input_tokens_seen": 88305984, + "step": 511 + }, + { + "epoch": 0.19571045576407506, + "loss": 0.3534683287143707, + "loss_ce": 0.017042554914951324, + "loss_iou": 1.103144645690918, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 88305984, + "step": 511 + }, + { + "epoch": 0.1960934507851398, + "grad_norm": 46.43906185076253, + "learning_rate": 5e-06, + "loss": 0.4626, + "num_input_tokens_seen": 88479016, + "step": 512 + }, + { + "epoch": 0.1960934507851398, + "loss": 0.5185445547103882, + "loss_ce": 0.017812136560678482, + "loss_iou": 1.2927305698394775, + "loss_num": 0.5, + "loss_xval": 0.5, + "num_input_tokens_seen": 88479016, + "step": 512 + }, + { + "epoch": 0.19647644580620452, + "grad_norm": 29.630687927421953, + "learning_rate": 5e-06, + "loss": 0.3324, + "num_input_tokens_seen": 88651728, + "step": 513 + }, + { + "epoch": 0.19647644580620452, + "loss": 0.3619629144668579, + "loss_ce": 0.01638186164200306, + "loss_iou": 1.2043843269348145, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 88651728, + "step": 513 + }, + { + "epoch": 0.19685944082726925, + "grad_norm": 24.973558380069395, + "learning_rate": 5e-06, + "loss": 0.3532, + "num_input_tokens_seen": 88824632, + "step": 514 + }, + { + "epoch": 0.19685944082726925, + "loss": 0.3557354807853699, + "loss_ce": 0.016990378499031067, + "loss_iou": 1.0580698251724243, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 88824632, + "step": 514 + }, + { + "epoch": 0.19724243584833398, + "grad_norm": 39.994809863245415, + "learning_rate": 5e-06, + "loss": 0.306, + "num_input_tokens_seen": 88997800, + "step": 515 + }, + { + "epoch": 0.19724243584833398, + "loss": 0.3369375765323639, + "loss_ce": 0.017235442996025085, + "loss_iou": 1.0511664152145386, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 88997800, + "step": 515 + }, + { + "epoch": 0.1976254308693987, + "grad_norm": 36.78717112901744, + "learning_rate": 5e-06, + "loss": 0.5048, + "num_input_tokens_seen": 89170832, + "step": 516 + }, + { + "epoch": 0.1976254308693987, + "loss": 0.5017112493515015, + "loss_ce": 0.017092112451791763, + "loss_iou": 1.0794587135314941, + "loss_num": 0.484375, + "loss_xval": 0.484375, + "num_input_tokens_seen": 89170832, + "step": 516 + }, + { + "epoch": 0.1980084258904634, + "grad_norm": 12.642036382767236, + "learning_rate": 5e-06, + "loss": 0.2768, + "num_input_tokens_seen": 89343856, + "step": 517 + }, + { + "epoch": 0.1980084258904634, + "loss": 0.25486576557159424, + "loss_ce": 0.018537653610110283, + "loss_iou": 1.0257585048675537, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 89343856, + "step": 517 + }, + { + "epoch": 0.19839142091152814, + "grad_norm": 23.205769289092615, + "learning_rate": 5e-06, + "loss": 0.3597, + "num_input_tokens_seen": 89517368, + "step": 518 + }, + { + "epoch": 0.19839142091152814, + "loss": 0.36347025632858276, + "loss_ce": 0.01581399515271187, + "loss_iou": 1.0005018711090088, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 89517368, + "step": 518 + }, + { + "epoch": 0.19877441593259287, + "grad_norm": 42.05917832773593, + "learning_rate": 5e-06, + "loss": 0.409, + "num_input_tokens_seen": 89690656, + "step": 519 + }, + { + "epoch": 0.19877441593259287, + "loss": 0.4221374988555908, + "loss_ce": 0.016864046454429626, + "loss_iou": 1.0211807489395142, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 89690656, + "step": 519 + }, + { + "epoch": 0.1991574109536576, + "grad_norm": 42.61402662722592, + "learning_rate": 5e-06, + "loss": 0.4126, + "num_input_tokens_seen": 89863760, + "step": 520 + }, + { + "epoch": 0.1991574109536576, + "loss": 0.3987889587879181, + "loss_ce": 0.016953013837337494, + "loss_iou": 1.1257686614990234, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 89863760, + "step": 520 + }, + { + "epoch": 0.19954040597472233, + "grad_norm": 21.30541945935348, + "learning_rate": 5e-06, + "loss": 0.3476, + "num_input_tokens_seen": 90036776, + "step": 521 + }, + { + "epoch": 0.19954040597472233, + "loss": 0.3476550877094269, + "loss_ce": 0.016600407660007477, + "loss_iou": 1.0270713567733765, + "loss_num": 0.33203125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 90036776, + "step": 521 + }, + { + "epoch": 0.19992340099578707, + "grad_norm": 24.706978334124425, + "learning_rate": 5e-06, + "loss": 0.3107, + "num_input_tokens_seen": 90209568, + "step": 522 + }, + { + "epoch": 0.19992340099578707, + "loss": 0.27285027503967285, + "loss_ce": 0.016990892589092255, + "loss_iou": 1.004989743232727, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 90209568, + "step": 522 + }, + { + "epoch": 0.20030639601685177, + "grad_norm": 71.3725387294566, + "learning_rate": 5e-06, + "loss": 0.3188, + "num_input_tokens_seen": 90382840, + "step": 523 + }, + { + "epoch": 0.20030639601685177, + "loss": 0.2938591241836548, + "loss_ce": 0.01602707989513874, + "loss_iou": 1.00357985496521, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 90382840, + "step": 523 + }, + { + "epoch": 0.2006893910379165, + "grad_norm": 48.48247472073512, + "learning_rate": 5e-06, + "loss": 0.4242, + "num_input_tokens_seen": 90555888, + "step": 524 + }, + { + "epoch": 0.2006893910379165, + "loss": 0.45546627044677734, + "loss_ce": 0.016867617145180702, + "loss_iou": 1.0269975662231445, + "loss_num": 0.439453125, + "loss_xval": 0.439453125, + "num_input_tokens_seen": 90555888, + "step": 524 + }, + { + "epoch": 0.20107238605898123, + "grad_norm": 35.65064728873406, + "learning_rate": 5e-06, + "loss": 0.3202, + "num_input_tokens_seen": 90729064, + "step": 525 + }, + { + "epoch": 0.20107238605898123, + "loss": 0.283124715089798, + "loss_ce": 0.01774386689066887, + "loss_iou": 1.0232046842575073, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 90729064, + "step": 525 + }, + { + "epoch": 0.20145538108004596, + "grad_norm": 23.407253873121075, + "learning_rate": 5e-06, + "loss": 0.4186, + "num_input_tokens_seen": 90901784, + "step": 526 + }, + { + "epoch": 0.20145538108004596, + "loss": 0.3516148328781128, + "loss_ce": 0.01604355499148369, + "loss_iou": 1.0675827264785767, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 90901784, + "step": 526 + }, + { + "epoch": 0.2018383761011107, + "grad_norm": 35.4392944721751, + "learning_rate": 5e-06, + "loss": 0.3056, + "num_input_tokens_seen": 91074584, + "step": 527 + }, + { + "epoch": 0.2018383761011107, + "loss": 0.2868385910987854, + "loss_ce": 0.019138380885124207, + "loss_iou": 1.0470924377441406, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 91074584, + "step": 527 + }, + { + "epoch": 0.20222137112217542, + "grad_norm": 42.69896005227747, + "learning_rate": 5e-06, + "loss": 0.4509, + "num_input_tokens_seen": 91247608, + "step": 528 + }, + { + "epoch": 0.20222137112217542, + "loss": 0.4155544340610504, + "loss_ce": 0.016994860023260117, + "loss_iou": 1.0653858184814453, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 91247608, + "step": 528 + }, + { + "epoch": 0.20260436614324015, + "grad_norm": 23.307668553653006, + "learning_rate": 5e-06, + "loss": 0.3109, + "num_input_tokens_seen": 91420448, + "step": 529 + }, + { + "epoch": 0.20260436614324015, + "loss": 0.3076122999191284, + "loss_ce": 0.016474585980176926, + "loss_iou": 1.058153748512268, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 91420448, + "step": 529 + }, + { + "epoch": 0.20298736116430485, + "grad_norm": 20.113420299178443, + "learning_rate": 5e-06, + "loss": 0.2891, + "num_input_tokens_seen": 91593160, + "step": 530 + }, + { + "epoch": 0.20298736116430485, + "loss": 0.2598230242729187, + "loss_ce": 0.0186731219291687, + "loss_iou": 1.0503168106079102, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 91593160, + "step": 530 + }, + { + "epoch": 0.20337035618536958, + "grad_norm": 37.42454102058793, + "learning_rate": 5e-06, + "loss": 0.2736, + "num_input_tokens_seen": 91766320, + "step": 531 + }, + { + "epoch": 0.20337035618536958, + "loss": 0.2909454107284546, + "loss_ce": 0.017874110490083694, + "loss_iou": 1.0835816860198975, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 91766320, + "step": 531 + }, + { + "epoch": 0.2037533512064343, + "grad_norm": 33.95476265655711, + "learning_rate": 5e-06, + "loss": 0.2945, + "num_input_tokens_seen": 91939392, + "step": 532 + }, + { + "epoch": 0.2037533512064343, + "loss": 0.3024279773235321, + "loss_ce": 0.017515864223241806, + "loss_iou": 1.0425469875335693, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 91939392, + "step": 532 + }, + { + "epoch": 0.20413634622749904, + "grad_norm": 22.947047229652018, + "learning_rate": 5e-06, + "loss": 0.2747, + "num_input_tokens_seen": 92112552, + "step": 533 + }, + { + "epoch": 0.20413634622749904, + "loss": 0.3030683100223541, + "loss_ce": 0.017057567834854126, + "loss_iou": 1.0824424028396606, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 92112552, + "step": 533 + }, + { + "epoch": 0.20451934124856377, + "grad_norm": 21.074821286054245, + "learning_rate": 5e-06, + "loss": 0.263, + "num_input_tokens_seen": 92285512, + "step": 534 + }, + { + "epoch": 0.20451934124856377, + "loss": 0.26460492610931396, + "loss_ce": 0.01814495027065277, + "loss_iou": 1.002181887626648, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 92285512, + "step": 534 + }, + { + "epoch": 0.2049023362696285, + "grad_norm": 29.429532237334737, + "learning_rate": 5e-06, + "loss": 0.234, + "num_input_tokens_seen": 92458944, + "step": 535 + }, + { + "epoch": 0.2049023362696285, + "loss": 0.2297022044658661, + "loss_ce": 0.018398497253656387, + "loss_iou": 1.032349705696106, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 92458944, + "step": 535 + }, + { + "epoch": 0.20528533129069323, + "grad_norm": 36.55545150405923, + "learning_rate": 5e-06, + "loss": 0.4114, + "num_input_tokens_seen": 92631848, + "step": 536 + }, + { + "epoch": 0.20528533129069323, + "loss": 0.3448227345943451, + "loss_ce": 0.017186010256409645, + "loss_iou": 1.0075856447219849, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 92631848, + "step": 536 + }, + { + "epoch": 0.20566832631175794, + "grad_norm": 26.7778727902466, + "learning_rate": 5e-06, + "loss": 0.2932, + "num_input_tokens_seen": 92805192, + "step": 537 + }, + { + "epoch": 0.20566832631175794, + "loss": 0.24786943197250366, + "loss_ce": 0.01770586520433426, + "loss_iou": 1.0099345445632935, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 92805192, + "step": 537 + }, + { + "epoch": 0.20605132133282267, + "grad_norm": 17.330358694147566, + "learning_rate": 5e-06, + "loss": 0.2875, + "num_input_tokens_seen": 92978328, + "step": 538 + }, + { + "epoch": 0.20605132133282267, + "loss": 0.25674402713775635, + "loss_ce": 0.01748622953891754, + "loss_iou": 1.0039182901382446, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 92978328, + "step": 538 + }, + { + "epoch": 0.2064343163538874, + "grad_norm": 29.656036286325826, + "learning_rate": 5e-06, + "loss": 0.3293, + "num_input_tokens_seen": 93151264, + "step": 539 + }, + { + "epoch": 0.2064343163538874, + "loss": 0.29664346575737, + "loss_ce": 0.017102457582950592, + "loss_iou": 1.0337765216827393, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 93151264, + "step": 539 + }, + { + "epoch": 0.20681731137495213, + "grad_norm": 35.01041250985919, + "learning_rate": 5e-06, + "loss": 0.382, + "num_input_tokens_seen": 93324280, + "step": 540 + }, + { + "epoch": 0.20681731137495213, + "loss": 0.3642435669898987, + "loss_ce": 0.017563890665769577, + "loss_iou": 1.0405396223068237, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 93324280, + "step": 540 + }, + { + "epoch": 0.20720030639601686, + "grad_norm": 29.514321748938745, + "learning_rate": 5e-06, + "loss": 0.3654, + "num_input_tokens_seen": 93493904, + "step": 541 + }, + { + "epoch": 0.20720030639601686, + "loss": 0.4098755121231079, + "loss_ce": 0.015893571078777313, + "loss_iou": 1.1384707689285278, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 93493904, + "step": 541 + }, + { + "epoch": 0.2075833014170816, + "grad_norm": 29.661642873200357, + "learning_rate": 5e-06, + "loss": 0.3565, + "num_input_tokens_seen": 93667240, + "step": 542 + }, + { + "epoch": 0.2075833014170816, + "loss": 0.3676626980304718, + "loss_ce": 0.01780918426811695, + "loss_iou": 1.025194764137268, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 93667240, + "step": 542 + }, + { + "epoch": 0.2079662964381463, + "grad_norm": 29.83369255589317, + "learning_rate": 5e-06, + "loss": 0.2658, + "num_input_tokens_seen": 93840192, + "step": 543 + }, + { + "epoch": 0.2079662964381463, + "loss": 0.28363192081451416, + "loss_ce": 0.016969311982393265, + "loss_iou": 1.3747169971466064, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 93840192, + "step": 543 + }, + { + "epoch": 0.20834929145921102, + "grad_norm": 35.945204353559014, + "learning_rate": 5e-06, + "loss": 0.4175, + "num_input_tokens_seen": 94013360, + "step": 544 + }, + { + "epoch": 0.20834929145921102, + "loss": 0.4123837947845459, + "loss_ce": 0.017364269122481346, + "loss_iou": 1.116492509841919, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 94013360, + "step": 544 + }, + { + "epoch": 0.20873228648027575, + "grad_norm": 18.211661675213367, + "learning_rate": 5e-06, + "loss": 0.2616, + "num_input_tokens_seen": 94186696, + "step": 545 + }, + { + "epoch": 0.20873228648027575, + "loss": 0.259822815656662, + "loss_ce": 0.018062567338347435, + "loss_iou": 1.042894721031189, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 94186696, + "step": 545 + }, + { + "epoch": 0.20911528150134048, + "grad_norm": 23.812323088726416, + "learning_rate": 5e-06, + "loss": 0.3155, + "num_input_tokens_seen": 94359896, + "step": 546 + }, + { + "epoch": 0.20911528150134048, + "loss": 0.348230242729187, + "loss_ce": 0.017907964065670967, + "loss_iou": 1.0230144262313843, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 94359896, + "step": 546 + }, + { + "epoch": 0.2094982765224052, + "grad_norm": 32.28595258584155, + "learning_rate": 5e-06, + "loss": 0.3224, + "num_input_tokens_seen": 94532536, + "step": 547 + }, + { + "epoch": 0.2094982765224052, + "loss": 0.3262408375740051, + "loss_ce": 0.018745729699730873, + "loss_iou": 1.0238080024719238, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 94532536, + "step": 547 + }, + { + "epoch": 0.20988127154346994, + "grad_norm": 37.37970388133116, + "learning_rate": 5e-06, + "loss": 0.2765, + "num_input_tokens_seen": 94705584, + "step": 548 + }, + { + "epoch": 0.20988127154346994, + "loss": 0.24450857937335968, + "loss_ce": 0.01819021813571453, + "loss_iou": 1.003622055053711, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 94705584, + "step": 548 + }, + { + "epoch": 0.21026426656453467, + "grad_norm": 21.037453351974072, + "learning_rate": 5e-06, + "loss": 0.3306, + "num_input_tokens_seen": 94878360, + "step": 549 + }, + { + "epoch": 0.21026426656453467, + "loss": 0.3464343845844269, + "loss_ce": 0.01647833362221718, + "loss_iou": 2.519430160522461, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 94878360, + "step": 549 + }, + { + "epoch": 0.21064726158559938, + "grad_norm": 28.012647090995817, + "learning_rate": 5e-06, + "loss": 0.4348, + "num_input_tokens_seen": 95051360, + "step": 550 + }, + { + "epoch": 0.21064726158559938, + "loss": 0.44148677587509155, + "loss_ce": 0.015095168724656105, + "loss_iou": 1.05269455909729, + "loss_num": 0.42578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 95051360, + "step": 550 + }, + { + "epoch": 0.2110302566066641, + "grad_norm": 30.856241730449078, + "learning_rate": 5e-06, + "loss": 0.2847, + "num_input_tokens_seen": 95224592, + "step": 551 + }, + { + "epoch": 0.2110302566066641, + "loss": 0.2634372115135193, + "loss_ce": 0.018320009112358093, + "loss_iou": 1.0076944828033447, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 95224592, + "step": 551 + }, + { + "epoch": 0.21141325162772884, + "grad_norm": 48.2047248051334, + "learning_rate": 5e-06, + "loss": 0.4344, + "num_input_tokens_seen": 95398152, + "step": 552 + }, + { + "epoch": 0.21141325162772884, + "loss": 0.4549892544746399, + "loss_ce": 0.017977558076381683, + "loss_iou": 1.051296591758728, + "loss_num": 0.4375, + "loss_xval": 0.4375, + "num_input_tokens_seen": 95398152, + "step": 552 + }, + { + "epoch": 0.21179624664879357, + "grad_norm": 31.21265694216636, + "learning_rate": 5e-06, + "loss": 0.2824, + "num_input_tokens_seen": 95570656, + "step": 553 + }, + { + "epoch": 0.21179624664879357, + "loss": 0.307553231716156, + "loss_ce": 0.017880387604236603, + "loss_iou": 1.0418583154678345, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 95570656, + "step": 553 + }, + { + "epoch": 0.2121792416698583, + "grad_norm": 20.617360425417267, + "learning_rate": 5e-06, + "loss": 0.3892, + "num_input_tokens_seen": 95743560, + "step": 554 + }, + { + "epoch": 0.2121792416698583, + "loss": 0.4240306615829468, + "loss_ce": 0.017658598721027374, + "loss_iou": 1.1064558029174805, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 95743560, + "step": 554 + }, + { + "epoch": 0.21256223669092303, + "grad_norm": 17.537898251891384, + "learning_rate": 5e-06, + "loss": 0.2828, + "num_input_tokens_seen": 95916504, + "step": 555 + }, + { + "epoch": 0.21256223669092303, + "loss": 0.284464567899704, + "loss_ce": 0.017130576074123383, + "loss_iou": 1.0634559392929077, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 95916504, + "step": 555 + }, + { + "epoch": 0.21294523171198773, + "grad_norm": 29.95540871787839, + "learning_rate": 5e-06, + "loss": 0.3323, + "num_input_tokens_seen": 96089144, + "step": 556 + }, + { + "epoch": 0.21294523171198773, + "loss": 0.3734918236732483, + "loss_ce": 0.0176568403840065, + "loss_iou": 1.041034460067749, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 96089144, + "step": 556 + }, + { + "epoch": 0.21332822673305246, + "grad_norm": 26.88730291109121, + "learning_rate": 5e-06, + "loss": 0.3439, + "num_input_tokens_seen": 96262456, + "step": 557 + }, + { + "epoch": 0.21332822673305246, + "loss": 0.34446585178375244, + "loss_ce": 0.016462940722703934, + "loss_iou": 1.0123193264007568, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 96262456, + "step": 557 + }, + { + "epoch": 0.2137112217541172, + "grad_norm": 20.921247055091808, + "learning_rate": 5e-06, + "loss": 0.3568, + "num_input_tokens_seen": 96435336, + "step": 558 + }, + { + "epoch": 0.2137112217541172, + "loss": 0.3399205207824707, + "loss_ce": 0.016312118619680405, + "loss_iou": 1.1698265075683594, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 96435336, + "step": 558 + }, + { + "epoch": 0.21409421677518192, + "grad_norm": 35.29154381337728, + "learning_rate": 5e-06, + "loss": 0.1949, + "num_input_tokens_seen": 96604584, + "step": 559 + }, + { + "epoch": 0.21409421677518192, + "loss": 0.19075900316238403, + "loss_ce": 0.01717502251267433, + "loss_iou": 1.0107824802398682, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 96604584, + "step": 559 + }, + { + "epoch": 0.21447721179624665, + "grad_norm": 34.094667343184106, + "learning_rate": 5e-06, + "loss": 0.3641, + "num_input_tokens_seen": 96777680, + "step": 560 + }, + { + "epoch": 0.21447721179624665, + "loss": 0.3789745569229126, + "loss_ce": 0.017280228435993195, + "loss_iou": 1.017249345779419, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 96777680, + "step": 560 + }, + { + "epoch": 0.21486020681731138, + "grad_norm": 20.445151200248425, + "learning_rate": 5e-06, + "loss": 0.2588, + "num_input_tokens_seen": 96950376, + "step": 561 + }, + { + "epoch": 0.21486020681731138, + "loss": 0.2581597566604614, + "loss_ce": 0.01816953346133232, + "loss_iou": 1.057889699935913, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 96950376, + "step": 561 + }, + { + "epoch": 0.2152432018383761, + "grad_norm": 28.099325370464555, + "learning_rate": 5e-06, + "loss": 0.3773, + "num_input_tokens_seen": 97123608, + "step": 562 + }, + { + "epoch": 0.2152432018383761, + "loss": 0.34945181012153625, + "loss_ce": 0.018885396420955658, + "loss_iou": 1.0347191095352173, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 97123608, + "step": 562 + }, + { + "epoch": 0.21562619685944082, + "grad_norm": 31.315819000716427, + "learning_rate": 5e-06, + "loss": 0.2773, + "num_input_tokens_seen": 97296712, + "step": 563 + }, + { + "epoch": 0.21562619685944082, + "loss": 0.24266687035560608, + "loss_ce": 0.01708093471825123, + "loss_iou": 0.8753404021263123, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 97296712, + "step": 563 + }, + { + "epoch": 0.21600919188050555, + "grad_norm": 36.60482388882205, + "learning_rate": 5e-06, + "loss": 0.2586, + "num_input_tokens_seen": 97469784, + "step": 564 + }, + { + "epoch": 0.21600919188050555, + "loss": 0.2576292157173157, + "loss_ce": 0.018493477255105972, + "loss_iou": 1.0151938199996948, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 97469784, + "step": 564 + }, + { + "epoch": 0.21639218690157028, + "grad_norm": 28.37729372949139, + "learning_rate": 5e-06, + "loss": 0.313, + "num_input_tokens_seen": 97642760, + "step": 565 + }, + { + "epoch": 0.21639218690157028, + "loss": 0.3322356343269348, + "loss_ce": 0.01692803017795086, + "loss_iou": 1.0117013454437256, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 97642760, + "step": 565 + }, + { + "epoch": 0.216775181922635, + "grad_norm": 22.15718872563187, + "learning_rate": 5e-06, + "loss": 0.3209, + "num_input_tokens_seen": 97815760, + "step": 566 + }, + { + "epoch": 0.216775181922635, + "loss": 0.32263487577438354, + "loss_ce": 0.017337003722786903, + "loss_iou": 1.0038520097732544, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 97815760, + "step": 566 + }, + { + "epoch": 0.21715817694369974, + "grad_norm": 36.35316914881609, + "learning_rate": 5e-06, + "loss": 0.3312, + "num_input_tokens_seen": 97988544, + "step": 567 + }, + { + "epoch": 0.21715817694369974, + "loss": 0.38007500767707825, + "loss_ce": 0.017892412841320038, + "loss_iou": 1.1017529964447021, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 97988544, + "step": 567 + }, + { + "epoch": 0.21754117196476447, + "grad_norm": 25.120149700187525, + "learning_rate": 5e-06, + "loss": 0.2921, + "num_input_tokens_seen": 98157904, + "step": 568 + }, + { + "epoch": 0.21754117196476447, + "loss": 0.2887548804283142, + "loss_ce": 0.017148425802588463, + "loss_iou": 1.0177134275436401, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 98157904, + "step": 568 + }, + { + "epoch": 0.21792416698582917, + "grad_norm": 45.31235160902148, + "learning_rate": 5e-06, + "loss": 0.3036, + "num_input_tokens_seen": 98330504, + "step": 569 + }, + { + "epoch": 0.21792416698582917, + "loss": 0.33930450677871704, + "loss_ce": 0.01752716302871704, + "loss_iou": 1.066689133644104, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 98330504, + "step": 569 + }, + { + "epoch": 0.2183071620068939, + "grad_norm": 27.820946939019912, + "learning_rate": 5e-06, + "loss": 0.4104, + "num_input_tokens_seen": 98503352, + "step": 570 + }, + { + "epoch": 0.2183071620068939, + "loss": 0.4757930636405945, + "loss_ce": 0.017541078850626945, + "loss_iou": 1.1921957731246948, + "loss_num": 0.458984375, + "loss_xval": 0.458984375, + "num_input_tokens_seen": 98503352, + "step": 570 + }, + { + "epoch": 0.21869015702795863, + "grad_norm": 26.298287161026817, + "learning_rate": 5e-06, + "loss": 0.2843, + "num_input_tokens_seen": 98676928, + "step": 571 + }, + { + "epoch": 0.21869015702795863, + "loss": 0.2664155960083008, + "loss_ce": 0.017514213919639587, + "loss_iou": 1.0240974426269531, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 98676928, + "step": 571 + }, + { + "epoch": 0.21907315204902336, + "grad_norm": 22.399804899954194, + "learning_rate": 5e-06, + "loss": 0.4406, + "num_input_tokens_seen": 98849736, + "step": 572 + }, + { + "epoch": 0.21907315204902336, + "loss": 0.48206478357315063, + "loss_ce": 0.01673276349902153, + "loss_iou": 1.0492788553237915, + "loss_num": 0.46484375, + "loss_xval": 0.46484375, + "num_input_tokens_seen": 98849736, + "step": 572 + }, + { + "epoch": 0.2194561470700881, + "grad_norm": 14.489176092642333, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 99022592, + "step": 573 + }, + { + "epoch": 0.2194561470700881, + "loss": 0.2183997631072998, + "loss_ce": 0.01747203804552555, + "loss_iou": 1.0570449829101562, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 99022592, + "step": 573 + }, + { + "epoch": 0.21983914209115282, + "grad_norm": 26.137905967601572, + "learning_rate": 5e-06, + "loss": 0.309, + "num_input_tokens_seen": 99195680, + "step": 574 + }, + { + "epoch": 0.21983914209115282, + "loss": 0.37402984499931335, + "loss_ce": 0.017950735986232758, + "loss_iou": 1.2666800022125244, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 99195680, + "step": 574 + }, + { + "epoch": 0.22022213711221755, + "grad_norm": 25.845565662558503, + "learning_rate": 5e-06, + "loss": 0.3863, + "num_input_tokens_seen": 99368472, + "step": 575 + }, + { + "epoch": 0.22022213711221755, + "loss": 0.35913804173469543, + "loss_ce": 0.017463242635130882, + "loss_iou": 1.1200839281082153, + "loss_num": 0.341796875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 99368472, + "step": 575 + }, + { + "epoch": 0.22060513213328226, + "grad_norm": 22.702387579466556, + "learning_rate": 5e-06, + "loss": 0.3197, + "num_input_tokens_seen": 99541504, + "step": 576 + }, + { + "epoch": 0.22060513213328226, + "loss": 0.3078612983226776, + "loss_ce": 0.018310517072677612, + "loss_iou": 1.1169304847717285, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 99541504, + "step": 576 + }, + { + "epoch": 0.22098812715434699, + "grad_norm": 31.942708726688444, + "learning_rate": 5e-06, + "loss": 0.321, + "num_input_tokens_seen": 99714288, + "step": 577 + }, + { + "epoch": 0.22098812715434699, + "loss": 0.32894328236579895, + "loss_ce": 0.018152287229895592, + "loss_iou": 1.015682578086853, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 99714288, + "step": 577 + }, + { + "epoch": 0.22137112217541172, + "grad_norm": 42.0077178120743, + "learning_rate": 5e-06, + "loss": 0.2944, + "num_input_tokens_seen": 99887304, + "step": 578 + }, + { + "epoch": 0.22137112217541172, + "loss": 0.3271752595901489, + "loss_ce": 0.017482876777648926, + "loss_iou": 1.07436203956604, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 99887304, + "step": 578 + }, + { + "epoch": 0.22175411719647645, + "grad_norm": 30.29576574211237, + "learning_rate": 5e-06, + "loss": 0.3389, + "num_input_tokens_seen": 100060096, + "step": 579 + }, + { + "epoch": 0.22175411719647645, + "loss": 0.3681296110153198, + "loss_ce": 0.01668919436633587, + "loss_iou": 1.1137621402740479, + "loss_num": 0.3515625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 100060096, + "step": 579 + }, + { + "epoch": 0.22213711221754118, + "grad_norm": 42.32461320236619, + "learning_rate": 5e-06, + "loss": 0.3718, + "num_input_tokens_seen": 100232920, + "step": 580 + }, + { + "epoch": 0.22213711221754118, + "loss": 0.33574941754341125, + "loss_ce": 0.018244553357362747, + "loss_iou": 1.0672136545181274, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 100232920, + "step": 580 + }, + { + "epoch": 0.2225201072386059, + "grad_norm": 32.82938766636816, + "learning_rate": 5e-06, + "loss": 0.2831, + "num_input_tokens_seen": 100405672, + "step": 581 + }, + { + "epoch": 0.2225201072386059, + "loss": 0.2923923432826996, + "loss_ce": 0.017245858907699585, + "loss_iou": 1.0123100280761719, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 100405672, + "step": 581 + }, + { + "epoch": 0.22290310225967064, + "grad_norm": 45.72324256936807, + "learning_rate": 5e-06, + "loss": 0.3289, + "num_input_tokens_seen": 100575224, + "step": 582 + }, + { + "epoch": 0.22290310225967064, + "loss": 0.33408015966415405, + "loss_ce": 0.018040090799331665, + "loss_iou": 1.0105905532836914, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 100575224, + "step": 582 + }, + { + "epoch": 0.22328609728073534, + "grad_norm": 27.20456589802419, + "learning_rate": 5e-06, + "loss": 0.3253, + "num_input_tokens_seen": 100747824, + "step": 583 + }, + { + "epoch": 0.22328609728073534, + "loss": 0.30941587686538696, + "loss_ce": 0.01632506772875786, + "loss_iou": 1.0055162906646729, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 100747824, + "step": 583 + }, + { + "epoch": 0.22366909230180007, + "grad_norm": 36.605260599158925, + "learning_rate": 5e-06, + "loss": 0.3869, + "num_input_tokens_seen": 100920728, + "step": 584 + }, + { + "epoch": 0.22366909230180007, + "loss": 0.34552252292633057, + "loss_ce": 0.017763737589120865, + "loss_iou": 1.0450878143310547, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 100920728, + "step": 584 + }, + { + "epoch": 0.2240520873228648, + "grad_norm": 41.24873530783497, + "learning_rate": 5e-06, + "loss": 0.3717, + "num_input_tokens_seen": 101093472, + "step": 585 + }, + { + "epoch": 0.2240520873228648, + "loss": 0.3666747212409973, + "loss_ce": 0.016699114814400673, + "loss_iou": 1.1095600128173828, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 101093472, + "step": 585 + }, + { + "epoch": 0.22443508234392953, + "grad_norm": 53.24347873920217, + "learning_rate": 5e-06, + "loss": 0.5541, + "num_input_tokens_seen": 101266568, + "step": 586 + }, + { + "epoch": 0.22443508234392953, + "loss": 0.5650396347045898, + "loss_ce": 0.01718807965517044, + "loss_iou": 1.0786261558532715, + "loss_num": 0.546875, + "loss_xval": 0.546875, + "num_input_tokens_seen": 101266568, + "step": 586 + }, + { + "epoch": 0.22481807736499426, + "grad_norm": 25.188296835415493, + "learning_rate": 5e-06, + "loss": 0.4724, + "num_input_tokens_seen": 101439928, + "step": 587 + }, + { + "epoch": 0.22481807736499426, + "loss": 0.4406231641769409, + "loss_ce": 0.01752748340368271, + "loss_iou": 1.0750447511672974, + "loss_num": 0.423828125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 101439928, + "step": 587 + }, + { + "epoch": 0.225201072386059, + "grad_norm": 33.085510112399156, + "learning_rate": 5e-06, + "loss": 0.5682, + "num_input_tokens_seen": 101613016, + "step": 588 + }, + { + "epoch": 0.225201072386059, + "loss": 0.5826241970062256, + "loss_ce": 0.01817106083035469, + "loss_iou": 1.0221844911575317, + "loss_num": 0.5625, + "loss_xval": 0.5625, + "num_input_tokens_seen": 101613016, + "step": 588 + }, + { + "epoch": 0.2255840674071237, + "grad_norm": 68.14967060371285, + "learning_rate": 5e-06, + "loss": 0.4348, + "num_input_tokens_seen": 101786296, + "step": 589 + }, + { + "epoch": 0.2255840674071237, + "loss": 0.4092572331428528, + "loss_ce": 0.016923263669013977, + "loss_iou": 1.0072649717330933, + "loss_num": 0.392578125, + "loss_xval": 0.392578125, + "num_input_tokens_seen": 101786296, + "step": 589 + }, + { + "epoch": 0.22596706242818843, + "grad_norm": 58.539577797948795, + "learning_rate": 5e-06, + "loss": 0.9328, + "num_input_tokens_seen": 101958720, + "step": 590 + }, + { + "epoch": 0.22596706242818843, + "loss": 0.9331662654876709, + "loss_ce": 0.01715068519115448, + "loss_iou": 1.5227229595184326, + "loss_num": 0.9140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 101958720, + "step": 590 + }, + { + "epoch": 0.22635005744925316, + "grad_norm": 29.474031494480535, + "learning_rate": 5e-06, + "loss": 0.5005, + "num_input_tokens_seen": 102131728, + "step": 591 + }, + { + "epoch": 0.22635005744925316, + "loss": 0.4739513397216797, + "loss_ce": 0.018384922295808792, + "loss_iou": 1.0300683975219727, + "loss_num": 0.455078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 102131728, + "step": 591 + }, + { + "epoch": 0.2267330524703179, + "grad_norm": 38.89440333259645, + "learning_rate": 5e-06, + "loss": 0.6574, + "num_input_tokens_seen": 102304592, + "step": 592 + }, + { + "epoch": 0.2267330524703179, + "loss": 0.640261709690094, + "loss_ce": 0.0186796635389328, + "loss_iou": 1.0453568696975708, + "loss_num": 0.62109375, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 102304592, + "step": 592 + }, + { + "epoch": 0.22711604749138262, + "grad_norm": 41.35986263831221, + "learning_rate": 5e-06, + "loss": 0.4902, + "num_input_tokens_seen": 102477688, + "step": 593 + }, + { + "epoch": 0.22711604749138262, + "loss": 0.5372838973999023, + "loss_ce": 0.01921752840280533, + "loss_iou": 1.0303370952606201, + "loss_num": 0.51953125, + "loss_xval": 0.51953125, + "num_input_tokens_seen": 102477688, + "step": 593 + }, + { + "epoch": 0.22749904251244735, + "grad_norm": 37.08661584549743, + "learning_rate": 5e-06, + "loss": 0.5568, + "num_input_tokens_seen": 102650608, + "step": 594 + }, + { + "epoch": 0.22749904251244735, + "loss": 0.5767726898193359, + "loss_ce": 0.0179347712546587, + "loss_iou": 1.0251212120056152, + "loss_num": 0.55859375, + "loss_xval": 0.55859375, + "num_input_tokens_seen": 102650608, + "step": 594 + }, + { + "epoch": 0.22788203753351208, + "grad_norm": 16.649004377658006, + "learning_rate": 5e-06, + "loss": 0.3834, + "num_input_tokens_seen": 102823688, + "step": 595 + }, + { + "epoch": 0.22788203753351208, + "loss": 0.44109010696411133, + "loss_ce": 0.017872339114546776, + "loss_iou": 1.0665128231048584, + "loss_num": 0.423828125, + "loss_xval": 0.423828125, + "num_input_tokens_seen": 102823688, + "step": 595 + }, + { + "epoch": 0.22826503255457678, + "grad_norm": 16.37457310632788, + "learning_rate": 5e-06, + "loss": 0.2507, + "num_input_tokens_seen": 102996344, + "step": 596 + }, + { + "epoch": 0.22826503255457678, + "loss": 0.23729053139686584, + "loss_ce": 0.018662605434656143, + "loss_iou": 1.0226633548736572, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 102996344, + "step": 596 + }, + { + "epoch": 0.2286480275756415, + "grad_norm": 21.53348319267111, + "learning_rate": 5e-06, + "loss": 0.4048, + "num_input_tokens_seen": 103168896, + "step": 597 + }, + { + "epoch": 0.2286480275756415, + "loss": 0.39744117856025696, + "loss_ce": 0.018534936010837555, + "loss_iou": 1.139091968536377, + "loss_num": 0.37890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 103168896, + "step": 597 + }, + { + "epoch": 0.22903102259670624, + "grad_norm": 24.527654157341697, + "learning_rate": 5e-06, + "loss": 0.2627, + "num_input_tokens_seen": 103341832, + "step": 598 + }, + { + "epoch": 0.22903102259670624, + "loss": 0.31931132078170776, + "loss_ce": 0.018407993018627167, + "loss_iou": 1.044081687927246, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 103341832, + "step": 598 + }, + { + "epoch": 0.22941401761777097, + "grad_norm": 25.416895779872892, + "learning_rate": 5e-06, + "loss": 0.3839, + "num_input_tokens_seen": 103514584, + "step": 599 + }, + { + "epoch": 0.22941401761777097, + "loss": 0.4270893633365631, + "loss_ce": 0.018031757324934006, + "loss_iou": 1.1228041648864746, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 103514584, + "step": 599 + }, + { + "epoch": 0.2297970126388357, + "grad_norm": 25.281019377135085, + "learning_rate": 5e-06, + "loss": 0.2995, + "num_input_tokens_seen": 103687432, + "step": 600 + }, + { + "epoch": 0.2297970126388357, + "loss": 0.27492815256118774, + "loss_ce": 0.01723773032426834, + "loss_iou": 1.144974708557129, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 103687432, + "step": 600 + }, + { + "epoch": 0.23018000765990043, + "grad_norm": 26.228245945257914, + "learning_rate": 5e-06, + "loss": 0.3585, + "num_input_tokens_seen": 103856976, + "step": 601 + }, + { + "epoch": 0.23018000765990043, + "loss": 0.3667566180229187, + "loss_ce": 0.019344517961144447, + "loss_iou": 1.0777132511138916, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 103856976, + "step": 601 + }, + { + "epoch": 0.23056300268096513, + "grad_norm": 41.11046425286124, + "learning_rate": 5e-06, + "loss": 0.2708, + "num_input_tokens_seen": 104029936, + "step": 602 + }, + { + "epoch": 0.23056300268096513, + "loss": 0.22370639443397522, + "loss_ce": 0.018323099240660667, + "loss_iou": 1.0170555114746094, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 104029936, + "step": 602 + }, + { + "epoch": 0.23094599770202986, + "grad_norm": 41.591703374258714, + "learning_rate": 5e-06, + "loss": 0.3554, + "num_input_tokens_seen": 104202896, + "step": 603 + }, + { + "epoch": 0.23094599770202986, + "loss": 0.38865795731544495, + "loss_ce": 0.017564216628670692, + "loss_iou": 1.0423808097839355, + "loss_num": 0.37109375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 104202896, + "step": 603 + }, + { + "epoch": 0.2313289927230946, + "grad_norm": 30.252928274985575, + "learning_rate": 5e-06, + "loss": 0.2503, + "num_input_tokens_seen": 104376000, + "step": 604 + }, + { + "epoch": 0.2313289927230946, + "loss": 0.2540701627731323, + "loss_ce": 0.018352385610342026, + "loss_iou": 1.1092627048492432, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 104376000, + "step": 604 + }, + { + "epoch": 0.23171198774415933, + "grad_norm": 28.653239475474283, + "learning_rate": 5e-06, + "loss": 0.2782, + "num_input_tokens_seen": 104548936, + "step": 605 + }, + { + "epoch": 0.23171198774415933, + "loss": 0.284741073846817, + "loss_ce": 0.019116077572107315, + "loss_iou": 1.1011557579040527, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 104548936, + "step": 605 + }, + { + "epoch": 0.23209498276522406, + "grad_norm": 30.154135033335002, + "learning_rate": 5e-06, + "loss": 0.2832, + "num_input_tokens_seen": 104721440, + "step": 606 + }, + { + "epoch": 0.23209498276522406, + "loss": 0.3105369806289673, + "loss_ce": 0.017690293490886688, + "loss_iou": 1.0124354362487793, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 104721440, + "step": 606 + }, + { + "epoch": 0.2324779777862888, + "grad_norm": 27.52309361013761, + "learning_rate": 5e-06, + "loss": 0.3772, + "num_input_tokens_seen": 104894456, + "step": 607 + }, + { + "epoch": 0.2324779777862888, + "loss": 0.413565456867218, + "loss_ce": 0.018179725855588913, + "loss_iou": 1.1098564863204956, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 104894456, + "step": 607 + }, + { + "epoch": 0.23286097280735352, + "grad_norm": 39.99339674434345, + "learning_rate": 5e-06, + "loss": 0.3057, + "num_input_tokens_seen": 105067744, + "step": 608 + }, + { + "epoch": 0.23286097280735352, + "loss": 0.30731672048568726, + "loss_ce": 0.018010085448622704, + "loss_iou": 1.0802106857299805, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 105067744, + "step": 608 + }, + { + "epoch": 0.23324396782841822, + "grad_norm": 33.07776467900004, + "learning_rate": 5e-06, + "loss": 0.34, + "num_input_tokens_seen": 105240768, + "step": 609 + }, + { + "epoch": 0.23324396782841822, + "loss": 0.42627090215682983, + "loss_ce": 0.018800202757120132, + "loss_iou": 1.2617712020874023, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 105240768, + "step": 609 + }, + { + "epoch": 0.23362696284948295, + "grad_norm": 27.82244250656852, + "learning_rate": 5e-06, + "loss": 0.2098, + "num_input_tokens_seen": 105414232, + "step": 610 + }, + { + "epoch": 0.23362696284948295, + "loss": 0.2339729517698288, + "loss_ce": 0.0188850536942482, + "loss_iou": 1.027437686920166, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 105414232, + "step": 610 + }, + { + "epoch": 0.23400995787054768, + "grad_norm": 28.753221166597708, + "learning_rate": 5e-06, + "loss": 0.2854, + "num_input_tokens_seen": 105587168, + "step": 611 + }, + { + "epoch": 0.23400995787054768, + "loss": 0.31163489818573, + "loss_ce": 0.017201315611600876, + "loss_iou": 1.035597562789917, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 105587168, + "step": 611 + }, + { + "epoch": 0.2343929528916124, + "grad_norm": 30.02519181773402, + "learning_rate": 5e-06, + "loss": 0.2157, + "num_input_tokens_seen": 105760128, + "step": 612 + }, + { + "epoch": 0.2343929528916124, + "loss": 0.18196620047092438, + "loss_ce": 0.018941298127174377, + "loss_iou": 1.0053131580352783, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 105760128, + "step": 612 + }, + { + "epoch": 0.23477594791267714, + "grad_norm": 27.270895360600054, + "learning_rate": 5e-06, + "loss": 0.2809, + "num_input_tokens_seen": 105933064, + "step": 613 + }, + { + "epoch": 0.23477594791267714, + "loss": 0.2941964864730835, + "loss_ce": 0.018073420971632004, + "loss_iou": 1.1391850709915161, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 105933064, + "step": 613 + }, + { + "epoch": 0.23515894293374187, + "grad_norm": 32.75790616386231, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 106106080, + "step": 614 + }, + { + "epoch": 0.23515894293374187, + "loss": 0.2357349693775177, + "loss_ce": 0.018022585660219193, + "loss_iou": 1.0676966905593872, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 106106080, + "step": 614 + }, + { + "epoch": 0.23554193795480657, + "grad_norm": 35.36961784872434, + "learning_rate": 5e-06, + "loss": 0.2747, + "num_input_tokens_seen": 106278944, + "step": 615 + }, + { + "epoch": 0.23554193795480657, + "loss": 0.22893795371055603, + "loss_ce": 0.018122535198926926, + "loss_iou": 1.0038223266601562, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 106278944, + "step": 615 + }, + { + "epoch": 0.2359249329758713, + "grad_norm": 31.206872306799447, + "learning_rate": 5e-06, + "loss": 0.2448, + "num_input_tokens_seen": 106451968, + "step": 616 + }, + { + "epoch": 0.2359249329758713, + "loss": 0.2548496127128601, + "loss_ce": 0.017422843724489212, + "loss_iou": 1.0527714490890503, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 106451968, + "step": 616 + }, + { + "epoch": 0.23630792799693603, + "grad_norm": 24.20033365261073, + "learning_rate": 5e-06, + "loss": 0.2879, + "num_input_tokens_seen": 106625288, + "step": 617 + }, + { + "epoch": 0.23630792799693603, + "loss": 0.2967357635498047, + "loss_ce": 0.018537502735853195, + "loss_iou": 1.0783098936080933, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 106625288, + "step": 617 + }, + { + "epoch": 0.23669092301800077, + "grad_norm": 23.365551448031216, + "learning_rate": 5e-06, + "loss": 0.2135, + "num_input_tokens_seen": 106797944, + "step": 618 + }, + { + "epoch": 0.23669092301800077, + "loss": 0.20067939162254333, + "loss_ce": 0.017329787835478783, + "loss_iou": 1.0012857913970947, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 106797944, + "step": 618 + }, + { + "epoch": 0.2370739180390655, + "grad_norm": 22.931989760098126, + "learning_rate": 5e-06, + "loss": 0.3273, + "num_input_tokens_seen": 106970904, + "step": 619 + }, + { + "epoch": 0.2370739180390655, + "loss": 0.3136942386627197, + "loss_ce": 0.017795825377106667, + "loss_iou": 1.0238518714904785, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 106970904, + "step": 619 + }, + { + "epoch": 0.23745691306013023, + "grad_norm": 28.813060915646872, + "learning_rate": 5e-06, + "loss": 0.2295, + "num_input_tokens_seen": 107144096, + "step": 620 + }, + { + "epoch": 0.23745691306013023, + "loss": 0.2313339114189148, + "loss_ce": 0.0186874121427536, + "loss_iou": 1.010434627532959, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 107144096, + "step": 620 + }, + { + "epoch": 0.23783990808119496, + "grad_norm": 25.08585719487234, + "learning_rate": 5e-06, + "loss": 0.3269, + "num_input_tokens_seen": 107316976, + "step": 621 + }, + { + "epoch": 0.23783990808119496, + "loss": 0.3422039747238159, + "loss_ce": 0.01798522099852562, + "loss_iou": 1.014849305152893, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 107316976, + "step": 621 + }, + { + "epoch": 0.23822290310225966, + "grad_norm": 30.021678511783954, + "learning_rate": 5e-06, + "loss": 0.2982, + "num_input_tokens_seen": 107489816, + "step": 622 + }, + { + "epoch": 0.23822290310225966, + "loss": 0.30236726999282837, + "loss_ce": 0.016600700095295906, + "loss_iou": 1.0681586265563965, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 107489816, + "step": 622 + }, + { + "epoch": 0.2386058981233244, + "grad_norm": 36.570672047053655, + "learning_rate": 5e-06, + "loss": 0.2883, + "num_input_tokens_seen": 107663056, + "step": 623 + }, + { + "epoch": 0.2386058981233244, + "loss": 0.2441415935754776, + "loss_ce": 0.0194101482629776, + "loss_iou": 1.016437292098999, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 107663056, + "step": 623 + }, + { + "epoch": 0.23898889314438912, + "grad_norm": 26.877624242395285, + "learning_rate": 5e-06, + "loss": 0.2423, + "num_input_tokens_seen": 107836160, + "step": 624 + }, + { + "epoch": 0.23898889314438912, + "loss": 0.21540260314941406, + "loss_ce": 0.01789284497499466, + "loss_iou": 1.008943796157837, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 107836160, + "step": 624 + }, + { + "epoch": 0.23937188816545385, + "grad_norm": 25.400953958313494, + "learning_rate": 5e-06, + "loss": 0.3771, + "num_input_tokens_seen": 108009496, + "step": 625 + }, + { + "epoch": 0.23937188816545385, + "loss": 0.42162907123565674, + "loss_ce": 0.019529465585947037, + "loss_iou": 1.0877376794815063, + "loss_num": 0.40234375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 108009496, + "step": 625 + }, + { + "epoch": 0.23975488318651858, + "grad_norm": 19.350539524761597, + "learning_rate": 5e-06, + "loss": 0.2422, + "num_input_tokens_seen": 108182224, + "step": 626 + }, + { + "epoch": 0.23975488318651858, + "loss": 0.19275686144828796, + "loss_ce": 0.018196308985352516, + "loss_iou": 1.022761583328247, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 108182224, + "step": 626 + }, + { + "epoch": 0.2401378782075833, + "grad_norm": 40.621139366261346, + "learning_rate": 5e-06, + "loss": 0.3272, + "num_input_tokens_seen": 108355280, + "step": 627 + }, + { + "epoch": 0.2401378782075833, + "loss": 0.38383835554122925, + "loss_ce": 0.01909228041768074, + "loss_iou": 1.0367767810821533, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 108355280, + "step": 627 + }, + { + "epoch": 0.24052087322864804, + "grad_norm": 32.69516719464062, + "learning_rate": 5e-06, + "loss": 0.3202, + "num_input_tokens_seen": 108524448, + "step": 628 + }, + { + "epoch": 0.24052087322864804, + "loss": 0.3452039957046509, + "loss_ce": 0.017811425030231476, + "loss_iou": 1.049802303314209, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 108524448, + "step": 628 + }, + { + "epoch": 0.24090386824971274, + "grad_norm": 21.751452752988556, + "learning_rate": 5e-06, + "loss": 0.2627, + "num_input_tokens_seen": 108697320, + "step": 629 + }, + { + "epoch": 0.24090386824971274, + "loss": 0.2546536922454834, + "loss_ce": 0.019057990983128548, + "loss_iou": 1.059018850326538, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 108697320, + "step": 629 + }, + { + "epoch": 0.24128686327077747, + "grad_norm": 29.944794638579904, + "learning_rate": 5e-06, + "loss": 0.3938, + "num_input_tokens_seen": 108869976, + "step": 630 + }, + { + "epoch": 0.24128686327077747, + "loss": 0.4710794985294342, + "loss_ce": 0.018076570704579353, + "loss_iou": 1.235339641571045, + "loss_num": 0.453125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 108869976, + "step": 630 + }, + { + "epoch": 0.2416698582918422, + "grad_norm": 32.84861963867553, + "learning_rate": 5e-06, + "loss": 0.2684, + "num_input_tokens_seen": 109043056, + "step": 631 + }, + { + "epoch": 0.2416698582918422, + "loss": 0.2961300015449524, + "loss_ce": 0.020251117646694183, + "loss_iou": 1.0154446363449097, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 109043056, + "step": 631 + }, + { + "epoch": 0.24205285331290693, + "grad_norm": 37.01900979423141, + "learning_rate": 5e-06, + "loss": 0.3095, + "num_input_tokens_seen": 109215736, + "step": 632 + }, + { + "epoch": 0.24205285331290693, + "loss": 0.3424670100212097, + "loss_ce": 0.01745479181408882, + "loss_iou": 1.0393264293670654, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 109215736, + "step": 632 + }, + { + "epoch": 0.24243584833397167, + "grad_norm": 34.48651243305176, + "learning_rate": 5e-06, + "loss": 0.3381, + "num_input_tokens_seen": 109388664, + "step": 633 + }, + { + "epoch": 0.24243584833397167, + "loss": 0.32500481605529785, + "loss_ce": 0.018120069056749344, + "loss_iou": 1.0546152591705322, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 109388664, + "step": 633 + }, + { + "epoch": 0.2428188433550364, + "grad_norm": 41.596791970752896, + "learning_rate": 5e-06, + "loss": 0.2942, + "num_input_tokens_seen": 109561584, + "step": 634 + }, + { + "epoch": 0.2428188433550364, + "loss": 0.31500840187072754, + "loss_ce": 0.01776721328496933, + "loss_iou": 1.0234500169754028, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 109561584, + "step": 634 + }, + { + "epoch": 0.2432018383761011, + "grad_norm": 36.33209925241172, + "learning_rate": 5e-06, + "loss": 0.2624, + "num_input_tokens_seen": 109734984, + "step": 635 + }, + { + "epoch": 0.2432018383761011, + "loss": 0.27469685673713684, + "loss_ce": 0.01847127452492714, + "loss_iou": 1.02968168258667, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 109734984, + "step": 635 + }, + { + "epoch": 0.24358483339716583, + "grad_norm": 40.307370612902695, + "learning_rate": 5e-06, + "loss": 0.2789, + "num_input_tokens_seen": 109907896, + "step": 636 + }, + { + "epoch": 0.24358483339716583, + "loss": 0.294486403465271, + "loss_ce": 0.01787509024143219, + "loss_iou": 1.0207655429840088, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 109907896, + "step": 636 + }, + { + "epoch": 0.24396782841823056, + "grad_norm": 32.377875645670116, + "learning_rate": 5e-06, + "loss": 0.3075, + "num_input_tokens_seen": 110081008, + "step": 637 + }, + { + "epoch": 0.24396782841823056, + "loss": 0.3352136015892029, + "loss_ce": 0.018196996301412582, + "loss_iou": 1.7230377197265625, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 110081008, + "step": 637 + }, + { + "epoch": 0.2443508234392953, + "grad_norm": 31.849941337676345, + "learning_rate": 5e-06, + "loss": 0.2119, + "num_input_tokens_seen": 110254528, + "step": 638 + }, + { + "epoch": 0.2443508234392953, + "loss": 0.19193212687969208, + "loss_ce": 0.018165042623877525, + "loss_iou": 0.9950222969055176, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 110254528, + "step": 638 + }, + { + "epoch": 0.24473381846036002, + "grad_norm": 39.34174564072891, + "learning_rate": 5e-06, + "loss": 0.3409, + "num_input_tokens_seen": 110427536, + "step": 639 + }, + { + "epoch": 0.24473381846036002, + "loss": 0.3895414471626282, + "loss_ce": 0.018325595185160637, + "loss_iou": 1.1946970224380493, + "loss_num": 0.37109375, + "loss_xval": 0.37109375, + "num_input_tokens_seen": 110427536, + "step": 639 + }, + { + "epoch": 0.24511681348142475, + "grad_norm": 29.075125285500715, + "learning_rate": 5e-06, + "loss": 0.2193, + "num_input_tokens_seen": 110600384, + "step": 640 + }, + { + "epoch": 0.24511681348142475, + "loss": 0.21013250946998596, + "loss_ce": 0.018848339095711708, + "loss_iou": 1.004418134689331, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 110600384, + "step": 640 + }, + { + "epoch": 0.24549980850248948, + "grad_norm": 32.01044314154312, + "learning_rate": 5e-06, + "loss": 0.3213, + "num_input_tokens_seen": 110773248, + "step": 641 + }, + { + "epoch": 0.24549980850248948, + "loss": 0.35537731647491455, + "loss_ce": 0.018097028136253357, + "loss_iou": 1.1918798685073853, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 110773248, + "step": 641 + }, + { + "epoch": 0.24588280352355418, + "grad_norm": 31.34468948038098, + "learning_rate": 5e-06, + "loss": 0.1919, + "num_input_tokens_seen": 110946488, + "step": 642 + }, + { + "epoch": 0.24588280352355418, + "loss": 0.19471794366836548, + "loss_ce": 0.01979117840528488, + "loss_iou": 1.0147979259490967, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 110946488, + "step": 642 + }, + { + "epoch": 0.2462657985446189, + "grad_norm": 36.15331658548177, + "learning_rate": 5e-06, + "loss": 0.2914, + "num_input_tokens_seen": 111119800, + "step": 643 + }, + { + "epoch": 0.2462657985446189, + "loss": 0.25731784105300903, + "loss_ce": 0.01769382134079933, + "loss_iou": 1.0024409294128418, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 111119800, + "step": 643 + }, + { + "epoch": 0.24664879356568364, + "grad_norm": 30.42264469721433, + "learning_rate": 5e-06, + "loss": 0.3181, + "num_input_tokens_seen": 111293128, + "step": 644 + }, + { + "epoch": 0.24664879356568364, + "loss": 0.32497987151145935, + "loss_ce": 0.01821719855070114, + "loss_iou": 1.0627901554107666, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 111293128, + "step": 644 + }, + { + "epoch": 0.24703178858674837, + "grad_norm": 26.060083123614064, + "learning_rate": 5e-06, + "loss": 0.2957, + "num_input_tokens_seen": 111465896, + "step": 645 + }, + { + "epoch": 0.24703178858674837, + "loss": 0.3195127844810486, + "loss_ce": 0.018121197819709778, + "loss_iou": 1.0172330141067505, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 111465896, + "step": 645 + }, + { + "epoch": 0.2474147836078131, + "grad_norm": 17.570419821790672, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 111639200, + "step": 646 + }, + { + "epoch": 0.2474147836078131, + "loss": 0.19249209761619568, + "loss_ce": 0.018847081810235977, + "loss_iou": 1.0150803327560425, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 111639200, + "step": 646 + }, + { + "epoch": 0.24779777862887784, + "grad_norm": 32.037862727923766, + "learning_rate": 5e-06, + "loss": 0.3627, + "num_input_tokens_seen": 111812008, + "step": 647 + }, + { + "epoch": 0.24779777862887784, + "loss": 0.30420738458633423, + "loss_ce": 0.01892906054854393, + "loss_iou": 1.00568687915802, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 111812008, + "step": 647 + }, + { + "epoch": 0.24818077364994254, + "grad_norm": 26.462807343965828, + "learning_rate": 5e-06, + "loss": 0.3076, + "num_input_tokens_seen": 111984760, + "step": 648 + }, + { + "epoch": 0.24818077364994254, + "loss": 0.3766970932483673, + "loss_ce": 0.017200034111738205, + "loss_iou": 1.0598634481430054, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 111984760, + "step": 648 + }, + { + "epoch": 0.24856376867100727, + "grad_norm": 24.18097412725299, + "learning_rate": 5e-06, + "loss": 0.2638, + "num_input_tokens_seen": 112157824, + "step": 649 + }, + { + "epoch": 0.24856376867100727, + "loss": 0.245339497923851, + "loss_ce": 0.019265277311205864, + "loss_iou": 1.0034204721450806, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 112157824, + "step": 649 + }, + { + "epoch": 0.248946763692072, + "grad_norm": 26.809266064922014, + "learning_rate": 5e-06, + "loss": 0.3174, + "num_input_tokens_seen": 112330656, + "step": 650 + }, + { + "epoch": 0.248946763692072, + "loss": 0.37160152196884155, + "loss_ce": 0.018330035731196404, + "loss_iou": 1.0580711364746094, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 112330656, + "step": 650 + }, + { + "epoch": 0.24932975871313673, + "grad_norm": 43.82264882663346, + "learning_rate": 5e-06, + "loss": 0.3323, + "num_input_tokens_seen": 112503760, + "step": 651 + }, + { + "epoch": 0.24932975871313673, + "loss": 0.350696861743927, + "loss_ce": 0.018787670880556107, + "loss_iou": 1.0264992713928223, + "loss_num": 0.33203125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 112503760, + "step": 651 + }, + { + "epoch": 0.24971275373420146, + "grad_norm": 34.4973935234838, + "learning_rate": 5e-06, + "loss": 0.2624, + "num_input_tokens_seen": 112677072, + "step": 652 + }, + { + "epoch": 0.24971275373420146, + "loss": 0.2623905539512634, + "loss_ce": 0.017639588564634323, + "loss_iou": 0.9752378463745117, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 112677072, + "step": 652 + }, + { + "epoch": 0.2500957487552662, + "grad_norm": 44.05348458776447, + "learning_rate": 5e-06, + "loss": 0.4013, + "num_input_tokens_seen": 112850512, + "step": 653 + }, + { + "epoch": 0.2500957487552662, + "loss": 0.4051348567008972, + "loss_ce": 0.01902644708752632, + "loss_iou": 1.0465726852416992, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 112850512, + "step": 653 + }, + { + "epoch": 0.2504787437763309, + "grad_norm": 14.702397870918162, + "learning_rate": 5e-06, + "loss": 0.209, + "num_input_tokens_seen": 113023680, + "step": 654 + }, + { + "epoch": 0.2504787437763309, + "loss": 0.2019750326871872, + "loss_ce": 0.018625423312187195, + "loss_iou": 1.009512186050415, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 113023680, + "step": 654 + }, + { + "epoch": 0.25086173879739565, + "grad_norm": 12.47110250304027, + "learning_rate": 5e-06, + "loss": 0.2805, + "num_input_tokens_seen": 113196272, + "step": 655 + }, + { + "epoch": 0.25086173879739565, + "loss": 0.24651247262954712, + "loss_ce": 0.018363066017627716, + "loss_iou": 1.0092339515686035, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 113196272, + "step": 655 + }, + { + "epoch": 0.25124473381846035, + "grad_norm": 24.930653773392503, + "learning_rate": 5e-06, + "loss": 0.294, + "num_input_tokens_seen": 113369432, + "step": 656 + }, + { + "epoch": 0.25124473381846035, + "loss": 0.3188003897666931, + "loss_ce": 0.01789705827832222, + "loss_iou": 1.006160020828247, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 113369432, + "step": 656 + }, + { + "epoch": 0.2516277288395251, + "grad_norm": 44.84435832027603, + "learning_rate": 5e-06, + "loss": 0.2957, + "num_input_tokens_seen": 113542424, + "step": 657 + }, + { + "epoch": 0.2516277288395251, + "loss": 0.3221891522407532, + "loss_ce": 0.019454795867204666, + "loss_iou": 1.0263134241104126, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 113542424, + "step": 657 + }, + { + "epoch": 0.2520107238605898, + "grad_norm": 51.24750214164252, + "learning_rate": 5e-06, + "loss": 0.3477, + "num_input_tokens_seen": 113715368, + "step": 658 + }, + { + "epoch": 0.2520107238605898, + "loss": 0.39300888776779175, + "loss_ce": 0.017886806279420853, + "loss_iou": 1.098710536956787, + "loss_num": 0.375, + "loss_xval": 0.375, + "num_input_tokens_seen": 113715368, + "step": 658 + }, + { + "epoch": 0.2523937188816545, + "grad_norm": 19.62696854079827, + "learning_rate": 5e-06, + "loss": 0.303, + "num_input_tokens_seen": 113888304, + "step": 659 + }, + { + "epoch": 0.2523937188816545, + "loss": 0.2846168577671051, + "loss_ce": 0.0204567052423954, + "loss_iou": 1.025336503982544, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 113888304, + "step": 659 + }, + { + "epoch": 0.2527767139027193, + "grad_norm": 25.6402536421642, + "learning_rate": 5e-06, + "loss": 0.2787, + "num_input_tokens_seen": 114061288, + "step": 660 + }, + { + "epoch": 0.2527767139027193, + "loss": 0.29709354043006897, + "loss_ce": 0.01730838418006897, + "loss_iou": 1.0613741874694824, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 114061288, + "step": 660 + }, + { + "epoch": 0.253159708923784, + "grad_norm": 31.309285262466563, + "learning_rate": 5e-06, + "loss": 0.314, + "num_input_tokens_seen": 114234264, + "step": 661 + }, + { + "epoch": 0.253159708923784, + "loss": 0.34084004163742065, + "loss_ce": 0.018574416637420654, + "loss_iou": 1.0365395545959473, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 114234264, + "step": 661 + }, + { + "epoch": 0.25354270394484874, + "grad_norm": 28.455632602900838, + "learning_rate": 5e-06, + "loss": 0.2957, + "num_input_tokens_seen": 114407216, + "step": 662 + }, + { + "epoch": 0.25354270394484874, + "loss": 0.3027651011943817, + "loss_ce": 0.018097132444381714, + "loss_iou": 1.0344314575195312, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 114407216, + "step": 662 + }, + { + "epoch": 0.25392569896591344, + "grad_norm": 20.7908543989196, + "learning_rate": 5e-06, + "loss": 0.3477, + "num_input_tokens_seen": 114579936, + "step": 663 + }, + { + "epoch": 0.25392569896591344, + "loss": 0.3468393087387085, + "loss_ce": 0.018470175564289093, + "loss_iou": 1.138266921043396, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 114579936, + "step": 663 + }, + { + "epoch": 0.2543086939869782, + "grad_norm": 29.421606992895377, + "learning_rate": 5e-06, + "loss": 0.3197, + "num_input_tokens_seen": 114752832, + "step": 664 + }, + { + "epoch": 0.2543086939869782, + "loss": 0.3257991075515747, + "loss_ce": 0.019036393612623215, + "loss_iou": 1.0928757190704346, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 114752832, + "step": 664 + }, + { + "epoch": 0.2546916890080429, + "grad_norm": 36.50931809186374, + "learning_rate": 5e-06, + "loss": 0.3505, + "num_input_tokens_seen": 114922392, + "step": 665 + }, + { + "epoch": 0.2546916890080429, + "loss": 0.36523759365081787, + "loss_ce": 0.019900668412446976, + "loss_iou": 1.0270261764526367, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 114922392, + "step": 665 + }, + { + "epoch": 0.2550746840291076, + "grad_norm": 29.69985389568634, + "learning_rate": 5e-06, + "loss": 0.3025, + "num_input_tokens_seen": 115095088, + "step": 666 + }, + { + "epoch": 0.2550746840291076, + "loss": 0.2568197548389435, + "loss_ce": 0.016890564933419228, + "loss_iou": 1.021826982498169, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 115095088, + "step": 666 + }, + { + "epoch": 0.25545767905017236, + "grad_norm": 34.42256022799013, + "learning_rate": 5e-06, + "loss": 0.2657, + "num_input_tokens_seen": 115268008, + "step": 667 + }, + { + "epoch": 0.25545767905017236, + "loss": 0.25539347529411316, + "loss_ce": 0.01876017078757286, + "loss_iou": 1.009194254875183, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 115268008, + "step": 667 + }, + { + "epoch": 0.25584067407123706, + "grad_norm": 46.15606482411902, + "learning_rate": 5e-06, + "loss": 0.3934, + "num_input_tokens_seen": 115441136, + "step": 668 + }, + { + "epoch": 0.25584067407123706, + "loss": 0.4060421586036682, + "loss_ce": 0.01798064261674881, + "loss_iou": 1.1865606307983398, + "loss_num": 0.388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 115441136, + "step": 668 + }, + { + "epoch": 0.2562236690923018, + "grad_norm": 37.288022716329095, + "learning_rate": 5e-06, + "loss": 0.2792, + "num_input_tokens_seen": 115614112, + "step": 669 + }, + { + "epoch": 0.2562236690923018, + "loss": 0.2116086184978485, + "loss_ce": 0.017883025109767914, + "loss_iou": 1.0063433647155762, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 115614112, + "step": 669 + }, + { + "epoch": 0.2566066641133665, + "grad_norm": 37.04308286123183, + "learning_rate": 5e-06, + "loss": 0.3174, + "num_input_tokens_seen": 115787224, + "step": 670 + }, + { + "epoch": 0.2566066641133665, + "loss": 0.3867374658584595, + "loss_ce": 0.017352700233459473, + "loss_iou": 1.0728284120559692, + "loss_num": 0.369140625, + "loss_xval": 0.369140625, + "num_input_tokens_seen": 115787224, + "step": 670 + }, + { + "epoch": 0.2569896591344312, + "grad_norm": 36.98854574218791, + "learning_rate": 5e-06, + "loss": 0.2975, + "num_input_tokens_seen": 115960176, + "step": 671 + }, + { + "epoch": 0.2569896591344312, + "loss": 0.256584107875824, + "loss_ce": 0.01866905391216278, + "loss_iou": 1.019071102142334, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 115960176, + "step": 671 + }, + { + "epoch": 0.257372654155496, + "grad_norm": 38.27793314515744, + "learning_rate": 5e-06, + "loss": 0.2906, + "num_input_tokens_seen": 116132992, + "step": 672 + }, + { + "epoch": 0.257372654155496, + "loss": 0.26381030678749084, + "loss_ce": 0.018326912075281143, + "loss_iou": 1.0126886367797852, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 116132992, + "step": 672 + }, + { + "epoch": 0.2577556491765607, + "grad_norm": 34.093893551307985, + "learning_rate": 5e-06, + "loss": 0.4332, + "num_input_tokens_seen": 116306232, + "step": 673 + }, + { + "epoch": 0.2577556491765607, + "loss": 0.34484460949897766, + "loss_ce": 0.020503807812929153, + "loss_iou": 1.0959699153900146, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 116306232, + "step": 673 + }, + { + "epoch": 0.25813864419762544, + "grad_norm": 28.261585383287276, + "learning_rate": 5e-06, + "loss": 0.3172, + "num_input_tokens_seen": 116479264, + "step": 674 + }, + { + "epoch": 0.25813864419762544, + "loss": 0.3202231228351593, + "loss_ce": 0.018892554566264153, + "loss_iou": 1.0380526781082153, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 116479264, + "step": 674 + }, + { + "epoch": 0.25852163921869015, + "grad_norm": 42.29597171093613, + "learning_rate": 5e-06, + "loss": 0.2876, + "num_input_tokens_seen": 116652608, + "step": 675 + }, + { + "epoch": 0.25852163921869015, + "loss": 0.2780504822731018, + "loss_ce": 0.019627634435892105, + "loss_iou": 1.023488163948059, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 116652608, + "step": 675 + }, + { + "epoch": 0.2589046342397549, + "grad_norm": 39.768614094798515, + "learning_rate": 5e-06, + "loss": 0.3281, + "num_input_tokens_seen": 116825904, + "step": 676 + }, + { + "epoch": 0.2589046342397549, + "loss": 0.3559473156929016, + "loss_ce": 0.01878909207880497, + "loss_iou": 1.0924371480941772, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 116825904, + "step": 676 + }, + { + "epoch": 0.2592876292608196, + "grad_norm": 30.636294971818533, + "learning_rate": 5e-06, + "loss": 0.4043, + "num_input_tokens_seen": 116998536, + "step": 677 + }, + { + "epoch": 0.2592876292608196, + "loss": 0.32248398661613464, + "loss_ce": 0.018284771591424942, + "loss_iou": 1.0102880001068115, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 116998536, + "step": 677 + }, + { + "epoch": 0.2596706242818843, + "grad_norm": 28.465009096568117, + "learning_rate": 5e-06, + "loss": 0.2849, + "num_input_tokens_seen": 117171440, + "step": 678 + }, + { + "epoch": 0.2596706242818843, + "loss": 0.3148580491542816, + "loss_ce": 0.016762344166636467, + "loss_iou": 1.0402758121490479, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 117171440, + "step": 678 + }, + { + "epoch": 0.26005361930294907, + "grad_norm": 39.52164357894705, + "learning_rate": 5e-06, + "loss": 0.3934, + "num_input_tokens_seen": 117344528, + "step": 679 + }, + { + "epoch": 0.26005361930294907, + "loss": 0.39193177223205566, + "loss_ce": 0.018884912133216858, + "loss_iou": 1.0810632705688477, + "loss_num": 0.373046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 117344528, + "step": 679 + }, + { + "epoch": 0.26043661432401377, + "grad_norm": 36.19760168487612, + "learning_rate": 5e-06, + "loss": 0.271, + "num_input_tokens_seen": 117514016, + "step": 680 + }, + { + "epoch": 0.26043661432401377, + "loss": 0.2822980284690857, + "loss_ce": 0.018992377445101738, + "loss_iou": 1.0114190578460693, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 117514016, + "step": 680 + }, + { + "epoch": 0.26081960934507853, + "grad_norm": 28.301497706787487, + "learning_rate": 5e-06, + "loss": 0.38, + "num_input_tokens_seen": 117686864, + "step": 681 + }, + { + "epoch": 0.26081960934507853, + "loss": 0.3316190242767334, + "loss_ce": 0.01765417493879795, + "loss_iou": 1.019181251525879, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 117686864, + "step": 681 + }, + { + "epoch": 0.26120260436614323, + "grad_norm": 29.967158323525172, + "learning_rate": 5e-06, + "loss": 0.3462, + "num_input_tokens_seen": 117859824, + "step": 682 + }, + { + "epoch": 0.26120260436614323, + "loss": 0.35776767134666443, + "loss_ce": 0.01890048012137413, + "loss_iou": 1.0225400924682617, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 117859824, + "step": 682 + }, + { + "epoch": 0.261585599387208, + "grad_norm": 45.81503050852084, + "learning_rate": 5e-06, + "loss": 0.4081, + "num_input_tokens_seen": 118032872, + "step": 683 + }, + { + "epoch": 0.261585599387208, + "loss": 0.3803051710128784, + "loss_ce": 0.018732909113168716, + "loss_iou": 1.0778758525848389, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 118032872, + "step": 683 + }, + { + "epoch": 0.2619685944082727, + "grad_norm": 41.75583928817496, + "learning_rate": 5e-06, + "loss": 0.3874, + "num_input_tokens_seen": 118205704, + "step": 684 + }, + { + "epoch": 0.2619685944082727, + "loss": 0.36764055490493774, + "loss_ce": 0.016078053042292595, + "loss_iou": 1.0661917924880981, + "loss_num": 0.3515625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 118205704, + "step": 684 + }, + { + "epoch": 0.2623515894293374, + "grad_norm": 34.19024593736843, + "learning_rate": 5e-06, + "loss": 0.3418, + "num_input_tokens_seen": 118378656, + "step": 685 + }, + { + "epoch": 0.2623515894293374, + "loss": 0.3572046756744385, + "loss_ce": 0.018947839736938477, + "loss_iou": 1.0371094942092896, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 118378656, + "step": 685 + }, + { + "epoch": 0.26273458445040215, + "grad_norm": 32.897054384579484, + "learning_rate": 5e-06, + "loss": 0.4488, + "num_input_tokens_seen": 118551760, + "step": 686 + }, + { + "epoch": 0.26273458445040215, + "loss": 0.4040653109550476, + "loss_ce": 0.018078990280628204, + "loss_iou": 1.0203449726104736, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 118551760, + "step": 686 + }, + { + "epoch": 0.26311757947146686, + "grad_norm": 52.02144318734267, + "learning_rate": 5e-06, + "loss": 0.4311, + "num_input_tokens_seen": 118724664, + "step": 687 + }, + { + "epoch": 0.26311757947146686, + "loss": 0.42668282985687256, + "loss_ce": 0.018845928832888603, + "loss_iou": 1.1407008171081543, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 118724664, + "step": 687 + }, + { + "epoch": 0.2635005744925316, + "grad_norm": 51.556657707207776, + "learning_rate": 5e-06, + "loss": 0.4464, + "num_input_tokens_seen": 118897632, + "step": 688 + }, + { + "epoch": 0.2635005744925316, + "loss": 0.4025242328643799, + "loss_ce": 0.019223466515541077, + "loss_iou": 1.0114976167678833, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 118897632, + "step": 688 + }, + { + "epoch": 0.2638835695135963, + "grad_norm": 31.65263708218579, + "learning_rate": 5e-06, + "loss": 0.4013, + "num_input_tokens_seen": 119070440, + "step": 689 + }, + { + "epoch": 0.2638835695135963, + "loss": 0.40110763907432556, + "loss_ce": 0.01805099844932556, + "loss_iou": 1.0903512239456177, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 119070440, + "step": 689 + }, + { + "epoch": 0.2642665645346611, + "grad_norm": 36.34307834204635, + "learning_rate": 5e-06, + "loss": 0.2601, + "num_input_tokens_seen": 119243032, + "step": 690 + }, + { + "epoch": 0.2642665645346611, + "loss": 0.257997989654541, + "loss_ce": 0.018251921981573105, + "loss_iou": 1.0377609729766846, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 119243032, + "step": 690 + }, + { + "epoch": 0.2646495595557258, + "grad_norm": 47.70778236126298, + "learning_rate": 5e-06, + "loss": 0.4739, + "num_input_tokens_seen": 119416104, + "step": 691 + }, + { + "epoch": 0.2646495595557258, + "loss": 0.4721728563308716, + "loss_ce": 0.019536111503839493, + "loss_iou": 1.0260049104690552, + "loss_num": 0.453125, + "loss_xval": 0.453125, + "num_input_tokens_seen": 119416104, + "step": 691 + }, + { + "epoch": 0.2650325545767905, + "grad_norm": 29.64229602834131, + "learning_rate": 5e-06, + "loss": 0.4062, + "num_input_tokens_seen": 119589064, + "step": 692 + }, + { + "epoch": 0.2650325545767905, + "loss": 0.3716251254081726, + "loss_ce": 0.018292631953954697, + "loss_iou": 0.9940216541290283, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 119589064, + "step": 692 + }, + { + "epoch": 0.26541554959785524, + "grad_norm": 32.23904198008546, + "learning_rate": 5e-06, + "loss": 0.3232, + "num_input_tokens_seen": 119761960, + "step": 693 + }, + { + "epoch": 0.26541554959785524, + "loss": 0.2772480249404907, + "loss_ce": 0.018092773854732513, + "loss_iou": 1.0166699886322021, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 119761960, + "step": 693 + }, + { + "epoch": 0.26579854461891994, + "grad_norm": 31.76869273383469, + "learning_rate": 5e-06, + "loss": 0.258, + "num_input_tokens_seen": 119935400, + "step": 694 + }, + { + "epoch": 0.26579854461891994, + "loss": 0.22908979654312134, + "loss_ce": 0.017175735905766487, + "loss_iou": 1.0116028785705566, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 119935400, + "step": 694 + }, + { + "epoch": 0.2661815396399847, + "grad_norm": 26.302439294025707, + "learning_rate": 5e-06, + "loss": 0.2365, + "num_input_tokens_seen": 120108416, + "step": 695 + }, + { + "epoch": 0.2661815396399847, + "loss": 0.27466124296188354, + "loss_ce": 0.018435679376125336, + "loss_iou": 1.0388991832733154, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 120108416, + "step": 695 + }, + { + "epoch": 0.2665645346610494, + "grad_norm": 30.018702947426444, + "learning_rate": 5e-06, + "loss": 0.2747, + "num_input_tokens_seen": 120281088, + "step": 696 + }, + { + "epoch": 0.2665645346610494, + "loss": 0.2880048155784607, + "loss_ce": 0.018351484090089798, + "loss_iou": 1.003197431564331, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 120281088, + "step": 696 + }, + { + "epoch": 0.26694752968211416, + "grad_norm": 51.20962525091946, + "learning_rate": 5e-06, + "loss": 0.3086, + "num_input_tokens_seen": 120454352, + "step": 697 + }, + { + "epoch": 0.26694752968211416, + "loss": 0.26430341601371765, + "loss_ce": 0.02138349786400795, + "loss_iou": 1.0231826305389404, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 120454352, + "step": 697 + }, + { + "epoch": 0.26733052470317886, + "grad_norm": 32.316880168240395, + "learning_rate": 5e-06, + "loss": 0.2535, + "num_input_tokens_seen": 120627288, + "step": 698 + }, + { + "epoch": 0.26733052470317886, + "loss": 0.2823450267314911, + "loss_ce": 0.017818670719861984, + "loss_iou": 1.0081267356872559, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 120627288, + "step": 698 + }, + { + "epoch": 0.26771351972424356, + "grad_norm": 28.501295156169963, + "learning_rate": 5e-06, + "loss": 0.3048, + "num_input_tokens_seen": 120800696, + "step": 699 + }, + { + "epoch": 0.26771351972424356, + "loss": 0.2860148549079895, + "loss_ce": 0.019779490306973457, + "loss_iou": 1.0732073783874512, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 120800696, + "step": 699 + }, + { + "epoch": 0.2680965147453083, + "grad_norm": 38.19213868256434, + "learning_rate": 5e-06, + "loss": 0.3346, + "num_input_tokens_seen": 120973608, + "step": 700 + }, + { + "epoch": 0.2680965147453083, + "loss": 0.33420705795288086, + "loss_ce": 0.019143562763929367, + "loss_iou": 1.0907044410705566, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 120973608, + "step": 700 + }, + { + "epoch": 0.268479509766373, + "grad_norm": 25.42889342675567, + "learning_rate": 5e-06, + "loss": 0.4255, + "num_input_tokens_seen": 121146512, + "step": 701 + }, + { + "epoch": 0.268479509766373, + "loss": 0.4972996115684509, + "loss_ce": 0.017807435244321823, + "loss_iou": 1.0941216945648193, + "loss_num": 0.48046875, + "loss_xval": 0.48046875, + "num_input_tokens_seen": 121146512, + "step": 701 + }, + { + "epoch": 0.2688625047874378, + "grad_norm": 19.2893195257886, + "learning_rate": 5e-06, + "loss": 0.2672, + "num_input_tokens_seen": 121319352, + "step": 702 + }, + { + "epoch": 0.2688625047874378, + "loss": 0.22158023715019226, + "loss_ce": 0.019187651574611664, + "loss_iou": 1.0422577857971191, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 121319352, + "step": 702 + }, + { + "epoch": 0.2692454998085025, + "grad_norm": 25.817232074067945, + "learning_rate": 5e-06, + "loss": 0.3147, + "num_input_tokens_seen": 121492040, + "step": 703 + }, + { + "epoch": 0.2692454998085025, + "loss": 0.33373576402664185, + "loss_ce": 0.01842815801501274, + "loss_iou": 1.0259368419647217, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 121492040, + "step": 703 + }, + { + "epoch": 0.2696284948295672, + "grad_norm": 24.595472462666415, + "learning_rate": 5e-06, + "loss": 0.2248, + "num_input_tokens_seen": 121664760, + "step": 704 + }, + { + "epoch": 0.2696284948295672, + "loss": 0.2683330774307251, + "loss_ce": 0.0193096324801445, + "loss_iou": 1.021988868713379, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 121664760, + "step": 704 + }, + { + "epoch": 0.27001148985063195, + "grad_norm": 31.826092860934732, + "learning_rate": 5e-06, + "loss": 0.2983, + "num_input_tokens_seen": 121837976, + "step": 705 + }, + { + "epoch": 0.27001148985063195, + "loss": 0.3716413080692291, + "loss_ce": 0.019468463957309723, + "loss_iou": 1.185765027999878, + "loss_num": 0.3515625, + "loss_xval": 0.3515625, + "num_input_tokens_seen": 121837976, + "step": 705 + }, + { + "epoch": 0.27039448487169665, + "grad_norm": 27.515951260871756, + "learning_rate": 5e-06, + "loss": 0.2401, + "num_input_tokens_seen": 122010944, + "step": 706 + }, + { + "epoch": 0.27039448487169665, + "loss": 0.21705345809459686, + "loss_ce": 0.018994377925992012, + "loss_iou": 1.081105351448059, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 122010944, + "step": 706 + }, + { + "epoch": 0.2707774798927614, + "grad_norm": 20.71376535604538, + "learning_rate": 5e-06, + "loss": 0.3434, + "num_input_tokens_seen": 122184408, + "step": 707 + }, + { + "epoch": 0.2707774798927614, + "loss": 0.3039228916168213, + "loss_ce": 0.018766645342111588, + "loss_iou": 1.0301353931427002, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 122184408, + "step": 707 + }, + { + "epoch": 0.2711604749138261, + "grad_norm": 25.93834274200068, + "learning_rate": 5e-06, + "loss": 0.2279, + "num_input_tokens_seen": 122357696, + "step": 708 + }, + { + "epoch": 0.2711604749138261, + "loss": 0.18875019252300262, + "loss_ce": 0.01858418434858322, + "loss_iou": 1.0017107725143433, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 122357696, + "step": 708 + }, + { + "epoch": 0.27154346993489087, + "grad_norm": 29.21920944343291, + "learning_rate": 5e-06, + "loss": 0.2946, + "num_input_tokens_seen": 122531104, + "step": 709 + }, + { + "epoch": 0.27154346993489087, + "loss": 0.25699907541275024, + "loss_ce": 0.019938526675105095, + "loss_iou": 1.0130640268325806, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 122531104, + "step": 709 + }, + { + "epoch": 0.27192646495595557, + "grad_norm": 27.400755121419984, + "learning_rate": 5e-06, + "loss": 0.2769, + "num_input_tokens_seen": 122703288, + "step": 710 + }, + { + "epoch": 0.27192646495595557, + "loss": 0.30395543575286865, + "loss_ce": 0.018310904502868652, + "loss_iou": 1.0546436309814453, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 122703288, + "step": 710 + }, + { + "epoch": 0.2723094599770203, + "grad_norm": 28.7883081324073, + "learning_rate": 5e-06, + "loss": 0.2618, + "num_input_tokens_seen": 122876424, + "step": 711 + }, + { + "epoch": 0.2723094599770203, + "loss": 0.20747819542884827, + "loss_ce": 0.02022233046591282, + "loss_iou": 1.0015647411346436, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 122876424, + "step": 711 + }, + { + "epoch": 0.27269245499808503, + "grad_norm": 33.229815548894656, + "learning_rate": 5e-06, + "loss": 0.2744, + "num_input_tokens_seen": 123049448, + "step": 712 + }, + { + "epoch": 0.27269245499808503, + "loss": 0.23115840554237366, + "loss_ce": 0.019732624292373657, + "loss_iou": 1.0099749565124512, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 123049448, + "step": 712 + }, + { + "epoch": 0.27307545001914973, + "grad_norm": 28.783978451444586, + "learning_rate": 5e-06, + "loss": 0.2834, + "num_input_tokens_seen": 123221864, + "step": 713 + }, + { + "epoch": 0.27307545001914973, + "loss": 0.30808210372924805, + "loss_ce": 0.0177988912910223, + "loss_iou": 0.89212566614151, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 123221864, + "step": 713 + }, + { + "epoch": 0.2734584450402145, + "grad_norm": 20.492719049018284, + "learning_rate": 5e-06, + "loss": 0.288, + "num_input_tokens_seen": 123395040, + "step": 714 + }, + { + "epoch": 0.2734584450402145, + "loss": 0.26109039783477783, + "loss_ce": 0.01865876093506813, + "loss_iou": 1.037696123123169, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 123395040, + "step": 714 + }, + { + "epoch": 0.2738414400612792, + "grad_norm": 27.808046903025076, + "learning_rate": 5e-06, + "loss": 0.2707, + "num_input_tokens_seen": 123568200, + "step": 715 + }, + { + "epoch": 0.2738414400612792, + "loss": 0.24071624875068665, + "loss_ce": 0.018914487212896347, + "loss_iou": 1.0531505346298218, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 123568200, + "step": 715 + }, + { + "epoch": 0.27422443508234395, + "grad_norm": 27.648231101059913, + "learning_rate": 5e-06, + "loss": 0.3143, + "num_input_tokens_seen": 123740960, + "step": 716 + }, + { + "epoch": 0.27422443508234395, + "loss": 0.3292272090911865, + "loss_ce": 0.017703771591186523, + "loss_iou": 1.1081669330596924, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 123740960, + "step": 716 + }, + { + "epoch": 0.27460743010340866, + "grad_norm": 44.2006124857482, + "learning_rate": 5e-06, + "loss": 0.3903, + "num_input_tokens_seen": 123913648, + "step": 717 + }, + { + "epoch": 0.27460743010340866, + "loss": 0.3485841155052185, + "loss_ce": 0.019848760217428207, + "loss_iou": 1.073115348815918, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 123913648, + "step": 717 + }, + { + "epoch": 0.27499042512447336, + "grad_norm": 36.791535567538524, + "learning_rate": 5e-06, + "loss": 0.2921, + "num_input_tokens_seen": 124086576, + "step": 718 + }, + { + "epoch": 0.27499042512447336, + "loss": 0.3415762782096863, + "loss_ce": 0.020287182182073593, + "loss_iou": 1.0090223550796509, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 124086576, + "step": 718 + }, + { + "epoch": 0.2753734201455381, + "grad_norm": 25.83204726799368, + "learning_rate": 5e-06, + "loss": 0.3082, + "num_input_tokens_seen": 124259544, + "step": 719 + }, + { + "epoch": 0.2753734201455381, + "loss": 0.316400945186615, + "loss_ce": 0.018854565918445587, + "loss_iou": 1.0233391523361206, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 124259544, + "step": 719 + }, + { + "epoch": 0.2757564151666028, + "grad_norm": 39.520129299910394, + "learning_rate": 5e-06, + "loss": 0.2282, + "num_input_tokens_seen": 124432128, + "step": 720 + }, + { + "epoch": 0.2757564151666028, + "loss": 0.19742247462272644, + "loss_ce": 0.018772579729557037, + "loss_iou": 1.0239720344543457, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 124432128, + "step": 720 + }, + { + "epoch": 0.2761394101876676, + "grad_norm": 25.857038261453084, + "learning_rate": 5e-06, + "loss": 0.2858, + "num_input_tokens_seen": 124604608, + "step": 721 + }, + { + "epoch": 0.2761394101876676, + "loss": 0.3113729953765869, + "loss_ce": 0.01718352548778057, + "loss_iou": 1.0240404605865479, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 124604608, + "step": 721 + }, + { + "epoch": 0.2765224052087323, + "grad_norm": 25.072404068900614, + "learning_rate": 5e-06, + "loss": 0.2671, + "num_input_tokens_seen": 124777552, + "step": 722 + }, + { + "epoch": 0.2765224052087323, + "loss": 0.29605644941329956, + "loss_ce": 0.01920097880065441, + "loss_iou": 1.0405561923980713, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 124777552, + "step": 722 + }, + { + "epoch": 0.27690540022979704, + "grad_norm": 34.50832431840458, + "learning_rate": 5e-06, + "loss": 0.3078, + "num_input_tokens_seen": 124950512, + "step": 723 + }, + { + "epoch": 0.27690540022979704, + "loss": 0.32187792658805847, + "loss_ce": 0.017922839149832726, + "loss_iou": 1.0352263450622559, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 124950512, + "step": 723 + }, + { + "epoch": 0.27728839525086174, + "grad_norm": 25.935439943930284, + "learning_rate": 5e-06, + "loss": 0.3003, + "num_input_tokens_seen": 125123896, + "step": 724 + }, + { + "epoch": 0.27728839525086174, + "loss": 0.3337087631225586, + "loss_ce": 0.01925564929842949, + "loss_iou": 1.0178894996643066, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 125123896, + "step": 724 + }, + { + "epoch": 0.27767139027192644, + "grad_norm": 24.152246508505147, + "learning_rate": 5e-06, + "loss": 0.3073, + "num_input_tokens_seen": 125296896, + "step": 725 + }, + { + "epoch": 0.27767139027192644, + "loss": 0.2829008102416992, + "loss_ce": 0.018984809517860413, + "loss_iou": 1.0293943881988525, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 125296896, + "step": 725 + }, + { + "epoch": 0.2780543852929912, + "grad_norm": 18.329942555997228, + "learning_rate": 5e-06, + "loss": 0.2388, + "num_input_tokens_seen": 125469584, + "step": 726 + }, + { + "epoch": 0.2780543852929912, + "loss": 0.23900489509105682, + "loss_ce": 0.019156260415911674, + "loss_iou": 1.197575330734253, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 125469584, + "step": 726 + }, + { + "epoch": 0.2784373803140559, + "grad_norm": 21.43154241942784, + "learning_rate": 5e-06, + "loss": 0.3144, + "num_input_tokens_seen": 125642448, + "step": 727 + }, + { + "epoch": 0.2784373803140559, + "loss": 0.34105658531188965, + "loss_ce": 0.018913013860583305, + "loss_iou": 1.100663185119629, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 125642448, + "step": 727 + }, + { + "epoch": 0.27882037533512066, + "grad_norm": 27.170960597013888, + "learning_rate": 5e-06, + "loss": 0.3035, + "num_input_tokens_seen": 125815328, + "step": 728 + }, + { + "epoch": 0.27882037533512066, + "loss": 0.3259619474411011, + "loss_ce": 0.01773439720273018, + "loss_iou": 1.0813090801239014, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 125815328, + "step": 728 + }, + { + "epoch": 0.27920337035618537, + "grad_norm": 27.85329724527193, + "learning_rate": 5e-06, + "loss": 0.3797, + "num_input_tokens_seen": 125988392, + "step": 729 + }, + { + "epoch": 0.27920337035618537, + "loss": 0.4470553696155548, + "loss_ce": 0.01822238229215145, + "loss_iou": 1.1166222095489502, + "loss_num": 0.4296875, + "loss_xval": 0.4296875, + "num_input_tokens_seen": 125988392, + "step": 729 + }, + { + "epoch": 0.2795863653772501, + "grad_norm": 27.964041799621693, + "learning_rate": 5e-06, + "loss": 0.2793, + "num_input_tokens_seen": 126161264, + "step": 730 + }, + { + "epoch": 0.2795863653772501, + "loss": 0.3219105005264282, + "loss_ce": 0.01954232156276703, + "loss_iou": 1.0590667724609375, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 126161264, + "step": 730 + }, + { + "epoch": 0.2799693603983148, + "grad_norm": 27.277962054056832, + "learning_rate": 5e-06, + "loss": 0.3276, + "num_input_tokens_seen": 126334072, + "step": 731 + }, + { + "epoch": 0.2799693603983148, + "loss": 0.29830729961395264, + "loss_ce": 0.01864420250058174, + "loss_iou": 1.041162371635437, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 126334072, + "step": 731 + }, + { + "epoch": 0.28035235541937953, + "grad_norm": 29.51935930158991, + "learning_rate": 5e-06, + "loss": 0.2424, + "num_input_tokens_seen": 126507056, + "step": 732 + }, + { + "epoch": 0.28035235541937953, + "loss": 0.24611225724220276, + "loss_ce": 0.020038019865751266, + "loss_iou": 1.0084537267684937, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 126507056, + "step": 732 + }, + { + "epoch": 0.2807353504404443, + "grad_norm": 25.375005606094607, + "learning_rate": 5e-06, + "loss": 0.3309, + "num_input_tokens_seen": 126680400, + "step": 733 + }, + { + "epoch": 0.2807353504404443, + "loss": 0.3649820387363434, + "loss_ce": 0.018180258572101593, + "loss_iou": 1.1179325580596924, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 126680400, + "step": 733 + }, + { + "epoch": 0.281118345461509, + "grad_norm": 26.700877001288095, + "learning_rate": 5e-06, + "loss": 0.2212, + "num_input_tokens_seen": 126853240, + "step": 734 + }, + { + "epoch": 0.281118345461509, + "loss": 0.19777071475982666, + "loss_ce": 0.018693555146455765, + "loss_iou": 1.0232832431793213, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 126853240, + "step": 734 + }, + { + "epoch": 0.28150134048257375, + "grad_norm": 39.93003839780164, + "learning_rate": 5e-06, + "loss": 0.3455, + "num_input_tokens_seen": 127026200, + "step": 735 + }, + { + "epoch": 0.28150134048257375, + "loss": 0.3599078357219696, + "loss_ce": 0.019331656396389008, + "loss_iou": 1.0984218120574951, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 127026200, + "step": 735 + }, + { + "epoch": 0.28188433550363845, + "grad_norm": 29.79316016423221, + "learning_rate": 5e-06, + "loss": 0.2482, + "num_input_tokens_seen": 127199528, + "step": 736 + }, + { + "epoch": 0.28188433550363845, + "loss": 0.2514440417289734, + "loss_ce": 0.019022157415747643, + "loss_iou": 1.055418610572815, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 127199528, + "step": 736 + }, + { + "epoch": 0.28226733052470315, + "grad_norm": 26.06499652291949, + "learning_rate": 5e-06, + "loss": 0.2795, + "num_input_tokens_seen": 127372704, + "step": 737 + }, + { + "epoch": 0.28226733052470315, + "loss": 0.28856316208839417, + "loss_ce": 0.019276045262813568, + "loss_iou": 1.009185791015625, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 127372704, + "step": 737 + }, + { + "epoch": 0.2826503255457679, + "grad_norm": 15.524540017190272, + "learning_rate": 5e-06, + "loss": 0.26, + "num_input_tokens_seen": 127545696, + "step": 738 + }, + { + "epoch": 0.2826503255457679, + "loss": 0.21141722798347473, + "loss_ce": 0.01915648952126503, + "loss_iou": 1.0185959339141846, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 127545696, + "step": 738 + }, + { + "epoch": 0.2830333205668326, + "grad_norm": 27.915596055871134, + "learning_rate": 5e-06, + "loss": 0.2655, + "num_input_tokens_seen": 127718712, + "step": 739 + }, + { + "epoch": 0.2830333205668326, + "loss": 0.26364731788635254, + "loss_ce": 0.01779773086309433, + "loss_iou": 1.0782397985458374, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 127718712, + "step": 739 + }, + { + "epoch": 0.28341631558789737, + "grad_norm": 37.87314443952695, + "learning_rate": 5e-06, + "loss": 0.3398, + "num_input_tokens_seen": 127891736, + "step": 740 + }, + { + "epoch": 0.28341631558789737, + "loss": 0.28291386365890503, + "loss_ce": 0.01924198493361473, + "loss_iou": 1.0338754653930664, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 127891736, + "step": 740 + }, + { + "epoch": 0.2837993106089621, + "grad_norm": 31.10362567038928, + "learning_rate": 5e-06, + "loss": 0.3584, + "num_input_tokens_seen": 128065056, + "step": 741 + }, + { + "epoch": 0.2837993106089621, + "loss": 0.407207190990448, + "loss_ce": 0.018779434263706207, + "loss_iou": 1.0766929388046265, + "loss_num": 0.388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 128065056, + "step": 741 + }, + { + "epoch": 0.28418230563002683, + "grad_norm": 31.388919914621702, + "learning_rate": 5e-06, + "loss": 0.2627, + "num_input_tokens_seen": 128238128, + "step": 742 + }, + { + "epoch": 0.28418230563002683, + "loss": 0.3214864134788513, + "loss_ce": 0.01966756023466587, + "loss_iou": 1.1697998046875, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 128238128, + "step": 742 + }, + { + "epoch": 0.28456530065109154, + "grad_norm": 36.49353980146307, + "learning_rate": 5e-06, + "loss": 0.3421, + "num_input_tokens_seen": 128411368, + "step": 743 + }, + { + "epoch": 0.28456530065109154, + "loss": 0.3016979694366455, + "loss_ce": 0.01959346793591976, + "loss_iou": 1.0393989086151123, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 128411368, + "step": 743 + }, + { + "epoch": 0.28494829567215624, + "grad_norm": 31.74476571965495, + "learning_rate": 5e-06, + "loss": 0.26, + "num_input_tokens_seen": 128584456, + "step": 744 + }, + { + "epoch": 0.28494829567215624, + "loss": 0.268583744764328, + "loss_ce": 0.01919407956302166, + "loss_iou": 1.0217280387878418, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 128584456, + "step": 744 + }, + { + "epoch": 0.285331290693221, + "grad_norm": 25.740209441166378, + "learning_rate": 5e-06, + "loss": 0.3807, + "num_input_tokens_seen": 128756616, + "step": 745 + }, + { + "epoch": 0.285331290693221, + "loss": 0.47400274872779846, + "loss_ce": 0.018070142716169357, + "loss_iou": 1.2395962476730347, + "loss_num": 0.455078125, + "loss_xval": 0.455078125, + "num_input_tokens_seen": 128756616, + "step": 745 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 22.444852305414027, + "learning_rate": 5e-06, + "loss": 0.2382, + "num_input_tokens_seen": 128929456, + "step": 746 + }, + { + "epoch": 0.2857142857142857, + "loss": 0.24783599376678467, + "loss_ce": 0.02029692381620407, + "loss_iou": 1.0339313745498657, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 128929456, + "step": 746 + }, + { + "epoch": 0.28609728073535046, + "grad_norm": 31.30095918600143, + "learning_rate": 5e-06, + "loss": 0.2983, + "num_input_tokens_seen": 129102544, + "step": 747 + }, + { + "epoch": 0.28609728073535046, + "loss": 0.30754023790359497, + "loss_ce": 0.01835567131638527, + "loss_iou": 1.047607183456421, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 129102544, + "step": 747 + }, + { + "epoch": 0.28648027575641516, + "grad_norm": 35.098898699133656, + "learning_rate": 5e-06, + "loss": 0.3248, + "num_input_tokens_seen": 129275432, + "step": 748 + }, + { + "epoch": 0.28648027575641516, + "loss": 0.2544245719909668, + "loss_ce": 0.018340574577450752, + "loss_iou": 1.0043387413024902, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 129275432, + "step": 748 + }, + { + "epoch": 0.2868632707774799, + "grad_norm": 34.0628159275406, + "learning_rate": 5e-06, + "loss": 0.3995, + "num_input_tokens_seen": 129448312, + "step": 749 + }, + { + "epoch": 0.2868632707774799, + "loss": 0.37420105934143066, + "loss_ce": 0.018976427614688873, + "loss_iou": 1.2287843227386475, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 129448312, + "step": 749 + }, + { + "epoch": 0.2872462657985446, + "grad_norm": 28.776119314117416, + "learning_rate": 5e-06, + "loss": 0.2512, + "num_input_tokens_seen": 129620848, + "step": 750 + }, + { + "epoch": 0.2872462657985446, + "eval_websight_new_CIoU": 0.8060732185840607, + "eval_websight_new_GIoU": 0.8037431240081787, + "eval_websight_new_IoU": 0.8077706694602966, + "eval_websight_new_MAE_all": 0.02072321344166994, + "eval_websight_new_MAE_h": 0.01762983202934265, + "eval_websight_new_MAE_w": 0.02194000780582428, + "eval_websight_new_MAE_x": 0.020607253536581993, + "eval_websight_new_MAE_y": 0.02271575853228569, + "eval_websight_new_NUM_probability": 0.00020467791182454675, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.2679460048675537, + "eval_websight_new_loss_ce": 0.037874773144721985, + "eval_websight_new_loss_iou": 1.0019870400428772, + "eval_websight_new_loss_num": 0.221923828125, + "eval_websight_new_loss_xval": 0.221923828125, + "eval_websight_new_runtime": 55.4103, + "eval_websight_new_samples_per_second": 0.902, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 129620848, + "step": 750 + }, + { + "epoch": 0.2872462657985446, + "eval_seeclick_CIoU": 0.6069816946983337, + "eval_seeclick_GIoU": 0.6073736250400543, + "eval_seeclick_IoU": 0.6343135833740234, + "eval_seeclick_MAE_all": 0.06425964087247849, + "eval_seeclick_MAE_h": 0.046736148186028004, + "eval_seeclick_MAE_w": 0.08796555548906326, + "eval_seeclick_MAE_x": 0.07397038117051125, + "eval_seeclick_MAE_y": 0.04836645536124706, + "eval_seeclick_NUM_probability": 0.0002977608673973009, + "eval_seeclick_inside_bbox": 0.8975694477558136, + "eval_seeclick_loss": 0.5584555864334106, + "eval_seeclick_loss_ce": 0.029193194583058357, + "eval_seeclick_loss_iou": 1.1114333868026733, + "eval_seeclick_loss_num": 0.4971923828125, + "eval_seeclick_loss_xval": 0.4971923828125, + "eval_seeclick_runtime": 81.4777, + "eval_seeclick_samples_per_second": 0.614, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 129620848, + "step": 750 + }, + { + "epoch": 0.2872462657985446, + "eval_icons_CIoU": 0.7668884098529816, + "eval_icons_GIoU": 0.7602577805519104, + "eval_icons_IoU": 0.7727258205413818, + "eval_icons_MAE_all": 0.028916318900883198, + "eval_icons_MAE_h": 0.02443860098719597, + "eval_icons_MAE_w": 0.03288193140178919, + "eval_icons_MAE_x": 0.029078337363898754, + "eval_icons_MAE_y": 0.02926640771329403, + "eval_icons_NUM_probability": 0.00020542916900012642, + "eval_icons_inside_bbox": 0.9722222089767456, + "eval_icons_loss": 0.30261683464050293, + "eval_icons_loss_ce": 0.025362316519021988, + "eval_icons_loss_iou": 1.0172606706619263, + "eval_icons_loss_num": 0.27044677734375, + "eval_icons_loss_xval": 0.27044677734375, + "eval_icons_runtime": 82.4836, + "eval_icons_samples_per_second": 0.606, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 129620848, + "step": 750 + }, + { + "epoch": 0.2872462657985446, + "loss": 0.3204486072063446, + "loss_ce": 0.025404643267393112, + "loss_iou": 1.0329668521881104, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 129620848, + "step": 750 + }, + { + "epoch": 0.2876292608196093, + "grad_norm": 31.849347898039117, + "learning_rate": 5e-06, + "loss": 0.3413, + "num_input_tokens_seen": 129793888, + "step": 751 + }, + { + "epoch": 0.2876292608196093, + "loss": 0.3468954861164093, + "loss_ce": 0.0186484232544899, + "loss_iou": 1.0144870281219482, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 129793888, + "step": 751 + }, + { + "epoch": 0.2880122558406741, + "grad_norm": 37.12241440467024, + "learning_rate": 5e-06, + "loss": 0.2601, + "num_input_tokens_seen": 129966816, + "step": 752 + }, + { + "epoch": 0.2880122558406741, + "loss": 0.25784942507743835, + "loss_ce": 0.017126768827438354, + "loss_iou": 1.0310940742492676, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 129966816, + "step": 752 + }, + { + "epoch": 0.2883952508617388, + "grad_norm": 24.471411990030894, + "learning_rate": 5e-06, + "loss": 0.2579, + "num_input_tokens_seen": 130139432, + "step": 753 + }, + { + "epoch": 0.2883952508617388, + "loss": 0.27429860830307007, + "loss_ce": 0.017706789076328278, + "loss_iou": 1.004685401916504, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 130139432, + "step": 753 + }, + { + "epoch": 0.28877824588280354, + "grad_norm": 25.500871018993212, + "learning_rate": 5e-06, + "loss": 0.2831, + "num_input_tokens_seen": 130312408, + "step": 754 + }, + { + "epoch": 0.28877824588280354, + "loss": 0.241877943277359, + "loss_ce": 0.020137224346399307, + "loss_iou": 1.0630152225494385, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 130312408, + "step": 754 + }, + { + "epoch": 0.28916124090386824, + "grad_norm": 29.8151233620471, + "learning_rate": 5e-06, + "loss": 0.3143, + "num_input_tokens_seen": 130485576, + "step": 755 + }, + { + "epoch": 0.28916124090386824, + "loss": 0.30824464559555054, + "loss_ce": 0.018815917894244194, + "loss_iou": 1.1340521574020386, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 130485576, + "step": 755 + }, + { + "epoch": 0.289544235924933, + "grad_norm": 28.68845671130155, + "learning_rate": 5e-06, + "loss": 0.2707, + "num_input_tokens_seen": 130658200, + "step": 756 + }, + { + "epoch": 0.289544235924933, + "loss": 0.2569507956504822, + "loss_ce": 0.01903577148914337, + "loss_iou": 1.1343841552734375, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 130658200, + "step": 756 + }, + { + "epoch": 0.2899272309459977, + "grad_norm": 31.452580410051898, + "learning_rate": 5e-06, + "loss": 0.2497, + "num_input_tokens_seen": 130830656, + "step": 757 + }, + { + "epoch": 0.2899272309459977, + "loss": 0.25728702545166016, + "loss_ce": 0.018273361027240753, + "loss_iou": 0.8802897930145264, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 130830656, + "step": 757 + }, + { + "epoch": 0.2903102259670624, + "grad_norm": 18.518207113485683, + "learning_rate": 5e-06, + "loss": 0.2537, + "num_input_tokens_seen": 131003400, + "step": 758 + }, + { + "epoch": 0.2903102259670624, + "loss": 0.2733742296695709, + "loss_ce": 0.020322469994425774, + "loss_iou": 1.0106251239776611, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 131003400, + "step": 758 + }, + { + "epoch": 0.29069322098812717, + "grad_norm": 29.93569981388942, + "learning_rate": 5e-06, + "loss": 0.3701, + "num_input_tokens_seen": 131176440, + "step": 759 + }, + { + "epoch": 0.29069322098812717, + "loss": 0.3478788733482361, + "loss_ce": 0.020242154598236084, + "loss_iou": 1.3264551162719727, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 131176440, + "step": 759 + }, + { + "epoch": 0.29107621600919187, + "grad_norm": 34.59169012607738, + "learning_rate": 5e-06, + "loss": 0.3407, + "num_input_tokens_seen": 131349016, + "step": 760 + }, + { + "epoch": 0.29107621600919187, + "loss": 0.2980373203754425, + "loss_ce": 0.019045621156692505, + "loss_iou": 1.052343487739563, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 131349016, + "step": 760 + }, + { + "epoch": 0.2914592110302566, + "grad_norm": 29.391192734076682, + "learning_rate": 5e-06, + "loss": 0.2276, + "num_input_tokens_seen": 131521896, + "step": 761 + }, + { + "epoch": 0.2914592110302566, + "loss": 0.2416970133781433, + "loss_ce": 0.018308322876691818, + "loss_iou": 1.0112210512161255, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 131521896, + "step": 761 + }, + { + "epoch": 0.29184220605132133, + "grad_norm": 25.01338260168904, + "learning_rate": 5e-06, + "loss": 0.3266, + "num_input_tokens_seen": 131694616, + "step": 762 + }, + { + "epoch": 0.29184220605132133, + "loss": 0.3483281135559082, + "loss_ce": 0.018982430920004845, + "loss_iou": 1.1844432353973389, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 131694616, + "step": 762 + }, + { + "epoch": 0.29222520107238603, + "grad_norm": 24.561687064793826, + "learning_rate": 5e-06, + "loss": 0.2714, + "num_input_tokens_seen": 131867904, + "step": 763 + }, + { + "epoch": 0.29222520107238603, + "loss": 0.2406422197818756, + "loss_ce": 0.018596317619085312, + "loss_iou": 1.0086827278137207, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 131867904, + "step": 763 + }, + { + "epoch": 0.2926081960934508, + "grad_norm": 29.799083783300127, + "learning_rate": 5e-06, + "loss": 0.3068, + "num_input_tokens_seen": 132040896, + "step": 764 + }, + { + "epoch": 0.2926081960934508, + "loss": 0.26021048426628113, + "loss_ce": 0.019060572609305382, + "loss_iou": 1.0764524936676025, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 132040896, + "step": 764 + }, + { + "epoch": 0.2929911911145155, + "grad_norm": 40.476805309914155, + "learning_rate": 5e-06, + "loss": 0.3188, + "num_input_tokens_seen": 132213992, + "step": 765 + }, + { + "epoch": 0.2929911911145155, + "loss": 0.31016725301742554, + "loss_ce": 0.01768677681684494, + "loss_iou": 1.0198488235473633, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 132213992, + "step": 765 + }, + { + "epoch": 0.29337418613558025, + "grad_norm": 21.384278854128862, + "learning_rate": 5e-06, + "loss": 0.2179, + "num_input_tokens_seen": 132386728, + "step": 766 + }, + { + "epoch": 0.29337418613558025, + "loss": 0.25086313486099243, + "loss_ce": 0.019906092435121536, + "loss_iou": 1.0787184238433838, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 132386728, + "step": 766 + }, + { + "epoch": 0.29375718115664495, + "grad_norm": 22.781816524931717, + "learning_rate": 5e-06, + "loss": 0.3074, + "num_input_tokens_seen": 132559584, + "step": 767 + }, + { + "epoch": 0.29375718115664495, + "loss": 0.24777740240097046, + "loss_ce": 0.01913970522582531, + "loss_iou": 1.0119497776031494, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 132559584, + "step": 767 + }, + { + "epoch": 0.2941401761777097, + "grad_norm": 25.50660002640219, + "learning_rate": 5e-06, + "loss": 0.2968, + "num_input_tokens_seen": 132732688, + "step": 768 + }, + { + "epoch": 0.2941401761777097, + "loss": 0.2769160866737366, + "loss_ce": 0.017760805785655975, + "loss_iou": 0.9788704514503479, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 132732688, + "step": 768 + }, + { + "epoch": 0.2945231711987744, + "grad_norm": 27.785423557385965, + "learning_rate": 5e-06, + "loss": 0.307, + "num_input_tokens_seen": 132905752, + "step": 769 + }, + { + "epoch": 0.2945231711987744, + "loss": 0.3069145083427429, + "loss_ce": 0.02017132379114628, + "loss_iou": 1.0028711557388306, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 132905752, + "step": 769 + }, + { + "epoch": 0.2949061662198391, + "grad_norm": 32.232262113044406, + "learning_rate": 5e-06, + "loss": 0.2295, + "num_input_tokens_seen": 133078728, + "step": 770 + }, + { + "epoch": 0.2949061662198391, + "loss": 0.2590344548225403, + "loss_ce": 0.01977665163576603, + "loss_iou": 1.0257396697998047, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 133078728, + "step": 770 + }, + { + "epoch": 0.2952891612409039, + "grad_norm": 25.545717091142183, + "learning_rate": 5e-06, + "loss": 0.309, + "num_input_tokens_seen": 133252048, + "step": 771 + }, + { + "epoch": 0.2952891612409039, + "loss": 0.20813432335853577, + "loss_ce": 0.018681202083826065, + "loss_iou": 1.003133773803711, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 133252048, + "step": 771 + }, + { + "epoch": 0.2956721562619686, + "grad_norm": 30.70035907777987, + "learning_rate": 5e-06, + "loss": 0.3112, + "num_input_tokens_seen": 133425080, + "step": 772 + }, + { + "epoch": 0.2956721562619686, + "loss": 0.307090699672699, + "loss_ce": 0.01912684738636017, + "loss_iou": 1.2478001117706299, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 133425080, + "step": 772 + }, + { + "epoch": 0.29605515128303334, + "grad_norm": 36.01061968484035, + "learning_rate": 5e-06, + "loss": 0.2957, + "num_input_tokens_seen": 133598400, + "step": 773 + }, + { + "epoch": 0.29605515128303334, + "loss": 0.2688688635826111, + "loss_ce": 0.019113002344965935, + "loss_iou": 1.0169953107833862, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 133598400, + "step": 773 + }, + { + "epoch": 0.29643814630409804, + "grad_norm": 41.38894880087424, + "learning_rate": 5e-06, + "loss": 0.2532, + "num_input_tokens_seen": 133771272, + "step": 774 + }, + { + "epoch": 0.29643814630409804, + "loss": 0.2356131374835968, + "loss_ce": 0.0201590396463871, + "loss_iou": 1.0057514905929565, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 133771272, + "step": 774 + }, + { + "epoch": 0.2968211413251628, + "grad_norm": 32.49854990049222, + "learning_rate": 5e-06, + "loss": 0.3348, + "num_input_tokens_seen": 133944504, + "step": 775 + }, + { + "epoch": 0.2968211413251628, + "loss": 0.3293522596359253, + "loss_ce": 0.01965990662574768, + "loss_iou": 1.0076303482055664, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 133944504, + "step": 775 + }, + { + "epoch": 0.2972041363462275, + "grad_norm": 22.050109669731302, + "learning_rate": 5e-06, + "loss": 0.2193, + "num_input_tokens_seen": 134117504, + "step": 776 + }, + { + "epoch": 0.2972041363462275, + "loss": 0.1762588620185852, + "loss_ce": 0.018971281126141548, + "loss_iou": 1.001531720161438, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 134117504, + "step": 776 + }, + { + "epoch": 0.2975871313672922, + "grad_norm": 35.12163201054286, + "learning_rate": 5e-06, + "loss": 0.3317, + "num_input_tokens_seen": 134290544, + "step": 777 + }, + { + "epoch": 0.2975871313672922, + "loss": 0.31508395075798035, + "loss_ce": 0.019307564944028854, + "loss_iou": 0.992944598197937, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 134290544, + "step": 777 + }, + { + "epoch": 0.29797012638835696, + "grad_norm": 31.412389288023597, + "learning_rate": 5e-06, + "loss": 0.2951, + "num_input_tokens_seen": 134463448, + "step": 778 + }, + { + "epoch": 0.29797012638835696, + "loss": 0.302811861038208, + "loss_ce": 0.0210736021399498, + "loss_iou": 1.0193989276885986, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 134463448, + "step": 778 + }, + { + "epoch": 0.29835312140942166, + "grad_norm": 38.631078016427374, + "learning_rate": 5e-06, + "loss": 0.3086, + "num_input_tokens_seen": 134636536, + "step": 779 + }, + { + "epoch": 0.29835312140942166, + "loss": 0.26435214281082153, + "loss_ce": 0.018502552062273026, + "loss_iou": 1.0163710117340088, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 134636536, + "step": 779 + }, + { + "epoch": 0.2987361164304864, + "grad_norm": 39.02751751222748, + "learning_rate": 5e-06, + "loss": 0.2279, + "num_input_tokens_seen": 134809632, + "step": 780 + }, + { + "epoch": 0.2987361164304864, + "loss": 0.2375146448612213, + "loss_ce": 0.01913086511194706, + "loss_iou": 1.0060218572616577, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 134809632, + "step": 780 + }, + { + "epoch": 0.2991191114515511, + "grad_norm": 23.628170294683574, + "learning_rate": 5e-06, + "loss": 0.3677, + "num_input_tokens_seen": 134982376, + "step": 781 + }, + { + "epoch": 0.2991191114515511, + "loss": 0.37691134214401245, + "loss_ce": 0.016437701880931854, + "loss_iou": 1.006775975227356, + "loss_num": 0.361328125, + "loss_xval": 0.361328125, + "num_input_tokens_seen": 134982376, + "step": 781 + }, + { + "epoch": 0.2995021064726159, + "grad_norm": 24.990854764855325, + "learning_rate": 5e-06, + "loss": 0.2969, + "num_input_tokens_seen": 135155400, + "step": 782 + }, + { + "epoch": 0.2995021064726159, + "loss": 0.2875215411186218, + "loss_ce": 0.01896686479449272, + "loss_iou": 1.0138508081436157, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 135155400, + "step": 782 + }, + { + "epoch": 0.2998851014936806, + "grad_norm": 27.848857295651754, + "learning_rate": 5e-06, + "loss": 0.3309, + "num_input_tokens_seen": 135328296, + "step": 783 + }, + { + "epoch": 0.2998851014936806, + "loss": 0.3307138681411743, + "loss_ce": 0.017847657203674316, + "loss_iou": 1.03713858127594, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 135328296, + "step": 783 + }, + { + "epoch": 0.3002680965147453, + "grad_norm": 27.869984789088633, + "learning_rate": 5e-06, + "loss": 0.3094, + "num_input_tokens_seen": 135501560, + "step": 784 + }, + { + "epoch": 0.3002680965147453, + "loss": 0.30624768137931824, + "loss_ce": 0.018405893817543983, + "loss_iou": 1.1890881061553955, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 135501560, + "step": 784 + }, + { + "epoch": 0.30065109153581004, + "grad_norm": 31.86857918382479, + "learning_rate": 5e-06, + "loss": 0.358, + "num_input_tokens_seen": 135674512, + "step": 785 + }, + { + "epoch": 0.30065109153581004, + "loss": 0.32729578018188477, + "loss_ce": 0.021021369844675064, + "loss_iou": 1.0098835229873657, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 135674512, + "step": 785 + }, + { + "epoch": 0.30103408655687475, + "grad_norm": 27.724965314133073, + "learning_rate": 5e-06, + "loss": 0.2584, + "num_input_tokens_seen": 135847352, + "step": 786 + }, + { + "epoch": 0.30103408655687475, + "loss": 0.2529115378856659, + "loss_ce": 0.020794840529561043, + "loss_iou": 1.0112671852111816, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 135847352, + "step": 786 + }, + { + "epoch": 0.3014170815779395, + "grad_norm": 33.8414322542126, + "learning_rate": 5e-06, + "loss": 0.4155, + "num_input_tokens_seen": 136020400, + "step": 787 + }, + { + "epoch": 0.3014170815779395, + "loss": 0.3727918267250061, + "loss_ce": 0.017078936100006104, + "loss_iou": 1.0145914554595947, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 136020400, + "step": 787 + }, + { + "epoch": 0.3018000765990042, + "grad_norm": 29.200512375102747, + "learning_rate": 5e-06, + "loss": 0.2671, + "num_input_tokens_seen": 136193440, + "step": 788 + }, + { + "epoch": 0.3018000765990042, + "loss": 0.3048609495162964, + "loss_ce": 0.020986445248126984, + "loss_iou": 1.0601730346679688, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 136193440, + "step": 788 + }, + { + "epoch": 0.30218307162006897, + "grad_norm": 28.613521881724473, + "learning_rate": 5e-06, + "loss": 0.3944, + "num_input_tokens_seen": 136366792, + "step": 789 + }, + { + "epoch": 0.30218307162006897, + "loss": 0.4070029854774475, + "loss_ce": 0.019429761916399002, + "loss_iou": 1.359428882598877, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 136366792, + "step": 789 + }, + { + "epoch": 0.30256606664113367, + "grad_norm": 25.204892331329287, + "learning_rate": 5e-06, + "loss": 0.2787, + "num_input_tokens_seen": 136539808, + "step": 790 + }, + { + "epoch": 0.30256606664113367, + "loss": 0.28682586550712585, + "loss_ce": 0.01936980150640011, + "loss_iou": 1.015669345855713, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 136539808, + "step": 790 + }, + { + "epoch": 0.30294906166219837, + "grad_norm": 39.99448952895715, + "learning_rate": 5e-06, + "loss": 0.3394, + "num_input_tokens_seen": 136712568, + "step": 791 + }, + { + "epoch": 0.30294906166219837, + "loss": 0.31899553537368774, + "loss_ce": 0.019434988498687744, + "loss_iou": 1.0062270164489746, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 136712568, + "step": 791 + }, + { + "epoch": 0.30333205668326313, + "grad_norm": 29.618683629226744, + "learning_rate": 5e-06, + "loss": 0.3589, + "num_input_tokens_seen": 136885576, + "step": 792 + }, + { + "epoch": 0.30333205668326313, + "loss": 0.29877734184265137, + "loss_ce": 0.019114281982183456, + "loss_iou": 1.3513352870941162, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 136885576, + "step": 792 + }, + { + "epoch": 0.30371505170432783, + "grad_norm": 23.868257252018164, + "learning_rate": 5e-06, + "loss": 0.3871, + "num_input_tokens_seen": 137058208, + "step": 793 + }, + { + "epoch": 0.30371505170432783, + "loss": 0.3736051023006439, + "loss_ce": 0.019967414438724518, + "loss_iou": 1.1055314540863037, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 137058208, + "step": 793 + }, + { + "epoch": 0.3040980467253926, + "grad_norm": 28.612832145842777, + "learning_rate": 5e-06, + "loss": 0.2283, + "num_input_tokens_seen": 137231120, + "step": 794 + }, + { + "epoch": 0.3040980467253926, + "loss": 0.2663195729255676, + "loss_ce": 0.02077515423297882, + "loss_iou": 1.237123727798462, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 137231120, + "step": 794 + }, + { + "epoch": 0.3044810417464573, + "grad_norm": 21.584350122426212, + "learning_rate": 5e-06, + "loss": 0.2675, + "num_input_tokens_seen": 137404216, + "step": 795 + }, + { + "epoch": 0.3044810417464573, + "loss": 0.2667836546897888, + "loss_ce": 0.018980927765369415, + "loss_iou": 1.0179177522659302, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 137404216, + "step": 795 + }, + { + "epoch": 0.304864036767522, + "grad_norm": 17.3797470257764, + "learning_rate": 5e-06, + "loss": 0.2406, + "num_input_tokens_seen": 137577296, + "step": 796 + }, + { + "epoch": 0.304864036767522, + "loss": 0.21924707293510437, + "loss_ce": 0.017647963017225266, + "loss_iou": 1.01650071144104, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 137577296, + "step": 796 + }, + { + "epoch": 0.30524703178858675, + "grad_norm": 19.681467637573444, + "learning_rate": 5e-06, + "loss": 0.2729, + "num_input_tokens_seen": 137746704, + "step": 797 + }, + { + "epoch": 0.30524703178858675, + "loss": 0.3018743395805359, + "loss_ce": 0.017633602023124695, + "loss_iou": 1.007925033569336, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 137746704, + "step": 797 + }, + { + "epoch": 0.30563002680965146, + "grad_norm": 27.588431696498727, + "learning_rate": 5e-06, + "loss": 0.2344, + "num_input_tokens_seen": 137920112, + "step": 798 + }, + { + "epoch": 0.30563002680965146, + "loss": 0.22935327887535095, + "loss_ce": 0.018659912049770355, + "loss_iou": 1.0037200450897217, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 137920112, + "step": 798 + }, + { + "epoch": 0.3060130218307162, + "grad_norm": 31.444068762978855, + "learning_rate": 5e-06, + "loss": 0.3038, + "num_input_tokens_seen": 138092696, + "step": 799 + }, + { + "epoch": 0.3060130218307162, + "loss": 0.2891741991043091, + "loss_ce": 0.018300168216228485, + "loss_iou": 1.0806390047073364, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 138092696, + "step": 799 + }, + { + "epoch": 0.3063960168517809, + "grad_norm": 25.85811328706055, + "learning_rate": 5e-06, + "loss": 0.2525, + "num_input_tokens_seen": 138265656, + "step": 800 + }, + { + "epoch": 0.3063960168517809, + "loss": 0.2749815881252289, + "loss_ce": 0.018389806151390076, + "loss_iou": 1.0961915254592896, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 138265656, + "step": 800 + }, + { + "epoch": 0.3067790118728457, + "grad_norm": 19.998868990620483, + "learning_rate": 5e-06, + "loss": 0.3475, + "num_input_tokens_seen": 138438640, + "step": 801 + }, + { + "epoch": 0.3067790118728457, + "loss": 0.2885781526565552, + "loss_ce": 0.01941310614347458, + "loss_iou": 1.0149998664855957, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 138438640, + "step": 801 + }, + { + "epoch": 0.3071620068939104, + "grad_norm": 20.921510079443348, + "learning_rate": 5e-06, + "loss": 0.2429, + "num_input_tokens_seen": 138611832, + "step": 802 + }, + { + "epoch": 0.3071620068939104, + "loss": 0.24668265879154205, + "loss_ce": 0.018289102241396904, + "loss_iou": 1.1573847532272339, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 138611832, + "step": 802 + }, + { + "epoch": 0.3075450019149751, + "grad_norm": 27.405235343015118, + "learning_rate": 5e-06, + "loss": 0.2824, + "num_input_tokens_seen": 138784816, + "step": 803 + }, + { + "epoch": 0.3075450019149751, + "loss": 0.3569422960281372, + "loss_ce": 0.01905165985226631, + "loss_iou": 1.0295093059539795, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 138784816, + "step": 803 + }, + { + "epoch": 0.30792799693603984, + "grad_norm": 32.963114559822415, + "learning_rate": 5e-06, + "loss": 0.3284, + "num_input_tokens_seen": 138958144, + "step": 804 + }, + { + "epoch": 0.30792799693603984, + "loss": 0.316406786441803, + "loss_ce": 0.021118704229593277, + "loss_iou": 1.2123587131500244, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 138958144, + "step": 804 + }, + { + "epoch": 0.30831099195710454, + "grad_norm": 34.84300997985131, + "learning_rate": 5e-06, + "loss": 0.3417, + "num_input_tokens_seen": 139131544, + "step": 805 + }, + { + "epoch": 0.30831099195710454, + "loss": 0.3351994752883911, + "loss_ce": 0.019403565675020218, + "loss_iou": 1.6786623001098633, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 139131544, + "step": 805 + }, + { + "epoch": 0.3086939869781693, + "grad_norm": 35.45536605472206, + "learning_rate": 5e-06, + "loss": 0.2027, + "num_input_tokens_seen": 139304696, + "step": 806 + }, + { + "epoch": 0.3086939869781693, + "loss": 0.2326219379901886, + "loss_ce": 0.02009751833975315, + "loss_iou": 1.007890224456787, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 139304696, + "step": 806 + }, + { + "epoch": 0.309076981999234, + "grad_norm": 38.75706380675298, + "learning_rate": 5e-06, + "loss": 0.3064, + "num_input_tokens_seen": 139477816, + "step": 807 + }, + { + "epoch": 0.309076981999234, + "loss": 0.29244673252105713, + "loss_ce": 0.019375447183847427, + "loss_iou": 1.0221316814422607, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 139477816, + "step": 807 + }, + { + "epoch": 0.30945997702029876, + "grad_norm": 41.831056947268316, + "learning_rate": 5e-06, + "loss": 0.3134, + "num_input_tokens_seen": 139650344, + "step": 808 + }, + { + "epoch": 0.30945997702029876, + "loss": 0.26006972789764404, + "loss_ce": 0.01885880157351494, + "loss_iou": 1.0169495344161987, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 139650344, + "step": 808 + }, + { + "epoch": 0.30984297204136346, + "grad_norm": 50.82809307784458, + "learning_rate": 5e-06, + "loss": 0.3306, + "num_input_tokens_seen": 139823688, + "step": 809 + }, + { + "epoch": 0.30984297204136346, + "loss": 0.2908038794994354, + "loss_ce": 0.018831226974725723, + "loss_iou": 1.0181925296783447, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 139823688, + "step": 809 + }, + { + "epoch": 0.31022596706242817, + "grad_norm": 38.353784907422515, + "learning_rate": 5e-06, + "loss": 0.3149, + "num_input_tokens_seen": 139996600, + "step": 810 + }, + { + "epoch": 0.31022596706242817, + "loss": 0.30111557245254517, + "loss_ce": 0.018644865602254868, + "loss_iou": 1.0038089752197266, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 139996600, + "step": 810 + }, + { + "epoch": 0.3106089620834929, + "grad_norm": 28.165365237091542, + "learning_rate": 5e-06, + "loss": 0.4093, + "num_input_tokens_seen": 140169192, + "step": 811 + }, + { + "epoch": 0.3106089620834929, + "loss": 0.4380716383457184, + "loss_ce": 0.016684912145137787, + "loss_iou": 1.016355037689209, + "loss_num": 0.421875, + "loss_xval": 0.421875, + "num_input_tokens_seen": 140169192, + "step": 811 + }, + { + "epoch": 0.3109919571045576, + "grad_norm": 36.22693262684854, + "learning_rate": 5e-06, + "loss": 0.3467, + "num_input_tokens_seen": 140342288, + "step": 812 + }, + { + "epoch": 0.3109919571045576, + "loss": 0.40430572628974915, + "loss_ce": 0.021249089390039444, + "loss_iou": 1.0298683643341064, + "loss_num": 0.3828125, + "loss_xval": 0.3828125, + "num_input_tokens_seen": 140342288, + "step": 812 + }, + { + "epoch": 0.3113749521256224, + "grad_norm": 68.34275654606967, + "learning_rate": 5e-06, + "loss": 0.439, + "num_input_tokens_seen": 140515280, + "step": 813 + }, + { + "epoch": 0.3113749521256224, + "loss": 0.4387940466403961, + "loss_ce": 0.01862802915275097, + "loss_iou": 1.0701375007629395, + "loss_num": 0.419921875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 140515280, + "step": 813 + }, + { + "epoch": 0.3117579471466871, + "grad_norm": 30.29609036013505, + "learning_rate": 5e-06, + "loss": 0.3877, + "num_input_tokens_seen": 140687808, + "step": 814 + }, + { + "epoch": 0.3117579471466871, + "loss": 0.41614609956741333, + "loss_ce": 0.017464444041252136, + "loss_iou": 1.0300133228302002, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 140687808, + "step": 814 + }, + { + "epoch": 0.31214094216775184, + "grad_norm": 46.158913572281875, + "learning_rate": 5e-06, + "loss": 0.4618, + "num_input_tokens_seen": 140860720, + "step": 815 + }, + { + "epoch": 0.31214094216775184, + "loss": 0.45985519886016846, + "loss_ce": 0.019181368872523308, + "loss_iou": 1.0219111442565918, + "loss_num": 0.44140625, + "loss_xval": 0.44140625, + "num_input_tokens_seen": 140860720, + "step": 815 + }, + { + "epoch": 0.31252393718881655, + "grad_norm": 48.979671790420646, + "learning_rate": 5e-06, + "loss": 0.4771, + "num_input_tokens_seen": 141033824, + "step": 816 + }, + { + "epoch": 0.31252393718881655, + "loss": 0.44618505239486694, + "loss_ce": 0.01918311044573784, + "loss_iou": 1.0167924165725708, + "loss_num": 0.427734375, + "loss_xval": 0.427734375, + "num_input_tokens_seen": 141033824, + "step": 816 + }, + { + "epoch": 0.31290693220988125, + "grad_norm": 79.25767496978227, + "learning_rate": 5e-06, + "loss": 0.6913, + "num_input_tokens_seen": 141206736, + "step": 817 + }, + { + "epoch": 0.31290693220988125, + "loss": 0.6464214324951172, + "loss_ce": 0.02044481784105301, + "loss_iou": 1.0313572883605957, + "loss_num": 0.625, + "loss_xval": 0.625, + "num_input_tokens_seen": 141206736, + "step": 817 + }, + { + "epoch": 0.313289927230946, + "grad_norm": 59.700056047035524, + "learning_rate": 5e-06, + "loss": 0.486, + "num_input_tokens_seen": 141379880, + "step": 818 + }, + { + "epoch": 0.313289927230946, + "loss": 0.5164886116981506, + "loss_ce": 0.020150743424892426, + "loss_iou": 1.0376331806182861, + "loss_num": 0.49609375, + "loss_xval": 0.49609375, + "num_input_tokens_seen": 141379880, + "step": 818 + }, + { + "epoch": 0.3136729222520107, + "grad_norm": 29.73039959881463, + "learning_rate": 5e-06, + "loss": 0.4492, + "num_input_tokens_seen": 141552624, + "step": 819 + }, + { + "epoch": 0.3136729222520107, + "loss": 0.49371808767318726, + "loss_ce": 0.01959698647260666, + "loss_iou": 1.1965837478637695, + "loss_num": 0.474609375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 141552624, + "step": 819 + }, + { + "epoch": 0.31405591727307547, + "grad_norm": 29.265472210379624, + "learning_rate": 5e-06, + "loss": 0.4304, + "num_input_tokens_seen": 141725424, + "step": 820 + }, + { + "epoch": 0.31405591727307547, + "loss": 0.4803914427757263, + "loss_ce": 0.01945391856133938, + "loss_iou": 0.9033697843551636, + "loss_num": 0.4609375, + "loss_xval": 0.4609375, + "num_input_tokens_seen": 141725424, + "step": 820 + }, + { + "epoch": 0.31443891229414017, + "grad_norm": 37.26799133330325, + "learning_rate": 5e-06, + "loss": 0.3368, + "num_input_tokens_seen": 141898224, + "step": 821 + }, + { + "epoch": 0.31443891229414017, + "loss": 0.3688926696777344, + "loss_ce": 0.019039157778024673, + "loss_iou": 1.0247838497161865, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 141898224, + "step": 821 + }, + { + "epoch": 0.31482190731520493, + "grad_norm": 42.61123158430906, + "learning_rate": 5e-06, + "loss": 0.3873, + "num_input_tokens_seen": 142071184, + "step": 822 + }, + { + "epoch": 0.31482190731520493, + "loss": 0.36723488569259644, + "loss_ce": 0.017869655042886734, + "loss_iou": 1.1218441724777222, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 142071184, + "step": 822 + }, + { + "epoch": 0.31520490233626963, + "grad_norm": 33.9693213636612, + "learning_rate": 5e-06, + "loss": 0.3381, + "num_input_tokens_seen": 142244352, + "step": 823 + }, + { + "epoch": 0.31520490233626963, + "loss": 0.35591232776641846, + "loss_ce": 0.01789965108036995, + "loss_iou": 1.0056263208389282, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 142244352, + "step": 823 + }, + { + "epoch": 0.31558789735733433, + "grad_norm": 18.745058443742334, + "learning_rate": 5e-06, + "loss": 0.3706, + "num_input_tokens_seen": 142417576, + "step": 824 + }, + { + "epoch": 0.31558789735733433, + "loss": 0.38489848375320435, + "loss_ce": 0.01978621818125248, + "loss_iou": 1.0301867723464966, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 142417576, + "step": 824 + }, + { + "epoch": 0.3159708923783991, + "grad_norm": 33.33675359937986, + "learning_rate": 5e-06, + "loss": 0.2685, + "num_input_tokens_seen": 142590552, + "step": 825 + }, + { + "epoch": 0.3159708923783991, + "loss": 0.27861785888671875, + "loss_ce": 0.0215377788990736, + "loss_iou": 1.0413572788238525, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 142590552, + "step": 825 + }, + { + "epoch": 0.3163538873994638, + "grad_norm": 41.838821770096324, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 142763592, + "step": 826 + }, + { + "epoch": 0.3163538873994638, + "loss": 0.2080744355916977, + "loss_ce": 0.020574431866407394, + "loss_iou": 1.0031614303588867, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 142763592, + "step": 826 + }, + { + "epoch": 0.31673688242052855, + "grad_norm": 52.73453060565837, + "learning_rate": 5e-06, + "loss": 0.3533, + "num_input_tokens_seen": 142936720, + "step": 827 + }, + { + "epoch": 0.31673688242052855, + "loss": 0.36775538325309753, + "loss_ce": 0.019854985177516937, + "loss_iou": 0.9030606150627136, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 142936720, + "step": 827 + }, + { + "epoch": 0.31711987744159326, + "grad_norm": 20.633616115057396, + "learning_rate": 5e-06, + "loss": 0.2597, + "num_input_tokens_seen": 143109240, + "step": 828 + }, + { + "epoch": 0.31711987744159326, + "loss": 0.27895843982696533, + "loss_ce": 0.019559025764465332, + "loss_iou": 1.019107460975647, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 143109240, + "step": 828 + }, + { + "epoch": 0.31750287246265796, + "grad_norm": 28.53891989940894, + "learning_rate": 5e-06, + "loss": 0.3038, + "num_input_tokens_seen": 143282320, + "step": 829 + }, + { + "epoch": 0.31750287246265796, + "loss": 0.3055585026741028, + "loss_ce": 0.02015809714794159, + "loss_iou": 1.046279788017273, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 143282320, + "step": 829 + }, + { + "epoch": 0.3178858674837227, + "grad_norm": 27.62069897087929, + "learning_rate": 5e-06, + "loss": 0.3134, + "num_input_tokens_seen": 143454920, + "step": 830 + }, + { + "epoch": 0.3178858674837227, + "loss": 0.31350505352020264, + "loss_ce": 0.017972828820347786, + "loss_iou": 1.0730494260787964, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 143454920, + "step": 830 + }, + { + "epoch": 0.3182688625047874, + "grad_norm": 31.893708533509034, + "learning_rate": 5e-06, + "loss": 0.2461, + "num_input_tokens_seen": 143627840, + "step": 831 + }, + { + "epoch": 0.3182688625047874, + "loss": 0.3111610412597656, + "loss_ce": 0.01794816553592682, + "loss_iou": 1.034367561340332, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 143627840, + "step": 831 + }, + { + "epoch": 0.3186518575258522, + "grad_norm": 45.06367253498661, + "learning_rate": 5e-06, + "loss": 0.2579, + "num_input_tokens_seen": 143801048, + "step": 832 + }, + { + "epoch": 0.3186518575258522, + "loss": 0.2307465523481369, + "loss_ce": 0.020541470497846603, + "loss_iou": 1.044113278388977, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 143801048, + "step": 832 + }, + { + "epoch": 0.3190348525469169, + "grad_norm": 34.95905333874307, + "learning_rate": 5e-06, + "loss": 0.268, + "num_input_tokens_seen": 143973608, + "step": 833 + }, + { + "epoch": 0.3190348525469169, + "loss": 0.296118825674057, + "loss_ce": 0.019141294062137604, + "loss_iou": 1.2847325801849365, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 143973608, + "step": 833 + }, + { + "epoch": 0.31941784756798164, + "grad_norm": 37.812370808504184, + "learning_rate": 5e-06, + "loss": 0.2669, + "num_input_tokens_seen": 144146480, + "step": 834 + }, + { + "epoch": 0.31941784756798164, + "loss": 0.251359224319458, + "loss_ce": 0.02040218561887741, + "loss_iou": 1.1682796478271484, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 144146480, + "step": 834 + }, + { + "epoch": 0.31980084258904634, + "grad_norm": 28.85114403789628, + "learning_rate": 5e-06, + "loss": 0.2441, + "num_input_tokens_seen": 144319496, + "step": 835 + }, + { + "epoch": 0.31980084258904634, + "loss": 0.2518581748008728, + "loss_ce": 0.019558370113372803, + "loss_iou": 1.008854866027832, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 144319496, + "step": 835 + }, + { + "epoch": 0.32018383761011104, + "grad_norm": 29.92956153922238, + "learning_rate": 5e-06, + "loss": 0.2753, + "num_input_tokens_seen": 144492864, + "step": 836 + }, + { + "epoch": 0.32018383761011104, + "loss": 0.28272461891174316, + "loss_ce": 0.02027345821261406, + "loss_iou": 1.1115727424621582, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 144492864, + "step": 836 + }, + { + "epoch": 0.3205668326311758, + "grad_norm": 21.881929259341096, + "learning_rate": 5e-06, + "loss": 0.2654, + "num_input_tokens_seen": 144665848, + "step": 837 + }, + { + "epoch": 0.3205668326311758, + "loss": 0.25894656777381897, + "loss_ce": 0.01895633339881897, + "loss_iou": 1.0287256240844727, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 144665848, + "step": 837 + }, + { + "epoch": 0.3209498276522405, + "grad_norm": 21.29026285801437, + "learning_rate": 5e-06, + "loss": 0.2512, + "num_input_tokens_seen": 144838976, + "step": 838 + }, + { + "epoch": 0.3209498276522405, + "loss": 0.22368401288986206, + "loss_ce": 0.018544865772128105, + "loss_iou": 1.0287506580352783, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 144838976, + "step": 838 + }, + { + "epoch": 0.32133282267330526, + "grad_norm": 26.849038724261195, + "learning_rate": 5e-06, + "loss": 0.2713, + "num_input_tokens_seen": 145011864, + "step": 839 + }, + { + "epoch": 0.32133282267330526, + "loss": 0.3127955198287964, + "loss_ce": 0.020070888102054596, + "loss_iou": 1.1161601543426514, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 145011864, + "step": 839 + }, + { + "epoch": 0.32171581769436997, + "grad_norm": 33.585060968994426, + "learning_rate": 5e-06, + "loss": 0.2158, + "num_input_tokens_seen": 145184768, + "step": 840 + }, + { + "epoch": 0.32171581769436997, + "loss": 0.19624000787734985, + "loss_ce": 0.020641859620809555, + "loss_iou": 1.0071518421173096, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 145184768, + "step": 840 + }, + { + "epoch": 0.3220988127154347, + "grad_norm": 31.301923816413904, + "learning_rate": 5e-06, + "loss": 0.2532, + "num_input_tokens_seen": 145357272, + "step": 841 + }, + { + "epoch": 0.3220988127154347, + "loss": 0.30623412132263184, + "loss_ce": 0.020162362605333328, + "loss_iou": 1.032745361328125, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 145357272, + "step": 841 + }, + { + "epoch": 0.3224818077364994, + "grad_norm": 25.348758789640222, + "learning_rate": 5e-06, + "loss": 0.22, + "num_input_tokens_seen": 145530344, + "step": 842 + }, + { + "epoch": 0.3224818077364994, + "loss": 0.19132661819458008, + "loss_ce": 0.019512636587023735, + "loss_iou": 1.0022637844085693, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 145530344, + "step": 842 + }, + { + "epoch": 0.32286480275756413, + "grad_norm": 24.638971635835045, + "learning_rate": 5e-06, + "loss": 0.2721, + "num_input_tokens_seen": 145703064, + "step": 843 + }, + { + "epoch": 0.32286480275756413, + "loss": 0.22540715336799622, + "loss_ce": 0.019230403006076813, + "loss_iou": 1.0078539848327637, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 145703064, + "step": 843 + }, + { + "epoch": 0.3232477977786289, + "grad_norm": 44.55137979394336, + "learning_rate": 5e-06, + "loss": 0.2526, + "num_input_tokens_seen": 145876192, + "step": 844 + }, + { + "epoch": 0.3232477977786289, + "loss": 0.24295461177825928, + "loss_ce": 0.02127492055296898, + "loss_iou": 1.0485103130340576, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 145876192, + "step": 844 + }, + { + "epoch": 0.3236307927996936, + "grad_norm": 24.890352961647036, + "learning_rate": 5e-06, + "loss": 0.2727, + "num_input_tokens_seen": 146049144, + "step": 845 + }, + { + "epoch": 0.3236307927996936, + "loss": 0.23165491223335266, + "loss_ce": 0.019496705383062363, + "loss_iou": 1.009504795074463, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 146049144, + "step": 845 + }, + { + "epoch": 0.32401378782075835, + "grad_norm": 25.007161446809004, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 146222448, + "step": 846 + }, + { + "epoch": 0.32401378782075835, + "loss": 0.20678061246871948, + "loss_ce": 0.018731294199824333, + "loss_iou": 1.0130515098571777, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 146222448, + "step": 846 + }, + { + "epoch": 0.32439678284182305, + "grad_norm": 25.81087641029904, + "learning_rate": 5e-06, + "loss": 0.2753, + "num_input_tokens_seen": 146395592, + "step": 847 + }, + { + "epoch": 0.32439678284182305, + "loss": 0.27316826581954956, + "loss_ce": 0.01950616016983986, + "loss_iou": 1.010009527206421, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 146395592, + "step": 847 + }, + { + "epoch": 0.3247797778628878, + "grad_norm": 27.322352176353736, + "learning_rate": 5e-06, + "loss": 0.2061, + "num_input_tokens_seen": 146568568, + "step": 848 + }, + { + "epoch": 0.3247797778628878, + "loss": 0.18458572030067444, + "loss_ce": 0.018814239650964737, + "loss_iou": 1.0022218227386475, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 146568568, + "step": 848 + }, + { + "epoch": 0.3251627728839525, + "grad_norm": 24.522639268896462, + "learning_rate": 5e-06, + "loss": 0.2667, + "num_input_tokens_seen": 146741328, + "step": 849 + }, + { + "epoch": 0.3251627728839525, + "loss": 0.27562767267227173, + "loss_ce": 0.020378652960062027, + "loss_iou": 1.0394654273986816, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 146741328, + "step": 849 + }, + { + "epoch": 0.3255457679050172, + "grad_norm": 15.574133460724482, + "learning_rate": 5e-06, + "loss": 0.1955, + "num_input_tokens_seen": 146914040, + "step": 850 + }, + { + "epoch": 0.3255457679050172, + "loss": 0.21060767769813538, + "loss_ce": 0.018285904079675674, + "loss_iou": 1.0735305547714233, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 146914040, + "step": 850 + }, + { + "epoch": 0.32592876292608197, + "grad_norm": 18.379237703982493, + "learning_rate": 5e-06, + "loss": 0.2648, + "num_input_tokens_seen": 147087104, + "step": 851 + }, + { + "epoch": 0.32592876292608197, + "loss": 0.24579739570617676, + "loss_ce": 0.01929592713713646, + "loss_iou": 1.0109920501708984, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 147087104, + "step": 851 + }, + { + "epoch": 0.3263117579471467, + "grad_norm": 28.601822137055382, + "learning_rate": 5e-06, + "loss": 0.2195, + "num_input_tokens_seen": 147259904, + "step": 852 + }, + { + "epoch": 0.3263117579471467, + "loss": 0.2619357109069824, + "loss_ce": 0.019992347806692123, + "loss_iou": 1.066663384437561, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 147259904, + "step": 852 + }, + { + "epoch": 0.32669475296821143, + "grad_norm": 27.97334658629691, + "learning_rate": 5e-06, + "loss": 0.2733, + "num_input_tokens_seen": 147433096, + "step": 853 + }, + { + "epoch": 0.32669475296821143, + "loss": 0.27185162901878357, + "loss_ce": 0.02038678154349327, + "loss_iou": 0.9701149463653564, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 147433096, + "step": 853 + }, + { + "epoch": 0.32707774798927614, + "grad_norm": 32.71851833006168, + "learning_rate": 5e-06, + "loss": 0.255, + "num_input_tokens_seen": 147605824, + "step": 854 + }, + { + "epoch": 0.32707774798927614, + "loss": 0.2587239146232605, + "loss_ce": 0.017024705186486244, + "loss_iou": 0.9123989343643188, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 147605824, + "step": 854 + }, + { + "epoch": 0.32746074301034084, + "grad_norm": 22.92444775658955, + "learning_rate": 5e-06, + "loss": 0.2654, + "num_input_tokens_seen": 147778928, + "step": 855 + }, + { + "epoch": 0.32746074301034084, + "loss": 0.2387467622756958, + "loss_ce": 0.018898125737905502, + "loss_iou": 1.0224438905715942, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 147778928, + "step": 855 + }, + { + "epoch": 0.3278437380314056, + "grad_norm": 25.280672116603366, + "learning_rate": 5e-06, + "loss": 0.2834, + "num_input_tokens_seen": 147951744, + "step": 856 + }, + { + "epoch": 0.3278437380314056, + "loss": 0.21689927577972412, + "loss_ce": 0.020121928304433823, + "loss_iou": 0.9918257594108582, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 147951744, + "step": 856 + }, + { + "epoch": 0.3282267330524703, + "grad_norm": 28.654276497458522, + "learning_rate": 5e-06, + "loss": 0.2463, + "num_input_tokens_seen": 148124912, + "step": 857 + }, + { + "epoch": 0.3282267330524703, + "loss": 0.27426350116729736, + "loss_ce": 0.020479325205087662, + "loss_iou": 1.02805495262146, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 148124912, + "step": 857 + }, + { + "epoch": 0.32860972807353506, + "grad_norm": 33.29380106819679, + "learning_rate": 5e-06, + "loss": 0.2217, + "num_input_tokens_seen": 148298080, + "step": 858 + }, + { + "epoch": 0.32860972807353506, + "loss": 0.21757923066616058, + "loss_ce": 0.020557750016450882, + "loss_iou": 1.0261526107788086, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 148298080, + "step": 858 + }, + { + "epoch": 0.32899272309459976, + "grad_norm": 33.55535640775121, + "learning_rate": 5e-06, + "loss": 0.2818, + "num_input_tokens_seen": 148471016, + "step": 859 + }, + { + "epoch": 0.32899272309459976, + "loss": 0.21814215183258057, + "loss_ce": 0.018923403695225716, + "loss_iou": 1.0018761157989502, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 148471016, + "step": 859 + }, + { + "epoch": 0.3293757181156645, + "grad_norm": 26.54593700223138, + "learning_rate": 5e-06, + "loss": 0.2736, + "num_input_tokens_seen": 148644016, + "step": 860 + }, + { + "epoch": 0.3293757181156645, + "loss": 0.31072235107421875, + "loss_ce": 0.019523631781339645, + "loss_iou": 1.087246298789978, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 148644016, + "step": 860 + }, + { + "epoch": 0.3297587131367292, + "grad_norm": 22.016117691997643, + "learning_rate": 5e-06, + "loss": 0.2776, + "num_input_tokens_seen": 148816904, + "step": 861 + }, + { + "epoch": 0.3297587131367292, + "loss": 0.2748348116874695, + "loss_ce": 0.01958579383790493, + "loss_iou": 1.0558685064315796, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 148816904, + "step": 861 + }, + { + "epoch": 0.3301417081577939, + "grad_norm": 27.05117190953951, + "learning_rate": 5e-06, + "loss": 0.2274, + "num_input_tokens_seen": 148989896, + "step": 862 + }, + { + "epoch": 0.3301417081577939, + "loss": 0.20970948040485382, + "loss_ce": 0.020439468324184418, + "loss_iou": 1.0353190898895264, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 148989896, + "step": 862 + }, + { + "epoch": 0.3305247031788587, + "grad_norm": 35.11845782318814, + "learning_rate": 5e-06, + "loss": 0.2434, + "num_input_tokens_seen": 149163184, + "step": 863 + }, + { + "epoch": 0.3305247031788587, + "loss": 0.271926611661911, + "loss_ce": 0.019851425662636757, + "loss_iou": 1.0185465812683105, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 149163184, + "step": 863 + }, + { + "epoch": 0.3309076981999234, + "grad_norm": 26.226221844466277, + "learning_rate": 5e-06, + "loss": 0.3065, + "num_input_tokens_seen": 149336352, + "step": 864 + }, + { + "epoch": 0.3309076981999234, + "loss": 0.28416454792022705, + "loss_ce": 0.019516095519065857, + "loss_iou": 1.0216724872589111, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 149336352, + "step": 864 + }, + { + "epoch": 0.33129069322098814, + "grad_norm": 23.19677223917612, + "learning_rate": 5e-06, + "loss": 0.2548, + "num_input_tokens_seen": 149509128, + "step": 865 + }, + { + "epoch": 0.33129069322098814, + "loss": 0.3117525577545166, + "loss_ce": 0.019394144415855408, + "loss_iou": 1.027750849723816, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 149509128, + "step": 865 + }, + { + "epoch": 0.33167368824205284, + "grad_norm": 22.600602819517086, + "learning_rate": 5e-06, + "loss": 0.2817, + "num_input_tokens_seen": 149682136, + "step": 866 + }, + { + "epoch": 0.33167368824205284, + "loss": 0.23056963086128235, + "loss_ce": 0.01926591619849205, + "loss_iou": 0.9869059324264526, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 149682136, + "step": 866 + }, + { + "epoch": 0.3320566832631176, + "grad_norm": 34.16005746791917, + "learning_rate": 5e-06, + "loss": 0.2815, + "num_input_tokens_seen": 149855344, + "step": 867 + }, + { + "epoch": 0.3320566832631176, + "loss": 0.28401294350624084, + "loss_ce": 0.020463157445192337, + "loss_iou": 1.9210349321365356, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 149855344, + "step": 867 + }, + { + "epoch": 0.3324396782841823, + "grad_norm": 29.907646899782975, + "learning_rate": 5e-06, + "loss": 0.2703, + "num_input_tokens_seen": 150028720, + "step": 868 + }, + { + "epoch": 0.3324396782841823, + "loss": 0.2970985174179077, + "loss_ce": 0.019998911768198013, + "loss_iou": 1.049875259399414, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 150028720, + "step": 868 + }, + { + "epoch": 0.332822673305247, + "grad_norm": 29.99771694528357, + "learning_rate": 5e-06, + "loss": 0.3178, + "num_input_tokens_seen": 150201784, + "step": 869 + }, + { + "epoch": 0.332822673305247, + "loss": 0.3909516930580139, + "loss_ce": 0.018515150994062424, + "loss_iou": 1.0345795154571533, + "loss_num": 0.373046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 150201784, + "step": 869 + }, + { + "epoch": 0.33320566832631177, + "grad_norm": 33.004047019120286, + "learning_rate": 5e-06, + "loss": 0.2027, + "num_input_tokens_seen": 150374760, + "step": 870 + }, + { + "epoch": 0.33320566832631177, + "loss": 0.180461585521698, + "loss_ce": 0.021342938765883446, + "loss_iou": 1.0032970905303955, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 150374760, + "step": 870 + }, + { + "epoch": 0.33358866334737647, + "grad_norm": 45.27232694325867, + "learning_rate": 5e-06, + "loss": 0.3349, + "num_input_tokens_seen": 150547536, + "step": 871 + }, + { + "epoch": 0.33358866334737647, + "loss": 0.35715845227241516, + "loss_ce": 0.018901610746979713, + "loss_iou": 1.0309621095657349, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 150547536, + "step": 871 + }, + { + "epoch": 0.3339716583684412, + "grad_norm": 31.843471049279653, + "learning_rate": 5e-06, + "loss": 0.2672, + "num_input_tokens_seen": 150720560, + "step": 872 + }, + { + "epoch": 0.3339716583684412, + "loss": 0.23046442866325378, + "loss_ce": 0.01879449561238289, + "loss_iou": 1.0066142082214355, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 150720560, + "step": 872 + }, + { + "epoch": 0.33435465338950593, + "grad_norm": 33.7652737493065, + "learning_rate": 5e-06, + "loss": 0.262, + "num_input_tokens_seen": 150893480, + "step": 873 + }, + { + "epoch": 0.33435465338950593, + "loss": 0.22619307041168213, + "loss_ce": 0.02123701572418213, + "loss_iou": 1.004620909690857, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 150893480, + "step": 873 + }, + { + "epoch": 0.3347376484105707, + "grad_norm": 33.864588355012586, + "learning_rate": 5e-06, + "loss": 0.2216, + "num_input_tokens_seen": 151063080, + "step": 874 + }, + { + "epoch": 0.3347376484105707, + "loss": 0.2183486968278885, + "loss_ce": 0.02132720872759819, + "loss_iou": 1.015843391418457, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 151063080, + "step": 874 + }, + { + "epoch": 0.3351206434316354, + "grad_norm": 27.357396846645152, + "learning_rate": 5e-06, + "loss": 0.3265, + "num_input_tokens_seen": 151235760, + "step": 875 + }, + { + "epoch": 0.3351206434316354, + "loss": 0.3101559281349182, + "loss_ce": 0.019018245860934258, + "loss_iou": 1.107253909111023, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 151235760, + "step": 875 + }, + { + "epoch": 0.3355036384527001, + "grad_norm": 13.869717694026997, + "learning_rate": 5e-06, + "loss": 0.2955, + "num_input_tokens_seen": 151408968, + "step": 876 + }, + { + "epoch": 0.3355036384527001, + "loss": 0.3140192925930023, + "loss_ce": 0.01970777101814747, + "loss_iou": 1.2262418270111084, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 151408968, + "step": 876 + }, + { + "epoch": 0.33588663347376485, + "grad_norm": 14.395864679785616, + "learning_rate": 5e-06, + "loss": 0.2199, + "num_input_tokens_seen": 151581912, + "step": 877 + }, + { + "epoch": 0.33588663347376485, + "loss": 0.25289204716682434, + "loss_ce": 0.01924947090446949, + "loss_iou": 1.035520076751709, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 151581912, + "step": 877 + }, + { + "epoch": 0.33626962849482955, + "grad_norm": 28.536779401306894, + "learning_rate": 5e-06, + "loss": 0.2443, + "num_input_tokens_seen": 151754712, + "step": 878 + }, + { + "epoch": 0.33626962849482955, + "loss": 0.25656014680862427, + "loss_ce": 0.017912685871124268, + "loss_iou": 1.0007470846176147, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 151754712, + "step": 878 + }, + { + "epoch": 0.3366526235158943, + "grad_norm": 31.989938231085112, + "learning_rate": 5e-06, + "loss": 0.205, + "num_input_tokens_seen": 151927232, + "step": 879 + }, + { + "epoch": 0.3366526235158943, + "loss": 0.17606347799301147, + "loss_ce": 0.018714847043156624, + "loss_iou": 1.0011568069458008, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 151927232, + "step": 879 + }, + { + "epoch": 0.337035618536959, + "grad_norm": 33.091155354932745, + "learning_rate": 5e-06, + "loss": 0.1916, + "num_input_tokens_seen": 152100176, + "step": 880 + }, + { + "epoch": 0.337035618536959, + "loss": 0.16588345170021057, + "loss_ce": 0.019704259932041168, + "loss_iou": 1.0007708072662354, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 152100176, + "step": 880 + }, + { + "epoch": 0.33741861355802377, + "grad_norm": 35.506378999432776, + "learning_rate": 5e-06, + "loss": 0.2679, + "num_input_tokens_seen": 152272968, + "step": 881 + }, + { + "epoch": 0.33741861355802377, + "loss": 0.2750643491744995, + "loss_ce": 0.019143931567668915, + "loss_iou": 1.0157005786895752, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 152272968, + "step": 881 + }, + { + "epoch": 0.3378016085790885, + "grad_norm": 29.668205512706834, + "learning_rate": 5e-06, + "loss": 0.2678, + "num_input_tokens_seen": 152446016, + "step": 882 + }, + { + "epoch": 0.3378016085790885, + "loss": 0.311052531003952, + "loss_ce": 0.01869412139058113, + "loss_iou": 1.227002739906311, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 152446016, + "step": 882 + }, + { + "epoch": 0.3381846036001532, + "grad_norm": 27.83597592754868, + "learning_rate": 5e-06, + "loss": 0.2616, + "num_input_tokens_seen": 152618800, + "step": 883 + }, + { + "epoch": 0.3381846036001532, + "loss": 0.2664565145969391, + "loss_ce": 0.01950826123356819, + "loss_iou": 1.0522820949554443, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 152618800, + "step": 883 + }, + { + "epoch": 0.33856759862121794, + "grad_norm": 31.058857810008302, + "learning_rate": 5e-06, + "loss": 0.2731, + "num_input_tokens_seen": 152792032, + "step": 884 + }, + { + "epoch": 0.33856759862121794, + "loss": 0.3341100811958313, + "loss_ce": 0.019229726865887642, + "loss_iou": 1.10325288772583, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 152792032, + "step": 884 + }, + { + "epoch": 0.33895059364228264, + "grad_norm": 22.04291331215238, + "learning_rate": 5e-06, + "loss": 0.2674, + "num_input_tokens_seen": 152965144, + "step": 885 + }, + { + "epoch": 0.33895059364228264, + "loss": 0.24300506711006165, + "loss_ce": 0.018273629248142242, + "loss_iou": 1.0013980865478516, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 152965144, + "step": 885 + }, + { + "epoch": 0.3393335886633474, + "grad_norm": 22.290170546120653, + "learning_rate": 5e-06, + "loss": 0.2737, + "num_input_tokens_seen": 153137984, + "step": 886 + }, + { + "epoch": 0.3393335886633474, + "loss": 0.2488933652639389, + "loss_ce": 0.019523244351148605, + "loss_iou": 2.009737968444824, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 153137984, + "step": 886 + }, + { + "epoch": 0.3397165836844121, + "grad_norm": 32.707924870130725, + "learning_rate": 5e-06, + "loss": 0.2528, + "num_input_tokens_seen": 153311136, + "step": 887 + }, + { + "epoch": 0.3397165836844121, + "loss": 0.2415219247341156, + "loss_ce": 0.019476018846035004, + "loss_iou": 1.016276478767395, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 153311136, + "step": 887 + }, + { + "epoch": 0.3400995787054768, + "grad_norm": 34.992697891143266, + "learning_rate": 5e-06, + "loss": 0.2079, + "num_input_tokens_seen": 153483800, + "step": 888 + }, + { + "epoch": 0.3400995787054768, + "loss": 0.2507489323616028, + "loss_ce": 0.018876362591981888, + "loss_iou": 1.0302425622940063, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 153483800, + "step": 888 + }, + { + "epoch": 0.34048257372654156, + "grad_norm": 41.754336205978944, + "learning_rate": 5e-06, + "loss": 0.2532, + "num_input_tokens_seen": 153656496, + "step": 889 + }, + { + "epoch": 0.34048257372654156, + "loss": 0.23359987139701843, + "loss_ce": 0.020953377708792686, + "loss_iou": 1.0017032623291016, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 153656496, + "step": 889 + }, + { + "epoch": 0.34086556874760626, + "grad_norm": 30.40687800194179, + "learning_rate": 5e-06, + "loss": 0.2286, + "num_input_tokens_seen": 153829608, + "step": 890 + }, + { + "epoch": 0.34086556874760626, + "loss": 0.25328874588012695, + "loss_ce": 0.019218916073441505, + "loss_iou": 1.0172107219696045, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 153829608, + "step": 890 + }, + { + "epoch": 0.341248563768671, + "grad_norm": 31.11285113679715, + "learning_rate": 5e-06, + "loss": 0.3226, + "num_input_tokens_seen": 154002776, + "step": 891 + }, + { + "epoch": 0.341248563768671, + "loss": 0.2829424738883972, + "loss_ce": 0.020247183740139008, + "loss_iou": 1.0092939138412476, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 154002776, + "step": 891 + }, + { + "epoch": 0.3416315587897357, + "grad_norm": 47.443313103501666, + "learning_rate": 5e-06, + "loss": 0.2477, + "num_input_tokens_seen": 154176184, + "step": 892 + }, + { + "epoch": 0.3416315587897357, + "loss": 0.16495199501514435, + "loss_ce": 0.02164144441485405, + "loss_iou": 1.0047979354858398, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 154176184, + "step": 892 + }, + { + "epoch": 0.3420145538108005, + "grad_norm": 60.4096998334582, + "learning_rate": 5e-06, + "loss": 0.3608, + "num_input_tokens_seen": 154349120, + "step": 893 + }, + { + "epoch": 0.3420145538108005, + "loss": 0.3488176465034485, + "loss_ce": 0.01996021717786789, + "loss_iou": 1.0872275829315186, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 154349120, + "step": 893 + }, + { + "epoch": 0.3423975488318652, + "grad_norm": 48.137410809081906, + "learning_rate": 5e-06, + "loss": 0.2708, + "num_input_tokens_seen": 154522336, + "step": 894 + }, + { + "epoch": 0.3423975488318652, + "loss": 0.30305665731430054, + "loss_ce": 0.01973147690296173, + "loss_iou": 1.0144085884094238, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 154522336, + "step": 894 + }, + { + "epoch": 0.3427805438529299, + "grad_norm": 38.15102960082304, + "learning_rate": 5e-06, + "loss": 0.2898, + "num_input_tokens_seen": 154695152, + "step": 895 + }, + { + "epoch": 0.3427805438529299, + "loss": 0.3265007734298706, + "loss_ce": 0.019371863454580307, + "loss_iou": 1.1647114753723145, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 154695152, + "step": 895 + }, + { + "epoch": 0.34316353887399464, + "grad_norm": 39.93498885433648, + "learning_rate": 5e-06, + "loss": 0.247, + "num_input_tokens_seen": 154868480, + "step": 896 + }, + { + "epoch": 0.34316353887399464, + "loss": 0.2490977942943573, + "loss_ce": 0.019605603069067, + "loss_iou": 1.0045669078826904, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 154868480, + "step": 896 + }, + { + "epoch": 0.34354653389505935, + "grad_norm": 49.53133487615153, + "learning_rate": 5e-06, + "loss": 0.3085, + "num_input_tokens_seen": 155041736, + "step": 897 + }, + { + "epoch": 0.34354653389505935, + "loss": 0.28766751289367676, + "loss_ce": 0.0194790530949831, + "loss_iou": 1.0107684135437012, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 155041736, + "step": 897 + }, + { + "epoch": 0.3439295289161241, + "grad_norm": 43.41462576045239, + "learning_rate": 5e-06, + "loss": 0.2817, + "num_input_tokens_seen": 155214920, + "step": 898 + }, + { + "epoch": 0.3439295289161241, + "loss": 0.29736804962158203, + "loss_ce": 0.020695675164461136, + "loss_iou": 1.0137401819229126, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 155214920, + "step": 898 + }, + { + "epoch": 0.3443125239371888, + "grad_norm": 29.071497778161586, + "learning_rate": 5e-06, + "loss": 0.2676, + "num_input_tokens_seen": 155387760, + "step": 899 + }, + { + "epoch": 0.3443125239371888, + "loss": 0.2834978699684143, + "loss_ce": 0.019337717443704605, + "loss_iou": 1.027559518814087, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 155387760, + "step": 899 + }, + { + "epoch": 0.34469551895825357, + "grad_norm": 40.99302143203642, + "learning_rate": 5e-06, + "loss": 0.3124, + "num_input_tokens_seen": 155557064, + "step": 900 + }, + { + "epoch": 0.34469551895825357, + "loss": 0.33803874254226685, + "loss_ce": 0.019801409915089607, + "loss_iou": 1.0807641744613647, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 155557064, + "step": 900 + }, + { + "epoch": 0.34507851397931827, + "grad_norm": 42.64613280035388, + "learning_rate": 5e-06, + "loss": 0.3903, + "num_input_tokens_seen": 155729992, + "step": 901 + }, + { + "epoch": 0.34507851397931827, + "loss": 0.46559372544288635, + "loss_ce": 0.020037095993757248, + "loss_iou": 1.1090466976165771, + "loss_num": 0.4453125, + "loss_xval": 0.4453125, + "num_input_tokens_seen": 155729992, + "step": 901 + }, + { + "epoch": 0.34546150900038297, + "grad_norm": 25.745717808459887, + "learning_rate": 5e-06, + "loss": 0.2969, + "num_input_tokens_seen": 155902824, + "step": 902 + }, + { + "epoch": 0.34546150900038297, + "loss": 0.3537680506706238, + "loss_ce": 0.01917332038283348, + "loss_iou": 1.0919166803359985, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 155902824, + "step": 902 + }, + { + "epoch": 0.34584450402144773, + "grad_norm": 38.51981430922358, + "learning_rate": 5e-06, + "loss": 0.3012, + "num_input_tokens_seen": 156075952, + "step": 903 + }, + { + "epoch": 0.34584450402144773, + "loss": 0.31221455335617065, + "loss_ce": 0.02022237703204155, + "loss_iou": 1.0048507452011108, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 156075952, + "step": 903 + }, + { + "epoch": 0.34622749904251243, + "grad_norm": 42.086478438408925, + "learning_rate": 5e-06, + "loss": 0.3136, + "num_input_tokens_seen": 156248960, + "step": 904 + }, + { + "epoch": 0.34622749904251243, + "loss": 0.3787992000579834, + "loss_ce": 0.020522834733128548, + "loss_iou": 1.0666258335113525, + "loss_num": 0.357421875, + "loss_xval": 0.357421875, + "num_input_tokens_seen": 156248960, + "step": 904 + }, + { + "epoch": 0.3466104940635772, + "grad_norm": 31.18795904043135, + "learning_rate": 5e-06, + "loss": 0.3417, + "num_input_tokens_seen": 156422032, + "step": 905 + }, + { + "epoch": 0.3466104940635772, + "loss": 0.3480222821235657, + "loss_ce": 0.01892072893679142, + "loss_iou": 1.045752763748169, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 156422032, + "step": 905 + }, + { + "epoch": 0.3469934890846419, + "grad_norm": 19.604571605873183, + "learning_rate": 5e-06, + "loss": 0.2438, + "num_input_tokens_seen": 156594896, + "step": 906 + }, + { + "epoch": 0.3469934890846419, + "loss": 0.2784842252731323, + "loss_ce": 0.019878283143043518, + "loss_iou": 1.0400936603546143, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 156594896, + "step": 906 + }, + { + "epoch": 0.34737648410570665, + "grad_norm": 27.065435221215285, + "learning_rate": 5e-06, + "loss": 0.3149, + "num_input_tokens_seen": 156767496, + "step": 907 + }, + { + "epoch": 0.34737648410570665, + "loss": 0.3503131866455078, + "loss_ce": 0.02182195335626602, + "loss_iou": 1.635839819908142, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 156767496, + "step": 907 + }, + { + "epoch": 0.34775947912677135, + "grad_norm": 34.892979096375406, + "learning_rate": 5e-06, + "loss": 0.2396, + "num_input_tokens_seen": 156940544, + "step": 908 + }, + { + "epoch": 0.34775947912677135, + "loss": 0.2266232967376709, + "loss_ce": 0.020629655569791794, + "loss_iou": 1.0288121700286865, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 156940544, + "step": 908 + }, + { + "epoch": 0.34814247414783606, + "grad_norm": 37.6920864330686, + "learning_rate": 5e-06, + "loss": 0.2843, + "num_input_tokens_seen": 157113304, + "step": 909 + }, + { + "epoch": 0.34814247414783606, + "loss": 0.29115408658981323, + "loss_ce": 0.020402126014232635, + "loss_iou": 1.0215134620666504, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 157113304, + "step": 909 + }, + { + "epoch": 0.3485254691689008, + "grad_norm": 33.315665964062156, + "learning_rate": 5e-06, + "loss": 0.2281, + "num_input_tokens_seen": 157286088, + "step": 910 + }, + { + "epoch": 0.3485254691689008, + "loss": 0.2402557134628296, + "loss_ce": 0.01888120174407959, + "loss_iou": 1.0174598693847656, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 157286088, + "step": 910 + }, + { + "epoch": 0.3489084641899655, + "grad_norm": 28.743804817260262, + "learning_rate": 5e-06, + "loss": 0.3037, + "num_input_tokens_seen": 157459064, + "step": 911 + }, + { + "epoch": 0.3489084641899655, + "loss": 0.24579393863677979, + "loss_ce": 0.019231446087360382, + "loss_iou": 1.5633665323257446, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 157459064, + "step": 911 + }, + { + "epoch": 0.3492914592110303, + "grad_norm": 32.52219973162685, + "learning_rate": 5e-06, + "loss": 0.2491, + "num_input_tokens_seen": 157631888, + "step": 912 + }, + { + "epoch": 0.3492914592110303, + "loss": 0.2055998146533966, + "loss_ce": 0.020419150590896606, + "loss_iou": 1.0066945552825928, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 157631888, + "step": 912 + }, + { + "epoch": 0.349674454232095, + "grad_norm": 43.165692368411264, + "learning_rate": 5e-06, + "loss": 0.246, + "num_input_tokens_seen": 157801072, + "step": 913 + }, + { + "epoch": 0.349674454232095, + "loss": 0.2419818788766861, + "loss_ce": 0.019142530858516693, + "loss_iou": 1.0147067308425903, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 157801072, + "step": 913 + }, + { + "epoch": 0.35005744925315974, + "grad_norm": 39.8351373397396, + "learning_rate": 5e-06, + "loss": 0.2307, + "num_input_tokens_seen": 157973896, + "step": 914 + }, + { + "epoch": 0.35005744925315974, + "loss": 0.23154376447200775, + "loss_ce": 0.01853107661008835, + "loss_iou": 1.0227255821228027, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 157973896, + "step": 914 + }, + { + "epoch": 0.35044044427422444, + "grad_norm": 30.211194241758076, + "learning_rate": 5e-06, + "loss": 0.2847, + "num_input_tokens_seen": 158147080, + "step": 915 + }, + { + "epoch": 0.35044044427422444, + "loss": 0.3091517686843872, + "loss_ce": 0.021309982985258102, + "loss_iou": 1.5687074661254883, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 158147080, + "step": 915 + }, + { + "epoch": 0.35082343929528914, + "grad_norm": 37.68092787773169, + "learning_rate": 5e-06, + "loss": 0.3188, + "num_input_tokens_seen": 158320312, + "step": 916 + }, + { + "epoch": 0.35082343929528914, + "loss": 0.21438775956630707, + "loss_ce": 0.020173899829387665, + "loss_iou": 1.0433349609375, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 158320312, + "step": 916 + }, + { + "epoch": 0.3512064343163539, + "grad_norm": 44.983632818646086, + "learning_rate": 5e-06, + "loss": 0.2567, + "num_input_tokens_seen": 158493192, + "step": 917 + }, + { + "epoch": 0.3512064343163539, + "loss": 0.2533189654350281, + "loss_ce": 0.020469840615987778, + "loss_iou": 1.1834526062011719, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 158493192, + "step": 917 + }, + { + "epoch": 0.3515894293374186, + "grad_norm": 32.39357558877045, + "learning_rate": 5e-06, + "loss": 0.2413, + "num_input_tokens_seen": 158666176, + "step": 918 + }, + { + "epoch": 0.3515894293374186, + "loss": 0.17894810438156128, + "loss_ce": 0.01915806531906128, + "loss_iou": 1.0037028789520264, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 158666176, + "step": 918 + }, + { + "epoch": 0.35197242435848336, + "grad_norm": 27.62549462577862, + "learning_rate": 5e-06, + "loss": 0.3017, + "num_input_tokens_seen": 158839064, + "step": 919 + }, + { + "epoch": 0.35197242435848336, + "loss": 0.3494510352611542, + "loss_ce": 0.01851840503513813, + "loss_iou": 1.0774567127227783, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 158839064, + "step": 919 + }, + { + "epoch": 0.35235541937954806, + "grad_norm": 26.037858753567132, + "learning_rate": 5e-06, + "loss": 0.2658, + "num_input_tokens_seen": 159012456, + "step": 920 + }, + { + "epoch": 0.35235541937954806, + "loss": 0.2600114047527313, + "loss_ce": 0.02099773660302162, + "loss_iou": 1.0828306674957275, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 159012456, + "step": 920 + }, + { + "epoch": 0.35273841440061277, + "grad_norm": 33.2862942978842, + "learning_rate": 5e-06, + "loss": 0.2344, + "num_input_tokens_seen": 159185584, + "step": 921 + }, + { + "epoch": 0.35273841440061277, + "loss": 0.24772392213344574, + "loss_ce": 0.018842089921236038, + "loss_iou": 1.0024566650390625, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 159185584, + "step": 921 + }, + { + "epoch": 0.3531214094216775, + "grad_norm": 42.377583632534844, + "learning_rate": 5e-06, + "loss": 0.2958, + "num_input_tokens_seen": 159358416, + "step": 922 + }, + { + "epoch": 0.3531214094216775, + "loss": 0.27739599347114563, + "loss_ce": 0.01958349347114563, + "loss_iou": 1.0809643268585205, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 159358416, + "step": 922 + }, + { + "epoch": 0.3535044044427422, + "grad_norm": 42.38230605939041, + "learning_rate": 5e-06, + "loss": 0.3198, + "num_input_tokens_seen": 159531552, + "step": 923 + }, + { + "epoch": 0.3535044044427422, + "loss": 0.34952735900878906, + "loss_ce": 0.021219246089458466, + "loss_iou": 1.0483672618865967, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 159531552, + "step": 923 + }, + { + "epoch": 0.353887399463807, + "grad_norm": 31.06636704009796, + "learning_rate": 5e-06, + "loss": 0.2219, + "num_input_tokens_seen": 159704552, + "step": 924 + }, + { + "epoch": 0.353887399463807, + "loss": 0.21600431203842163, + "loss_ce": 0.01941007934510708, + "loss_iou": 1.0015068054199219, + "loss_num": 0.1962890625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 159704552, + "step": 924 + }, + { + "epoch": 0.3542703944848717, + "grad_norm": 27.49356051820737, + "learning_rate": 5e-06, + "loss": 0.2664, + "num_input_tokens_seen": 159877752, + "step": 925 + }, + { + "epoch": 0.3542703944848717, + "loss": 0.21517300605773926, + "loss_ce": 0.01973842643201351, + "loss_iou": 1.0043420791625977, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 159877752, + "step": 925 + }, + { + "epoch": 0.35465338950593644, + "grad_norm": 26.41876018406636, + "learning_rate": 5e-06, + "loss": 0.2788, + "num_input_tokens_seen": 160051032, + "step": 926 + }, + { + "epoch": 0.35465338950593644, + "loss": 0.2924797534942627, + "loss_ce": 0.0201408751308918, + "loss_iou": 1.050068974494934, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 160051032, + "step": 926 + }, + { + "epoch": 0.35503638452700115, + "grad_norm": 29.833120520130493, + "learning_rate": 5e-06, + "loss": 0.3168, + "num_input_tokens_seen": 160224368, + "step": 927 + }, + { + "epoch": 0.35503638452700115, + "loss": 0.2220856249332428, + "loss_ce": 0.02042545937001705, + "loss_iou": 1.002797245979309, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 160224368, + "step": 927 + }, + { + "epoch": 0.35541937954806585, + "grad_norm": 27.609107074489003, + "learning_rate": 5e-06, + "loss": 0.2503, + "num_input_tokens_seen": 160397280, + "step": 928 + }, + { + "epoch": 0.35541937954806585, + "loss": 0.2418726682662964, + "loss_ce": 0.02049814537167549, + "loss_iou": 1.1102688312530518, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 160397280, + "step": 928 + }, + { + "epoch": 0.3558023745691306, + "grad_norm": 29.57272954416723, + "learning_rate": 5e-06, + "loss": 0.2923, + "num_input_tokens_seen": 160569992, + "step": 929 + }, + { + "epoch": 0.3558023745691306, + "loss": 0.2873765826225281, + "loss_ce": 0.02004261687397957, + "loss_iou": 1.100219964981079, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 160569992, + "step": 929 + }, + { + "epoch": 0.3561853695901953, + "grad_norm": 29.217580457996434, + "learning_rate": 5e-06, + "loss": 0.2273, + "num_input_tokens_seen": 160743128, + "step": 930 + }, + { + "epoch": 0.3561853695901953, + "loss": 0.22324742376804352, + "loss_ce": 0.019634131342172623, + "loss_iou": 1.0148777961730957, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 160743128, + "step": 930 + }, + { + "epoch": 0.35656836461126007, + "grad_norm": 36.5321377163354, + "learning_rate": 5e-06, + "loss": 0.3072, + "num_input_tokens_seen": 160916272, + "step": 931 + }, + { + "epoch": 0.35656836461126007, + "loss": 0.2983270287513733, + "loss_ce": 0.021471569314599037, + "loss_iou": 1.0132410526275635, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 160916272, + "step": 931 + }, + { + "epoch": 0.35695135963232477, + "grad_norm": 27.108967710770894, + "learning_rate": 5e-06, + "loss": 0.176, + "num_input_tokens_seen": 161089152, + "step": 932 + }, + { + "epoch": 0.35695135963232477, + "loss": 0.18797306716442108, + "loss_ce": 0.019638102501630783, + "loss_iou": 1.0014984607696533, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 161089152, + "step": 932 + }, + { + "epoch": 0.35733435465338953, + "grad_norm": 24.419209023342862, + "learning_rate": 5e-06, + "loss": 0.2917, + "num_input_tokens_seen": 161262408, + "step": 933 + }, + { + "epoch": 0.35733435465338953, + "loss": 0.3132162094116211, + "loss_ce": 0.0211019366979599, + "loss_iou": 1.067854642868042, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 161262408, + "step": 933 + }, + { + "epoch": 0.35771734967445423, + "grad_norm": 32.067596818972596, + "learning_rate": 5e-06, + "loss": 0.2357, + "num_input_tokens_seen": 161435616, + "step": 934 + }, + { + "epoch": 0.35771734967445423, + "loss": 0.20871080458164215, + "loss_ce": 0.020478377118706703, + "loss_iou": 1.0352756977081299, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 161435616, + "step": 934 + }, + { + "epoch": 0.35810034469551894, + "grad_norm": 36.608693997731365, + "learning_rate": 5e-06, + "loss": 0.2748, + "num_input_tokens_seen": 161608768, + "step": 935 + }, + { + "epoch": 0.35810034469551894, + "loss": 0.29362761974334717, + "loss_ce": 0.020190125331282616, + "loss_iou": 1.0156643390655518, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 161608768, + "step": 935 + }, + { + "epoch": 0.3584833397165837, + "grad_norm": 24.677787951327748, + "learning_rate": 5e-06, + "loss": 0.1886, + "num_input_tokens_seen": 161781680, + "step": 936 + }, + { + "epoch": 0.3584833397165837, + "loss": 0.17715074121952057, + "loss_ce": 0.020351428538560867, + "loss_iou": 0.9857987761497498, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 161781680, + "step": 936 + }, + { + "epoch": 0.3588663347376484, + "grad_norm": 22.374263056377668, + "learning_rate": 5e-06, + "loss": 0.305, + "num_input_tokens_seen": 161954624, + "step": 937 + }, + { + "epoch": 0.3588663347376484, + "loss": 0.21580612659454346, + "loss_ce": 0.01921188458800316, + "loss_iou": 0.886029839515686, + "loss_num": 0.1962890625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 161954624, + "step": 937 + }, + { + "epoch": 0.35924932975871315, + "grad_norm": 27.39624654170563, + "learning_rate": 5e-06, + "loss": 0.3217, + "num_input_tokens_seen": 162127632, + "step": 938 + }, + { + "epoch": 0.35924932975871315, + "loss": 0.32278984785079956, + "loss_ce": 0.020543742924928665, + "loss_iou": 1.0597132444381714, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 162127632, + "step": 938 + }, + { + "epoch": 0.35963232477977786, + "grad_norm": 35.36408821697388, + "learning_rate": 5e-06, + "loss": 0.3753, + "num_input_tokens_seen": 162300440, + "step": 939 + }, + { + "epoch": 0.35963232477977786, + "loss": 0.35825634002685547, + "loss_ce": 0.02134224772453308, + "loss_iou": 1.1178550720214844, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 162300440, + "step": 939 + }, + { + "epoch": 0.3600153198008426, + "grad_norm": 32.27612707323375, + "learning_rate": 5e-06, + "loss": 0.2525, + "num_input_tokens_seen": 162473368, + "step": 940 + }, + { + "epoch": 0.3600153198008426, + "loss": 0.2508542835712433, + "loss_ce": 0.018920686095952988, + "loss_iou": 1.0336934328079224, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 162473368, + "step": 940 + }, + { + "epoch": 0.3603983148219073, + "grad_norm": 26.328718140221437, + "learning_rate": 5e-06, + "loss": 0.3067, + "num_input_tokens_seen": 162646824, + "step": 941 + }, + { + "epoch": 0.3603983148219073, + "loss": 0.34090521931648254, + "loss_ce": 0.021813416853547096, + "loss_iou": 1.027925729751587, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 162646824, + "step": 941 + }, + { + "epoch": 0.360781309842972, + "grad_norm": 36.2819084239107, + "learning_rate": 5e-06, + "loss": 0.2025, + "num_input_tokens_seen": 162819664, + "step": 942 + }, + { + "epoch": 0.360781309842972, + "loss": 0.20151524245738983, + "loss_ce": 0.02054600790143013, + "loss_iou": 1.0331640243530273, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 162819664, + "step": 942 + }, + { + "epoch": 0.3611643048640368, + "grad_norm": 45.38164385254464, + "learning_rate": 5e-06, + "loss": 0.2846, + "num_input_tokens_seen": 162992528, + "step": 943 + }, + { + "epoch": 0.3611643048640368, + "loss": 0.2944408357143402, + "loss_ce": 0.01892814412713051, + "loss_iou": 1.0125874280929565, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 162992528, + "step": 943 + }, + { + "epoch": 0.3615472998851015, + "grad_norm": 25.859829660421223, + "learning_rate": 5e-06, + "loss": 0.2095, + "num_input_tokens_seen": 163165904, + "step": 944 + }, + { + "epoch": 0.3615472998851015, + "loss": 0.25529569387435913, + "loss_ce": 0.019242238253355026, + "loss_iou": 1.001347303390503, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 163165904, + "step": 944 + }, + { + "epoch": 0.36193029490616624, + "grad_norm": 28.6788539541314, + "learning_rate": 5e-06, + "loss": 0.3256, + "num_input_tokens_seen": 163339152, + "step": 945 + }, + { + "epoch": 0.36193029490616624, + "loss": 0.3408098816871643, + "loss_ce": 0.019154615700244904, + "loss_iou": 1.025606393814087, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 163339152, + "step": 945 + }, + { + "epoch": 0.36231328992723094, + "grad_norm": 21.10913626917847, + "learning_rate": 5e-06, + "loss": 0.2721, + "num_input_tokens_seen": 163511832, + "step": 946 + }, + { + "epoch": 0.36231328992723094, + "loss": 0.259053111076355, + "loss_ce": 0.020161528140306473, + "loss_iou": 1.0623010396957397, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 163511832, + "step": 946 + }, + { + "epoch": 0.3626962849482957, + "grad_norm": 44.33962548458138, + "learning_rate": 5e-06, + "loss": 0.2804, + "num_input_tokens_seen": 163684696, + "step": 947 + }, + { + "epoch": 0.3626962849482957, + "loss": 0.27441030740737915, + "loss_ce": 0.02087029069662094, + "loss_iou": 1.0241224765777588, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 163684696, + "step": 947 + }, + { + "epoch": 0.3630792799693604, + "grad_norm": 51.65153418796951, + "learning_rate": 5e-06, + "loss": 0.2737, + "num_input_tokens_seen": 163857912, + "step": 948 + }, + { + "epoch": 0.3630792799693604, + "loss": 0.291856974363327, + "loss_ce": 0.02025051787495613, + "loss_iou": 1.0185136795043945, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 163857912, + "step": 948 + }, + { + "epoch": 0.3634622749904251, + "grad_norm": 29.070846081236027, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 164031016, + "step": 949 + }, + { + "epoch": 0.3634622749904251, + "loss": 0.26171785593032837, + "loss_ce": 0.019286222755908966, + "loss_iou": 1.0417400598526, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 164031016, + "step": 949 + }, + { + "epoch": 0.36384527001148986, + "grad_norm": 23.19025535792982, + "learning_rate": 5e-06, + "loss": 0.2316, + "num_input_tokens_seen": 164203672, + "step": 950 + }, + { + "epoch": 0.36384527001148986, + "loss": 0.23933814465999603, + "loss_ce": 0.01802467182278633, + "loss_iou": 1.0510921478271484, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 164203672, + "step": 950 + }, + { + "epoch": 0.36422826503255457, + "grad_norm": 38.49785846617074, + "learning_rate": 5e-06, + "loss": 0.2897, + "num_input_tokens_seen": 164376880, + "step": 951 + }, + { + "epoch": 0.36422826503255457, + "loss": 0.28677451610565186, + "loss_ce": 0.019684676080942154, + "loss_iou": 1.0165079832077026, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 164376880, + "step": 951 + }, + { + "epoch": 0.3646112600536193, + "grad_norm": 31.328935383781246, + "learning_rate": 5e-06, + "loss": 0.3101, + "num_input_tokens_seen": 164550000, + "step": 952 + }, + { + "epoch": 0.3646112600536193, + "loss": 0.3002566993236542, + "loss_ce": 0.018640486523509026, + "loss_iou": 1.0196378231048584, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 164550000, + "step": 952 + }, + { + "epoch": 0.364994255074684, + "grad_norm": 31.244400096108578, + "learning_rate": 5e-06, + "loss": 0.3367, + "num_input_tokens_seen": 164722936, + "step": 953 + }, + { + "epoch": 0.364994255074684, + "loss": 0.29647910594940186, + "loss_ce": 0.02023400366306305, + "loss_iou": 1.0197391510009766, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 164722936, + "step": 953 + }, + { + "epoch": 0.36537725009574873, + "grad_norm": 36.71793962885349, + "learning_rate": 5e-06, + "loss": 0.2579, + "num_input_tokens_seen": 164896328, + "step": 954 + }, + { + "epoch": 0.36537725009574873, + "loss": 0.262016624212265, + "loss_ce": 0.02129395306110382, + "loss_iou": 1.035705804824829, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 164896328, + "step": 954 + }, + { + "epoch": 0.3657602451168135, + "grad_norm": 37.574584377052794, + "learning_rate": 5e-06, + "loss": 0.3782, + "num_input_tokens_seen": 165069352, + "step": 955 + }, + { + "epoch": 0.3657602451168135, + "loss": 0.4396094083786011, + "loss_ce": 0.020053725689649582, + "loss_iou": 3.2485127449035645, + "loss_num": 0.419921875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 165069352, + "step": 955 + }, + { + "epoch": 0.3661432401378782, + "grad_norm": 34.42679518876968, + "learning_rate": 5e-06, + "loss": 0.3147, + "num_input_tokens_seen": 165242528, + "step": 956 + }, + { + "epoch": 0.3661432401378782, + "loss": 0.2801949381828308, + "loss_ce": 0.02042931132018566, + "loss_iou": 1.007472276687622, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 165242528, + "step": 956 + }, + { + "epoch": 0.36652623515894295, + "grad_norm": 27.517040916418587, + "learning_rate": 5e-06, + "loss": 0.3127, + "num_input_tokens_seen": 165411808, + "step": 957 + }, + { + "epoch": 0.36652623515894295, + "loss": 0.29710865020751953, + "loss_ce": 0.019764892756938934, + "loss_iou": 1.1402711868286133, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 165411808, + "step": 957 + }, + { + "epoch": 0.36690923018000765, + "grad_norm": 25.34036629920117, + "learning_rate": 5e-06, + "loss": 0.2089, + "num_input_tokens_seen": 165584792, + "step": 958 + }, + { + "epoch": 0.36690923018000765, + "loss": 0.19531746208667755, + "loss_ce": 0.019475182518363, + "loss_iou": 1.0212349891662598, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 165584792, + "step": 958 + }, + { + "epoch": 0.3672922252010724, + "grad_norm": 31.0376230069948, + "learning_rate": 5e-06, + "loss": 0.3084, + "num_input_tokens_seen": 165757536, + "step": 959 + }, + { + "epoch": 0.3672922252010724, + "loss": 0.23202432692050934, + "loss_ce": 0.01901163160800934, + "loss_iou": 1.0255075693130493, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 165757536, + "step": 959 + }, + { + "epoch": 0.3676752202221371, + "grad_norm": 32.45066158774197, + "learning_rate": 5e-06, + "loss": 0.1919, + "num_input_tokens_seen": 165930696, + "step": 960 + }, + { + "epoch": 0.3676752202221371, + "loss": 0.18574444949626923, + "loss_ce": 0.019423654302954674, + "loss_iou": 1.0056147575378418, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 165930696, + "step": 960 + }, + { + "epoch": 0.3680582152432018, + "grad_norm": 24.452173233060154, + "learning_rate": 5e-06, + "loss": 0.2577, + "num_input_tokens_seen": 166103496, + "step": 961 + }, + { + "epoch": 0.3680582152432018, + "loss": 0.2453029602766037, + "loss_ce": 0.019167695194482803, + "loss_iou": 1.0065820217132568, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 166103496, + "step": 961 + }, + { + "epoch": 0.36844121026426657, + "grad_norm": 31.50527693117424, + "learning_rate": 5e-06, + "loss": 0.196, + "num_input_tokens_seen": 166276544, + "step": 962 + }, + { + "epoch": 0.36844121026426657, + "loss": 0.18020428717136383, + "loss_ce": 0.02047528699040413, + "loss_iou": 1.0018154382705688, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 166276544, + "step": 962 + }, + { + "epoch": 0.3688242052853313, + "grad_norm": 35.310253666246936, + "learning_rate": 5e-06, + "loss": 0.236, + "num_input_tokens_seen": 166449960, + "step": 963 + }, + { + "epoch": 0.3688242052853313, + "loss": 0.2314654141664505, + "loss_ce": 0.020955156534910202, + "loss_iou": 1.0441038608551025, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 166449960, + "step": 963 + }, + { + "epoch": 0.36920720030639603, + "grad_norm": 34.72278989600349, + "learning_rate": 5e-06, + "loss": 0.2881, + "num_input_tokens_seen": 166623200, + "step": 964 + }, + { + "epoch": 0.36920720030639603, + "loss": 0.32761016488075256, + "loss_ce": 0.01919952780008316, + "loss_iou": 1.1988705396652222, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 166623200, + "step": 964 + }, + { + "epoch": 0.36959019532746074, + "grad_norm": 35.99209920451824, + "learning_rate": 5e-06, + "loss": 0.2629, + "num_input_tokens_seen": 166796008, + "step": 965 + }, + { + "epoch": 0.36959019532746074, + "loss": 0.2871081233024597, + "loss_ce": 0.01953001320362091, + "loss_iou": 1.0272150039672852, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 166796008, + "step": 965 + }, + { + "epoch": 0.3699731903485255, + "grad_norm": 34.54876742428441, + "learning_rate": 5e-06, + "loss": 0.224, + "num_input_tokens_seen": 166968864, + "step": 966 + }, + { + "epoch": 0.3699731903485255, + "loss": 0.23276183009147644, + "loss_ce": 0.02017638087272644, + "loss_iou": 1.0384440422058105, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 166968864, + "step": 966 + }, + { + "epoch": 0.3703561853695902, + "grad_norm": 31.537146617666913, + "learning_rate": 5e-06, + "loss": 0.2377, + "num_input_tokens_seen": 167142048, + "step": 967 + }, + { + "epoch": 0.3703561853695902, + "loss": 0.2386997640132904, + "loss_ce": 0.018484920263290405, + "loss_iou": 1.0021443367004395, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 167142048, + "step": 967 + }, + { + "epoch": 0.3707391803906549, + "grad_norm": 28.718798089964558, + "learning_rate": 5e-06, + "loss": 0.2094, + "num_input_tokens_seen": 167315016, + "step": 968 + }, + { + "epoch": 0.3707391803906549, + "loss": 0.1817106306552887, + "loss_ce": 0.019662294536828995, + "loss_iou": 1.0258190631866455, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 167315016, + "step": 968 + }, + { + "epoch": 0.37112217541171966, + "grad_norm": 40.4887294725314, + "learning_rate": 5e-06, + "loss": 0.2375, + "num_input_tokens_seen": 167488120, + "step": 969 + }, + { + "epoch": 0.37112217541171966, + "loss": 0.2075423300266266, + "loss_ce": 0.021629242226481438, + "loss_iou": 1.0055090188980103, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 167488120, + "step": 969 + }, + { + "epoch": 0.37150517043278436, + "grad_norm": 42.83308812336441, + "learning_rate": 5e-06, + "loss": 0.2509, + "num_input_tokens_seen": 167657296, + "step": 970 + }, + { + "epoch": 0.37150517043278436, + "loss": 0.24004191160202026, + "loss_ce": 0.021413978189229965, + "loss_iou": 1.0232372283935547, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 167657296, + "step": 970 + }, + { + "epoch": 0.3718881654538491, + "grad_norm": 27.347654952228595, + "learning_rate": 5e-06, + "loss": 0.3097, + "num_input_tokens_seen": 167830448, + "step": 971 + }, + { + "epoch": 0.3718881654538491, + "loss": 0.30894187092781067, + "loss_ce": 0.02036764845252037, + "loss_iou": 1.0451247692108154, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 167830448, + "step": 971 + }, + { + "epoch": 0.3722711604749138, + "grad_norm": 37.39125530343754, + "learning_rate": 5e-06, + "loss": 0.2068, + "num_input_tokens_seen": 168003512, + "step": 972 + }, + { + "epoch": 0.3722711604749138, + "loss": 0.2168152779340744, + "loss_ce": 0.018878266215324402, + "loss_iou": 1.0135071277618408, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 168003512, + "step": 972 + }, + { + "epoch": 0.3726541554959786, + "grad_norm": 49.908204652296874, + "learning_rate": 5e-06, + "loss": 0.2756, + "num_input_tokens_seen": 168176136, + "step": 973 + }, + { + "epoch": 0.3726541554959786, + "loss": 0.29072046279907227, + "loss_ce": 0.019480209797620773, + "loss_iou": 1.0204592943191528, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 168176136, + "step": 973 + }, + { + "epoch": 0.3730371505170433, + "grad_norm": 31.16355749647373, + "learning_rate": 5e-06, + "loss": 0.2951, + "num_input_tokens_seen": 168349208, + "step": 974 + }, + { + "epoch": 0.3730371505170433, + "loss": 0.30966952443122864, + "loss_ce": 0.02072908729314804, + "loss_iou": 1.0145212411880493, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 168349208, + "step": 974 + }, + { + "epoch": 0.373420145538108, + "grad_norm": 25.088461228350013, + "learning_rate": 5e-06, + "loss": 0.3297, + "num_input_tokens_seen": 168522392, + "step": 975 + }, + { + "epoch": 0.373420145538108, + "loss": 0.3033157289028168, + "loss_ce": 0.01999053731560707, + "loss_iou": 1.001996636390686, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 168522392, + "step": 975 + }, + { + "epoch": 0.37380314055917274, + "grad_norm": 40.386975520720455, + "learning_rate": 5e-06, + "loss": 0.3908, + "num_input_tokens_seen": 168695216, + "step": 976 + }, + { + "epoch": 0.37380314055917274, + "loss": 0.33824622631073, + "loss_ce": 0.019764790311455727, + "loss_iou": 1.0352516174316406, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 168695216, + "step": 976 + }, + { + "epoch": 0.37418613558023744, + "grad_norm": 57.71792347077378, + "learning_rate": 5e-06, + "loss": 0.4084, + "num_input_tokens_seen": 168868456, + "step": 977 + }, + { + "epoch": 0.37418613558023744, + "loss": 0.42148301005363464, + "loss_ce": 0.020115818828344345, + "loss_iou": 1.053954839706421, + "loss_num": 0.40234375, + "loss_xval": 0.40234375, + "num_input_tokens_seen": 168868456, + "step": 977 + }, + { + "epoch": 0.3745691306013022, + "grad_norm": 48.833072329454026, + "learning_rate": 5e-06, + "loss": 0.3615, + "num_input_tokens_seen": 169041200, + "step": 978 + }, + { + "epoch": 0.3745691306013022, + "loss": 0.3466894030570984, + "loss_ce": 0.022104421630501747, + "loss_iou": 1.0623598098754883, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 169041200, + "step": 978 + }, + { + "epoch": 0.3749521256223669, + "grad_norm": 37.36730018726415, + "learning_rate": 5e-06, + "loss": 0.4141, + "num_input_tokens_seen": 169213848, + "step": 979 + }, + { + "epoch": 0.3749521256223669, + "loss": 0.4149620234966278, + "loss_ce": 0.020186619833111763, + "loss_iou": 1.0112701654434204, + "loss_num": 0.39453125, + "loss_xval": 0.39453125, + "num_input_tokens_seen": 169213848, + "step": 979 + }, + { + "epoch": 0.3753351206434316, + "grad_norm": 46.6991870180064, + "learning_rate": 5e-06, + "loss": 0.4204, + "num_input_tokens_seen": 169386720, + "step": 980 + }, + { + "epoch": 0.3753351206434316, + "loss": 0.4489254355430603, + "loss_ce": 0.02241174876689911, + "loss_iou": 1.0938462018966675, + "loss_num": 0.42578125, + "loss_xval": 0.42578125, + "num_input_tokens_seen": 169386720, + "step": 980 + }, + { + "epoch": 0.37571811566449637, + "grad_norm": 68.94776622161878, + "learning_rate": 5e-06, + "loss": 0.6308, + "num_input_tokens_seen": 169559888, + "step": 981 + }, + { + "epoch": 0.37571811566449637, + "loss": 0.6707226037979126, + "loss_ce": 0.021308545023202896, + "loss_iou": 1.3989322185516357, + "loss_num": 0.6484375, + "loss_xval": 0.6484375, + "num_input_tokens_seen": 169559888, + "step": 981 + }, + { + "epoch": 0.37610111068556107, + "grad_norm": 55.08751091480485, + "learning_rate": 5e-06, + "loss": 0.4322, + "num_input_tokens_seen": 169733104, + "step": 982 + }, + { + "epoch": 0.37610111068556107, + "loss": 0.42639780044555664, + "loss_ce": 0.020391955971717834, + "loss_iou": 1.0782774686813354, + "loss_num": 0.40625, + "loss_xval": 0.40625, + "num_input_tokens_seen": 169733104, + "step": 982 + }, + { + "epoch": 0.3764841057066258, + "grad_norm": 38.44895143776549, + "learning_rate": 5e-06, + "loss": 0.4994, + "num_input_tokens_seen": 169906048, + "step": 983 + }, + { + "epoch": 0.3764841057066258, + "loss": 0.4617816209793091, + "loss_ce": 0.018422234803438187, + "loss_iou": 0.9563711285591125, + "loss_num": 0.443359375, + "loss_xval": 0.443359375, + "num_input_tokens_seen": 169906048, + "step": 983 + }, + { + "epoch": 0.37686710072769053, + "grad_norm": 18.926519580396565, + "learning_rate": 5e-06, + "loss": 0.3627, + "num_input_tokens_seen": 170078736, + "step": 984 + }, + { + "epoch": 0.37686710072769053, + "loss": 0.32097262144088745, + "loss_ce": 0.019825173541903496, + "loss_iou": 1.0138651132583618, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 170078736, + "step": 984 + }, + { + "epoch": 0.3772500957487553, + "grad_norm": 13.845202092258038, + "learning_rate": 5e-06, + "loss": 0.2875, + "num_input_tokens_seen": 170252032, + "step": 985 + }, + { + "epoch": 0.3772500957487553, + "loss": 0.31729811429977417, + "loss_ce": 0.02091141790151596, + "loss_iou": 1.0532989501953125, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 170252032, + "step": 985 + }, + { + "epoch": 0.37763309076982, + "grad_norm": 29.6947170824272, + "learning_rate": 5e-06, + "loss": 0.3102, + "num_input_tokens_seen": 170425024, + "step": 986 + }, + { + "epoch": 0.37763309076982, + "loss": 0.3302353024482727, + "loss_ce": 0.02335052751004696, + "loss_iou": 1.0863749980926514, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 170425024, + "step": 986 + }, + { + "epoch": 0.3780160857908847, + "grad_norm": 12.067861650195477, + "learning_rate": 5e-06, + "loss": 0.2124, + "num_input_tokens_seen": 170598016, + "step": 987 + }, + { + "epoch": 0.3780160857908847, + "loss": 0.2587829530239105, + "loss_ce": 0.020379632711410522, + "loss_iou": 1.0715415477752686, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 170598016, + "step": 987 + }, + { + "epoch": 0.37839908081194945, + "grad_norm": 15.67376693155364, + "learning_rate": 5e-06, + "loss": 0.2562, + "num_input_tokens_seen": 170770912, + "step": 988 + }, + { + "epoch": 0.37839908081194945, + "loss": 0.24709764122962952, + "loss_ce": 0.020352039486169815, + "loss_iou": 1.096572756767273, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 170770912, + "step": 988 + }, + { + "epoch": 0.37878207583301415, + "grad_norm": 32.01541760856049, + "learning_rate": 5e-06, + "loss": 0.2612, + "num_input_tokens_seen": 170943712, + "step": 989 + }, + { + "epoch": 0.37878207583301415, + "loss": 0.254817396402359, + "loss_ce": 0.020930690690875053, + "loss_iou": 1.0075466632843018, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 170943712, + "step": 989 + }, + { + "epoch": 0.3791650708540789, + "grad_norm": 30.10256252096975, + "learning_rate": 5e-06, + "loss": 0.2557, + "num_input_tokens_seen": 171116320, + "step": 990 + }, + { + "epoch": 0.3791650708540789, + "loss": 0.24919039011001587, + "loss_ce": 0.021285109221935272, + "loss_iou": 1.0024375915527344, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 171116320, + "step": 990 + }, + { + "epoch": 0.3795480658751436, + "grad_norm": 36.33932731783155, + "learning_rate": 5e-06, + "loss": 0.2792, + "num_input_tokens_seen": 171289368, + "step": 991 + }, + { + "epoch": 0.3795480658751436, + "loss": 0.36623942852020264, + "loss_ce": 0.01846110261976719, + "loss_iou": 1.0178117752075195, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 171289368, + "step": 991 + }, + { + "epoch": 0.3799310608962084, + "grad_norm": 23.561074635829417, + "learning_rate": 5e-06, + "loss": 0.2706, + "num_input_tokens_seen": 171461888, + "step": 992 + }, + { + "epoch": 0.3799310608962084, + "loss": 0.22868096828460693, + "loss_ce": 0.021405581384897232, + "loss_iou": 1.0184024572372437, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 171461888, + "step": 992 + }, + { + "epoch": 0.3803140559172731, + "grad_norm": 69.65429230110702, + "learning_rate": 5e-06, + "loss": 0.2282, + "num_input_tokens_seen": 171634728, + "step": 993 + }, + { + "epoch": 0.3803140559172731, + "loss": 0.1933428943157196, + "loss_ce": 0.020674441009759903, + "loss_iou": 1.048812985420227, + "loss_num": 0.1728515625, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 171634728, + "step": 993 + }, + { + "epoch": 0.3806970509383378, + "grad_norm": 47.02556831642122, + "learning_rate": 5e-06, + "loss": 0.2991, + "num_input_tokens_seen": 171807632, + "step": 994 + }, + { + "epoch": 0.3806970509383378, + "loss": 0.25592872500419617, + "loss_ce": 0.02253028377890587, + "loss_iou": 1.0186436176300049, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 171807632, + "step": 994 + }, + { + "epoch": 0.38108004595940254, + "grad_norm": 26.723063321849086, + "learning_rate": 5e-06, + "loss": 0.2258, + "num_input_tokens_seen": 171980448, + "step": 995 + }, + { + "epoch": 0.38108004595940254, + "loss": 0.19038596749305725, + "loss_ce": 0.018755123019218445, + "loss_iou": 1.0027767419815063, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 171980448, + "step": 995 + }, + { + "epoch": 0.38146304098046724, + "grad_norm": 22.90353645456606, + "learning_rate": 5e-06, + "loss": 0.2788, + "num_input_tokens_seen": 172153312, + "step": 996 + }, + { + "epoch": 0.38146304098046724, + "loss": 0.357512503862381, + "loss_ce": 0.020720507949590683, + "loss_iou": 1.1017611026763916, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 172153312, + "step": 996 + }, + { + "epoch": 0.381846036001532, + "grad_norm": 38.48221669166512, + "learning_rate": 5e-06, + "loss": 0.2013, + "num_input_tokens_seen": 172326240, + "step": 997 + }, + { + "epoch": 0.381846036001532, + "loss": 0.18817144632339478, + "loss_ce": 0.01983649656176567, + "loss_iou": 1.0173728466033936, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 172326240, + "step": 997 + }, + { + "epoch": 0.3822290310225967, + "grad_norm": 36.67878110625945, + "learning_rate": 5e-06, + "loss": 0.2996, + "num_input_tokens_seen": 172499128, + "step": 998 + }, + { + "epoch": 0.3822290310225967, + "loss": 0.3369196355342865, + "loss_ce": 0.01880439557135105, + "loss_iou": 1.6968703269958496, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 172499128, + "step": 998 + }, + { + "epoch": 0.38261202604366146, + "grad_norm": 28.376835361028178, + "learning_rate": 5e-06, + "loss": 0.2075, + "num_input_tokens_seen": 172671712, + "step": 999 + }, + { + "epoch": 0.38261202604366146, + "loss": 0.23104539513587952, + "loss_ce": 0.020107891410589218, + "loss_iou": 1.0930787324905396, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 172671712, + "step": 999 + }, + { + "epoch": 0.38299502106472616, + "grad_norm": 34.65065629131397, + "learning_rate": 5e-06, + "loss": 0.2881, + "num_input_tokens_seen": 172844576, + "step": 1000 + }, + { + "epoch": 0.38299502106472616, + "eval_websight_new_CIoU": 0.88560551404953, + "eval_websight_new_GIoU": 0.885301798582077, + "eval_websight_new_IoU": 0.886048287153244, + "eval_websight_new_MAE_all": 0.011711293365806341, + "eval_websight_new_MAE_h": 0.012148695066571236, + "eval_websight_new_MAE_w": 0.015516783576458693, + "eval_websight_new_MAE_x": 0.011325123254209757, + "eval_websight_new_MAE_y": 0.00785457226447761, + "eval_websight_new_NUM_probability": 0.0001039152812154498, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.1705828160047531, + "eval_websight_new_loss_ce": 0.040730688720941544, + "eval_websight_new_loss_iou": 1.0006657838821411, + "eval_websight_new_loss_num": 0.13189697265625, + "eval_websight_new_loss_xval": 0.13189697265625, + "eval_websight_new_runtime": 60.6628, + "eval_websight_new_samples_per_second": 0.824, + "eval_websight_new_steps_per_second": 0.033, + "num_input_tokens_seen": 172844576, + "step": 1000 + }, + { + "epoch": 0.38299502106472616, + "eval_seeclick_CIoU": 0.6596595346927643, + "eval_seeclick_GIoU": 0.6570102870464325, + "eval_seeclick_IoU": 0.6813171207904816, + "eval_seeclick_MAE_all": 0.04912090487778187, + "eval_seeclick_MAE_h": 0.04407734237611294, + "eval_seeclick_MAE_w": 0.0640980452299118, + "eval_seeclick_MAE_x": 0.04707919806241989, + "eval_seeclick_MAE_y": 0.041229039430618286, + "eval_seeclick_NUM_probability": 0.00013923477672506124, + "eval_seeclick_inside_bbox": 0.9409722089767456, + "eval_seeclick_loss": 0.4747886657714844, + "eval_seeclick_loss_ce": 0.03171798028051853, + "eval_seeclick_loss_iou": 1.0753774046897888, + "eval_seeclick_loss_num": 0.4178466796875, + "eval_seeclick_loss_xval": 0.4178466796875, + "eval_seeclick_runtime": 87.6366, + "eval_seeclick_samples_per_second": 0.571, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 172844576, + "step": 1000 + }, + { + "epoch": 0.38299502106472616, + "eval_icons_CIoU": 0.8593625128269196, + "eval_icons_GIoU": 0.8555968403816223, + "eval_icons_IoU": 0.8631813824176788, + "eval_icons_MAE_all": 0.01792115345597267, + "eval_icons_MAE_h": 0.020976515486836433, + "eval_icons_MAE_w": 0.02032420039176941, + "eval_icons_MAE_x": 0.015029226895421743, + "eval_icons_MAE_y": 0.015354669652879238, + "eval_icons_NUM_probability": 0.00010661216583685018, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.16257581114768982, + "eval_icons_loss_ce": 0.02727161906659603, + "eval_icons_loss_iou": 1.0135286450386047, + "eval_icons_loss_num": 0.126068115234375, + "eval_icons_loss_xval": 0.126068115234375, + "eval_icons_runtime": 90.5649, + "eval_icons_samples_per_second": 0.552, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 172844576, + "step": 1000 + }, + { + "epoch": 0.38299502106472616, + "loss": 0.17055588960647583, + "loss_ce": 0.02730637788772583, + "loss_iou": 1.0265284776687622, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 172844576, + "step": 1000 + }, + { + "epoch": 0.38337801608579086, + "grad_norm": 34.91923648615059, + "learning_rate": 5e-06, + "loss": 0.1858, + "num_input_tokens_seen": 173017632, + "step": 1001 + }, + { + "epoch": 0.38337801608579086, + "loss": 0.18923646211624146, + "loss_ce": 0.020779425278306007, + "loss_iou": 1.0047352313995361, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 173017632, + "step": 1001 + }, + { + "epoch": 0.3837610111068556, + "grad_norm": 35.66388745742093, + "learning_rate": 5e-06, + "loss": 0.2295, + "num_input_tokens_seen": 173190344, + "step": 1002 + }, + { + "epoch": 0.3837610111068556, + "loss": 0.2730959951877594, + "loss_ce": 0.019678032025694847, + "loss_iou": 1.074703335762024, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 173190344, + "step": 1002 + }, + { + "epoch": 0.3841440061279203, + "grad_norm": 37.025768383792325, + "learning_rate": 5e-06, + "loss": 0.24, + "num_input_tokens_seen": 173363256, + "step": 1003 + }, + { + "epoch": 0.3841440061279203, + "loss": 0.25298523902893066, + "loss_ce": 0.020868537947535515, + "loss_iou": 1.1796908378601074, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 173363256, + "step": 1003 + }, + { + "epoch": 0.3845270011489851, + "grad_norm": 38.128815178417504, + "learning_rate": 5e-06, + "loss": 0.3525, + "num_input_tokens_seen": 173536208, + "step": 1004 + }, + { + "epoch": 0.3845270011489851, + "loss": 0.43987375497817993, + "loss_ce": 0.020318102091550827, + "loss_iou": 1.224792242050171, + "loss_num": 0.419921875, + "loss_xval": 0.419921875, + "num_input_tokens_seen": 173536208, + "step": 1004 + }, + { + "epoch": 0.3849099961700498, + "grad_norm": 39.33223346405527, + "learning_rate": 5e-06, + "loss": 0.1855, + "num_input_tokens_seen": 173709080, + "step": 1005 + }, + { + "epoch": 0.3849099961700498, + "loss": 0.21229764819145203, + "loss_ce": 0.020342081785202026, + "loss_iou": 1.0204285383224487, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 173709080, + "step": 1005 + }, + { + "epoch": 0.38529299119111454, + "grad_norm": 32.986916212887074, + "learning_rate": 5e-06, + "loss": 0.2482, + "num_input_tokens_seen": 173881672, + "step": 1006 + }, + { + "epoch": 0.38529299119111454, + "loss": 0.2718982696533203, + "loss_ce": 0.019334791228175163, + "loss_iou": 0.9743728637695312, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 173881672, + "step": 1006 + }, + { + "epoch": 0.38567598621217924, + "grad_norm": 33.071792353423476, + "learning_rate": 5e-06, + "loss": 0.2528, + "num_input_tokens_seen": 174054688, + "step": 1007 + }, + { + "epoch": 0.38567598621217924, + "loss": 0.21027815341949463, + "loss_ce": 0.019482256844639778, + "loss_iou": 1.053891658782959, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 174054688, + "step": 1007 + }, + { + "epoch": 0.38605898123324395, + "grad_norm": 45.872281069364874, + "learning_rate": 5e-06, + "loss": 0.2233, + "num_input_tokens_seen": 174227720, + "step": 1008 + }, + { + "epoch": 0.38605898123324395, + "loss": 0.24927006661891937, + "loss_ce": 0.019350633025169373, + "loss_iou": 1.0087388753890991, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 174227720, + "step": 1008 + }, + { + "epoch": 0.3864419762543087, + "grad_norm": 28.142361946977083, + "learning_rate": 5e-06, + "loss": 0.2527, + "num_input_tokens_seen": 174400504, + "step": 1009 + }, + { + "epoch": 0.3864419762543087, + "loss": 0.26419174671173096, + "loss_ce": 0.019806988537311554, + "loss_iou": 1.026214361190796, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 174400504, + "step": 1009 + }, + { + "epoch": 0.3868249712753734, + "grad_norm": 26.611276161604945, + "learning_rate": 5e-06, + "loss": 0.241, + "num_input_tokens_seen": 174573072, + "step": 1010 + }, + { + "epoch": 0.3868249712753734, + "loss": 0.2983134984970093, + "loss_ce": 0.01968800649046898, + "loss_iou": 1.1066608428955078, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 174573072, + "step": 1010 + }, + { + "epoch": 0.38720796629643817, + "grad_norm": 30.461948311421125, + "learning_rate": 5e-06, + "loss": 0.2167, + "num_input_tokens_seen": 174745704, + "step": 1011 + }, + { + "epoch": 0.38720796629643817, + "loss": 0.24395182728767395, + "loss_ce": 0.019708681851625443, + "loss_iou": 1.0080559253692627, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 174745704, + "step": 1011 + }, + { + "epoch": 0.38759096131750287, + "grad_norm": 32.797647663022616, + "learning_rate": 5e-06, + "loss": 0.1919, + "num_input_tokens_seen": 174919040, + "step": 1012 + }, + { + "epoch": 0.38759096131750287, + "loss": 0.2338232696056366, + "loss_ce": 0.019650910049676895, + "loss_iou": 1.0448970794677734, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 174919040, + "step": 1012 + }, + { + "epoch": 0.38797395633856757, + "grad_norm": 30.065078518528374, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 175092104, + "step": 1013 + }, + { + "epoch": 0.38797395633856757, + "loss": 0.32156360149383545, + "loss_ce": 0.020110974088311195, + "loss_iou": 1.0617436170578003, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 175092104, + "step": 1013 + }, + { + "epoch": 0.38835695135963233, + "grad_norm": 28.867010484729118, + "learning_rate": 5e-06, + "loss": 0.2178, + "num_input_tokens_seen": 175264816, + "step": 1014 + }, + { + "epoch": 0.38835695135963233, + "loss": 0.18044480681419373, + "loss_ce": 0.018396452069282532, + "loss_iou": 1.0876814126968384, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 175264816, + "step": 1014 + }, + { + "epoch": 0.38873994638069703, + "grad_norm": 28.792328412821917, + "learning_rate": 5e-06, + "loss": 0.2482, + "num_input_tokens_seen": 175437672, + "step": 1015 + }, + { + "epoch": 0.38873994638069703, + "loss": 0.2683184742927551, + "loss_ce": 0.018562620505690575, + "loss_iou": 1.0557260513305664, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 175437672, + "step": 1015 + }, + { + "epoch": 0.3891229414017618, + "grad_norm": 24.516995066221835, + "learning_rate": 5e-06, + "loss": 0.2651, + "num_input_tokens_seen": 175610552, + "step": 1016 + }, + { + "epoch": 0.3891229414017618, + "loss": 0.20224417746067047, + "loss_ce": 0.020542524755001068, + "loss_iou": 1.0099539756774902, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 175610552, + "step": 1016 + }, + { + "epoch": 0.3895059364228265, + "grad_norm": 29.986105631087796, + "learning_rate": 5e-06, + "loss": 0.3219, + "num_input_tokens_seen": 175783464, + "step": 1017 + }, + { + "epoch": 0.3895059364228265, + "loss": 0.3537331223487854, + "loss_ce": 0.02060324139893055, + "loss_iou": 1.188672661781311, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 175783464, + "step": 1017 + }, + { + "epoch": 0.38988893144389125, + "grad_norm": 37.60632462935342, + "learning_rate": 5e-06, + "loss": 0.2641, + "num_input_tokens_seen": 175956424, + "step": 1018 + }, + { + "epoch": 0.38988893144389125, + "loss": 0.28005295991897583, + "loss_ce": 0.021630097180604935, + "loss_iou": 1.034566879272461, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 175956424, + "step": 1018 + }, + { + "epoch": 0.39027192646495595, + "grad_norm": 26.55092653731164, + "learning_rate": 5e-06, + "loss": 0.299, + "num_input_tokens_seen": 176125440, + "step": 1019 + }, + { + "epoch": 0.39027192646495595, + "loss": 0.2950381338596344, + "loss_ce": 0.020502010360360146, + "loss_iou": 1.079911470413208, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 176125440, + "step": 1019 + }, + { + "epoch": 0.39065492148602066, + "grad_norm": 24.91511831938555, + "learning_rate": 5e-06, + "loss": 0.2435, + "num_input_tokens_seen": 176298344, + "step": 1020 + }, + { + "epoch": 0.39065492148602066, + "loss": 0.2348160445690155, + "loss_ce": 0.0199722982943058, + "loss_iou": 0.9842574596405029, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 176298344, + "step": 1020 + }, + { + "epoch": 0.3910379165070854, + "grad_norm": 36.35647838615096, + "learning_rate": 5e-06, + "loss": 0.2235, + "num_input_tokens_seen": 176471520, + "step": 1021 + }, + { + "epoch": 0.3910379165070854, + "loss": 0.20562788844108582, + "loss_ce": 0.02185102179646492, + "loss_iou": 0.9844660758972168, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 176471520, + "step": 1021 + }, + { + "epoch": 0.3914209115281501, + "grad_norm": 34.59426683529261, + "learning_rate": 5e-06, + "loss": 0.2816, + "num_input_tokens_seen": 176644144, + "step": 1022 + }, + { + "epoch": 0.3914209115281501, + "loss": 0.2733522653579712, + "loss_ce": 0.01700459234416485, + "loss_iou": 1.3063082695007324, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 176644144, + "step": 1022 + }, + { + "epoch": 0.3918039065492149, + "grad_norm": 13.813531849418283, + "learning_rate": 5e-06, + "loss": 0.2646, + "num_input_tokens_seen": 176817016, + "step": 1023 + }, + { + "epoch": 0.3918039065492149, + "loss": 0.2954465448856354, + "loss_ce": 0.018224865198135376, + "loss_iou": 1.582000732421875, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 176817016, + "step": 1023 + }, + { + "epoch": 0.3921869015702796, + "grad_norm": 28.728838579840332, + "learning_rate": 5e-06, + "loss": 0.2812, + "num_input_tokens_seen": 176989848, + "step": 1024 + }, + { + "epoch": 0.3921869015702796, + "loss": 0.3526727557182312, + "loss_ce": 0.019359752535820007, + "loss_iou": 1.1450241804122925, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 176989848, + "step": 1024 + }, + { + "epoch": 0.39256989659134434, + "grad_norm": 17.574054634642955, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 177162880, + "step": 1025 + }, + { + "epoch": 0.39256989659134434, + "loss": 0.25614941120147705, + "loss_ce": 0.02116405963897705, + "loss_iou": 1.0470411777496338, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 177162880, + "step": 1025 + }, + { + "epoch": 0.39295289161240904, + "grad_norm": 31.87346085801817, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 177335720, + "step": 1026 + }, + { + "epoch": 0.39295289161240904, + "loss": 0.2296457290649414, + "loss_ce": 0.02029513008892536, + "loss_iou": 1.0434712171554565, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 177335720, + "step": 1026 + }, + { + "epoch": 0.39333588663347374, + "grad_norm": 34.07962343740927, + "learning_rate": 5e-06, + "loss": 0.2748, + "num_input_tokens_seen": 177508480, + "step": 1027 + }, + { + "epoch": 0.39333588663347374, + "loss": 0.2981407046318054, + "loss_ce": 0.020308684557676315, + "loss_iou": 1.0449728965759277, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 177508480, + "step": 1027 + }, + { + "epoch": 0.3937188816545385, + "grad_norm": 21.19770177391623, + "learning_rate": 5e-06, + "loss": 0.2962, + "num_input_tokens_seen": 177681640, + "step": 1028 + }, + { + "epoch": 0.3937188816545385, + "loss": 0.32453253865242004, + "loss_ce": 0.019722970202565193, + "loss_iou": 1.0778220891952515, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 177681640, + "step": 1028 + }, + { + "epoch": 0.3941018766756032, + "grad_norm": 33.706028660777044, + "learning_rate": 5e-06, + "loss": 0.231, + "num_input_tokens_seen": 177854520, + "step": 1029 + }, + { + "epoch": 0.3941018766756032, + "loss": 0.2419479936361313, + "loss_ce": 0.020390372723340988, + "loss_iou": 1.0999131202697754, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 177854520, + "step": 1029 + }, + { + "epoch": 0.39448487169666796, + "grad_norm": 35.236379668839774, + "learning_rate": 5e-06, + "loss": 0.2638, + "num_input_tokens_seen": 178020376, + "step": 1030 + }, + { + "epoch": 0.39448487169666796, + "loss": 0.23927438259124756, + "loss_ce": 0.022111300379037857, + "loss_iou": 1.0072345733642578, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 178020376, + "step": 1030 + }, + { + "epoch": 0.39486786671773266, + "grad_norm": 32.1285055354119, + "learning_rate": 5e-06, + "loss": 0.245, + "num_input_tokens_seen": 178193424, + "step": 1031 + }, + { + "epoch": 0.39486786671773266, + "loss": 0.29661643505096436, + "loss_ce": 0.019333723932504654, + "loss_iou": 1.0214552879333496, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 178193424, + "step": 1031 + }, + { + "epoch": 0.3952508617387974, + "grad_norm": 25.32324873100988, + "learning_rate": 5e-06, + "loss": 0.3053, + "num_input_tokens_seen": 178366336, + "step": 1032 + }, + { + "epoch": 0.3952508617387974, + "loss": 0.34949928522109985, + "loss_ce": 0.019421163946390152, + "loss_iou": 1.5528998374938965, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 178366336, + "step": 1032 + }, + { + "epoch": 0.3956338567598621, + "grad_norm": 27.588990325820333, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 178539032, + "step": 1033 + }, + { + "epoch": 0.3956338567598621, + "loss": 0.22613750398159027, + "loss_ce": 0.019716612994670868, + "loss_iou": 1.0760841369628906, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 178539032, + "step": 1033 + }, + { + "epoch": 0.3960168517809268, + "grad_norm": 35.87362400033478, + "learning_rate": 5e-06, + "loss": 0.3129, + "num_input_tokens_seen": 178711576, + "step": 1034 + }, + { + "epoch": 0.3960168517809268, + "loss": 0.31826305389404297, + "loss_ce": 0.01943489909172058, + "loss_iou": 1.1315784454345703, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 178711576, + "step": 1034 + }, + { + "epoch": 0.3963998468019916, + "grad_norm": 28.41816637255368, + "learning_rate": 5e-06, + "loss": 0.2566, + "num_input_tokens_seen": 178884296, + "step": 1035 + }, + { + "epoch": 0.3963998468019916, + "loss": 0.1878519058227539, + "loss_ce": 0.019883153960108757, + "loss_iou": 1.001306176185608, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 178884296, + "step": 1035 + }, + { + "epoch": 0.3967828418230563, + "grad_norm": 28.305814305305066, + "learning_rate": 5e-06, + "loss": 0.2616, + "num_input_tokens_seen": 179057336, + "step": 1036 + }, + { + "epoch": 0.3967828418230563, + "loss": 0.24126334488391876, + "loss_ce": 0.018362944945693016, + "loss_iou": 1.0048975944519043, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 179057336, + "step": 1036 + }, + { + "epoch": 0.39716583684412105, + "grad_norm": 27.021356199118504, + "learning_rate": 5e-06, + "loss": 0.228, + "num_input_tokens_seen": 179230528, + "step": 1037 + }, + { + "epoch": 0.39716583684412105, + "loss": 0.2893359661102295, + "loss_ce": 0.020354025065898895, + "loss_iou": 1.10371732711792, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 179230528, + "step": 1037 + }, + { + "epoch": 0.39754883186518575, + "grad_norm": 34.33560302695971, + "learning_rate": 5e-06, + "loss": 0.2778, + "num_input_tokens_seen": 179403520, + "step": 1038 + }, + { + "epoch": 0.39754883186518575, + "loss": 0.256139874458313, + "loss_ce": 0.018468990921974182, + "loss_iou": 1.0579614639282227, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 179403520, + "step": 1038 + }, + { + "epoch": 0.3979318268862505, + "grad_norm": 34.447395900685464, + "learning_rate": 5e-06, + "loss": 0.2138, + "num_input_tokens_seen": 179576640, + "step": 1039 + }, + { + "epoch": 0.3979318268862505, + "loss": 0.24738307297229767, + "loss_ce": 0.02027124911546707, + "loss_iou": 1.0358725786209106, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 179576640, + "step": 1039 + }, + { + "epoch": 0.3983148219073152, + "grad_norm": 25.621526396280814, + "learning_rate": 5e-06, + "loss": 0.2689, + "num_input_tokens_seen": 179749928, + "step": 1040 + }, + { + "epoch": 0.3983148219073152, + "loss": 0.2736883759498596, + "loss_ce": 0.019904205575585365, + "loss_iou": 1.008162498474121, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 179749928, + "step": 1040 + }, + { + "epoch": 0.3986978169283799, + "grad_norm": 24.693164082863355, + "learning_rate": 5e-06, + "loss": 0.21, + "num_input_tokens_seen": 179922960, + "step": 1041 + }, + { + "epoch": 0.3986978169283799, + "loss": 0.22558265924453735, + "loss_ce": 0.020565571263432503, + "loss_iou": 1.0665814876556396, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 179922960, + "step": 1041 + }, + { + "epoch": 0.39908081194944467, + "grad_norm": 27.018742048274287, + "learning_rate": 5e-06, + "loss": 0.2807, + "num_input_tokens_seen": 180095264, + "step": 1042 + }, + { + "epoch": 0.39908081194944467, + "loss": 0.2431066781282425, + "loss_ce": 0.020145248621702194, + "loss_iou": 1.0317829847335815, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 180095264, + "step": 1042 + }, + { + "epoch": 0.39946380697050937, + "grad_norm": 28.78757936901894, + "learning_rate": 5e-06, + "loss": 0.3031, + "num_input_tokens_seen": 180268200, + "step": 1043 + }, + { + "epoch": 0.39946380697050937, + "loss": 0.25353485345840454, + "loss_ce": 0.021296095103025436, + "loss_iou": 1.0509358644485474, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 180268200, + "step": 1043 + }, + { + "epoch": 0.39984680199157413, + "grad_norm": 28.53706200030292, + "learning_rate": 5e-06, + "loss": 0.2743, + "num_input_tokens_seen": 180441240, + "step": 1044 + }, + { + "epoch": 0.39984680199157413, + "loss": 0.27864956855773926, + "loss_ce": 0.019067034125328064, + "loss_iou": 1.0073164701461792, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 180441240, + "step": 1044 + }, + { + "epoch": 0.40022979701263883, + "grad_norm": 28.757712763800257, + "learning_rate": 5e-06, + "loss": 0.2324, + "num_input_tokens_seen": 180614320, + "step": 1045 + }, + { + "epoch": 0.40022979701263883, + "loss": 0.21844510734081268, + "loss_ce": 0.020508088171482086, + "loss_iou": 1.0111403465270996, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 180614320, + "step": 1045 + }, + { + "epoch": 0.40061279203370354, + "grad_norm": 36.304606140461196, + "learning_rate": 5e-06, + "loss": 0.2559, + "num_input_tokens_seen": 180787416, + "step": 1046 + }, + { + "epoch": 0.40061279203370354, + "loss": 0.2342616319656372, + "loss_ce": 0.019906163215637207, + "loss_iou": 1.0033844709396362, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 180787416, + "step": 1046 + }, + { + "epoch": 0.4009957870547683, + "grad_norm": 24.178970934702544, + "learning_rate": 5e-06, + "loss": 0.2158, + "num_input_tokens_seen": 180960432, + "step": 1047 + }, + { + "epoch": 0.4009957870547683, + "loss": 0.24342913925647736, + "loss_ce": 0.018758729100227356, + "loss_iou": 1.027082085609436, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 180960432, + "step": 1047 + }, + { + "epoch": 0.401378782075833, + "grad_norm": 23.55980725206559, + "learning_rate": 5e-06, + "loss": 0.2725, + "num_input_tokens_seen": 181132816, + "step": 1048 + }, + { + "epoch": 0.401378782075833, + "loss": 0.24806642532348633, + "loss_ce": 0.019550804048776627, + "loss_iou": 1.0464789867401123, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 181132816, + "step": 1048 + }, + { + "epoch": 0.40176177709689775, + "grad_norm": 15.16717833608982, + "learning_rate": 5e-06, + "loss": 0.2624, + "num_input_tokens_seen": 181305688, + "step": 1049 + }, + { + "epoch": 0.40176177709689775, + "loss": 0.2303747683763504, + "loss_ce": 0.021390395238995552, + "loss_iou": 1.0903661251068115, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 181305688, + "step": 1049 + }, + { + "epoch": 0.40214477211796246, + "grad_norm": 19.53155959200495, + "learning_rate": 5e-06, + "loss": 0.2944, + "num_input_tokens_seen": 181477952, + "step": 1050 + }, + { + "epoch": 0.40214477211796246, + "loss": 0.3223775029182434, + "loss_ce": 0.019276905804872513, + "loss_iou": 1.0366268157958984, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 181477952, + "step": 1050 + }, + { + "epoch": 0.4025277671390272, + "grad_norm": 28.92356228415835, + "learning_rate": 5e-06, + "loss": 0.2129, + "num_input_tokens_seen": 181650616, + "step": 1051 + }, + { + "epoch": 0.4025277671390272, + "loss": 0.2253831923007965, + "loss_ce": 0.020427130162715912, + "loss_iou": 1.0213334560394287, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 181650616, + "step": 1051 + }, + { + "epoch": 0.4029107621600919, + "grad_norm": 37.07564566175605, + "learning_rate": 5e-06, + "loss": 0.2537, + "num_input_tokens_seen": 181823720, + "step": 1052 + }, + { + "epoch": 0.4029107621600919, + "loss": 0.23519429564476013, + "loss_ce": 0.01882467046380043, + "loss_iou": 1.0054073333740234, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 181823720, + "step": 1052 + }, + { + "epoch": 0.4032937571811566, + "grad_norm": 31.201015046885313, + "learning_rate": 5e-06, + "loss": 0.2164, + "num_input_tokens_seen": 181996632, + "step": 1053 + }, + { + "epoch": 0.4032937571811566, + "loss": 0.20811310410499573, + "loss_ce": 0.01975860819220543, + "loss_iou": 1.007824182510376, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 181996632, + "step": 1053 + }, + { + "epoch": 0.4036767522022214, + "grad_norm": 29.227328404145602, + "learning_rate": 5e-06, + "loss": 0.2954, + "num_input_tokens_seen": 182169560, + "step": 1054 + }, + { + "epoch": 0.4036767522022214, + "loss": 0.27629297971725464, + "loss_ce": 0.019334979355335236, + "loss_iou": 1.0095736980438232, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 182169560, + "step": 1054 + }, + { + "epoch": 0.4040597472232861, + "grad_norm": 17.552602461062378, + "learning_rate": 5e-06, + "loss": 0.2578, + "num_input_tokens_seen": 182342216, + "step": 1055 + }, + { + "epoch": 0.4040597472232861, + "loss": 0.33272290229797363, + "loss_ce": 0.019917726516723633, + "loss_iou": 1.024327039718628, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 182342216, + "step": 1055 + }, + { + "epoch": 0.40444274224435084, + "grad_norm": 29.33588487581412, + "learning_rate": 5e-06, + "loss": 0.2677, + "num_input_tokens_seen": 182515128, + "step": 1056 + }, + { + "epoch": 0.40444274224435084, + "loss": 0.2414645403623581, + "loss_ce": 0.020639337599277496, + "loss_iou": 1.0498523712158203, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 182515128, + "step": 1056 + }, + { + "epoch": 0.40482573726541554, + "grad_norm": 31.24753138828455, + "learning_rate": 5e-06, + "loss": 0.2141, + "num_input_tokens_seen": 182688608, + "step": 1057 + }, + { + "epoch": 0.40482573726541554, + "loss": 0.2143682837486267, + "loss_ce": 0.02027648501098156, + "loss_iou": 1.0015480518341064, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 182688608, + "step": 1057 + }, + { + "epoch": 0.4052087322864803, + "grad_norm": 30.426761376657293, + "learning_rate": 5e-06, + "loss": 0.2727, + "num_input_tokens_seen": 182861744, + "step": 1058 + }, + { + "epoch": 0.4052087322864803, + "loss": 0.21315500140190125, + "loss_ce": 0.02089424803853035, + "loss_iou": 1.0028371810913086, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 182861744, + "step": 1058 + }, + { + "epoch": 0.405591727307545, + "grad_norm": 40.38200187104438, + "learning_rate": 5e-06, + "loss": 0.2406, + "num_input_tokens_seen": 183034680, + "step": 1059 + }, + { + "epoch": 0.405591727307545, + "loss": 0.2552831172943115, + "loss_ce": 0.02084709331393242, + "loss_iou": 1.0171141624450684, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 183034680, + "step": 1059 + }, + { + "epoch": 0.4059747223286097, + "grad_norm": 46.59560384369054, + "learning_rate": 5e-06, + "loss": 0.3161, + "num_input_tokens_seen": 183207984, + "step": 1060 + }, + { + "epoch": 0.4059747223286097, + "loss": 0.32938194274902344, + "loss_ce": 0.019811611622571945, + "loss_iou": 1.0379105806350708, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 183207984, + "step": 1060 + }, + { + "epoch": 0.40635771734967446, + "grad_norm": 37.40181740370994, + "learning_rate": 5e-06, + "loss": 0.3163, + "num_input_tokens_seen": 183381376, + "step": 1061 + }, + { + "epoch": 0.40635771734967446, + "loss": 0.33082133531570435, + "loss_ce": 0.020884817466139793, + "loss_iou": 1.0592249631881714, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 183381376, + "step": 1061 + }, + { + "epoch": 0.40674071237073917, + "grad_norm": 29.14551515879792, + "learning_rate": 5e-06, + "loss": 0.2702, + "num_input_tokens_seen": 183554152, + "step": 1062 + }, + { + "epoch": 0.40674071237073917, + "loss": 0.2896021604537964, + "loss_ce": 0.019826781004667282, + "loss_iou": 1.015427827835083, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 183554152, + "step": 1062 + }, + { + "epoch": 0.4071237073918039, + "grad_norm": 28.24061135290386, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 183727000, + "step": 1063 + }, + { + "epoch": 0.4071237073918039, + "loss": 0.2897298038005829, + "loss_ce": 0.020686831325292587, + "loss_iou": 1.2125548124313354, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 183727000, + "step": 1063 + }, + { + "epoch": 0.4075067024128686, + "grad_norm": 33.52604644553763, + "learning_rate": 5e-06, + "loss": 0.3711, + "num_input_tokens_seen": 183899816, + "step": 1064 + }, + { + "epoch": 0.4075067024128686, + "loss": 0.4948132634162903, + "loss_ce": 0.019593555480241776, + "loss_iou": 1.2182378768920898, + "loss_num": 0.474609375, + "loss_xval": 0.474609375, + "num_input_tokens_seen": 183899816, + "step": 1064 + }, + { + "epoch": 0.4078896974339334, + "grad_norm": 27.88108930750279, + "learning_rate": 5e-06, + "loss": 0.2385, + "num_input_tokens_seen": 184072696, + "step": 1065 + }, + { + "epoch": 0.4078896974339334, + "loss": 0.21623888611793518, + "loss_ce": 0.020682232454419136, + "loss_iou": 1.0074313879013062, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 184072696, + "step": 1065 + }, + { + "epoch": 0.4082726924549981, + "grad_norm": 37.1247900528633, + "learning_rate": 5e-06, + "loss": 0.2424, + "num_input_tokens_seen": 184245136, + "step": 1066 + }, + { + "epoch": 0.4082726924549981, + "loss": 0.22287967801094055, + "loss_ce": 0.019205372780561447, + "loss_iou": 1.0023624897003174, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 184245136, + "step": 1066 + }, + { + "epoch": 0.4086556874760628, + "grad_norm": 26.491031650768395, + "learning_rate": 5e-06, + "loss": 0.3627, + "num_input_tokens_seen": 184418320, + "step": 1067 + }, + { + "epoch": 0.4086556874760628, + "loss": 0.3207070827484131, + "loss_ce": 0.021024484187364578, + "loss_iou": 1.0953075885772705, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 184418320, + "step": 1067 + }, + { + "epoch": 0.40903868249712755, + "grad_norm": 29.791257760310767, + "learning_rate": 5e-06, + "loss": 0.2265, + "num_input_tokens_seen": 184591320, + "step": 1068 + }, + { + "epoch": 0.40903868249712755, + "loss": 0.24238575994968414, + "loss_ce": 0.019729506224393845, + "loss_iou": 4.7769131098520414e+35, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 184591320, + "step": 1068 + }, + { + "epoch": 0.40942167751819225, + "grad_norm": 37.98331683212382, + "learning_rate": 5e-06, + "loss": 0.2225, + "num_input_tokens_seen": 184764320, + "step": 1069 + }, + { + "epoch": 0.40942167751819225, + "loss": 0.22537072002887726, + "loss_ce": 0.02084190584719181, + "loss_iou": 1.002504587173462, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 184764320, + "step": 1069 + }, + { + "epoch": 0.409804672539257, + "grad_norm": 35.89718177327458, + "learning_rate": 5e-06, + "loss": 0.375, + "num_input_tokens_seen": 184937048, + "step": 1070 + }, + { + "epoch": 0.409804672539257, + "loss": 0.3266220688819885, + "loss_ce": 0.01985936611890793, + "loss_iou": 1.1617192029953003, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 184937048, + "step": 1070 + }, + { + "epoch": 0.4101876675603217, + "grad_norm": 21.651350807757197, + "learning_rate": 5e-06, + "loss": 0.1698, + "num_input_tokens_seen": 185110016, + "step": 1071 + }, + { + "epoch": 0.4101876675603217, + "loss": 0.15666621923446655, + "loss_ce": 0.020985066890716553, + "loss_iou": 1.0026936531066895, + "loss_num": 0.1357421875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 185110016, + "step": 1071 + }, + { + "epoch": 0.41057066258138647, + "grad_norm": 48.532328506967026, + "learning_rate": 5e-06, + "loss": 0.2417, + "num_input_tokens_seen": 185283120, + "step": 1072 + }, + { + "epoch": 0.41057066258138647, + "loss": 0.24449491500854492, + "loss_ce": 0.019214145839214325, + "loss_iou": 1.0388288497924805, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 185283120, + "step": 1072 + }, + { + "epoch": 0.4109536576024512, + "grad_norm": 32.26843347837672, + "learning_rate": 5e-06, + "loss": 0.2295, + "num_input_tokens_seen": 185455952, + "step": 1073 + }, + { + "epoch": 0.4109536576024512, + "loss": 0.21386288106441498, + "loss_ce": 0.020625578239560127, + "loss_iou": 1.003318190574646, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 185455952, + "step": 1073 + }, + { + "epoch": 0.4113366526235159, + "grad_norm": 25.19425817777292, + "learning_rate": 5e-06, + "loss": 0.2858, + "num_input_tokens_seen": 185628432, + "step": 1074 + }, + { + "epoch": 0.4113366526235159, + "loss": 0.2122112363576889, + "loss_ce": 0.018485646694898605, + "loss_iou": 1.0039255619049072, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 185628432, + "step": 1074 + }, + { + "epoch": 0.41171964764458063, + "grad_norm": 13.217673976589335, + "learning_rate": 5e-06, + "loss": 0.1704, + "num_input_tokens_seen": 185801592, + "step": 1075 + }, + { + "epoch": 0.41171964764458063, + "loss": 0.17114916443824768, + "loss_ce": 0.020758548751473427, + "loss_iou": 1.0100396871566772, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 185801592, + "step": 1075 + }, + { + "epoch": 0.41210264266564534, + "grad_norm": 14.260577675080459, + "learning_rate": 5e-06, + "loss": 0.2129, + "num_input_tokens_seen": 185974600, + "step": 1076 + }, + { + "epoch": 0.41210264266564534, + "loss": 0.24450165033340454, + "loss_ce": 0.018915709108114243, + "loss_iou": 1.0633727312088013, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 185974600, + "step": 1076 + }, + { + "epoch": 0.4124856376867101, + "grad_norm": 28.824251015629905, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 186147568, + "step": 1077 + }, + { + "epoch": 0.4124856376867101, + "loss": 0.23406794667243958, + "loss_ce": 0.01995660364627838, + "loss_iou": 1.0066158771514893, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 186147568, + "step": 1077 + }, + { + "epoch": 0.4128686327077748, + "grad_norm": 36.032062956592505, + "learning_rate": 5e-06, + "loss": 0.3205, + "num_input_tokens_seen": 186320584, + "step": 1078 + }, + { + "epoch": 0.4128686327077748, + "loss": 0.3176344633102417, + "loss_ce": 0.019416697323322296, + "loss_iou": 1.0374646186828613, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 186320584, + "step": 1078 + }, + { + "epoch": 0.4132516277288395, + "grad_norm": 30.008842799861185, + "learning_rate": 5e-06, + "loss": 0.2067, + "num_input_tokens_seen": 186493616, + "step": 1079 + }, + { + "epoch": 0.4132516277288395, + "loss": 0.20071427524089813, + "loss_ce": 0.01980607956647873, + "loss_iou": 1.0139362812042236, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 186493616, + "step": 1079 + }, + { + "epoch": 0.41363462274990426, + "grad_norm": 25.83195018400052, + "learning_rate": 5e-06, + "loss": 0.3091, + "num_input_tokens_seen": 186666664, + "step": 1080 + }, + { + "epoch": 0.41363462274990426, + "loss": 0.25226709246635437, + "loss_ce": 0.019417980685830116, + "loss_iou": 1.0057278871536255, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 186666664, + "step": 1080 + }, + { + "epoch": 0.41401761777096896, + "grad_norm": 25.59488449260052, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 186839648, + "step": 1081 + }, + { + "epoch": 0.41401761777096896, + "loss": 0.24771007895469666, + "loss_ce": 0.02007945626974106, + "loss_iou": 1.0573749542236328, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 186839648, + "step": 1081 + }, + { + "epoch": 0.4144006127920337, + "grad_norm": 36.715223570817095, + "learning_rate": 5e-06, + "loss": 0.2796, + "num_input_tokens_seen": 187012432, + "step": 1082 + }, + { + "epoch": 0.4144006127920337, + "loss": 0.3474213182926178, + "loss_ce": 0.019174234941601753, + "loss_iou": 0.8901275396347046, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 187012432, + "step": 1082 + }, + { + "epoch": 0.4147836078130984, + "grad_norm": 36.67370334258155, + "learning_rate": 5e-06, + "loss": 0.3268, + "num_input_tokens_seen": 187185240, + "step": 1083 + }, + { + "epoch": 0.4147836078130984, + "loss": 0.2963693141937256, + "loss_ce": 0.018720384687185287, + "loss_iou": 1.019195318222046, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 187185240, + "step": 1083 + }, + { + "epoch": 0.4151666028341632, + "grad_norm": 30.295862631894526, + "learning_rate": 5e-06, + "loss": 0.32, + "num_input_tokens_seen": 187356408, + "step": 1084 + }, + { + "epoch": 0.4151666028341632, + "loss": 0.3452816605567932, + "loss_ce": 0.019353941082954407, + "loss_iou": 1.0064892768859863, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 187356408, + "step": 1084 + }, + { + "epoch": 0.4155495978552279, + "grad_norm": 36.726952729545765, + "learning_rate": 5e-06, + "loss": 0.186, + "num_input_tokens_seen": 187529288, + "step": 1085 + }, + { + "epoch": 0.4155495978552279, + "loss": 0.1999548077583313, + "loss_ce": 0.02014523558318615, + "loss_iou": 1.0213425159454346, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 187529288, + "step": 1085 + }, + { + "epoch": 0.4159325928762926, + "grad_norm": 41.84504012812776, + "learning_rate": 5e-06, + "loss": 0.3025, + "num_input_tokens_seen": 187702224, + "step": 1086 + }, + { + "epoch": 0.4159325928762926, + "loss": 0.30090370774269104, + "loss_ce": 0.019653702154755592, + "loss_iou": 1.0435850620269775, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 187702224, + "step": 1086 + }, + { + "epoch": 0.41631558789735734, + "grad_norm": 35.58417190733154, + "learning_rate": 5e-06, + "loss": 0.2655, + "num_input_tokens_seen": 187875016, + "step": 1087 + }, + { + "epoch": 0.41631558789735734, + "loss": 0.34806880354881287, + "loss_ce": 0.018478956073522568, + "loss_iou": 1.1121827363967896, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 187875016, + "step": 1087 + }, + { + "epoch": 0.41669858291842204, + "grad_norm": 57.372595580243946, + "learning_rate": 5e-06, + "loss": 0.2721, + "num_input_tokens_seen": 188048056, + "step": 1088 + }, + { + "epoch": 0.41669858291842204, + "loss": 0.24286219477653503, + "loss_ce": 0.018741104751825333, + "loss_iou": 1.0283300876617432, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 188048056, + "step": 1088 + }, + { + "epoch": 0.4170815779394868, + "grad_norm": 35.09600697549438, + "learning_rate": 5e-06, + "loss": 0.2756, + "num_input_tokens_seen": 188221192, + "step": 1089 + }, + { + "epoch": 0.4170815779394868, + "loss": 0.32690510153770447, + "loss_ce": 0.019532054662704468, + "loss_iou": 0.9916969537734985, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 188221192, + "step": 1089 + }, + { + "epoch": 0.4174645729605515, + "grad_norm": 35.8157320365255, + "learning_rate": 5e-06, + "loss": 0.2852, + "num_input_tokens_seen": 188394200, + "step": 1090 + }, + { + "epoch": 0.4174645729605515, + "loss": 0.287636935710907, + "loss_ce": 0.018838126212358475, + "loss_iou": 1.0336689949035645, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 188394200, + "step": 1090 + }, + { + "epoch": 0.41784756798161626, + "grad_norm": 37.08378410038575, + "learning_rate": 5e-06, + "loss": 0.2278, + "num_input_tokens_seen": 188567032, + "step": 1091 + }, + { + "epoch": 0.41784756798161626, + "loss": 0.24460598826408386, + "loss_ce": 0.020790068432688713, + "loss_iou": 1.0342530012130737, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 188567032, + "step": 1091 + }, + { + "epoch": 0.41823056300268097, + "grad_norm": 37.41860078335122, + "learning_rate": 5e-06, + "loss": 0.2608, + "num_input_tokens_seen": 188739480, + "step": 1092 + }, + { + "epoch": 0.41823056300268097, + "loss": 0.2734837532043457, + "loss_ce": 0.019089236855506897, + "loss_iou": 0.9870978593826294, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 188739480, + "step": 1092 + }, + { + "epoch": 0.41861355802374567, + "grad_norm": 20.193221961767293, + "learning_rate": 5e-06, + "loss": 0.2244, + "num_input_tokens_seen": 188912824, + "step": 1093 + }, + { + "epoch": 0.41861355802374567, + "loss": 0.20880837738513947, + "loss_ce": 0.02075905352830887, + "loss_iou": 1.0575461387634277, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 188912824, + "step": 1093 + }, + { + "epoch": 0.4189965530448104, + "grad_norm": 26.32238347120246, + "learning_rate": 5e-06, + "loss": 0.2898, + "num_input_tokens_seen": 189085880, + "step": 1094 + }, + { + "epoch": 0.4189965530448104, + "loss": 0.2794140577316284, + "loss_ce": 0.020563945174217224, + "loss_iou": 1.1031012535095215, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 189085880, + "step": 1094 + }, + { + "epoch": 0.41937954806587513, + "grad_norm": 37.26817002701847, + "learning_rate": 5e-06, + "loss": 0.2058, + "num_input_tokens_seen": 189258864, + "step": 1095 + }, + { + "epoch": 0.41937954806587513, + "loss": 0.1816214621067047, + "loss_ce": 0.019695177674293518, + "loss_iou": 1.0097739696502686, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 189258864, + "step": 1095 + }, + { + "epoch": 0.4197625430869399, + "grad_norm": 34.95408758265353, + "learning_rate": 5e-06, + "loss": 0.2835, + "num_input_tokens_seen": 189431648, + "step": 1096 + }, + { + "epoch": 0.4197625430869399, + "loss": 0.29773491621017456, + "loss_ce": 0.01990288496017456, + "loss_iou": 1.141841173171997, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 189431648, + "step": 1096 + }, + { + "epoch": 0.4201455381080046, + "grad_norm": 35.4006169941124, + "learning_rate": 5e-06, + "loss": 0.2949, + "num_input_tokens_seen": 189604664, + "step": 1097 + }, + { + "epoch": 0.4201455381080046, + "loss": 0.2436686009168625, + "loss_ce": 0.021439600735902786, + "loss_iou": 1.0211408138275146, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 189604664, + "step": 1097 + }, + { + "epoch": 0.42052853312906935, + "grad_norm": 34.428038870408685, + "learning_rate": 5e-06, + "loss": 0.2832, + "num_input_tokens_seen": 189778208, + "step": 1098 + }, + { + "epoch": 0.42052853312906935, + "loss": 0.2590641677379608, + "loss_ce": 0.020294636487960815, + "loss_iou": 1.025743007659912, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 189778208, + "step": 1098 + }, + { + "epoch": 0.42091152815013405, + "grad_norm": 35.0534447485883, + "learning_rate": 5e-06, + "loss": 0.2622, + "num_input_tokens_seen": 189950968, + "step": 1099 + }, + { + "epoch": 0.42091152815013405, + "loss": 0.2537471055984497, + "loss_ce": 0.019250061362981796, + "loss_iou": 1.0193878412246704, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 189950968, + "step": 1099 + }, + { + "epoch": 0.42129452317119875, + "grad_norm": 35.467707534962194, + "learning_rate": 5e-06, + "loss": 0.3142, + "num_input_tokens_seen": 190123976, + "step": 1100 + }, + { + "epoch": 0.42129452317119875, + "loss": 0.3694373369216919, + "loss_ce": 0.019095528870821, + "loss_iou": 1.0063408613204956, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 190123976, + "step": 1100 + }, + { + "epoch": 0.4216775181922635, + "grad_norm": 30.95793796142663, + "learning_rate": 5e-06, + "loss": 0.2187, + "num_input_tokens_seen": 190297032, + "step": 1101 + }, + { + "epoch": 0.4216775181922635, + "loss": 0.23031185567378998, + "loss_ce": 0.02138851210474968, + "loss_iou": 1.0118106603622437, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 190297032, + "step": 1101 + }, + { + "epoch": 0.4220605132133282, + "grad_norm": 35.56362632816114, + "learning_rate": 5e-06, + "loss": 0.281, + "num_input_tokens_seen": 190470000, + "step": 1102 + }, + { + "epoch": 0.4220605132133282, + "loss": 0.22374743223190308, + "loss_ce": 0.019890017807483673, + "loss_iou": 1.0248589515686035, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 190470000, + "step": 1102 + }, + { + "epoch": 0.422443508234393, + "grad_norm": 48.711027174685476, + "learning_rate": 5e-06, + "loss": 0.2976, + "num_input_tokens_seen": 190643112, + "step": 1103 + }, + { + "epoch": 0.422443508234393, + "loss": 0.29828524589538574, + "loss_ce": 0.021185625344514847, + "loss_iou": 1.0907062292099, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 190643112, + "step": 1103 + }, + { + "epoch": 0.4228265032554577, + "grad_norm": 37.48421738750284, + "learning_rate": 5e-06, + "loss": 0.2993, + "num_input_tokens_seen": 190815712, + "step": 1104 + }, + { + "epoch": 0.4228265032554577, + "loss": 0.2960641384124756, + "loss_ce": 0.019452795386314392, + "loss_iou": 1.012925624847412, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 190815712, + "step": 1104 + }, + { + "epoch": 0.4232094982765224, + "grad_norm": 30.2858820801016, + "learning_rate": 5e-06, + "loss": 0.2165, + "num_input_tokens_seen": 190988688, + "step": 1105 + }, + { + "epoch": 0.4232094982765224, + "loss": 0.22336909174919128, + "loss_ce": 0.020122017711400986, + "loss_iou": 1.015170931816101, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 190988688, + "step": 1105 + }, + { + "epoch": 0.42359249329758714, + "grad_norm": 36.10718236901174, + "learning_rate": 5e-06, + "loss": 0.2552, + "num_input_tokens_seen": 191161624, + "step": 1106 + }, + { + "epoch": 0.42359249329758714, + "loss": 0.2612323760986328, + "loss_ce": 0.02002144604921341, + "loss_iou": 1.019909143447876, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 191161624, + "step": 1106 + }, + { + "epoch": 0.42397548831865184, + "grad_norm": 43.646195988064775, + "learning_rate": 5e-06, + "loss": 0.3127, + "num_input_tokens_seen": 191331472, + "step": 1107 + }, + { + "epoch": 0.42397548831865184, + "loss": 0.3050917088985443, + "loss_ce": 0.02323136292397976, + "loss_iou": 1.1774249076843262, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 191331472, + "step": 1107 + }, + { + "epoch": 0.4243584833397166, + "grad_norm": 28.77716085555061, + "learning_rate": 5e-06, + "loss": 0.316, + "num_input_tokens_seen": 191504464, + "step": 1108 + }, + { + "epoch": 0.4243584833397166, + "loss": 0.3153734803199768, + "loss_ce": 0.021306123584508896, + "loss_iou": 1.0083816051483154, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 191504464, + "step": 1108 + }, + { + "epoch": 0.4247414783607813, + "grad_norm": 23.328156987166356, + "learning_rate": 5e-06, + "loss": 0.2327, + "num_input_tokens_seen": 191677864, + "step": 1109 + }, + { + "epoch": 0.4247414783607813, + "loss": 0.17955052852630615, + "loss_ce": 0.020676013082265854, + "loss_iou": 1.0050337314605713, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 191677864, + "step": 1109 + }, + { + "epoch": 0.42512447338184606, + "grad_norm": 44.71531405551023, + "learning_rate": 5e-06, + "loss": 0.2593, + "num_input_tokens_seen": 191850688, + "step": 1110 + }, + { + "epoch": 0.42512447338184606, + "loss": 0.2549092173576355, + "loss_ce": 0.019313503056764603, + "loss_iou": 1.0502129793167114, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 191850688, + "step": 1110 + }, + { + "epoch": 0.42550746840291076, + "grad_norm": 45.485111903636096, + "learning_rate": 5e-06, + "loss": 0.2858, + "num_input_tokens_seen": 192023720, + "step": 1111 + }, + { + "epoch": 0.42550746840291076, + "loss": 0.30052047967910767, + "loss_ce": 0.019636696204543114, + "loss_iou": 0.8777676820755005, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 192023720, + "step": 1111 + }, + { + "epoch": 0.42589046342397546, + "grad_norm": 30.37565301000588, + "learning_rate": 5e-06, + "loss": 0.321, + "num_input_tokens_seen": 192196616, + "step": 1112 + }, + { + "epoch": 0.42589046342397546, + "loss": 0.26790064573287964, + "loss_ce": 0.01985376700758934, + "loss_iou": 1.0034968852996826, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 192196616, + "step": 1112 + }, + { + "epoch": 0.4262734584450402, + "grad_norm": 23.429370774409772, + "learning_rate": 5e-06, + "loss": 0.3124, + "num_input_tokens_seen": 192369312, + "step": 1113 + }, + { + "epoch": 0.4262734584450402, + "loss": 0.25368160009384155, + "loss_ce": 0.018452120944857597, + "loss_iou": 1.0222837924957275, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 192369312, + "step": 1113 + }, + { + "epoch": 0.4266564534661049, + "grad_norm": 49.81728447039158, + "learning_rate": 5e-06, + "loss": 0.2437, + "num_input_tokens_seen": 192541976, + "step": 1114 + }, + { + "epoch": 0.4266564534661049, + "loss": 0.19863948225975037, + "loss_ce": 0.01913510262966156, + "loss_iou": 1.0239115953445435, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 192541976, + "step": 1114 + }, + { + "epoch": 0.4270394484871697, + "grad_norm": 53.96170294298491, + "learning_rate": 5e-06, + "loss": 0.3647, + "num_input_tokens_seen": 192715040, + "step": 1115 + }, + { + "epoch": 0.4270394484871697, + "loss": 0.38576173782348633, + "loss_ce": 0.021381856873631477, + "loss_iou": 1.0469461679458618, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 192715040, + "step": 1115 + }, + { + "epoch": 0.4274224435082344, + "grad_norm": 30.996940663427463, + "learning_rate": 5e-06, + "loss": 0.3304, + "num_input_tokens_seen": 192887808, + "step": 1116 + }, + { + "epoch": 0.4274224435082344, + "loss": 0.32507526874542236, + "loss_ce": 0.018556740134954453, + "loss_iou": 1.0313948392868042, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 192887808, + "step": 1116 + }, + { + "epoch": 0.42780543852929914, + "grad_norm": 23.36501609384908, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 193060816, + "step": 1117 + }, + { + "epoch": 0.42780543852929914, + "loss": 0.17319723963737488, + "loss_ce": 0.022196270525455475, + "loss_iou": 1.0087032318115234, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 193060816, + "step": 1117 + }, + { + "epoch": 0.42818843355036385, + "grad_norm": 43.03438693532102, + "learning_rate": 5e-06, + "loss": 0.3621, + "num_input_tokens_seen": 193233944, + "step": 1118 + }, + { + "epoch": 0.42818843355036385, + "loss": 0.39938050508499146, + "loss_ce": 0.022183235734701157, + "loss_iou": 1.0878607034683228, + "loss_num": 0.376953125, + "loss_xval": 0.376953125, + "num_input_tokens_seen": 193233944, + "step": 1118 + }, + { + "epoch": 0.42857142857142855, + "grad_norm": 31.289737397930423, + "learning_rate": 5e-06, + "loss": 0.2237, + "num_input_tokens_seen": 193407104, + "step": 1119 + }, + { + "epoch": 0.42857142857142855, + "loss": 0.2626212537288666, + "loss_ce": 0.02067788690328598, + "loss_iou": 1.1765764951705933, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 193407104, + "step": 1119 + }, + { + "epoch": 0.4289544235924933, + "grad_norm": 28.985363759382338, + "learning_rate": 5e-06, + "loss": 0.2763, + "num_input_tokens_seen": 193576656, + "step": 1120 + }, + { + "epoch": 0.4289544235924933, + "loss": 0.29900774359703064, + "loss_ce": 0.019955020397901535, + "loss_iou": 1.0166741609573364, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 193576656, + "step": 1120 + }, + { + "epoch": 0.429337418613558, + "grad_norm": 22.460035617900825, + "learning_rate": 5e-06, + "loss": 0.1649, + "num_input_tokens_seen": 193749608, + "step": 1121 + }, + { + "epoch": 0.429337418613558, + "loss": 0.15083445608615875, + "loss_ce": 0.019730933010578156, + "loss_iou": 1.0127614736557007, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 193749608, + "step": 1121 + }, + { + "epoch": 0.42972041363462277, + "grad_norm": 38.12969874093416, + "learning_rate": 5e-06, + "loss": 0.2681, + "num_input_tokens_seen": 193922552, + "step": 1122 + }, + { + "epoch": 0.42972041363462277, + "loss": 0.3112063407897949, + "loss_ce": 0.019946586340665817, + "loss_iou": 0.9104921817779541, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 193922552, + "step": 1122 + }, + { + "epoch": 0.43010340865568747, + "grad_norm": 35.74217380806701, + "learning_rate": 5e-06, + "loss": 0.2094, + "num_input_tokens_seen": 194095616, + "step": 1123 + }, + { + "epoch": 0.43010340865568747, + "loss": 0.2030068337917328, + "loss_ce": 0.02014549821615219, + "loss_iou": 1.064756155014038, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 194095616, + "step": 1123 + }, + { + "epoch": 0.4304864036767522, + "grad_norm": 37.04493085369391, + "learning_rate": 5e-06, + "loss": 0.2416, + "num_input_tokens_seen": 194268272, + "step": 1124 + }, + { + "epoch": 0.4304864036767522, + "loss": 0.2464175522327423, + "loss_ce": 0.020343340933322906, + "loss_iou": 1.0350528955459595, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 194268272, + "step": 1124 + }, + { + "epoch": 0.43086939869781693, + "grad_norm": 38.20645250797387, + "learning_rate": 5e-06, + "loss": 0.2287, + "num_input_tokens_seen": 194441376, + "step": 1125 + }, + { + "epoch": 0.43086939869781693, + "loss": 0.21355994045734406, + "loss_ce": 0.020688854157924652, + "loss_iou": 1.0069462060928345, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 194441376, + "step": 1125 + }, + { + "epoch": 0.43125239371888163, + "grad_norm": 28.52901901475493, + "learning_rate": 5e-06, + "loss": 0.2658, + "num_input_tokens_seen": 194614168, + "step": 1126 + }, + { + "epoch": 0.43125239371888163, + "loss": 0.2590653896331787, + "loss_ce": 0.020051725208759308, + "loss_iou": 1.050733208656311, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 194614168, + "step": 1126 + }, + { + "epoch": 0.4316353887399464, + "grad_norm": 23.461995361491727, + "learning_rate": 5e-06, + "loss": 0.2763, + "num_input_tokens_seen": 194787328, + "step": 1127 + }, + { + "epoch": 0.4316353887399464, + "loss": 0.2598300576210022, + "loss_ce": 0.020206015557050705, + "loss_iou": 1.007830262184143, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 194787328, + "step": 1127 + }, + { + "epoch": 0.4320183837610111, + "grad_norm": 29.33121127397308, + "learning_rate": 5e-06, + "loss": 0.2646, + "num_input_tokens_seen": 194959912, + "step": 1128 + }, + { + "epoch": 0.4320183837610111, + "loss": 0.23208530247211456, + "loss_ce": 0.017912931740283966, + "loss_iou": 1.0141150951385498, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 194959912, + "step": 1128 + }, + { + "epoch": 0.43240137878207585, + "grad_norm": 39.34486748573164, + "learning_rate": 5e-06, + "loss": 0.2952, + "num_input_tokens_seen": 195132584, + "step": 1129 + }, + { + "epoch": 0.43240137878207585, + "loss": 0.23368357121944427, + "loss_ce": 0.02024361863732338, + "loss_iou": 1.0584793090820312, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 195132584, + "step": 1129 + }, + { + "epoch": 0.43278437380314055, + "grad_norm": 22.854477573257608, + "learning_rate": 5e-06, + "loss": 0.2324, + "num_input_tokens_seen": 195305600, + "step": 1130 + }, + { + "epoch": 0.43278437380314055, + "loss": 0.31261980533599854, + "loss_ce": 0.020383477210998535, + "loss_iou": 1.2783775329589844, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 195305600, + "step": 1130 + }, + { + "epoch": 0.4331673688242053, + "grad_norm": 33.972667461606015, + "learning_rate": 5e-06, + "loss": 0.227, + "num_input_tokens_seen": 195478744, + "step": 1131 + }, + { + "epoch": 0.4331673688242053, + "loss": 0.19473661482334137, + "loss_ce": 0.02170194685459137, + "loss_iou": 1.0100171566009521, + "loss_num": 0.1728515625, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 195478744, + "step": 1131 + }, + { + "epoch": 0.43355036384527, + "grad_norm": 33.56408684153845, + "learning_rate": 5e-06, + "loss": 0.3593, + "num_input_tokens_seen": 195651632, + "step": 1132 + }, + { + "epoch": 0.43355036384527, + "loss": 0.280134379863739, + "loss_ce": 0.01988050900399685, + "loss_iou": 1.0125410556793213, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 195651632, + "step": 1132 + }, + { + "epoch": 0.4339333588663347, + "grad_norm": 27.666942950446835, + "learning_rate": 5e-06, + "loss": 0.2947, + "num_input_tokens_seen": 195824592, + "step": 1133 + }, + { + "epoch": 0.4339333588663347, + "loss": 0.35064661502838135, + "loss_ce": 0.02154507488012314, + "loss_iou": 1.8595023155212402, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 195824592, + "step": 1133 + }, + { + "epoch": 0.4343163538873995, + "grad_norm": 26.545706769596773, + "learning_rate": 5e-06, + "loss": 0.2278, + "num_input_tokens_seen": 195997792, + "step": 1134 + }, + { + "epoch": 0.4343163538873995, + "loss": 0.23813310265541077, + "loss_ce": 0.021336231380701065, + "loss_iou": 1.0221257209777832, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 195997792, + "step": 1134 + }, + { + "epoch": 0.4346993489084642, + "grad_norm": 31.21511075386116, + "learning_rate": 5e-06, + "loss": 0.2317, + "num_input_tokens_seen": 196170952, + "step": 1135 + }, + { + "epoch": 0.4346993489084642, + "loss": 0.23391905426979065, + "loss_ce": 0.021943967789411545, + "loss_iou": 1.0050550699234009, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 196170952, + "step": 1135 + }, + { + "epoch": 0.43508234392952894, + "grad_norm": 34.490804779413054, + "learning_rate": 5e-06, + "loss": 0.3355, + "num_input_tokens_seen": 196343768, + "step": 1136 + }, + { + "epoch": 0.43508234392952894, + "loss": 0.3720143437385559, + "loss_ce": 0.021550467237830162, + "loss_iou": 1.0862687826156616, + "loss_num": 0.349609375, + "loss_xval": 0.349609375, + "num_input_tokens_seen": 196343768, + "step": 1136 + }, + { + "epoch": 0.43546533895059364, + "grad_norm": 32.27694300787197, + "learning_rate": 5e-06, + "loss": 0.2392, + "num_input_tokens_seen": 196516592, + "step": 1137 + }, + { + "epoch": 0.43546533895059364, + "loss": 0.25271502137184143, + "loss_ce": 0.019682809710502625, + "loss_iou": 1.098147988319397, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 196516592, + "step": 1137 + }, + { + "epoch": 0.43584833397165834, + "grad_norm": 29.930265659285894, + "learning_rate": 5e-06, + "loss": 0.275, + "num_input_tokens_seen": 196689856, + "step": 1138 + }, + { + "epoch": 0.43584833397165834, + "loss": 0.256893128156662, + "loss_ce": 0.01958843693137169, + "loss_iou": 1.010021448135376, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 196689856, + "step": 1138 + }, + { + "epoch": 0.4362313289927231, + "grad_norm": 40.63195635884255, + "learning_rate": 5e-06, + "loss": 0.2234, + "num_input_tokens_seen": 196862832, + "step": 1139 + }, + { + "epoch": 0.4362313289927231, + "loss": 0.20894160866737366, + "loss_ce": 0.020404014736413956, + "loss_iou": 1.120874285697937, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 196862832, + "step": 1139 + }, + { + "epoch": 0.4366143240137878, + "grad_norm": 50.467821313162986, + "learning_rate": 5e-06, + "loss": 0.2889, + "num_input_tokens_seen": 197035712, + "step": 1140 + }, + { + "epoch": 0.4366143240137878, + "loss": 0.2801207900047302, + "loss_ce": 0.020721379667520523, + "loss_iou": 1.0571398735046387, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 197035712, + "step": 1140 + }, + { + "epoch": 0.43699731903485256, + "grad_norm": 35.97720727841275, + "learning_rate": 5e-06, + "loss": 0.2381, + "num_input_tokens_seen": 197208320, + "step": 1141 + }, + { + "epoch": 0.43699731903485256, + "loss": 0.22430667281150818, + "loss_ce": 0.022646527737379074, + "loss_iou": 1.0075345039367676, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 197208320, + "step": 1141 + }, + { + "epoch": 0.43738031405591726, + "grad_norm": 34.950341336361674, + "learning_rate": 5e-06, + "loss": 0.2609, + "num_input_tokens_seen": 197381760, + "step": 1142 + }, + { + "epoch": 0.43738031405591726, + "loss": 0.24490168690681458, + "loss_ce": 0.020902663469314575, + "loss_iou": 1.0394790172576904, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 197381760, + "step": 1142 + }, + { + "epoch": 0.437763309076982, + "grad_norm": 25.107315876505393, + "learning_rate": 5e-06, + "loss": 0.2079, + "num_input_tokens_seen": 197554856, + "step": 1143 + }, + { + "epoch": 0.437763309076982, + "loss": 0.24060098826885223, + "loss_ce": 0.020508214831352234, + "loss_iou": 1.3005890846252441, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 197554856, + "step": 1143 + }, + { + "epoch": 0.4381463040980467, + "grad_norm": 41.9365348406118, + "learning_rate": 5e-06, + "loss": 0.3391, + "num_input_tokens_seen": 197727792, + "step": 1144 + }, + { + "epoch": 0.4381463040980467, + "loss": 0.3874055743217468, + "loss_ce": 0.01972980797290802, + "loss_iou": 1.177030324935913, + "loss_num": 0.3671875, + "loss_xval": 0.3671875, + "num_input_tokens_seen": 197727792, + "step": 1144 + }, + { + "epoch": 0.4385292991191114, + "grad_norm": 41.52103336616536, + "learning_rate": 5e-06, + "loss": 0.2384, + "num_input_tokens_seen": 197900872, + "step": 1145 + }, + { + "epoch": 0.4385292991191114, + "loss": 0.2604897618293762, + "loss_ce": 0.022025402635335922, + "loss_iou": 1.2552176713943481, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 197900872, + "step": 1145 + }, + { + "epoch": 0.4389122941401762, + "grad_norm": 29.365917048394262, + "learning_rate": 5e-06, + "loss": 0.2574, + "num_input_tokens_seen": 198073696, + "step": 1146 + }, + { + "epoch": 0.4389122941401762, + "loss": 0.2711096405982971, + "loss_ce": 0.01976683922111988, + "loss_iou": 1.0339949131011963, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 198073696, + "step": 1146 + }, + { + "epoch": 0.4392952891612409, + "grad_norm": 24.33312632490962, + "learning_rate": 5e-06, + "loss": 0.2307, + "num_input_tokens_seen": 198246672, + "step": 1147 + }, + { + "epoch": 0.4392952891612409, + "loss": 0.22538858652114868, + "loss_ce": 0.02092081494629383, + "loss_iou": 1.0422980785369873, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 198246672, + "step": 1147 + }, + { + "epoch": 0.43967828418230565, + "grad_norm": 27.943519811091573, + "learning_rate": 5e-06, + "loss": 0.2625, + "num_input_tokens_seen": 198419504, + "step": 1148 + }, + { + "epoch": 0.43967828418230565, + "loss": 0.22157150506973267, + "loss_ce": 0.020765848457813263, + "loss_iou": 1.0049946308135986, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 198419504, + "step": 1148 + }, + { + "epoch": 0.44006127920337035, + "grad_norm": 27.988290899295784, + "learning_rate": 5e-06, + "loss": 0.275, + "num_input_tokens_seen": 198592888, + "step": 1149 + }, + { + "epoch": 0.44006127920337035, + "loss": 0.23316389322280884, + "loss_ce": 0.019784986972808838, + "loss_iou": 1.017301082611084, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 198592888, + "step": 1149 + }, + { + "epoch": 0.4404442742244351, + "grad_norm": 30.494644210178567, + "learning_rate": 5e-06, + "loss": 0.3304, + "num_input_tokens_seen": 198761976, + "step": 1150 + }, + { + "epoch": 0.4404442742244351, + "loss": 0.4319886863231659, + "loss_ce": 0.023297281935811043, + "loss_iou": 1.8764491081237793, + "loss_num": 0.408203125, + "loss_xval": 0.408203125, + "num_input_tokens_seen": 198761976, + "step": 1150 + }, + { + "epoch": 0.4408272692454998, + "grad_norm": 26.69361483665594, + "learning_rate": 5e-06, + "loss": 0.254, + "num_input_tokens_seen": 198934792, + "step": 1151 + }, + { + "epoch": 0.4408272692454998, + "loss": 0.23793041706085205, + "loss_ce": 0.021011468023061752, + "loss_iou": 1.084611177444458, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 198934792, + "step": 1151 + }, + { + "epoch": 0.4412102642665645, + "grad_norm": 39.02427073739685, + "learning_rate": 5e-06, + "loss": 0.2906, + "num_input_tokens_seen": 199108008, + "step": 1152 + }, + { + "epoch": 0.4412102642665645, + "loss": 0.3364917039871216, + "loss_ce": 0.020329583436250687, + "loss_iou": 1.074775218963623, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 199108008, + "step": 1152 + }, + { + "epoch": 0.44159325928762927, + "grad_norm": 35.111010496577286, + "learning_rate": 5e-06, + "loss": 0.2216, + "num_input_tokens_seen": 199280944, + "step": 1153 + }, + { + "epoch": 0.44159325928762927, + "loss": 0.2469591498374939, + "loss_ce": 0.02064078487455845, + "loss_iou": 1.0694262981414795, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 199280944, + "step": 1153 + }, + { + "epoch": 0.44197625430869397, + "grad_norm": 24.67561900275964, + "learning_rate": 5e-06, + "loss": 0.2682, + "num_input_tokens_seen": 199453416, + "step": 1154 + }, + { + "epoch": 0.44197625430869397, + "loss": 0.2632145583629608, + "loss_ce": 0.018585659563541412, + "loss_iou": 1.0391876697540283, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 199453416, + "step": 1154 + }, + { + "epoch": 0.44235924932975873, + "grad_norm": 17.922251172974647, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 199626208, + "step": 1155 + }, + { + "epoch": 0.44235924932975873, + "loss": 0.24920615553855896, + "loss_ce": 0.018859483301639557, + "loss_iou": 1.166675090789795, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 199626208, + "step": 1155 + }, + { + "epoch": 0.44274224435082343, + "grad_norm": 26.41346711504118, + "learning_rate": 5e-06, + "loss": 0.2129, + "num_input_tokens_seen": 199799312, + "step": 1156 + }, + { + "epoch": 0.44274224435082343, + "loss": 0.24005916714668274, + "loss_ce": 0.020271575078368187, + "loss_iou": 1.0121383666992188, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 199799312, + "step": 1156 + }, + { + "epoch": 0.4431252393718882, + "grad_norm": 32.11700929479766, + "learning_rate": 5e-06, + "loss": 0.2136, + "num_input_tokens_seen": 199972096, + "step": 1157 + }, + { + "epoch": 0.4431252393718882, + "loss": 0.21976438164710999, + "loss_ce": 0.020057355985045433, + "loss_iou": 1.0306508541107178, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 199972096, + "step": 1157 + }, + { + "epoch": 0.4435082343929529, + "grad_norm": 30.826752734022154, + "learning_rate": 5e-06, + "loss": 0.2375, + "num_input_tokens_seen": 200145200, + "step": 1158 + }, + { + "epoch": 0.4435082343929529, + "loss": 0.20936432480812073, + "loss_ce": 0.019606031477451324, + "loss_iou": 1.0025780200958252, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 200145200, + "step": 1158 + }, + { + "epoch": 0.4438912294140176, + "grad_norm": 25.947265547466927, + "learning_rate": 5e-06, + "loss": 0.2716, + "num_input_tokens_seen": 200318200, + "step": 1159 + }, + { + "epoch": 0.4438912294140176, + "loss": 0.2963043451309204, + "loss_ce": 0.02054748684167862, + "loss_iou": 1.0963495969772339, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 200318200, + "step": 1159 + }, + { + "epoch": 0.44427422443508235, + "grad_norm": 24.302641455107704, + "learning_rate": 5e-06, + "loss": 0.2201, + "num_input_tokens_seen": 200491392, + "step": 1160 + }, + { + "epoch": 0.44427422443508235, + "loss": 0.18968833982944489, + "loss_ce": 0.020620953291654587, + "loss_iou": 1.037597894668579, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 200491392, + "step": 1160 + }, + { + "epoch": 0.44465721945614706, + "grad_norm": 28.242728423249723, + "learning_rate": 5e-06, + "loss": 0.3088, + "num_input_tokens_seen": 200660680, + "step": 1161 + }, + { + "epoch": 0.44465721945614706, + "loss": 0.32645246386528015, + "loss_ce": 0.01968976855278015, + "loss_iou": 1.0893526077270508, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 200660680, + "step": 1161 + }, + { + "epoch": 0.4450402144772118, + "grad_norm": 31.27411261250214, + "learning_rate": 5e-06, + "loss": 0.311, + "num_input_tokens_seen": 200833776, + "step": 1162 + }, + { + "epoch": 0.4450402144772118, + "loss": 0.3027549982070923, + "loss_ce": 0.019734999164938927, + "loss_iou": 1.0339843034744263, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 200833776, + "step": 1162 + }, + { + "epoch": 0.4454232094982765, + "grad_norm": 28.83713172926187, + "learning_rate": 5e-06, + "loss": 0.2088, + "num_input_tokens_seen": 201007016, + "step": 1163 + }, + { + "epoch": 0.4454232094982765, + "loss": 0.1710694581270218, + "loss_ce": 0.021411247551441193, + "loss_iou": 1.0376994609832764, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 201007016, + "step": 1163 + }, + { + "epoch": 0.4458062045193413, + "grad_norm": 24.68230334788439, + "learning_rate": 5e-06, + "loss": 0.2782, + "num_input_tokens_seen": 201180064, + "step": 1164 + }, + { + "epoch": 0.4458062045193413, + "loss": 0.3124706745147705, + "loss_ce": 0.019440874457359314, + "loss_iou": 1.0577409267425537, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 201180064, + "step": 1164 + }, + { + "epoch": 0.446189199540406, + "grad_norm": 27.25612711491665, + "learning_rate": 5e-06, + "loss": 0.1769, + "num_input_tokens_seen": 201353120, + "step": 1165 + }, + { + "epoch": 0.446189199540406, + "loss": 0.18721093237400055, + "loss_ce": 0.02034081518650055, + "loss_iou": 1.023526906967163, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 201353120, + "step": 1165 + }, + { + "epoch": 0.4465721945614707, + "grad_norm": 43.52114898506472, + "learning_rate": 5e-06, + "loss": 0.2885, + "num_input_tokens_seen": 201526272, + "step": 1166 + }, + { + "epoch": 0.4465721945614707, + "loss": 0.3110736906528473, + "loss_ce": 0.02005806751549244, + "loss_iou": 1.0133700370788574, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 201526272, + "step": 1166 + }, + { + "epoch": 0.44695518958253544, + "grad_norm": 23.933824697037963, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 201699400, + "step": 1167 + }, + { + "epoch": 0.44695518958253544, + "loss": 0.23789691925048828, + "loss_ce": 0.018963806331157684, + "loss_iou": 1.0314826965332031, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 201699400, + "step": 1167 + }, + { + "epoch": 0.44733818460360014, + "grad_norm": 24.68663628579167, + "learning_rate": 5e-06, + "loss": 0.3253, + "num_input_tokens_seen": 201872616, + "step": 1168 + }, + { + "epoch": 0.44733818460360014, + "loss": 0.3806021809577942, + "loss_ce": 0.021593384444713593, + "loss_iou": 1.2345356941223145, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 201872616, + "step": 1168 + }, + { + "epoch": 0.4477211796246649, + "grad_norm": 28.08736080430726, + "learning_rate": 5e-06, + "loss": 0.2538, + "num_input_tokens_seen": 202045792, + "step": 1169 + }, + { + "epoch": 0.4477211796246649, + "loss": 0.2737443447113037, + "loss_ce": 0.019593939185142517, + "loss_iou": 1.0906858444213867, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 202045792, + "step": 1169 + }, + { + "epoch": 0.4481041746457296, + "grad_norm": 39.28701170625111, + "learning_rate": 5e-06, + "loss": 0.29, + "num_input_tokens_seen": 202218752, + "step": 1170 + }, + { + "epoch": 0.4481041746457296, + "loss": 0.28742527961730957, + "loss_ce": 0.019969239830970764, + "loss_iou": 1.0095866918563843, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 202218752, + "step": 1170 + }, + { + "epoch": 0.4484871696667943, + "grad_norm": 33.28356494069342, + "learning_rate": 5e-06, + "loss": 0.2831, + "num_input_tokens_seen": 202391632, + "step": 1171 + }, + { + "epoch": 0.4484871696667943, + "loss": 0.2931366562843323, + "loss_ce": 0.020248495042324066, + "loss_iou": 1.1357561349868774, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 202391632, + "step": 1171 + }, + { + "epoch": 0.44887016468785906, + "grad_norm": 27.589215773927542, + "learning_rate": 5e-06, + "loss": 0.2546, + "num_input_tokens_seen": 202564520, + "step": 1172 + }, + { + "epoch": 0.44887016468785906, + "loss": 0.25870171189308167, + "loss_ce": 0.01993216946721077, + "loss_iou": 1.0024595260620117, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 202564520, + "step": 1172 + }, + { + "epoch": 0.44925315970892377, + "grad_norm": 35.92483723023636, + "learning_rate": 5e-06, + "loss": 0.2835, + "num_input_tokens_seen": 202737576, + "step": 1173 + }, + { + "epoch": 0.44925315970892377, + "loss": 0.2748756408691406, + "loss_ce": 0.020236970856785774, + "loss_iou": 1.0638651847839355, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 202737576, + "step": 1173 + }, + { + "epoch": 0.4496361547299885, + "grad_norm": 30.022342219911604, + "learning_rate": 5e-06, + "loss": 0.285, + "num_input_tokens_seen": 202910528, + "step": 1174 + }, + { + "epoch": 0.4496361547299885, + "loss": 0.2753847539424896, + "loss_ce": 0.020013663917779922, + "loss_iou": 1.0376520156860352, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 202910528, + "step": 1174 + }, + { + "epoch": 0.4500191497510532, + "grad_norm": 32.25365793654511, + "learning_rate": 5e-06, + "loss": 0.2748, + "num_input_tokens_seen": 203083688, + "step": 1175 + }, + { + "epoch": 0.4500191497510532, + "loss": 0.3484739065170288, + "loss_ce": 0.018884049728512764, + "loss_iou": 1.121877908706665, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 203083688, + "step": 1175 + }, + { + "epoch": 0.450402144772118, + "grad_norm": 28.833320703308424, + "learning_rate": 5e-06, + "loss": 0.2602, + "num_input_tokens_seen": 203257080, + "step": 1176 + }, + { + "epoch": 0.450402144772118, + "loss": 0.23676294088363647, + "loss_ce": 0.021675053983926773, + "loss_iou": 1.1116487979888916, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 203257080, + "step": 1176 + }, + { + "epoch": 0.4507851397931827, + "grad_norm": 31.152714343773237, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 203430104, + "step": 1177 + }, + { + "epoch": 0.4507851397931827, + "loss": 0.2718905508518219, + "loss_ce": 0.0206698477268219, + "loss_iou": 1.0138851404190063, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 203430104, + "step": 1177 + }, + { + "epoch": 0.4511681348142474, + "grad_norm": 32.222635202745295, + "learning_rate": 5e-06, + "loss": 0.2379, + "num_input_tokens_seen": 203603160, + "step": 1178 + }, + { + "epoch": 0.4511681348142474, + "loss": 0.2524706721305847, + "loss_ce": 0.01931639388203621, + "loss_iou": 1.0933088064193726, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 203603160, + "step": 1178 + }, + { + "epoch": 0.45155112983531215, + "grad_norm": 28.513552595286786, + "learning_rate": 5e-06, + "loss": 0.2808, + "num_input_tokens_seen": 203776208, + "step": 1179 + }, + { + "epoch": 0.45155112983531215, + "loss": 0.24923914670944214, + "loss_ce": 0.01986902952194214, + "loss_iou": 1.0238929986953735, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 203776208, + "step": 1179 + }, + { + "epoch": 0.45193412485637685, + "grad_norm": 21.249686000705484, + "learning_rate": 5e-06, + "loss": 0.2621, + "num_input_tokens_seen": 203949184, + "step": 1180 + }, + { + "epoch": 0.45193412485637685, + "loss": 0.25357022881507874, + "loss_ce": 0.02023281529545784, + "loss_iou": 1.0669682025909424, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 203949184, + "step": 1180 + }, + { + "epoch": 0.4523171198774416, + "grad_norm": 26.917478057523944, + "learning_rate": 5e-06, + "loss": 0.2158, + "num_input_tokens_seen": 204122024, + "step": 1181 + }, + { + "epoch": 0.4523171198774416, + "loss": 0.22131076455116272, + "loss_ce": 0.01873507723212242, + "loss_iou": 1.0175056457519531, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 204122024, + "step": 1181 + }, + { + "epoch": 0.4527001148985063, + "grad_norm": 30.28490368060961, + "learning_rate": 5e-06, + "loss": 0.2525, + "num_input_tokens_seen": 204294960, + "step": 1182 + }, + { + "epoch": 0.4527001148985063, + "loss": 0.2743193209171295, + "loss_ce": 0.020291002467274666, + "loss_iou": 1.040724754333496, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 204294960, + "step": 1182 + }, + { + "epoch": 0.45308310991957107, + "grad_norm": 22.87707927828686, + "learning_rate": 5e-06, + "loss": 0.2321, + "num_input_tokens_seen": 204467368, + "step": 1183 + }, + { + "epoch": 0.45308310991957107, + "loss": 0.2445080280303955, + "loss_ce": 0.018800020217895508, + "loss_iou": 1.1185842752456665, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 204467368, + "step": 1183 + }, + { + "epoch": 0.4534661049406358, + "grad_norm": 20.696879568895486, + "learning_rate": 5e-06, + "loss": 0.2679, + "num_input_tokens_seen": 204640280, + "step": 1184 + }, + { + "epoch": 0.4534661049406358, + "loss": 0.2555941045284271, + "loss_ce": 0.021341171115636826, + "loss_iou": 1.0109895467758179, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 204640280, + "step": 1184 + }, + { + "epoch": 0.4538490999617005, + "grad_norm": 26.268558075458888, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 204813240, + "step": 1185 + }, + { + "epoch": 0.4538490999617005, + "loss": 0.25668853521347046, + "loss_ce": 0.021214904263615608, + "loss_iou": 1.0567761659622192, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 204813240, + "step": 1185 + }, + { + "epoch": 0.45423209498276523, + "grad_norm": 32.61383844153683, + "learning_rate": 5e-06, + "loss": 0.2827, + "num_input_tokens_seen": 204986088, + "step": 1186 + }, + { + "epoch": 0.45423209498276523, + "loss": 0.33852893114089966, + "loss_ce": 0.019315045326948166, + "loss_iou": 1.038220763206482, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 204986088, + "step": 1186 + }, + { + "epoch": 0.45461509000382994, + "grad_norm": 28.72239339332055, + "learning_rate": 5e-06, + "loss": 0.2099, + "num_input_tokens_seen": 205159464, + "step": 1187 + }, + { + "epoch": 0.45461509000382994, + "loss": 0.14363889396190643, + "loss_ce": 0.02016477659344673, + "loss_iou": 1.0007100105285645, + "loss_num": 0.12353515625, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 205159464, + "step": 1187 + }, + { + "epoch": 0.4549980850248947, + "grad_norm": 29.17153599753967, + "learning_rate": 5e-06, + "loss": 0.2577, + "num_input_tokens_seen": 205332696, + "step": 1188 + }, + { + "epoch": 0.4549980850248947, + "loss": 0.266764760017395, + "loss_ce": 0.020182739943265915, + "loss_iou": 1.0024018287658691, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 205332696, + "step": 1188 + }, + { + "epoch": 0.4553810800459594, + "grad_norm": 28.532039716512504, + "learning_rate": 5e-06, + "loss": 0.1976, + "num_input_tokens_seen": 205505880, + "step": 1189 + }, + { + "epoch": 0.4553810800459594, + "loss": 0.2205185443162918, + "loss_ce": 0.02087254822254181, + "loss_iou": 1.0607324838638306, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 205505880, + "step": 1189 + }, + { + "epoch": 0.45576407506702415, + "grad_norm": 37.53982498463436, + "learning_rate": 5e-06, + "loss": 0.2872, + "num_input_tokens_seen": 205678504, + "step": 1190 + }, + { + "epoch": 0.45576407506702415, + "loss": 0.31850773096084595, + "loss_ce": 0.02065618336200714, + "loss_iou": 1.0170081853866577, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 205678504, + "step": 1190 + }, + { + "epoch": 0.45614707008808886, + "grad_norm": 29.4571782160019, + "learning_rate": 5e-06, + "loss": 0.2473, + "num_input_tokens_seen": 205851752, + "step": 1191 + }, + { + "epoch": 0.45614707008808886, + "loss": 0.26461970806121826, + "loss_ce": 0.020173925906419754, + "loss_iou": 1.0164811611175537, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 205851752, + "step": 1191 + }, + { + "epoch": 0.45653006510915356, + "grad_norm": 30.10232987030623, + "learning_rate": 5e-06, + "loss": 0.3273, + "num_input_tokens_seen": 206024416, + "step": 1192 + }, + { + "epoch": 0.45653006510915356, + "loss": 0.2999013662338257, + "loss_ce": 0.020177241414785385, + "loss_iou": 1.0649040937423706, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 206024416, + "step": 1192 + }, + { + "epoch": 0.4569130601302183, + "grad_norm": 46.115333340752976, + "learning_rate": 5e-06, + "loss": 0.2377, + "num_input_tokens_seen": 206197440, + "step": 1193 + }, + { + "epoch": 0.4569130601302183, + "loss": 0.23769903182983398, + "loss_ce": 0.021268364042043686, + "loss_iou": 1.0191850662231445, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 206197440, + "step": 1193 + }, + { + "epoch": 0.457296055151283, + "grad_norm": 43.7152421239042, + "learning_rate": 5e-06, + "loss": 0.2804, + "num_input_tokens_seen": 206370312, + "step": 1194 + }, + { + "epoch": 0.457296055151283, + "loss": 0.3282549977302551, + "loss_ce": 0.020637793466448784, + "loss_iou": 0.9924590587615967, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 206370312, + "step": 1194 + }, + { + "epoch": 0.4576790501723478, + "grad_norm": 33.74389350078248, + "learning_rate": 5e-06, + "loss": 0.2559, + "num_input_tokens_seen": 206543424, + "step": 1195 + }, + { + "epoch": 0.4576790501723478, + "loss": 0.3125259578227997, + "loss_ce": 0.020289622247219086, + "loss_iou": 1.0566866397857666, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 206543424, + "step": 1195 + }, + { + "epoch": 0.4580620451934125, + "grad_norm": 39.31242299560097, + "learning_rate": 5e-06, + "loss": 0.2625, + "num_input_tokens_seen": 206716040, + "step": 1196 + }, + { + "epoch": 0.4580620451934125, + "loss": 0.21807287633419037, + "loss_ce": 0.019098270684480667, + "loss_iou": 1.010589838027954, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 206716040, + "step": 1196 + }, + { + "epoch": 0.4584450402144772, + "grad_norm": 49.293738253540674, + "learning_rate": 5e-06, + "loss": 0.2648, + "num_input_tokens_seen": 206888584, + "step": 1197 + }, + { + "epoch": 0.4584450402144772, + "loss": 0.2763878107070923, + "loss_ce": 0.02113880030810833, + "loss_iou": 1.055002212524414, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 206888584, + "step": 1197 + }, + { + "epoch": 0.45882803523554194, + "grad_norm": 33.281452562256966, + "learning_rate": 5e-06, + "loss": 0.3585, + "num_input_tokens_seen": 207061368, + "step": 1198 + }, + { + "epoch": 0.45882803523554194, + "loss": 0.4192292094230652, + "loss_ce": 0.02018134854733944, + "loss_iou": 1.1942784786224365, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 207061368, + "step": 1198 + }, + { + "epoch": 0.45921103025660664, + "grad_norm": 27.32779327998403, + "learning_rate": 5e-06, + "loss": 0.233, + "num_input_tokens_seen": 207234448, + "step": 1199 + }, + { + "epoch": 0.45921103025660664, + "loss": 0.26918455958366394, + "loss_ce": 0.020100083202123642, + "loss_iou": 1.0386629104614258, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 207234448, + "step": 1199 + }, + { + "epoch": 0.4595940252776714, + "grad_norm": 34.42167959598355, + "learning_rate": 5e-06, + "loss": 0.2061, + "num_input_tokens_seen": 207407368, + "step": 1200 + }, + { + "epoch": 0.4595940252776714, + "loss": 0.2239762842655182, + "loss_ce": 0.019752655178308487, + "loss_iou": 1.047470211982727, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 207407368, + "step": 1200 + }, + { + "epoch": 0.4599770202987361, + "grad_norm": 33.34176773218534, + "learning_rate": 5e-06, + "loss": 0.2566, + "num_input_tokens_seen": 207580336, + "step": 1201 + }, + { + "epoch": 0.4599770202987361, + "loss": 0.25941014289855957, + "loss_ce": 0.019908176735043526, + "loss_iou": 1.0563254356384277, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 207580336, + "step": 1201 + }, + { + "epoch": 0.46036001531980086, + "grad_norm": 29.238023332602307, + "learning_rate": 5e-06, + "loss": 0.2481, + "num_input_tokens_seen": 207753496, + "step": 1202 + }, + { + "epoch": 0.46036001531980086, + "loss": 0.19793447852134705, + "loss_ce": 0.020932529121637344, + "loss_iou": 1.0017145872116089, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 207753496, + "step": 1202 + }, + { + "epoch": 0.46074301034086557, + "grad_norm": 37.123953292975, + "learning_rate": 5e-06, + "loss": 0.3079, + "num_input_tokens_seen": 207926736, + "step": 1203 + }, + { + "epoch": 0.46074301034086557, + "loss": 0.3396281599998474, + "loss_ce": 0.019437748938798904, + "loss_iou": 1.0252783298492432, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 207926736, + "step": 1203 + }, + { + "epoch": 0.46112600536193027, + "grad_norm": 49.708482696429726, + "learning_rate": 5e-06, + "loss": 0.3337, + "num_input_tokens_seen": 208099768, + "step": 1204 + }, + { + "epoch": 0.46112600536193027, + "loss": 0.30779531598091125, + "loss_ce": 0.020075593143701553, + "loss_iou": 1.01540207862854, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 208099768, + "step": 1204 + }, + { + "epoch": 0.461509000382995, + "grad_norm": 33.38269900294354, + "learning_rate": 5e-06, + "loss": 0.3222, + "num_input_tokens_seen": 208273040, + "step": 1205 + }, + { + "epoch": 0.461509000382995, + "loss": 0.32141298055648804, + "loss_ce": 0.020265530794858932, + "loss_iou": 1.094458818435669, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 208273040, + "step": 1205 + }, + { + "epoch": 0.46189199540405973, + "grad_norm": 32.721773895186935, + "learning_rate": 5e-06, + "loss": 0.2943, + "num_input_tokens_seen": 208445712, + "step": 1206 + }, + { + "epoch": 0.46189199540405973, + "loss": 0.30638575553894043, + "loss_ce": 0.020374994724988937, + "loss_iou": 1.0706769227981567, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 208445712, + "step": 1206 + }, + { + "epoch": 0.4622749904251245, + "grad_norm": 62.49189654564097, + "learning_rate": 5e-06, + "loss": 0.3114, + "num_input_tokens_seen": 208618688, + "step": 1207 + }, + { + "epoch": 0.4622749904251245, + "loss": 0.29980647563934326, + "loss_ce": 0.020387519150972366, + "loss_iou": 1.1299349069595337, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 208618688, + "step": 1207 + }, + { + "epoch": 0.4626579854461892, + "grad_norm": 83.33686582974181, + "learning_rate": 5e-06, + "loss": 0.5361, + "num_input_tokens_seen": 208791936, + "step": 1208 + }, + { + "epoch": 0.4626579854461892, + "loss": 0.5459185838699341, + "loss_ce": 0.021748628467321396, + "loss_iou": 1.1664130687713623, + "loss_num": 0.5234375, + "loss_xval": 0.5234375, + "num_input_tokens_seen": 208791936, + "step": 1208 + }, + { + "epoch": 0.46304098046725395, + "grad_norm": 32.498828012614354, + "learning_rate": 5e-06, + "loss": 0.3044, + "num_input_tokens_seen": 208965216, + "step": 1209 + }, + { + "epoch": 0.46304098046725395, + "loss": 0.3312862813472748, + "loss_ce": 0.02061733417212963, + "loss_iou": 1.144300103187561, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 208965216, + "step": 1209 + }, + { + "epoch": 0.46342397548831865, + "grad_norm": 38.343275183035935, + "learning_rate": 5e-06, + "loss": 0.37, + "num_input_tokens_seen": 209137952, + "step": 1210 + }, + { + "epoch": 0.46342397548831865, + "loss": 0.34652167558670044, + "loss_ce": 0.019739460200071335, + "loss_iou": 0.8828321695327759, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 209137952, + "step": 1210 + }, + { + "epoch": 0.46380697050938335, + "grad_norm": 51.65954090616292, + "learning_rate": 5e-06, + "loss": 0.3785, + "num_input_tokens_seen": 209310688, + "step": 1211 + }, + { + "epoch": 0.46380697050938335, + "loss": 0.3275665044784546, + "loss_ce": 0.02055967040359974, + "loss_iou": 1.0090607404708862, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 209310688, + "step": 1211 + }, + { + "epoch": 0.4641899655304481, + "grad_norm": 43.65917138990877, + "learning_rate": 5e-06, + "loss": 0.3131, + "num_input_tokens_seen": 209483632, + "step": 1212 + }, + { + "epoch": 0.4641899655304481, + "loss": 0.319579541683197, + "loss_ce": 0.02014104649424553, + "loss_iou": 1.007224202156067, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 209483632, + "step": 1212 + }, + { + "epoch": 0.4645729605515128, + "grad_norm": 21.208706371704558, + "learning_rate": 5e-06, + "loss": 0.2875, + "num_input_tokens_seen": 209656520, + "step": 1213 + }, + { + "epoch": 0.4645729605515128, + "loss": 0.23015263676643372, + "loss_ce": 0.018482713028788567, + "loss_iou": 1.0930778980255127, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 209656520, + "step": 1213 + }, + { + "epoch": 0.4649559555725776, + "grad_norm": 22.846096589976895, + "learning_rate": 5e-06, + "loss": 0.2401, + "num_input_tokens_seen": 209829808, + "step": 1214 + }, + { + "epoch": 0.4649559555725776, + "loss": 0.2387043833732605, + "loss_ce": 0.021419227123260498, + "loss_iou": 1.1203703880310059, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 209829808, + "step": 1214 + }, + { + "epoch": 0.4653389505936423, + "grad_norm": 21.76714792430826, + "learning_rate": 5e-06, + "loss": 0.2076, + "num_input_tokens_seen": 210002408, + "step": 1215 + }, + { + "epoch": 0.4653389505936423, + "loss": 0.19137930870056152, + "loss_ce": 0.020480871200561523, + "loss_iou": 1.027408480644226, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 210002408, + "step": 1215 + }, + { + "epoch": 0.46572194561470703, + "grad_norm": 42.78407183247811, + "learning_rate": 5e-06, + "loss": 0.2315, + "num_input_tokens_seen": 210175520, + "step": 1216 + }, + { + "epoch": 0.46572194561470703, + "loss": 0.2105085402727127, + "loss_ce": 0.021726813167333603, + "loss_iou": 1.0148282051086426, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 210175520, + "step": 1216 + }, + { + "epoch": 0.46610494063577174, + "grad_norm": 42.92081573795019, + "learning_rate": 5e-06, + "loss": 0.2418, + "num_input_tokens_seen": 210348544, + "step": 1217 + }, + { + "epoch": 0.46610494063577174, + "loss": 0.24414533376693726, + "loss_ce": 0.021000802516937256, + "loss_iou": 1.0263220071792603, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 210348544, + "step": 1217 + }, + { + "epoch": 0.46648793565683644, + "grad_norm": 25.986169050505218, + "learning_rate": 5e-06, + "loss": 0.2528, + "num_input_tokens_seen": 210521272, + "step": 1218 + }, + { + "epoch": 0.46648793565683644, + "loss": 0.30587664246559143, + "loss_ce": 0.02169695310294628, + "loss_iou": 1.0758498907089233, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 210521272, + "step": 1218 + }, + { + "epoch": 0.4668709306779012, + "grad_norm": 26.45532019578287, + "learning_rate": 5e-06, + "loss": 0.2786, + "num_input_tokens_seen": 210694296, + "step": 1219 + }, + { + "epoch": 0.4668709306779012, + "loss": 0.3204804062843323, + "loss_ce": 0.020248491317033768, + "loss_iou": 1.3104071617126465, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 210694296, + "step": 1219 + }, + { + "epoch": 0.4672539256989659, + "grad_norm": 29.044389132790407, + "learning_rate": 5e-06, + "loss": 0.1978, + "num_input_tokens_seen": 210867208, + "step": 1220 + }, + { + "epoch": 0.4672539256989659, + "loss": 0.2194499671459198, + "loss_ce": 0.019559821113944054, + "loss_iou": 0.885863184928894, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 210867208, + "step": 1220 + }, + { + "epoch": 0.46763692072003066, + "grad_norm": 32.577022795819815, + "learning_rate": 5e-06, + "loss": 0.2507, + "num_input_tokens_seen": 211040032, + "step": 1221 + }, + { + "epoch": 0.46763692072003066, + "loss": 0.2515682876110077, + "loss_ce": 0.021221602335572243, + "loss_iou": 1.0183402299880981, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 211040032, + "step": 1221 + }, + { + "epoch": 0.46801991574109536, + "grad_norm": 29.22541175312178, + "learning_rate": 5e-06, + "loss": 0.279, + "num_input_tokens_seen": 211213408, + "step": 1222 + }, + { + "epoch": 0.46801991574109536, + "loss": 0.23782938718795776, + "loss_ce": 0.020239055156707764, + "loss_iou": 1.0102102756500244, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 211213408, + "step": 1222 + }, + { + "epoch": 0.4684029107621601, + "grad_norm": 21.64071661144197, + "learning_rate": 5e-06, + "loss": 0.3053, + "num_input_tokens_seen": 211386344, + "step": 1223 + }, + { + "epoch": 0.4684029107621601, + "loss": 0.27443018555641174, + "loss_ce": 0.02027980238199234, + "loss_iou": 0.9637537598609924, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 211386344, + "step": 1223 + }, + { + "epoch": 0.4687859057832248, + "grad_norm": 28.11459993389933, + "learning_rate": 5e-06, + "loss": 0.2068, + "num_input_tokens_seen": 211559856, + "step": 1224 + }, + { + "epoch": 0.4687859057832248, + "loss": 0.2193112075328827, + "loss_ce": 0.020580727607011795, + "loss_iou": 1.0139524936676025, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 211559856, + "step": 1224 + }, + { + "epoch": 0.4691689008042895, + "grad_norm": 36.428761035536986, + "learning_rate": 5e-06, + "loss": 0.2531, + "num_input_tokens_seen": 211733248, + "step": 1225 + }, + { + "epoch": 0.4691689008042895, + "loss": 0.3093939423561096, + "loss_ce": 0.01972106657922268, + "loss_iou": 1.015163779258728, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 211733248, + "step": 1225 + }, + { + "epoch": 0.4695518958253543, + "grad_norm": 39.01721544263482, + "learning_rate": 5e-06, + "loss": 0.2558, + "num_input_tokens_seen": 211906336, + "step": 1226 + }, + { + "epoch": 0.4695518958253543, + "loss": 0.26606816053390503, + "loss_ce": 0.021134087815880775, + "loss_iou": -1.7795388064727341e+28, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 211906336, + "step": 1226 + }, + { + "epoch": 0.469934890846419, + "grad_norm": 27.60863119072085, + "learning_rate": 5e-06, + "loss": 0.2386, + "num_input_tokens_seen": 212078952, + "step": 1227 + }, + { + "epoch": 0.469934890846419, + "loss": 0.20051205158233643, + "loss_ce": 0.020397303625941277, + "loss_iou": 1.0308964252471924, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 212078952, + "step": 1227 + }, + { + "epoch": 0.47031788586748374, + "grad_norm": 25.995949679468232, + "learning_rate": 5e-06, + "loss": 0.2314, + "num_input_tokens_seen": 212251880, + "step": 1228 + }, + { + "epoch": 0.47031788586748374, + "loss": 0.19911321997642517, + "loss_ce": 0.02076849900186062, + "loss_iou": 1.0025396347045898, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 212251880, + "step": 1228 + }, + { + "epoch": 0.47070088088854845, + "grad_norm": 36.32822038372454, + "learning_rate": 5e-06, + "loss": 0.2005, + "num_input_tokens_seen": 212424800, + "step": 1229 + }, + { + "epoch": 0.47070088088854845, + "loss": 0.2403562068939209, + "loss_ce": 0.0217282734811306, + "loss_iou": 1.041759729385376, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 212424800, + "step": 1229 + }, + { + "epoch": 0.47108387590961315, + "grad_norm": 42.008764834368776, + "learning_rate": 5e-06, + "loss": 0.257, + "num_input_tokens_seen": 212597832, + "step": 1230 + }, + { + "epoch": 0.47108387590961315, + "loss": 0.26775553822517395, + "loss_ce": 0.0201359074562788, + "loss_iou": 1.010007619857788, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 212597832, + "step": 1230 + }, + { + "epoch": 0.4714668709306779, + "grad_norm": 35.65222787113635, + "learning_rate": 5e-06, + "loss": 0.1874, + "num_input_tokens_seen": 212770320, + "step": 1231 + }, + { + "epoch": 0.4714668709306779, + "loss": 0.19671890139579773, + "loss_ce": 0.02063247561454773, + "loss_iou": 1.0493252277374268, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 212770320, + "step": 1231 + }, + { + "epoch": 0.4718498659517426, + "grad_norm": 31.942214418097883, + "learning_rate": 5e-06, + "loss": 0.2551, + "num_input_tokens_seen": 212943368, + "step": 1232 + }, + { + "epoch": 0.4718498659517426, + "loss": 0.3143588602542877, + "loss_ce": 0.022122547030448914, + "loss_iou": 1.219099760055542, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 212943368, + "step": 1232 + }, + { + "epoch": 0.47223286097280737, + "grad_norm": 33.31657235703236, + "learning_rate": 5e-06, + "loss": 0.2829, + "num_input_tokens_seen": 213116424, + "step": 1233 + }, + { + "epoch": 0.47223286097280737, + "loss": 0.30633944272994995, + "loss_ce": 0.021000079810619354, + "loss_iou": 1.0062294006347656, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 213116424, + "step": 1233 + }, + { + "epoch": 0.47261585599387207, + "grad_norm": 33.776484152274676, + "learning_rate": 5e-06, + "loss": 0.2491, + "num_input_tokens_seen": 213289256, + "step": 1234 + }, + { + "epoch": 0.47261585599387207, + "loss": 0.2458813637495041, + "loss_ce": 0.02114991284906864, + "loss_iou": 1.0139302015304565, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 213289256, + "step": 1234 + }, + { + "epoch": 0.4729988510149368, + "grad_norm": 23.591833356456384, + "learning_rate": 5e-06, + "loss": 0.226, + "num_input_tokens_seen": 213462384, + "step": 1235 + }, + { + "epoch": 0.4729988510149368, + "loss": 0.2640923261642456, + "loss_ce": 0.02001270279288292, + "loss_iou": 1.134010910987854, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 213462384, + "step": 1235 + }, + { + "epoch": 0.47338184603600153, + "grad_norm": 23.575302914366794, + "learning_rate": 5e-06, + "loss": 0.2856, + "num_input_tokens_seen": 213635320, + "step": 1236 + }, + { + "epoch": 0.47338184603600153, + "loss": 0.3351038694381714, + "loss_ce": 0.021139021962881088, + "loss_iou": 1.0476080179214478, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 213635320, + "step": 1236 + }, + { + "epoch": 0.47376484105706623, + "grad_norm": 27.247542152040623, + "learning_rate": 5e-06, + "loss": 0.2751, + "num_input_tokens_seen": 213808440, + "step": 1237 + }, + { + "epoch": 0.47376484105706623, + "loss": 0.33616071939468384, + "loss_ce": 0.02085307240486145, + "loss_iou": 1.0902354717254639, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 213808440, + "step": 1237 + }, + { + "epoch": 0.474147836078131, + "grad_norm": 29.056772296303375, + "learning_rate": 5e-06, + "loss": 0.2855, + "num_input_tokens_seen": 213981424, + "step": 1238 + }, + { + "epoch": 0.474147836078131, + "loss": 0.2904357314109802, + "loss_ce": 0.020721357315778732, + "loss_iou": 1.0729703903198242, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 213981424, + "step": 1238 + }, + { + "epoch": 0.4745308310991957, + "grad_norm": 25.19565289524144, + "learning_rate": 5e-06, + "loss": 0.2261, + "num_input_tokens_seen": 214154408, + "step": 1239 + }, + { + "epoch": 0.4745308310991957, + "loss": 0.19168391823768616, + "loss_ce": 0.01980891078710556, + "loss_iou": 1.0037028789520264, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 214154408, + "step": 1239 + }, + { + "epoch": 0.47491382612026045, + "grad_norm": 28.591882792030717, + "learning_rate": 5e-06, + "loss": 0.3062, + "num_input_tokens_seen": 214327432, + "step": 1240 + }, + { + "epoch": 0.47491382612026045, + "loss": 0.2543603777885437, + "loss_ce": 0.022243663668632507, + "loss_iou": 1.098926305770874, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 214327432, + "step": 1240 + }, + { + "epoch": 0.47529682114132515, + "grad_norm": 27.2066659745211, + "learning_rate": 5e-06, + "loss": 0.202, + "num_input_tokens_seen": 214500088, + "step": 1241 + }, + { + "epoch": 0.47529682114132515, + "loss": 0.2134188413619995, + "loss_ce": 0.019815323874354362, + "loss_iou": 0.9447759389877319, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 214500088, + "step": 1241 + }, + { + "epoch": 0.4756798161623899, + "grad_norm": 31.714890019626516, + "learning_rate": 5e-06, + "loss": 0.2822, + "num_input_tokens_seen": 214673072, + "step": 1242 + }, + { + "epoch": 0.4756798161623899, + "loss": 0.3018686771392822, + "loss_ce": 0.020069342106580734, + "loss_iou": 1.040618658065796, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 214673072, + "step": 1242 + }, + { + "epoch": 0.4760628111834546, + "grad_norm": 41.539759519045866, + "learning_rate": 5e-06, + "loss": 0.2599, + "num_input_tokens_seen": 214845760, + "step": 1243 + }, + { + "epoch": 0.4760628111834546, + "loss": 0.2806388735771179, + "loss_ce": 0.019042205065488815, + "loss_iou": 1.0721485614776611, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 214845760, + "step": 1243 + }, + { + "epoch": 0.4764458062045193, + "grad_norm": 26.00145575425882, + "learning_rate": 5e-06, + "loss": 0.2321, + "num_input_tokens_seen": 215018680, + "step": 1244 + }, + { + "epoch": 0.4764458062045193, + "loss": 0.22874602675437927, + "loss_ce": 0.01939544454216957, + "loss_iou": 1.001299500465393, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 215018680, + "step": 1244 + }, + { + "epoch": 0.4768288012255841, + "grad_norm": 41.614299138596365, + "learning_rate": 5e-06, + "loss": 0.2361, + "num_input_tokens_seen": 215191304, + "step": 1245 + }, + { + "epoch": 0.4768288012255841, + "loss": 0.2315431535243988, + "loss_ce": 0.0205446258187294, + "loss_iou": 1.0208760499954224, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 215191304, + "step": 1245 + }, + { + "epoch": 0.4772117962466488, + "grad_norm": 47.040980481672904, + "learning_rate": 5e-06, + "loss": 0.2758, + "num_input_tokens_seen": 215364248, + "step": 1246 + }, + { + "epoch": 0.4772117962466488, + "loss": 0.28870445489883423, + "loss_ce": 0.020393896847963333, + "loss_iou": 1.137253999710083, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 215364248, + "step": 1246 + }, + { + "epoch": 0.47759479126771354, + "grad_norm": 36.53723029226407, + "learning_rate": 5e-06, + "loss": 0.2395, + "num_input_tokens_seen": 215537144, + "step": 1247 + }, + { + "epoch": 0.47759479126771354, + "loss": 0.24715638160705566, + "loss_ce": 0.020166628062725067, + "loss_iou": 1.118346929550171, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 215537144, + "step": 1247 + }, + { + "epoch": 0.47797778628877824, + "grad_norm": 28.875781579919764, + "learning_rate": 5e-06, + "loss": 0.1856, + "num_input_tokens_seen": 215710080, + "step": 1248 + }, + { + "epoch": 0.47797778628877824, + "loss": 0.18068018555641174, + "loss_ce": 0.02168361470103264, + "loss_iou": 1.0261390209197998, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 215710080, + "step": 1248 + }, + { + "epoch": 0.478360781309843, + "grad_norm": 34.72003795966805, + "learning_rate": 5e-06, + "loss": 0.205, + "num_input_tokens_seen": 215883128, + "step": 1249 + }, + { + "epoch": 0.478360781309843, + "loss": 0.16687241196632385, + "loss_ce": 0.021730797365307808, + "loss_iou": 1.0021653175354004, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 215883128, + "step": 1249 + }, + { + "epoch": 0.4787437763309077, + "grad_norm": 55.81362605658789, + "learning_rate": 5e-06, + "loss": 0.2416, + "num_input_tokens_seen": 216056208, + "step": 1250 + }, + { + "epoch": 0.4787437763309077, + "eval_websight_new_CIoU": 0.8993282616138458, + "eval_websight_new_GIoU": 0.8991754651069641, + "eval_websight_new_IoU": 0.8996633589267731, + "eval_websight_new_MAE_all": 0.011344741564244032, + "eval_websight_new_MAE_h": 0.013371880631893873, + "eval_websight_new_MAE_w": 0.01601374614983797, + "eval_websight_new_MAE_x": 0.009378236252814531, + "eval_websight_new_MAE_y": 0.006615103920921683, + "eval_websight_new_NUM_probability": 9.162682181340642e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.16499315202236176, + "eval_websight_new_loss_ce": 0.0413089245557785, + "eval_websight_new_loss_iou": 1.0006011128425598, + "eval_websight_new_loss_num": 0.125, + "eval_websight_new_loss_xval": 0.125, + "eval_websight_new_runtime": 56.5573, + "eval_websight_new_samples_per_second": 0.884, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 216056208, + "step": 1250 + }, + { + "epoch": 0.4787437763309077, + "eval_seeclick_CIoU": 0.625552624464035, + "eval_seeclick_GIoU": 0.6292989253997803, + "eval_seeclick_IoU": 0.6577684879302979, + "eval_seeclick_MAE_all": 0.06135014072060585, + "eval_seeclick_MAE_h": 0.05675686150789261, + "eval_seeclick_MAE_w": 0.07418486848473549, + "eval_seeclick_MAE_x": 0.06162244640290737, + "eval_seeclick_MAE_y": 0.05283636972308159, + "eval_seeclick_NUM_probability": 0.0001246245992660988, + "eval_seeclick_inside_bbox": 0.8975694477558136, + "eval_seeclick_loss": 0.5222324132919312, + "eval_seeclick_loss_ce": 0.032219236716628075, + "eval_seeclick_loss_iou": 1.1460830569267273, + "eval_seeclick_loss_num": 0.46142578125, + "eval_seeclick_loss_xval": 0.46142578125, + "eval_seeclick_runtime": 85.2578, + "eval_seeclick_samples_per_second": 0.586, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 216056208, + "step": 1250 + }, + { + "epoch": 0.4787437763309077, + "eval_icons_CIoU": 0.8609585464000702, + "eval_icons_GIoU": 0.8591577708721161, + "eval_icons_IoU": 0.8646650612354279, + "eval_icons_MAE_all": 0.01873237080872059, + "eval_icons_MAE_h": 0.02262669149786234, + "eval_icons_MAE_w": 0.022232558578252792, + "eval_icons_MAE_x": 0.014843590091913939, + "eval_icons_MAE_y": 0.015226639807224274, + "eval_icons_NUM_probability": 9.073268302017823e-05, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.17036914825439453, + "eval_icons_loss_ce": 0.027757744304835796, + "eval_icons_loss_iou": 1.0137335062026978, + "eval_icons_loss_num": 0.133453369140625, + "eval_icons_loss_xval": 0.133453369140625, + "eval_icons_runtime": 86.4594, + "eval_icons_samples_per_second": 0.578, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 216056208, + "step": 1250 + }, + { + "epoch": 0.4787437763309077, + "loss": 0.17677251994609833, + "loss_ce": 0.02778569608926773, + "loss_iou": 1.0268021821975708, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 216056208, + "step": 1250 + }, + { + "epoch": 0.4791267713519724, + "grad_norm": 40.001223857015134, + "learning_rate": 5e-06, + "loss": 0.1946, + "num_input_tokens_seen": 216229520, + "step": 1251 + }, + { + "epoch": 0.4791267713519724, + "loss": 0.18243470788002014, + "loss_ce": 0.02111878991127014, + "loss_iou": 1.01613187789917, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 216229520, + "step": 1251 + }, + { + "epoch": 0.47950976637303716, + "grad_norm": 31.521366519005536, + "learning_rate": 5e-06, + "loss": 0.2673, + "num_input_tokens_seen": 216402744, + "step": 1252 + }, + { + "epoch": 0.47950976637303716, + "loss": 0.26666757464408875, + "loss_ce": 0.021550390869379044, + "loss_iou": 1.0124704837799072, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 216402744, + "step": 1252 + }, + { + "epoch": 0.47989276139410186, + "grad_norm": 28.036570071899565, + "learning_rate": 5e-06, + "loss": 0.2371, + "num_input_tokens_seen": 216575744, + "step": 1253 + }, + { + "epoch": 0.47989276139410186, + "loss": 0.3153555393218994, + "loss_ce": 0.020647302269935608, + "loss_iou": 1.0578529834747314, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 216575744, + "step": 1253 + }, + { + "epoch": 0.4802757564151666, + "grad_norm": 33.72330941098161, + "learning_rate": 5e-06, + "loss": 0.2868, + "num_input_tokens_seen": 216748136, + "step": 1254 + }, + { + "epoch": 0.4802757564151666, + "loss": 0.28596433997154236, + "loss_ce": 0.020095188170671463, + "loss_iou": 1.083143711090088, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 216748136, + "step": 1254 + }, + { + "epoch": 0.4806587514362313, + "grad_norm": 28.545714072290114, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 216921392, + "step": 1255 + }, + { + "epoch": 0.4806587514362313, + "loss": 0.25406983494758606, + "loss_ce": 0.020671386271715164, + "loss_iou": 1.2555147409439087, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 216921392, + "step": 1255 + }, + { + "epoch": 0.4810417464572961, + "grad_norm": 25.958929048140725, + "learning_rate": 5e-06, + "loss": 0.2512, + "num_input_tokens_seen": 217094440, + "step": 1256 + }, + { + "epoch": 0.4810417464572961, + "loss": 0.29601573944091797, + "loss_ce": 0.020197857171297073, + "loss_iou": 1.0392547845840454, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 217094440, + "step": 1256 + }, + { + "epoch": 0.4814247414783608, + "grad_norm": 29.56119118045122, + "learning_rate": 5e-06, + "loss": 0.3023, + "num_input_tokens_seen": 217266896, + "step": 1257 + }, + { + "epoch": 0.4814247414783608, + "loss": 0.37968093156814575, + "loss_ce": 0.019634529948234558, + "loss_iou": 1.1065092086791992, + "loss_num": 0.359375, + "loss_xval": 0.359375, + "num_input_tokens_seen": 217266896, + "step": 1257 + }, + { + "epoch": 0.4818077364994255, + "grad_norm": 28.549049276922037, + "learning_rate": 5e-06, + "loss": 0.2112, + "num_input_tokens_seen": 217439728, + "step": 1258 + }, + { + "epoch": 0.4818077364994255, + "loss": 0.20042143762111664, + "loss_ce": 0.020856007933616638, + "loss_iou": 1.0481743812561035, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 217439728, + "step": 1258 + }, + { + "epoch": 0.48219073152049025, + "grad_norm": 39.25289950745477, + "learning_rate": 5e-06, + "loss": 0.2395, + "num_input_tokens_seen": 217612664, + "step": 1259 + }, + { + "epoch": 0.48219073152049025, + "loss": 0.26584044098854065, + "loss_ce": 0.020723257213830948, + "loss_iou": 0.9868621826171875, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 217612664, + "step": 1259 + }, + { + "epoch": 0.48257372654155495, + "grad_norm": 43.435645856671705, + "learning_rate": 5e-06, + "loss": 0.2626, + "num_input_tokens_seen": 217785856, + "step": 1260 + }, + { + "epoch": 0.48257372654155495, + "loss": 0.21399447321891785, + "loss_ce": 0.02173374593257904, + "loss_iou": 1.0139672756195068, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 217785856, + "step": 1260 + }, + { + "epoch": 0.4829567215626197, + "grad_norm": 37.75141670203048, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 217958800, + "step": 1261 + }, + { + "epoch": 0.4829567215626197, + "loss": 0.21330128610134125, + "loss_ce": 0.02012501284480095, + "loss_iou": 1.0445476770401, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 217958800, + "step": 1261 + }, + { + "epoch": 0.4833397165836844, + "grad_norm": 26.454020869883134, + "learning_rate": 5e-06, + "loss": 0.2543, + "num_input_tokens_seen": 218131784, + "step": 1262 + }, + { + "epoch": 0.4833397165836844, + "loss": 0.2772575914859772, + "loss_ce": 0.02182544767856598, + "loss_iou": 1.0503876209259033, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 218131784, + "step": 1262 + }, + { + "epoch": 0.4837227116047491, + "grad_norm": 26.083432961672667, + "learning_rate": 5e-06, + "loss": 0.1786, + "num_input_tokens_seen": 218304288, + "step": 1263 + }, + { + "epoch": 0.4837227116047491, + "loss": 0.1882094442844391, + "loss_ce": 0.02054586447775364, + "loss_iou": 1.0622601509094238, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 218304288, + "step": 1263 + }, + { + "epoch": 0.48410570662581387, + "grad_norm": 30.387732450393234, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 218477696, + "step": 1264 + }, + { + "epoch": 0.48410570662581387, + "loss": 0.23629167675971985, + "loss_ce": 0.021631035953760147, + "loss_iou": 1.0289371013641357, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 218477696, + "step": 1264 + }, + { + "epoch": 0.4844887016468786, + "grad_norm": 26.831065114890695, + "learning_rate": 5e-06, + "loss": 0.2217, + "num_input_tokens_seen": 218650680, + "step": 1265 + }, + { + "epoch": 0.4844887016468786, + "loss": 0.2116309255361557, + "loss_ce": 0.02126227878034115, + "loss_iou": 1.0325360298156738, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 218650680, + "step": 1265 + }, + { + "epoch": 0.48487169666794333, + "grad_norm": 26.661143836911545, + "learning_rate": 5e-06, + "loss": 0.2495, + "num_input_tokens_seen": 218823744, + "step": 1266 + }, + { + "epoch": 0.48487169666794333, + "loss": 0.2849521040916443, + "loss_ce": 0.021524369716644287, + "loss_iou": 1.0682734251022339, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 218823744, + "step": 1266 + }, + { + "epoch": 0.48525469168900803, + "grad_norm": 33.55121540611247, + "learning_rate": 5e-06, + "loss": 0.2134, + "num_input_tokens_seen": 218996744, + "step": 1267 + }, + { + "epoch": 0.48525469168900803, + "loss": 0.21019728481769562, + "loss_ce": 0.02031690999865532, + "loss_iou": 1.0088659524917603, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 218996744, + "step": 1267 + }, + { + "epoch": 0.4856376867100728, + "grad_norm": 38.76012561855069, + "learning_rate": 5e-06, + "loss": 0.2625, + "num_input_tokens_seen": 219169520, + "step": 1268 + }, + { + "epoch": 0.4856376867100728, + "loss": 0.266610324382782, + "loss_ce": 0.021493151783943176, + "loss_iou": 1.0532801151275635, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 219169520, + "step": 1268 + }, + { + "epoch": 0.4860206817311375, + "grad_norm": 25.69733505577472, + "learning_rate": 5e-06, + "loss": 0.2445, + "num_input_tokens_seen": 219342160, + "step": 1269 + }, + { + "epoch": 0.4860206817311375, + "loss": 0.27903541922569275, + "loss_ce": 0.01908668503165245, + "loss_iou": 0.9650876522064209, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 219342160, + "step": 1269 + }, + { + "epoch": 0.4864036767522022, + "grad_norm": 28.647249426243246, + "learning_rate": 5e-06, + "loss": 0.2317, + "num_input_tokens_seen": 219514504, + "step": 1270 + }, + { + "epoch": 0.4864036767522022, + "loss": 0.23824116587638855, + "loss_ce": 0.0194911677390337, + "loss_iou": 1.0316154956817627, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 219514504, + "step": 1270 + }, + { + "epoch": 0.48678667177326695, + "grad_norm": 24.09040725347035, + "learning_rate": 5e-06, + "loss": 0.3011, + "num_input_tokens_seen": 219686880, + "step": 1271 + }, + { + "epoch": 0.48678667177326695, + "loss": 0.3540859520435333, + "loss_ce": 0.020101584494113922, + "loss_iou": 1.0527961254119873, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 219686880, + "step": 1271 + }, + { + "epoch": 0.48716966679433166, + "grad_norm": 41.66327530287003, + "learning_rate": 5e-06, + "loss": 0.2688, + "num_input_tokens_seen": 219859928, + "step": 1272 + }, + { + "epoch": 0.48716966679433166, + "loss": 0.34175288677215576, + "loss_ce": 0.02064690738916397, + "loss_iou": 1.2458839416503906, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 219859928, + "step": 1272 + }, + { + "epoch": 0.4875526618153964, + "grad_norm": 43.8863915282584, + "learning_rate": 5e-06, + "loss": 0.3065, + "num_input_tokens_seen": 220032792, + "step": 1273 + }, + { + "epoch": 0.4875526618153964, + "loss": 0.2963223457336426, + "loss_ce": 0.021297920495271683, + "loss_iou": 1.021531105041504, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 220032792, + "step": 1273 + }, + { + "epoch": 0.4879356568364611, + "grad_norm": 31.15985261756482, + "learning_rate": 5e-06, + "loss": 0.2837, + "num_input_tokens_seen": 220205480, + "step": 1274 + }, + { + "epoch": 0.4879356568364611, + "loss": 0.2977232038974762, + "loss_ce": 0.02135601080954075, + "loss_iou": 1.0700514316558838, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 220205480, + "step": 1274 + }, + { + "epoch": 0.4883186518575259, + "grad_norm": 27.083749619057436, + "learning_rate": 5e-06, + "loss": 0.289, + "num_input_tokens_seen": 220378616, + "step": 1275 + }, + { + "epoch": 0.4883186518575259, + "loss": 0.3561253845691681, + "loss_ce": 0.020248912274837494, + "loss_iou": 1.0839688777923584, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 220378616, + "step": 1275 + }, + { + "epoch": 0.4887016468785906, + "grad_norm": 37.52991517794262, + "learning_rate": 5e-06, + "loss": 0.2556, + "num_input_tokens_seen": 220551680, + "step": 1276 + }, + { + "epoch": 0.4887016468785906, + "loss": 0.23188307881355286, + "loss_ce": 0.021922152489423752, + "loss_iou": 1.0254385471343994, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 220551680, + "step": 1276 + }, + { + "epoch": 0.4890846418996553, + "grad_norm": 31.922314748751266, + "learning_rate": 5e-06, + "loss": 0.2336, + "num_input_tokens_seen": 220724960, + "step": 1277 + }, + { + "epoch": 0.4890846418996553, + "loss": 0.19547098875045776, + "loss_ce": 0.020055953413248062, + "loss_iou": 1.0054305791854858, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 220724960, + "step": 1277 + }, + { + "epoch": 0.48946763692072004, + "grad_norm": 20.772756783892586, + "learning_rate": 5e-06, + "loss": 0.3165, + "num_input_tokens_seen": 220897696, + "step": 1278 + }, + { + "epoch": 0.48946763692072004, + "loss": 0.2660837769508362, + "loss_ce": 0.021210746839642525, + "loss_iou": 1.0261434316635132, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 220897696, + "step": 1278 + }, + { + "epoch": 0.48985063194178474, + "grad_norm": 21.80453036032498, + "learning_rate": 5e-06, + "loss": 0.1961, + "num_input_tokens_seen": 221070320, + "step": 1279 + }, + { + "epoch": 0.48985063194178474, + "loss": 0.20578120648860931, + "loss_ce": 0.019807081669569016, + "loss_iou": 1.0507761240005493, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 221070320, + "step": 1279 + }, + { + "epoch": 0.4902336269628495, + "grad_norm": 34.31817030457487, + "learning_rate": 5e-06, + "loss": 0.2552, + "num_input_tokens_seen": 221243296, + "step": 1280 + }, + { + "epoch": 0.4902336269628495, + "loss": 0.3230183720588684, + "loss_ce": 0.01949053257703781, + "loss_iou": 1.6514267921447754, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 221243296, + "step": 1280 + }, + { + "epoch": 0.4906166219839142, + "grad_norm": 29.44297168945806, + "learning_rate": 5e-06, + "loss": 0.2868, + "num_input_tokens_seen": 221416328, + "step": 1281 + }, + { + "epoch": 0.4906166219839142, + "loss": 0.31145864725112915, + "loss_ce": 0.02044302597641945, + "loss_iou": 1.084681749343872, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 221416328, + "step": 1281 + }, + { + "epoch": 0.49099961700497896, + "grad_norm": 24.28100217361298, + "learning_rate": 5e-06, + "loss": 0.241, + "num_input_tokens_seen": 221589264, + "step": 1282 + }, + { + "epoch": 0.49099961700497896, + "loss": 0.27191275358200073, + "loss_ce": 0.02069203555583954, + "loss_iou": 1.0331673622131348, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 221589264, + "step": 1282 + }, + { + "epoch": 0.49138261202604366, + "grad_norm": 30.994349085547384, + "learning_rate": 5e-06, + "loss": 0.2326, + "num_input_tokens_seen": 221762320, + "step": 1283 + }, + { + "epoch": 0.49138261202604366, + "loss": 0.2575764060020447, + "loss_ce": 0.01978345587849617, + "loss_iou": 1.0269644260406494, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 221762320, + "step": 1283 + }, + { + "epoch": 0.49176560704710837, + "grad_norm": 38.6801635000788, + "learning_rate": 5e-06, + "loss": 0.201, + "num_input_tokens_seen": 221935392, + "step": 1284 + }, + { + "epoch": 0.49176560704710837, + "loss": 0.18782469630241394, + "loss_ce": 0.021137692034244537, + "loss_iou": 1.0069403648376465, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 221935392, + "step": 1284 + }, + { + "epoch": 0.4921486020681731, + "grad_norm": 27.639627105321036, + "learning_rate": 5e-06, + "loss": 0.2377, + "num_input_tokens_seen": 222108128, + "step": 1285 + }, + { + "epoch": 0.4921486020681731, + "loss": 0.2548784017562866, + "loss_ce": 0.019404759630560875, + "loss_iou": 1.0391649007797241, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 222108128, + "step": 1285 + }, + { + "epoch": 0.4925315970892378, + "grad_norm": 22.161921926812532, + "learning_rate": 5e-06, + "loss": 0.1802, + "num_input_tokens_seen": 222281120, + "step": 1286 + }, + { + "epoch": 0.4925315970892378, + "loss": 0.17910084128379822, + "loss_ce": 0.01979907602071762, + "loss_iou": 1.028035044670105, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 222281120, + "step": 1286 + }, + { + "epoch": 0.4929145921103026, + "grad_norm": 26.32633461607658, + "learning_rate": 5e-06, + "loss": 0.2956, + "num_input_tokens_seen": 222453960, + "step": 1287 + }, + { + "epoch": 0.4929145921103026, + "loss": 0.3282597064971924, + "loss_ce": 0.021008752286434174, + "loss_iou": 1.0230724811553955, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 222453960, + "step": 1287 + }, + { + "epoch": 0.4932975871313673, + "grad_norm": 50.046689203103455, + "learning_rate": 5e-06, + "loss": 0.2674, + "num_input_tokens_seen": 222626888, + "step": 1288 + }, + { + "epoch": 0.4932975871313673, + "loss": 0.22985592484474182, + "loss_ce": 0.02111569046974182, + "loss_iou": 1.0050923824310303, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 222626888, + "step": 1288 + }, + { + "epoch": 0.49368058215243205, + "grad_norm": 34.9913963459762, + "learning_rate": 5e-06, + "loss": 0.2121, + "num_input_tokens_seen": 222799736, + "step": 1289 + }, + { + "epoch": 0.49368058215243205, + "loss": 0.2531659007072449, + "loss_ce": 0.020255738869309425, + "loss_iou": 1.0212188959121704, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 222799736, + "step": 1289 + }, + { + "epoch": 0.49406357717349675, + "grad_norm": 32.45673806117689, + "learning_rate": 5e-06, + "loss": 0.2929, + "num_input_tokens_seen": 222972904, + "step": 1290 + }, + { + "epoch": 0.49406357717349675, + "loss": 0.2759690284729004, + "loss_ce": 0.02120828628540039, + "loss_iou": 1.0240442752838135, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 222972904, + "step": 1290 + }, + { + "epoch": 0.49444657219456145, + "grad_norm": 47.547632682028144, + "learning_rate": 5e-06, + "loss": 0.3203, + "num_input_tokens_seen": 223145920, + "step": 1291 + }, + { + "epoch": 0.49444657219456145, + "loss": 0.2650194466114044, + "loss_ce": 0.020512614399194717, + "loss_iou": 1.0113552808761597, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 223145920, + "step": 1291 + }, + { + "epoch": 0.4948295672156262, + "grad_norm": 44.28677941576457, + "learning_rate": 5e-06, + "loss": 0.2589, + "num_input_tokens_seen": 223318880, + "step": 1292 + }, + { + "epoch": 0.4948295672156262, + "loss": 0.2196100503206253, + "loss_ce": 0.021734073758125305, + "loss_iou": 0.9984222650527954, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 223318880, + "step": 1292 + }, + { + "epoch": 0.4952125622366909, + "grad_norm": 36.16609817287782, + "learning_rate": 5e-06, + "loss": 0.3403, + "num_input_tokens_seen": 223491800, + "step": 1293 + }, + { + "epoch": 0.4952125622366909, + "loss": 0.3044971525669098, + "loss_ce": 0.022392667829990387, + "loss_iou": 1.1346598863601685, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 223491800, + "step": 1293 + }, + { + "epoch": 0.49559555725775567, + "grad_norm": 21.054867054897002, + "learning_rate": 5e-06, + "loss": 0.2747, + "num_input_tokens_seen": 223664648, + "step": 1294 + }, + { + "epoch": 0.49559555725775567, + "loss": 0.29920294880867004, + "loss_ce": 0.019051577895879745, + "loss_iou": 1.1052517890930176, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 223664648, + "step": 1294 + }, + { + "epoch": 0.4959785522788204, + "grad_norm": 16.948986363038664, + "learning_rate": 5e-06, + "loss": 0.3082, + "num_input_tokens_seen": 223837328, + "step": 1295 + }, + { + "epoch": 0.4959785522788204, + "loss": 0.3230059742927551, + "loss_ce": 0.02002747543156147, + "loss_iou": 1.1161134243011475, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 223837328, + "step": 1295 + }, + { + "epoch": 0.4963615472998851, + "grad_norm": 16.96679685389391, + "learning_rate": 5e-06, + "loss": 0.2246, + "num_input_tokens_seen": 224010416, + "step": 1296 + }, + { + "epoch": 0.4963615472998851, + "loss": 0.23388779163360596, + "loss_ce": 0.01770125702023506, + "loss_iou": 0.8875874876976013, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 224010416, + "step": 1296 + }, + { + "epoch": 0.49674454232094983, + "grad_norm": 31.47918902702664, + "learning_rate": 5e-06, + "loss": 0.3196, + "num_input_tokens_seen": 224183672, + "step": 1297 + }, + { + "epoch": 0.49674454232094983, + "loss": 0.34980836510658264, + "loss_ce": 0.021195102483034134, + "loss_iou": 1.0460903644561768, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 224183672, + "step": 1297 + }, + { + "epoch": 0.49712753734201454, + "grad_norm": 41.42384157620713, + "learning_rate": 5e-06, + "loss": 0.2303, + "num_input_tokens_seen": 224357024, + "step": 1298 + }, + { + "epoch": 0.49712753734201454, + "loss": 0.21717743575572968, + "loss_ce": 0.022414255887269974, + "loss_iou": 1.001346230506897, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 224357024, + "step": 1298 + }, + { + "epoch": 0.4975105323630793, + "grad_norm": 36.173696628924326, + "learning_rate": 5e-06, + "loss": 0.3034, + "num_input_tokens_seen": 224529872, + "step": 1299 + }, + { + "epoch": 0.4975105323630793, + "loss": 0.23724958300590515, + "loss_ce": 0.02020856738090515, + "loss_iou": 1.0051367282867432, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 224529872, + "step": 1299 + }, + { + "epoch": 0.497893527384144, + "grad_norm": 22.15717286074569, + "learning_rate": 5e-06, + "loss": 0.2266, + "num_input_tokens_seen": 224702944, + "step": 1300 + }, + { + "epoch": 0.497893527384144, + "loss": 0.22423772513866425, + "loss_ce": 0.02129582315683365, + "loss_iou": 1.030242919921875, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 224702944, + "step": 1300 + }, + { + "epoch": 0.49827652240520875, + "grad_norm": 35.5021631522705, + "learning_rate": 5e-06, + "loss": 0.2559, + "num_input_tokens_seen": 224875800, + "step": 1301 + }, + { + "epoch": 0.49827652240520875, + "loss": 0.29484647512435913, + "loss_ce": 0.021775171160697937, + "loss_iou": 1.0023797750473022, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 224875800, + "step": 1301 + }, + { + "epoch": 0.49865951742627346, + "grad_norm": 46.35512650727594, + "learning_rate": 5e-06, + "loss": 0.2939, + "num_input_tokens_seen": 225048824, + "step": 1302 + }, + { + "epoch": 0.49865951742627346, + "loss": 0.32741639018058777, + "loss_ce": 0.02126404270529747, + "loss_iou": 1.0482133626937866, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 225048824, + "step": 1302 + }, + { + "epoch": 0.49904251244733816, + "grad_norm": 34.31661781023796, + "learning_rate": 5e-06, + "loss": 0.1727, + "num_input_tokens_seen": 225221712, + "step": 1303 + }, + { + "epoch": 0.49904251244733816, + "loss": 0.16094151139259338, + "loss_ce": 0.018607541918754578, + "loss_iou": 1.000558614730835, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 225221712, + "step": 1303 + }, + { + "epoch": 0.4994255074684029, + "grad_norm": 29.129981132600637, + "learning_rate": 5e-06, + "loss": 0.2064, + "num_input_tokens_seen": 225394392, + "step": 1304 + }, + { + "epoch": 0.4994255074684029, + "loss": 0.19164007902145386, + "loss_ce": 0.019520942121744156, + "loss_iou": 1.0027861595153809, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 225394392, + "step": 1304 + }, + { + "epoch": 0.4998085024894676, + "grad_norm": 39.55374574961186, + "learning_rate": 5e-06, + "loss": 0.248, + "num_input_tokens_seen": 225567568, + "step": 1305 + }, + { + "epoch": 0.4998085024894676, + "loss": 0.2537155747413635, + "loss_ce": 0.021904051303863525, + "loss_iou": 1.28212308883667, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 225567568, + "step": 1305 + }, + { + "epoch": 0.5001914975105324, + "grad_norm": 40.71644853830682, + "learning_rate": 5e-06, + "loss": 0.2348, + "num_input_tokens_seen": 225740720, + "step": 1306 + }, + { + "epoch": 0.5001914975105324, + "loss": 0.19909319281578064, + "loss_ce": 0.02093156985938549, + "loss_iou": 0.9498213529586792, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 225740720, + "step": 1306 + }, + { + "epoch": 0.5005744925315971, + "grad_norm": 29.06221694350055, + "learning_rate": 5e-06, + "loss": 0.1784, + "num_input_tokens_seen": 225913584, + "step": 1307 + }, + { + "epoch": 0.5005744925315971, + "loss": 0.1767686903476715, + "loss_ce": 0.018321428447961807, + "loss_iou": 1.0008810758590698, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 225913584, + "step": 1307 + }, + { + "epoch": 0.5009574875526618, + "grad_norm": 50.6076976981039, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 226086816, + "step": 1308 + }, + { + "epoch": 0.5009574875526618, + "loss": 0.19694334268569946, + "loss_ce": 0.020795879885554314, + "loss_iou": 1.0267889499664307, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 226086816, + "step": 1308 + }, + { + "epoch": 0.5013404825737265, + "grad_norm": 35.17112276616945, + "learning_rate": 5e-06, + "loss": 0.2929, + "num_input_tokens_seen": 226259584, + "step": 1309 + }, + { + "epoch": 0.5013404825737265, + "loss": 0.2977929413318634, + "loss_ce": 0.0208764486014843, + "loss_iou": 1.084275484085083, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 226259584, + "step": 1309 + }, + { + "epoch": 0.5017234775947913, + "grad_norm": 28.65337481435919, + "learning_rate": 5e-06, + "loss": 0.2208, + "num_input_tokens_seen": 226432240, + "step": 1310 + }, + { + "epoch": 0.5017234775947913, + "loss": 0.2865043878555298, + "loss_ce": 0.019536608830094337, + "loss_iou": 1.0236896276474, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 226432240, + "step": 1310 + }, + { + "epoch": 0.502106472615856, + "grad_norm": 31.646856094478498, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 226601512, + "step": 1311 + }, + { + "epoch": 0.502106472615856, + "loss": 0.24922175705432892, + "loss_ce": 0.01890559494495392, + "loss_iou": 0.88271164894104, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 226601512, + "step": 1311 + }, + { + "epoch": 0.5024894676369207, + "grad_norm": 36.226073173748745, + "learning_rate": 5e-06, + "loss": 0.2344, + "num_input_tokens_seen": 226774608, + "step": 1312 + }, + { + "epoch": 0.5024894676369207, + "loss": 0.2274923473596573, + "loss_ce": 0.02155972458422184, + "loss_iou": 1.152855634689331, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 226774608, + "step": 1312 + }, + { + "epoch": 0.5028724626579855, + "grad_norm": 38.4014820846328, + "learning_rate": 5e-06, + "loss": 0.2594, + "num_input_tokens_seen": 226947576, + "step": 1313 + }, + { + "epoch": 0.5028724626579855, + "loss": 0.32784831523895264, + "loss_ce": 0.01901041530072689, + "loss_iou": 1.092888593673706, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 226947576, + "step": 1313 + }, + { + "epoch": 0.5032554576790502, + "grad_norm": 38.89804778530378, + "learning_rate": 5e-06, + "loss": 0.2806, + "num_input_tokens_seen": 227120832, + "step": 1314 + }, + { + "epoch": 0.5032554576790502, + "loss": 0.28596794605255127, + "loss_ce": 0.02144157886505127, + "loss_iou": 1.021527886390686, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 227120832, + "step": 1314 + }, + { + "epoch": 0.5036384527001149, + "grad_norm": 31.807060485433798, + "learning_rate": 5e-06, + "loss": 0.2344, + "num_input_tokens_seen": 227293688, + "step": 1315 + }, + { + "epoch": 0.5036384527001149, + "loss": 0.21565531194210052, + "loss_ce": 0.020037636160850525, + "loss_iou": 1.0489392280578613, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 227293688, + "step": 1315 + }, + { + "epoch": 0.5040214477211796, + "grad_norm": 23.146613469858394, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 227466600, + "step": 1316 + }, + { + "epoch": 0.5040214477211796, + "loss": 0.2014552652835846, + "loss_ce": 0.02018084190785885, + "loss_iou": 1.0091123580932617, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 227466600, + "step": 1316 + }, + { + "epoch": 0.5044044427422444, + "grad_norm": 22.32627210915022, + "learning_rate": 5e-06, + "loss": 0.21, + "num_input_tokens_seen": 227639752, + "step": 1317 + }, + { + "epoch": 0.5044044427422444, + "loss": 0.21035532653331757, + "loss_ce": 0.019437363371253014, + "loss_iou": 1.0013794898986816, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 227639752, + "step": 1317 + }, + { + "epoch": 0.504787437763309, + "grad_norm": 24.765486554199338, + "learning_rate": 5e-06, + "loss": 0.2454, + "num_input_tokens_seen": 227812776, + "step": 1318 + }, + { + "epoch": 0.504787437763309, + "loss": 0.2919178009033203, + "loss_ce": 0.021043788641691208, + "loss_iou": 1.0157856941223145, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 227812776, + "step": 1318 + }, + { + "epoch": 0.5051704327843738, + "grad_norm": 36.61425807120079, + "learning_rate": 5e-06, + "loss": 0.1963, + "num_input_tokens_seen": 227985864, + "step": 1319 + }, + { + "epoch": 0.5051704327843738, + "loss": 0.18542779982089996, + "loss_ce": 0.018618717789649963, + "loss_iou": 1.0022015571594238, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 227985864, + "step": 1319 + }, + { + "epoch": 0.5055534278054385, + "grad_norm": 33.75917521038577, + "learning_rate": 5e-06, + "loss": 0.2716, + "num_input_tokens_seen": 228158688, + "step": 1320 + }, + { + "epoch": 0.5055534278054385, + "loss": 0.23160414397716522, + "loss_ce": 0.020117323845624924, + "loss_iou": 1.0108810663223267, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 228158688, + "step": 1320 + }, + { + "epoch": 0.5059364228265033, + "grad_norm": 34.89146044779558, + "learning_rate": 5e-06, + "loss": 0.2391, + "num_input_tokens_seen": 228331520, + "step": 1321 + }, + { + "epoch": 0.5059364228265033, + "loss": 0.23049719631671906, + "loss_ce": 0.020047983154654503, + "loss_iou": 1.028801441192627, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 228331520, + "step": 1321 + }, + { + "epoch": 0.506319417847568, + "grad_norm": 29.487383714282466, + "learning_rate": 5e-06, + "loss": 0.2963, + "num_input_tokens_seen": 228504720, + "step": 1322 + }, + { + "epoch": 0.506319417847568, + "loss": 0.24549441039562225, + "loss_ce": 0.02247193455696106, + "loss_iou": 1.0049664974212646, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 228504720, + "step": 1322 + }, + { + "epoch": 0.5067024128686327, + "grad_norm": 24.43148361606767, + "learning_rate": 5e-06, + "loss": 0.2366, + "num_input_tokens_seen": 228677624, + "step": 1323 + }, + { + "epoch": 0.5067024128686327, + "loss": 0.17037566006183624, + "loss_ce": 0.01998504251241684, + "loss_iou": 1.0022523403167725, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 228677624, + "step": 1323 + }, + { + "epoch": 0.5070854078896975, + "grad_norm": 25.358514927064693, + "learning_rate": 5e-06, + "loss": 0.2574, + "num_input_tokens_seen": 228850800, + "step": 1324 + }, + { + "epoch": 0.5070854078896975, + "loss": 0.2549811005592346, + "loss_ce": 0.021094385534524918, + "loss_iou": 1.0110515356063843, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 228850800, + "step": 1324 + }, + { + "epoch": 0.5074684029107621, + "grad_norm": 26.514133382129888, + "learning_rate": 5e-06, + "loss": 0.2547, + "num_input_tokens_seen": 229023664, + "step": 1325 + }, + { + "epoch": 0.5074684029107621, + "loss": 0.2352285385131836, + "loss_ce": 0.021727576851844788, + "loss_iou": 1.0286118984222412, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 229023664, + "step": 1325 + }, + { + "epoch": 0.5078513979318269, + "grad_norm": 38.798782986456516, + "learning_rate": 5e-06, + "loss": 0.264, + "num_input_tokens_seen": 229196784, + "step": 1326 + }, + { + "epoch": 0.5078513979318269, + "loss": 0.3162633180618286, + "loss_ce": 0.022195931524038315, + "loss_iou": 1.3013497591018677, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 229196784, + "step": 1326 + }, + { + "epoch": 0.5082343929528916, + "grad_norm": 34.50600779885732, + "learning_rate": 5e-06, + "loss": 0.2081, + "num_input_tokens_seen": 229369408, + "step": 1327 + }, + { + "epoch": 0.5082343929528916, + "loss": 0.1770828366279602, + "loss_ce": 0.018391430377960205, + "loss_iou": 0.8795397877693176, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 229369408, + "step": 1327 + }, + { + "epoch": 0.5086173879739564, + "grad_norm": 28.688164962242777, + "learning_rate": 5e-06, + "loss": 0.2512, + "num_input_tokens_seen": 229542488, + "step": 1328 + }, + { + "epoch": 0.5086173879739564, + "loss": 0.2442503720521927, + "loss_ce": 0.020800668746232986, + "loss_iou": 1.0248935222625732, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 229542488, + "step": 1328 + }, + { + "epoch": 0.509000382995021, + "grad_norm": 33.64137325261358, + "learning_rate": 5e-06, + "loss": 0.2543, + "num_input_tokens_seen": 229715656, + "step": 1329 + }, + { + "epoch": 0.509000382995021, + "loss": 0.2702787518501282, + "loss_ce": 0.020400822162628174, + "loss_iou": 1.170792818069458, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 229715656, + "step": 1329 + }, + { + "epoch": 0.5093833780160858, + "grad_norm": 36.39428144567596, + "learning_rate": 5e-06, + "loss": 0.2902, + "num_input_tokens_seen": 229888712, + "step": 1330 + }, + { + "epoch": 0.5093833780160858, + "loss": 0.2342301458120346, + "loss_ce": 0.021461594849824905, + "loss_iou": 1.0727958679199219, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 229888712, + "step": 1330 + }, + { + "epoch": 0.5097663730371506, + "grad_norm": 25.070009992914553, + "learning_rate": 5e-06, + "loss": 0.2314, + "num_input_tokens_seen": 230061896, + "step": 1331 + }, + { + "epoch": 0.5097663730371506, + "loss": 0.2624282240867615, + "loss_ce": 0.021034184843301773, + "loss_iou": 1.0383816957473755, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 230061896, + "step": 1331 + }, + { + "epoch": 0.5101493680582152, + "grad_norm": 35.76388904354218, + "learning_rate": 5e-06, + "loss": 0.2072, + "num_input_tokens_seen": 230234944, + "step": 1332 + }, + { + "epoch": 0.5101493680582152, + "loss": 0.2063194364309311, + "loss_ce": 0.019979100674390793, + "loss_iou": 1.0199110507965088, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 230234944, + "step": 1332 + }, + { + "epoch": 0.51053236307928, + "grad_norm": 20.77696301352915, + "learning_rate": 5e-06, + "loss": 0.2245, + "num_input_tokens_seen": 230407960, + "step": 1333 + }, + { + "epoch": 0.51053236307928, + "loss": 0.23807215690612793, + "loss_ce": 0.021946672350168228, + "loss_iou": 1.0557270050048828, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 230407960, + "step": 1333 + }, + { + "epoch": 0.5109153581003447, + "grad_norm": 43.40236449501893, + "learning_rate": 5e-06, + "loss": 0.2583, + "num_input_tokens_seen": 230581128, + "step": 1334 + }, + { + "epoch": 0.5109153581003447, + "loss": 0.26795485615730286, + "loss_ce": 0.021372821182012558, + "loss_iou": 1.0251628160476685, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 230581128, + "step": 1334 + }, + { + "epoch": 0.5112983531214095, + "grad_norm": 38.36400782556495, + "learning_rate": 5e-06, + "loss": 0.2806, + "num_input_tokens_seen": 230753384, + "step": 1335 + }, + { + "epoch": 0.5112983531214095, + "loss": 0.2859666645526886, + "loss_ce": 0.017717158421874046, + "loss_iou": 0.9628697633743286, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 230753384, + "step": 1335 + }, + { + "epoch": 0.5116813481424741, + "grad_norm": 22.614666083386055, + "learning_rate": 5e-06, + "loss": 0.2495, + "num_input_tokens_seen": 230926144, + "step": 1336 + }, + { + "epoch": 0.5116813481424741, + "loss": 0.2618767023086548, + "loss_ce": 0.02017749845981598, + "loss_iou": 1.1013388633728027, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 230926144, + "step": 1336 + }, + { + "epoch": 0.5120643431635389, + "grad_norm": 31.241732332051885, + "learning_rate": 5e-06, + "loss": 0.2089, + "num_input_tokens_seen": 231098912, + "step": 1337 + }, + { + "epoch": 0.5120643431635389, + "loss": 0.22717610001564026, + "loss_ce": 0.020266912877559662, + "loss_iou": 1.0211718082427979, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 231098912, + "step": 1337 + }, + { + "epoch": 0.5124473381846036, + "grad_norm": 40.09849691785989, + "learning_rate": 5e-06, + "loss": 0.2688, + "num_input_tokens_seen": 231271840, + "step": 1338 + }, + { + "epoch": 0.5124473381846036, + "loss": 0.26360654830932617, + "loss_ce": 0.01983211562037468, + "loss_iou": 1.0164717435836792, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 231271840, + "step": 1338 + }, + { + "epoch": 0.5128303332056683, + "grad_norm": 32.845511557733886, + "learning_rate": 5e-06, + "loss": 0.273, + "num_input_tokens_seen": 231444504, + "step": 1339 + }, + { + "epoch": 0.5128303332056683, + "loss": 0.27383947372436523, + "loss_ce": 0.01956700161099434, + "loss_iou": 1.0331006050109863, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 231444504, + "step": 1339 + }, + { + "epoch": 0.513213328226733, + "grad_norm": 36.65452646587695, + "learning_rate": 5e-06, + "loss": 0.2347, + "num_input_tokens_seen": 231617464, + "step": 1340 + }, + { + "epoch": 0.513213328226733, + "loss": 0.21411117911338806, + "loss_ce": 0.022338712587952614, + "loss_iou": 1.02278733253479, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 231617464, + "step": 1340 + }, + { + "epoch": 0.5135963232477978, + "grad_norm": 42.29017363996495, + "learning_rate": 5e-06, + "loss": 0.2139, + "num_input_tokens_seen": 231790368, + "step": 1341 + }, + { + "epoch": 0.5135963232477978, + "loss": 0.22402310371398926, + "loss_ce": 0.021752599626779556, + "loss_iou": 1.0306389331817627, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 231790368, + "step": 1341 + }, + { + "epoch": 0.5139793182688625, + "grad_norm": 47.29629926879741, + "learning_rate": 5e-06, + "loss": 0.2668, + "num_input_tokens_seen": 231962936, + "step": 1342 + }, + { + "epoch": 0.5139793182688625, + "loss": 0.28897830843925476, + "loss_ce": 0.02097294107079506, + "loss_iou": 1.057763695716858, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 231962936, + "step": 1342 + }, + { + "epoch": 0.5143623132899272, + "grad_norm": 24.648160299512856, + "learning_rate": 5e-06, + "loss": 0.2407, + "num_input_tokens_seen": 232135720, + "step": 1343 + }, + { + "epoch": 0.5143623132899272, + "loss": 0.25257134437561035, + "loss_ce": 0.01844049245119095, + "loss_iou": 0.9149987101554871, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 232135720, + "step": 1343 + }, + { + "epoch": 0.514745308310992, + "grad_norm": 23.948393528127145, + "learning_rate": 5e-06, + "loss": 0.1945, + "num_input_tokens_seen": 232308168, + "step": 1344 + }, + { + "epoch": 0.514745308310992, + "loss": 0.15678608417510986, + "loss_ce": 0.018785586580634117, + "loss_iou": 0.8803042769432068, + "loss_num": 0.1376953125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 232308168, + "step": 1344 + }, + { + "epoch": 0.5151283033320567, + "grad_norm": 16.1456748348062, + "learning_rate": 5e-06, + "loss": 0.2041, + "num_input_tokens_seen": 232480952, + "step": 1345 + }, + { + "epoch": 0.5151283033320567, + "loss": 0.17203077673912048, + "loss_ce": 0.020480498671531677, + "loss_iou": 1.002892255783081, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 232480952, + "step": 1345 + }, + { + "epoch": 0.5155112983531214, + "grad_norm": 10.864190384455942, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 232654008, + "step": 1346 + }, + { + "epoch": 0.5155112983531214, + "loss": 0.203628808259964, + "loss_ce": 0.022781629115343094, + "loss_iou": 1.1514043807983398, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 232654008, + "step": 1346 + }, + { + "epoch": 0.5158942933741861, + "grad_norm": 16.390795807993168, + "learning_rate": 5e-06, + "loss": 0.1773, + "num_input_tokens_seen": 232826856, + "step": 1347 + }, + { + "epoch": 0.5158942933741861, + "loss": 0.1825709044933319, + "loss_ce": 0.02021739073097706, + "loss_iou": 1.0611228942871094, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 232826856, + "step": 1347 + }, + { + "epoch": 0.5162772883952509, + "grad_norm": 54.976880532330874, + "learning_rate": 5e-06, + "loss": 0.2725, + "num_input_tokens_seen": 232999616, + "step": 1348 + }, + { + "epoch": 0.5162772883952509, + "loss": 0.29398179054260254, + "loss_ce": 0.020727401599287987, + "loss_iou": 1.1835370063781738, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 232999616, + "step": 1348 + }, + { + "epoch": 0.5166602834163155, + "grad_norm": 32.01183554460165, + "learning_rate": 5e-06, + "loss": 0.2167, + "num_input_tokens_seen": 233172416, + "step": 1349 + }, + { + "epoch": 0.5166602834163155, + "loss": 0.19804415106773376, + "loss_ce": 0.020859088748693466, + "loss_iou": 1.0127989053726196, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 233172416, + "step": 1349 + }, + { + "epoch": 0.5170432784373803, + "grad_norm": 28.674047907310722, + "learning_rate": 5e-06, + "loss": 0.2378, + "num_input_tokens_seen": 233345096, + "step": 1350 + }, + { + "epoch": 0.5170432784373803, + "loss": 0.2748212218284607, + "loss_ce": 0.019999457523226738, + "loss_iou": 1.0336179733276367, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 233345096, + "step": 1350 + }, + { + "epoch": 0.517426273458445, + "grad_norm": 35.15392862620632, + "learning_rate": 5e-06, + "loss": 0.1947, + "num_input_tokens_seen": 233517904, + "step": 1351 + }, + { + "epoch": 0.517426273458445, + "loss": 0.1975768804550171, + "loss_ce": 0.01978147029876709, + "loss_iou": 1.0122634172439575, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 233517904, + "step": 1351 + }, + { + "epoch": 0.5178092684795098, + "grad_norm": 33.684261942202, + "learning_rate": 5e-06, + "loss": 0.2148, + "num_input_tokens_seen": 233690808, + "step": 1352 + }, + { + "epoch": 0.5178092684795098, + "loss": 0.240241140127182, + "loss_ce": 0.022589776664972305, + "loss_iou": 1.0366296768188477, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 233690808, + "step": 1352 + }, + { + "epoch": 0.5181922635005745, + "grad_norm": 31.301514112753125, + "learning_rate": 5e-06, + "loss": 0.2582, + "num_input_tokens_seen": 233863688, + "step": 1353 + }, + { + "epoch": 0.5181922635005745, + "loss": 0.20045393705368042, + "loss_ce": 0.01960677094757557, + "loss_iou": 1.0051530599594116, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 233863688, + "step": 1353 + }, + { + "epoch": 0.5185752585216392, + "grad_norm": 33.153027066248825, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 234036264, + "step": 1354 + }, + { + "epoch": 0.5185752585216392, + "loss": 0.20700308680534363, + "loss_ce": 0.021090004593133926, + "loss_iou": 1.0467784404754639, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 234036264, + "step": 1354 + }, + { + "epoch": 0.518958253542704, + "grad_norm": 38.335258783336045, + "learning_rate": 5e-06, + "loss": 0.2545, + "num_input_tokens_seen": 234209368, + "step": 1355 + }, + { + "epoch": 0.518958253542704, + "loss": 0.2540934085845947, + "loss_ce": 0.02008461207151413, + "loss_iou": 1.0373358726501465, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 234209368, + "step": 1355 + }, + { + "epoch": 0.5193412485637686, + "grad_norm": 33.080406690400075, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 234382384, + "step": 1356 + }, + { + "epoch": 0.5193412485637686, + "loss": 0.19170862436294556, + "loss_ce": 0.02196984738111496, + "loss_iou": 1.003777027130127, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 234382384, + "step": 1356 + }, + { + "epoch": 0.5197242435848334, + "grad_norm": 29.393792328760647, + "learning_rate": 5e-06, + "loss": 0.2662, + "num_input_tokens_seen": 234555088, + "step": 1357 + }, + { + "epoch": 0.5197242435848334, + "loss": 0.3434233069419861, + "loss_ce": 0.020547322928905487, + "loss_iou": 1.283623456954956, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 234555088, + "step": 1357 + }, + { + "epoch": 0.5201072386058981, + "grad_norm": 29.68426568876046, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 234728136, + "step": 1358 + }, + { + "epoch": 0.5201072386058981, + "loss": 0.1551058292388916, + "loss_ce": 0.02223229594528675, + "loss_iou": 1.0033645629882812, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 234728136, + "step": 1358 + }, + { + "epoch": 0.5204902336269629, + "grad_norm": 41.63303639801133, + "learning_rate": 5e-06, + "loss": 0.3478, + "num_input_tokens_seen": 234900752, + "step": 1359 + }, + { + "epoch": 0.5204902336269629, + "loss": 0.2942076325416565, + "loss_ce": 0.020770136266946793, + "loss_iou": 1.0545668601989746, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 234900752, + "step": 1359 + }, + { + "epoch": 0.5208732286480275, + "grad_norm": 38.08271812322465, + "learning_rate": 5e-06, + "loss": 0.2165, + "num_input_tokens_seen": 235073496, + "step": 1360 + }, + { + "epoch": 0.5208732286480275, + "loss": 0.2514550983905792, + "loss_ce": 0.020253915339708328, + "loss_iou": 1.036755084991455, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 235073496, + "step": 1360 + }, + { + "epoch": 0.5212562236690923, + "grad_norm": 48.7808133245496, + "learning_rate": 5e-06, + "loss": 0.3216, + "num_input_tokens_seen": 235246760, + "step": 1361 + }, + { + "epoch": 0.5212562236690923, + "loss": 0.2707056403160095, + "loss_ce": 0.02033940888941288, + "loss_iou": 1.0714623928070068, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 235246760, + "step": 1361 + }, + { + "epoch": 0.5216392186901571, + "grad_norm": 43.49549644385844, + "learning_rate": 5e-06, + "loss": 0.2786, + "num_input_tokens_seen": 235419320, + "step": 1362 + }, + { + "epoch": 0.5216392186901571, + "loss": 0.26690515875816345, + "loss_ce": 0.022093143314123154, + "loss_iou": 1.0660336017608643, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 235419320, + "step": 1362 + }, + { + "epoch": 0.5220222137112217, + "grad_norm": 29.674097574665602, + "learning_rate": 5e-06, + "loss": 0.3186, + "num_input_tokens_seen": 235589272, + "step": 1363 + }, + { + "epoch": 0.5220222137112217, + "loss": 0.3059327006340027, + "loss_ce": 0.019799869507551193, + "loss_iou": 1.036852478981018, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 235589272, + "step": 1363 + }, + { + "epoch": 0.5224052087322865, + "grad_norm": 20.00999596008343, + "learning_rate": 5e-06, + "loss": 0.1837, + "num_input_tokens_seen": 235762120, + "step": 1364 + }, + { + "epoch": 0.5224052087322865, + "loss": 0.1387934386730194, + "loss_ce": 0.02075144834816456, + "loss_iou": 1.0177533626556396, + "loss_num": 0.1181640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 235762120, + "step": 1364 + }, + { + "epoch": 0.5227882037533512, + "grad_norm": 35.35985928537426, + "learning_rate": 5e-06, + "loss": 0.2153, + "num_input_tokens_seen": 235934800, + "step": 1365 + }, + { + "epoch": 0.5227882037533512, + "loss": 0.24349024891853333, + "loss_ce": 0.021322278305888176, + "loss_iou": 1.007620096206665, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 235934800, + "step": 1365 + }, + { + "epoch": 0.523171198774416, + "grad_norm": 37.38076587715216, + "learning_rate": 5e-06, + "loss": 0.2603, + "num_input_tokens_seen": 236107896, + "step": 1366 + }, + { + "epoch": 0.523171198774416, + "loss": 0.2549581229686737, + "loss_ce": 0.021681763231754303, + "loss_iou": 1.0397226810455322, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 236107896, + "step": 1366 + }, + { + "epoch": 0.5235541937954806, + "grad_norm": 28.733650081221064, + "learning_rate": 5e-06, + "loss": 0.2669, + "num_input_tokens_seen": 236280776, + "step": 1367 + }, + { + "epoch": 0.5235541937954806, + "loss": 0.26827141642570496, + "loss_ce": 0.022421810775995255, + "loss_iou": 1.0088081359863281, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 236280776, + "step": 1367 + }, + { + "epoch": 0.5239371888165454, + "grad_norm": 36.763076262248894, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 236453704, + "step": 1368 + }, + { + "epoch": 0.5239371888165454, + "loss": 0.24433454871177673, + "loss_ce": 0.022349681705236435, + "loss_iou": 1.0200729370117188, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 236453704, + "step": 1368 + }, + { + "epoch": 0.5243201838376101, + "grad_norm": 54.352311727641876, + "learning_rate": 5e-06, + "loss": 0.2858, + "num_input_tokens_seen": 236626752, + "step": 1369 + }, + { + "epoch": 0.5243201838376101, + "loss": 0.2584747076034546, + "loss_ce": 0.021780386567115784, + "loss_iou": 1.018236756324768, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 236626752, + "step": 1369 + }, + { + "epoch": 0.5247031788586748, + "grad_norm": 39.28697472365648, + "learning_rate": 5e-06, + "loss": 0.2362, + "num_input_tokens_seen": 236799888, + "step": 1370 + }, + { + "epoch": 0.5247031788586748, + "loss": 0.24799107015132904, + "loss_ce": 0.022649260237812996, + "loss_iou": 1.01529860496521, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 236799888, + "step": 1370 + }, + { + "epoch": 0.5250861738797395, + "grad_norm": 39.85297075105549, + "learning_rate": 5e-06, + "loss": 0.2601, + "num_input_tokens_seen": 236972848, + "step": 1371 + }, + { + "epoch": 0.5250861738797395, + "loss": 0.2791779339313507, + "loss_ce": 0.020510949194431305, + "loss_iou": 1.0840009450912476, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 236972848, + "step": 1371 + }, + { + "epoch": 0.5254691689008043, + "grad_norm": 23.883892009766925, + "learning_rate": 5e-06, + "loss": 0.2045, + "num_input_tokens_seen": 237145664, + "step": 1372 + }, + { + "epoch": 0.5254691689008043, + "loss": 0.21139544248580933, + "loss_ce": 0.020050212740898132, + "loss_iou": 1.0639290809631348, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 237145664, + "step": 1372 + }, + { + "epoch": 0.5258521639218691, + "grad_norm": 48.69033583444745, + "learning_rate": 5e-06, + "loss": 0.2613, + "num_input_tokens_seen": 237318480, + "step": 1373 + }, + { + "epoch": 0.5258521639218691, + "loss": 0.23347464203834534, + "loss_ce": 0.019729536026716232, + "loss_iou": 1.0228080749511719, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 237318480, + "step": 1373 + }, + { + "epoch": 0.5262351589429337, + "grad_norm": 33.13288312093415, + "learning_rate": 5e-06, + "loss": 0.2333, + "num_input_tokens_seen": 237491280, + "step": 1374 + }, + { + "epoch": 0.5262351589429337, + "loss": 0.1887282282114029, + "loss_ce": 0.020820513367652893, + "loss_iou": 1.0077931880950928, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 237491280, + "step": 1374 + }, + { + "epoch": 0.5266181539639985, + "grad_norm": 21.103176966025885, + "learning_rate": 5e-06, + "loss": 0.2045, + "num_input_tokens_seen": 237664680, + "step": 1375 + }, + { + "epoch": 0.5266181539639985, + "loss": 0.2112509310245514, + "loss_ce": 0.02008882910013199, + "loss_iou": 1.010251760482788, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 237664680, + "step": 1375 + }, + { + "epoch": 0.5270011489850632, + "grad_norm": 34.49932971770472, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 237837416, + "step": 1376 + }, + { + "epoch": 0.5270011489850632, + "loss": 0.16933594644069672, + "loss_ce": 0.020043957978487015, + "loss_iou": 1.0145107507705688, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 237837416, + "step": 1376 + }, + { + "epoch": 0.5273841440061279, + "grad_norm": 47.41067228375037, + "learning_rate": 5e-06, + "loss": 0.2119, + "num_input_tokens_seen": 238010416, + "step": 1377 + }, + { + "epoch": 0.5273841440061279, + "loss": 0.2054481953382492, + "loss_ce": 0.020755819976329803, + "loss_iou": 1.0072085857391357, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 238010416, + "step": 1377 + }, + { + "epoch": 0.5277671390271926, + "grad_norm": 39.93611250459804, + "learning_rate": 5e-06, + "loss": 0.2428, + "num_input_tokens_seen": 238183624, + "step": 1378 + }, + { + "epoch": 0.5277671390271926, + "loss": 0.22903317213058472, + "loss_ce": 0.02218502014875412, + "loss_iou": 1.0179922580718994, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 238183624, + "step": 1378 + }, + { + "epoch": 0.5281501340482574, + "grad_norm": 27.4562870797303, + "learning_rate": 5e-06, + "loss": 0.2511, + "num_input_tokens_seen": 238356984, + "step": 1379 + }, + { + "epoch": 0.5281501340482574, + "loss": 0.2706752419471741, + "loss_ce": 0.021285587921738625, + "loss_iou": 1.0386700630187988, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 238356984, + "step": 1379 + }, + { + "epoch": 0.5285331290693221, + "grad_norm": 27.974109028105044, + "learning_rate": 5e-06, + "loss": 0.2066, + "num_input_tokens_seen": 238530016, + "step": 1380 + }, + { + "epoch": 0.5285331290693221, + "loss": 0.2735742926597595, + "loss_ce": 0.020034248009324074, + "loss_iou": 1.0495893955230713, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 238530016, + "step": 1380 + }, + { + "epoch": 0.5289161240903868, + "grad_norm": 45.21358194144243, + "learning_rate": 5e-06, + "loss": 0.1966, + "num_input_tokens_seen": 238703256, + "step": 1381 + }, + { + "epoch": 0.5289161240903868, + "loss": 0.20240136981010437, + "loss_ce": 0.021065907552838326, + "loss_iou": 1.013717770576477, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 238703256, + "step": 1381 + }, + { + "epoch": 0.5292991191114516, + "grad_norm": 42.50025687762121, + "learning_rate": 5e-06, + "loss": 0.3085, + "num_input_tokens_seen": 238876048, + "step": 1382 + }, + { + "epoch": 0.5292991191114516, + "loss": 0.29392093420028687, + "loss_ce": 0.021826203912496567, + "loss_iou": 1.034827709197998, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 238876048, + "step": 1382 + }, + { + "epoch": 0.5296821141325163, + "grad_norm": 30.425252298057856, + "learning_rate": 5e-06, + "loss": 0.2059, + "num_input_tokens_seen": 239049176, + "step": 1383 + }, + { + "epoch": 0.5296821141325163, + "loss": 0.2247311770915985, + "loss_ce": 0.02093479037284851, + "loss_iou": 1.028517723083496, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 239049176, + "step": 1383 + }, + { + "epoch": 0.530065109153581, + "grad_norm": 33.61457679214779, + "learning_rate": 5e-06, + "loss": 0.2666, + "num_input_tokens_seen": 239222136, + "step": 1384 + }, + { + "epoch": 0.530065109153581, + "loss": 0.2373058795928955, + "loss_ce": 0.022034896537661552, + "loss_iou": 1.0021536350250244, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 239222136, + "step": 1384 + }, + { + "epoch": 0.5304481041746457, + "grad_norm": 39.7211041095127, + "learning_rate": 5e-06, + "loss": 0.2564, + "num_input_tokens_seen": 239395320, + "step": 1385 + }, + { + "epoch": 0.5304481041746457, + "loss": 0.2629600167274475, + "loss_ce": 0.01991802640259266, + "loss_iou": 1.1126071214675903, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 239395320, + "step": 1385 + }, + { + "epoch": 0.5308310991957105, + "grad_norm": 46.416829273045806, + "learning_rate": 5e-06, + "loss": 0.33, + "num_input_tokens_seen": 239568768, + "step": 1386 + }, + { + "epoch": 0.5308310991957105, + "loss": 0.27796030044555664, + "loss_ce": 0.022345073521137238, + "loss_iou": 1.0250229835510254, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 239568768, + "step": 1386 + }, + { + "epoch": 0.5312140942167752, + "grad_norm": 31.069103071591833, + "learning_rate": 5e-06, + "loss": 0.2698, + "num_input_tokens_seen": 239741832, + "step": 1387 + }, + { + "epoch": 0.5312140942167752, + "loss": 0.3004428446292877, + "loss_ce": 0.02065768837928772, + "loss_iou": 1.1097912788391113, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 239741832, + "step": 1387 + }, + { + "epoch": 0.5315970892378399, + "grad_norm": 26.256177961290657, + "learning_rate": 5e-06, + "loss": 0.2724, + "num_input_tokens_seen": 239914976, + "step": 1388 + }, + { + "epoch": 0.5315970892378399, + "loss": 0.2281334102153778, + "loss_ce": 0.021834567189216614, + "loss_iou": 1.005028486251831, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 239914976, + "step": 1388 + }, + { + "epoch": 0.5319800842589046, + "grad_norm": 28.352365425330024, + "learning_rate": 5e-06, + "loss": 0.3002, + "num_input_tokens_seen": 240087576, + "step": 1389 + }, + { + "epoch": 0.5319800842589046, + "loss": 0.24115517735481262, + "loss_ce": 0.020146869122982025, + "loss_iou": 1.0026452541351318, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 240087576, + "step": 1389 + }, + { + "epoch": 0.5323630792799694, + "grad_norm": 33.1114051606871, + "learning_rate": 5e-06, + "loss": 0.2046, + "num_input_tokens_seen": 240260296, + "step": 1390 + }, + { + "epoch": 0.5323630792799694, + "loss": 0.237400084733963, + "loss_ce": 0.02023700624704361, + "loss_iou": 1.0121262073516846, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 240260296, + "step": 1390 + }, + { + "epoch": 0.532746074301034, + "grad_norm": 43.21014613584952, + "learning_rate": 5e-06, + "loss": 0.271, + "num_input_tokens_seen": 240433432, + "step": 1391 + }, + { + "epoch": 0.532746074301034, + "loss": 0.3160042464733124, + "loss_ce": 0.02034994773566723, + "loss_iou": 1.059490442276001, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 240433432, + "step": 1391 + }, + { + "epoch": 0.5331290693220988, + "grad_norm": 28.60585062243727, + "learning_rate": 5e-06, + "loss": 0.2541, + "num_input_tokens_seen": 240606648, + "step": 1392 + }, + { + "epoch": 0.5331290693220988, + "loss": 0.2064480185508728, + "loss_ce": 0.021145276725292206, + "loss_iou": 1.0031088590621948, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 240606648, + "step": 1392 + }, + { + "epoch": 0.5335120643431636, + "grad_norm": 24.29340048658995, + "learning_rate": 5e-06, + "loss": 0.2703, + "num_input_tokens_seen": 240779056, + "step": 1393 + }, + { + "epoch": 0.5335120643431636, + "loss": 0.24477875232696533, + "loss_ce": 0.021756291389465332, + "loss_iou": 1.0061001777648926, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 240779056, + "step": 1393 + }, + { + "epoch": 0.5338950593642283, + "grad_norm": 34.78546575491859, + "learning_rate": 5e-06, + "loss": 0.2491, + "num_input_tokens_seen": 240952344, + "step": 1394 + }, + { + "epoch": 0.5338950593642283, + "loss": 0.23100616037845612, + "loss_ce": 0.021655568853020668, + "loss_iou": 1.0422629117965698, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 240952344, + "step": 1394 + }, + { + "epoch": 0.534278054385293, + "grad_norm": 42.96399669803133, + "learning_rate": 5e-06, + "loss": 0.3026, + "num_input_tokens_seen": 241125424, + "step": 1395 + }, + { + "epoch": 0.534278054385293, + "loss": 0.3512245714664459, + "loss_ce": 0.022367149591445923, + "loss_iou": 1.081865668296814, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 241125424, + "step": 1395 + }, + { + "epoch": 0.5346610494063577, + "grad_norm": 31.602337959979078, + "learning_rate": 5e-06, + "loss": 0.2021, + "num_input_tokens_seen": 241298240, + "step": 1396 + }, + { + "epoch": 0.5346610494063577, + "loss": 0.20335182547569275, + "loss_ce": 0.020673586055636406, + "loss_iou": 1.04007089138031, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 241298240, + "step": 1396 + }, + { + "epoch": 0.5350440444274225, + "grad_norm": 35.38432134211127, + "learning_rate": 5e-06, + "loss": 0.2385, + "num_input_tokens_seen": 241471400, + "step": 1397 + }, + { + "epoch": 0.5350440444274225, + "loss": 0.26554152369499207, + "loss_ce": 0.021034665405750275, + "loss_iou": 1.047926664352417, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 241471400, + "step": 1397 + }, + { + "epoch": 0.5354270394484871, + "grad_norm": 37.54265941196159, + "learning_rate": 5e-06, + "loss": 0.2632, + "num_input_tokens_seen": 241644472, + "step": 1398 + }, + { + "epoch": 0.5354270394484871, + "loss": 0.230668842792511, + "loss_ce": 0.020768944174051285, + "loss_iou": 1.0046364068984985, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 241644472, + "step": 1398 + }, + { + "epoch": 0.5358100344695519, + "grad_norm": 36.60518748131619, + "learning_rate": 5e-06, + "loss": 0.2057, + "num_input_tokens_seen": 241817608, + "step": 1399 + }, + { + "epoch": 0.5358100344695519, + "loss": 0.20266515016555786, + "loss_ce": 0.020231062546372414, + "loss_iou": 1.0437780618667603, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 241817608, + "step": 1399 + }, + { + "epoch": 0.5361930294906166, + "grad_norm": 30.71190131618502, + "learning_rate": 5e-06, + "loss": 0.1983, + "num_input_tokens_seen": 241990056, + "step": 1400 + }, + { + "epoch": 0.5361930294906166, + "loss": 0.24845024943351746, + "loss_ce": 0.019843079149723053, + "loss_iou": 1.0894261598587036, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 241990056, + "step": 1400 + }, + { + "epoch": 0.5365760245116813, + "grad_norm": 34.43627927450469, + "learning_rate": 5e-06, + "loss": 0.1957, + "num_input_tokens_seen": 242163112, + "step": 1401 + }, + { + "epoch": 0.5365760245116813, + "loss": 0.222532257437706, + "loss_ce": 0.022520046681165695, + "loss_iou": 1.0313280820846558, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 242163112, + "step": 1401 + }, + { + "epoch": 0.536959019532746, + "grad_norm": 38.91070144458712, + "learning_rate": 5e-06, + "loss": 0.2387, + "num_input_tokens_seen": 242336248, + "step": 1402 + }, + { + "epoch": 0.536959019532746, + "loss": 0.24522612988948822, + "loss_ce": 0.020555716007947922, + "loss_iou": 1.0357367992401123, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 242336248, + "step": 1402 + }, + { + "epoch": 0.5373420145538108, + "grad_norm": 42.22580467490461, + "learning_rate": 5e-06, + "loss": 0.2548, + "num_input_tokens_seen": 242509344, + "step": 1403 + }, + { + "epoch": 0.5373420145538108, + "loss": 0.2751389145851135, + "loss_ce": 0.021232664585113525, + "loss_iou": 1.1418867111206055, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 242509344, + "step": 1403 + }, + { + "epoch": 0.5377250095748756, + "grad_norm": 25.80160854409078, + "learning_rate": 5e-06, + "loss": 0.2568, + "num_input_tokens_seen": 242682240, + "step": 1404 + }, + { + "epoch": 0.5377250095748756, + "loss": 0.21290144324302673, + "loss_ce": 0.01978621445596218, + "loss_iou": 1.0695327520370483, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 242682240, + "step": 1404 + }, + { + "epoch": 0.5381080045959402, + "grad_norm": 29.013355076490416, + "learning_rate": 5e-06, + "loss": 0.1944, + "num_input_tokens_seen": 242855424, + "step": 1405 + }, + { + "epoch": 0.5381080045959402, + "loss": 0.18262693285942078, + "loss_ce": 0.021372050046920776, + "loss_iou": 1.0200414657592773, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 242855424, + "step": 1405 + }, + { + "epoch": 0.538490999617005, + "grad_norm": 34.489133825620655, + "learning_rate": 5e-06, + "loss": 0.2304, + "num_input_tokens_seen": 243024776, + "step": 1406 + }, + { + "epoch": 0.538490999617005, + "loss": 0.21656718850135803, + "loss_ce": 0.01978985220193863, + "loss_iou": 1.0040063858032227, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 243024776, + "step": 1406 + }, + { + "epoch": 0.5388739946380697, + "grad_norm": 40.11347155892702, + "learning_rate": 5e-06, + "loss": 0.2875, + "num_input_tokens_seen": 243197944, + "step": 1407 + }, + { + "epoch": 0.5388739946380697, + "loss": 0.24362048506736755, + "loss_ce": 0.020109744742512703, + "loss_iou": 1.0843185186386108, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 243197944, + "step": 1407 + }, + { + "epoch": 0.5392569896591344, + "grad_norm": 29.6068668671716, + "learning_rate": 5e-06, + "loss": 0.2132, + "num_input_tokens_seen": 243371016, + "step": 1408 + }, + { + "epoch": 0.5392569896591344, + "loss": 0.25290000438690186, + "loss_ce": 0.0213326346129179, + "loss_iou": 1.2228739261627197, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 243371016, + "step": 1408 + }, + { + "epoch": 0.5396399846801991, + "grad_norm": 25.28544123695245, + "learning_rate": 5e-06, + "loss": 0.2863, + "num_input_tokens_seen": 243544064, + "step": 1409 + }, + { + "epoch": 0.5396399846801991, + "loss": 0.24945750832557678, + "loss_ce": 0.019232889637351036, + "loss_iou": 1.1065994501113892, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 243544064, + "step": 1409 + }, + { + "epoch": 0.5400229797012639, + "grad_norm": 35.31818721965278, + "learning_rate": 5e-06, + "loss": 0.281, + "num_input_tokens_seen": 243717064, + "step": 1410 + }, + { + "epoch": 0.5400229797012639, + "loss": 0.29192155599594116, + "loss_ce": 0.020437180995941162, + "loss_iou": 1.0520268678665161, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 243717064, + "step": 1410 + }, + { + "epoch": 0.5404059747223287, + "grad_norm": 36.389040810693295, + "learning_rate": 5e-06, + "loss": 0.2786, + "num_input_tokens_seen": 243889960, + "step": 1411 + }, + { + "epoch": 0.5404059747223287, + "loss": 0.2415573000907898, + "loss_ce": 0.021037276834249496, + "loss_iou": 1.1709904670715332, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 243889960, + "step": 1411 + }, + { + "epoch": 0.5407889697433933, + "grad_norm": 24.93525037623869, + "learning_rate": 5e-06, + "loss": 0.2767, + "num_input_tokens_seen": 244062832, + "step": 1412 + }, + { + "epoch": 0.5407889697433933, + "loss": 0.3619447946548462, + "loss_ce": 0.021856890991330147, + "loss_iou": 1.0857049226760864, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 244062832, + "step": 1412 + }, + { + "epoch": 0.5411719647644581, + "grad_norm": 34.52752507359736, + "learning_rate": 5e-06, + "loss": 0.2621, + "num_input_tokens_seen": 244235680, + "step": 1413 + }, + { + "epoch": 0.5411719647644581, + "loss": 0.2576747536659241, + "loss_ce": 0.01963762938976288, + "loss_iou": 1.0238568782806396, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 244235680, + "step": 1413 + }, + { + "epoch": 0.5415549597855228, + "grad_norm": 22.679668038653407, + "learning_rate": 5e-06, + "loss": 0.2084, + "num_input_tokens_seen": 244408328, + "step": 1414 + }, + { + "epoch": 0.5415549597855228, + "loss": 0.1996358335018158, + "loss_ce": 0.018605569377541542, + "loss_iou": 1.0567339658737183, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 244408328, + "step": 1414 + }, + { + "epoch": 0.5419379548065875, + "grad_norm": 31.1917342671544, + "learning_rate": 5e-06, + "loss": 0.2115, + "num_input_tokens_seen": 244581744, + "step": 1415 + }, + { + "epoch": 0.5419379548065875, + "loss": 0.21254712343215942, + "loss_ce": 0.02150709182024002, + "loss_iou": 1.0056824684143066, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 244581744, + "step": 1415 + }, + { + "epoch": 0.5423209498276522, + "grad_norm": 26.335178729234826, + "learning_rate": 5e-06, + "loss": 0.2533, + "num_input_tokens_seen": 244754712, + "step": 1416 + }, + { + "epoch": 0.5423209498276522, + "loss": 0.27759096026420593, + "loss_ce": 0.020693987607955933, + "loss_iou": 1.1383533477783203, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 244754712, + "step": 1416 + }, + { + "epoch": 0.542703944848717, + "grad_norm": 25.083193859683952, + "learning_rate": 5e-06, + "loss": 0.2966, + "num_input_tokens_seen": 244927200, + "step": 1417 + }, + { + "epoch": 0.542703944848717, + "loss": 0.18403224647045135, + "loss_ce": 0.020763196051120758, + "loss_iou": -3.9631676720860365e+17, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 244927200, + "step": 1417 + }, + { + "epoch": 0.5430869398697817, + "grad_norm": 26.016439424328823, + "learning_rate": 5e-06, + "loss": 0.2138, + "num_input_tokens_seen": 245100368, + "step": 1418 + }, + { + "epoch": 0.5430869398697817, + "loss": 0.1996343731880188, + "loss_ce": 0.018909268081188202, + "loss_iou": 1.039953351020813, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 245100368, + "step": 1418 + }, + { + "epoch": 0.5434699348908464, + "grad_norm": 28.21882526564885, + "learning_rate": 5e-06, + "loss": 0.2513, + "num_input_tokens_seen": 245272672, + "step": 1419 + }, + { + "epoch": 0.5434699348908464, + "loss": 0.22828856110572815, + "loss_ce": 0.021013177931308746, + "loss_iou": 1.0036940574645996, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 245272672, + "step": 1419 + }, + { + "epoch": 0.5438529299119111, + "grad_norm": 26.678920714651735, + "learning_rate": 5e-06, + "loss": 0.2133, + "num_input_tokens_seen": 245445504, + "step": 1420 + }, + { + "epoch": 0.5438529299119111, + "loss": 0.24139827489852905, + "loss_ce": 0.019413433969020844, + "loss_iou": 1.034382700920105, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 245445504, + "step": 1420 + }, + { + "epoch": 0.5442359249329759, + "grad_norm": 41.33102167850886, + "learning_rate": 5e-06, + "loss": 0.2339, + "num_input_tokens_seen": 245618696, + "step": 1421 + }, + { + "epoch": 0.5442359249329759, + "loss": 0.23862941563129425, + "loss_ce": 0.019513199105858803, + "loss_iou": 1.044738531112671, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 245618696, + "step": 1421 + }, + { + "epoch": 0.5446189199540405, + "grad_norm": 37.35848943267274, + "learning_rate": 5e-06, + "loss": 0.2347, + "num_input_tokens_seen": 245791336, + "step": 1422 + }, + { + "epoch": 0.5446189199540405, + "loss": 0.2276022732257843, + "loss_ce": 0.020326875150203705, + "loss_iou": 1.112799048423767, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 245791336, + "step": 1422 + }, + { + "epoch": 0.5450019149751053, + "grad_norm": 34.4538962509511, + "learning_rate": 5e-06, + "loss": 0.2323, + "num_input_tokens_seen": 245964384, + "step": 1423 + }, + { + "epoch": 0.5450019149751053, + "loss": 0.2902798652648926, + "loss_ce": 0.02306796796619892, + "loss_iou": 1.0727440118789673, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 245964384, + "step": 1423 + }, + { + "epoch": 0.5453849099961701, + "grad_norm": 29.81776513589979, + "learning_rate": 5e-06, + "loss": 0.1872, + "num_input_tokens_seen": 246137000, + "step": 1424 + }, + { + "epoch": 0.5453849099961701, + "loss": 0.1663355976343155, + "loss_ce": 0.019851218909025192, + "loss_iou": 1.002077579498291, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 246137000, + "step": 1424 + }, + { + "epoch": 0.5457679050172348, + "grad_norm": 47.50351566884468, + "learning_rate": 5e-06, + "loss": 0.2471, + "num_input_tokens_seen": 246309744, + "step": 1425 + }, + { + "epoch": 0.5457679050172348, + "loss": 0.26415950059890747, + "loss_ce": 0.021972009912133217, + "loss_iou": 1.1133008003234863, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 246309744, + "step": 1425 + }, + { + "epoch": 0.5461509000382995, + "grad_norm": 35.42252092555776, + "learning_rate": 5e-06, + "loss": 0.2452, + "num_input_tokens_seen": 246482224, + "step": 1426 + }, + { + "epoch": 0.5461509000382995, + "loss": 0.22726401686668396, + "loss_ce": 0.01931723766028881, + "loss_iou": 1.0251805782318115, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 246482224, + "step": 1426 + }, + { + "epoch": 0.5465338950593642, + "grad_norm": 26.07618375093433, + "learning_rate": 5e-06, + "loss": 0.2365, + "num_input_tokens_seen": 246655184, + "step": 1427 + }, + { + "epoch": 0.5465338950593642, + "loss": 0.22888973355293274, + "loss_ce": 0.020820889621973038, + "loss_iou": 1.027405023574829, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 246655184, + "step": 1427 + }, + { + "epoch": 0.546916890080429, + "grad_norm": 27.74269664167861, + "learning_rate": 5e-06, + "loss": 0.1659, + "num_input_tokens_seen": 246828488, + "step": 1428 + }, + { + "epoch": 0.546916890080429, + "loss": 0.1319091022014618, + "loss_ce": 0.02119133621454239, + "loss_iou": 1.0033646821975708, + "loss_num": 0.11083984375, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 246828488, + "step": 1428 + }, + { + "epoch": 0.5472998851014936, + "grad_norm": 47.618605413464145, + "learning_rate": 5e-06, + "loss": 0.28, + "num_input_tokens_seen": 247001456, + "step": 1429 + }, + { + "epoch": 0.5472998851014936, + "loss": 0.24275851249694824, + "loss_ce": 0.0203463826328516, + "loss_iou": 1.0035829544067383, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 247001456, + "step": 1429 + }, + { + "epoch": 0.5476828801225584, + "grad_norm": 95.07469045236165, + "learning_rate": 5e-06, + "loss": 0.2775, + "num_input_tokens_seen": 247174344, + "step": 1430 + }, + { + "epoch": 0.5476828801225584, + "loss": 0.275626540184021, + "loss_ce": 0.019523024559020996, + "loss_iou": 1.013465166091919, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 247174344, + "step": 1430 + }, + { + "epoch": 0.5480658751436231, + "grad_norm": 39.65837448926188, + "learning_rate": 5e-06, + "loss": 0.2261, + "num_input_tokens_seen": 247347136, + "step": 1431 + }, + { + "epoch": 0.5480658751436231, + "loss": 0.1636388599872589, + "loss_ce": 0.019656913354992867, + "loss_iou": 1.0006221532821655, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 247347136, + "step": 1431 + }, + { + "epoch": 0.5484488701646879, + "grad_norm": 26.931540948457975, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 247520504, + "step": 1432 + }, + { + "epoch": 0.5484488701646879, + "loss": 0.20612883567810059, + "loss_ce": 0.02143644355237484, + "loss_iou": 1.0078787803649902, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 247520504, + "step": 1432 + }, + { + "epoch": 0.5488318651857526, + "grad_norm": 28.208258312522524, + "learning_rate": 5e-06, + "loss": 0.2821, + "num_input_tokens_seen": 247693176, + "step": 1433 + }, + { + "epoch": 0.5488318651857526, + "loss": 0.3653810918331146, + "loss_ce": 0.019983142614364624, + "loss_iou": 1.0598523616790771, + "loss_num": 0.345703125, + "loss_xval": 0.345703125, + "num_input_tokens_seen": 247693176, + "step": 1433 + }, + { + "epoch": 0.5492148602068173, + "grad_norm": 29.833269339360154, + "learning_rate": 5e-06, + "loss": 0.219, + "num_input_tokens_seen": 247866072, + "step": 1434 + }, + { + "epoch": 0.5492148602068173, + "loss": 0.1977815181016922, + "loss_ce": 0.019009539857506752, + "loss_iou": 1.0558912754058838, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 247866072, + "step": 1434 + }, + { + "epoch": 0.5495978552278821, + "grad_norm": 33.24822640975801, + "learning_rate": 5e-06, + "loss": 0.2243, + "num_input_tokens_seen": 248038728, + "step": 1435 + }, + { + "epoch": 0.5495978552278821, + "loss": 0.2502945363521576, + "loss_ce": 0.01909336820244789, + "loss_iou": 1.1917321681976318, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 248038728, + "step": 1435 + }, + { + "epoch": 0.5499808502489467, + "grad_norm": 21.834141828576332, + "learning_rate": 5e-06, + "loss": 0.2581, + "num_input_tokens_seen": 248211784, + "step": 1436 + }, + { + "epoch": 0.5499808502489467, + "loss": 0.1583734154701233, + "loss_ce": 0.0202508382499218, + "loss_iou": 1.0127925872802734, + "loss_num": 0.1376953125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 248211784, + "step": 1436 + }, + { + "epoch": 0.5503638452700115, + "grad_norm": 23.653747309977522, + "learning_rate": 5e-06, + "loss": 0.2427, + "num_input_tokens_seen": 248384840, + "step": 1437 + }, + { + "epoch": 0.5503638452700115, + "loss": 0.186224102973938, + "loss_ce": 0.02020847424864769, + "loss_iou": 1.003072738647461, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 248384840, + "step": 1437 + }, + { + "epoch": 0.5507468402910762, + "grad_norm": 26.591948475025305, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 248558136, + "step": 1438 + }, + { + "epoch": 0.5507468402910762, + "loss": 0.1620870679616928, + "loss_ce": 0.02115688845515251, + "loss_iou": 1.0057390928268433, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 248558136, + "step": 1438 + }, + { + "epoch": 0.551129835312141, + "grad_norm": 44.742800345656015, + "learning_rate": 5e-06, + "loss": 0.245, + "num_input_tokens_seen": 248730896, + "step": 1439 + }, + { + "epoch": 0.551129835312141, + "loss": 0.2673144042491913, + "loss_ce": 0.020488237962126732, + "loss_iou": 1.0043540000915527, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 248730896, + "step": 1439 + }, + { + "epoch": 0.5515128303332056, + "grad_norm": 37.738405534998805, + "learning_rate": 5e-06, + "loss": 0.2234, + "num_input_tokens_seen": 248900032, + "step": 1440 + }, + { + "epoch": 0.5515128303332056, + "loss": 0.2288774847984314, + "loss_ce": 0.0203813835978508, + "loss_iou": 1.0389633178710938, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 248900032, + "step": 1440 + }, + { + "epoch": 0.5518958253542704, + "grad_norm": 28.895011673028538, + "learning_rate": 5e-06, + "loss": 0.253, + "num_input_tokens_seen": 249073008, + "step": 1441 + }, + { + "epoch": 0.5518958253542704, + "loss": 0.18834465742111206, + "loss_ce": 0.019765563309192657, + "loss_iou": 0.974697470664978, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 249073008, + "step": 1441 + }, + { + "epoch": 0.5522788203753352, + "grad_norm": 18.81039078862195, + "learning_rate": 5e-06, + "loss": 0.244, + "num_input_tokens_seen": 249246200, + "step": 1442 + }, + { + "epoch": 0.5522788203753352, + "loss": 0.2613523006439209, + "loss_ce": 0.018371347337961197, + "loss_iou": 1.048504114151001, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 249246200, + "step": 1442 + }, + { + "epoch": 0.5526618153963998, + "grad_norm": 30.81313041119159, + "learning_rate": 5e-06, + "loss": 0.2541, + "num_input_tokens_seen": 249418984, + "step": 1443 + }, + { + "epoch": 0.5526618153963998, + "loss": 0.3366760313510895, + "loss_ce": 0.020269788801670074, + "loss_iou": 1.0565412044525146, + "loss_num": 0.31640625, + "loss_xval": 0.31640625, + "num_input_tokens_seen": 249418984, + "step": 1443 + }, + { + "epoch": 0.5530448104174646, + "grad_norm": 20.92797494782328, + "learning_rate": 5e-06, + "loss": 0.2445, + "num_input_tokens_seen": 249591752, + "step": 1444 + }, + { + "epoch": 0.5530448104174646, + "loss": 0.25018686056137085, + "loss_ce": 0.020450517535209656, + "loss_iou": 1.016252040863037, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 249591752, + "step": 1444 + }, + { + "epoch": 0.5534278054385293, + "grad_norm": 33.838335380949694, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 249764864, + "step": 1445 + }, + { + "epoch": 0.5534278054385293, + "loss": 0.22452612221240997, + "loss_ce": 0.022499756887555122, + "loss_iou": 1.0481488704681396, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 249764864, + "step": 1445 + }, + { + "epoch": 0.5538108004595941, + "grad_norm": 28.991763308037577, + "learning_rate": 5e-06, + "loss": 0.2625, + "num_input_tokens_seen": 249937608, + "step": 1446 + }, + { + "epoch": 0.5538108004595941, + "loss": 0.2906506061553955, + "loss_ce": 0.020142812281847, + "loss_iou": 1.282198429107666, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 249937608, + "step": 1446 + }, + { + "epoch": 0.5541937954806587, + "grad_norm": 29.089797794294615, + "learning_rate": 5e-06, + "loss": 0.26, + "num_input_tokens_seen": 250110232, + "step": 1447 + }, + { + "epoch": 0.5541937954806587, + "loss": 0.3229195773601532, + "loss_ce": 0.020551415160298347, + "loss_iou": 1.019213080406189, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 250110232, + "step": 1447 + }, + { + "epoch": 0.5545767905017235, + "grad_norm": 26.8909519813235, + "learning_rate": 5e-06, + "loss": 0.1862, + "num_input_tokens_seen": 250283600, + "step": 1448 + }, + { + "epoch": 0.5545767905017235, + "loss": 0.16345219314098358, + "loss_ce": 0.02044682390987873, + "loss_iou": 1.0251049995422363, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 250283600, + "step": 1448 + }, + { + "epoch": 0.5549597855227882, + "grad_norm": 30.580572657257935, + "learning_rate": 5e-06, + "loss": 0.2356, + "num_input_tokens_seen": 250455976, + "step": 1449 + }, + { + "epoch": 0.5549597855227882, + "loss": 0.26306387782096863, + "loss_ce": 0.020815353840589523, + "loss_iou": 1.0646202564239502, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 250455976, + "step": 1449 + }, + { + "epoch": 0.5553427805438529, + "grad_norm": 27.359346762967068, + "learning_rate": 5e-06, + "loss": 0.2284, + "num_input_tokens_seen": 250628712, + "step": 1450 + }, + { + "epoch": 0.5553427805438529, + "loss": 0.2822895050048828, + "loss_ce": 0.019166937097907066, + "loss_iou": 1.0252337455749512, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 250628712, + "step": 1450 + }, + { + "epoch": 0.5557257755649176, + "grad_norm": 24.65924185646727, + "learning_rate": 5e-06, + "loss": 0.2387, + "num_input_tokens_seen": 250801504, + "step": 1451 + }, + { + "epoch": 0.5557257755649176, + "loss": 0.28496530652046204, + "loss_ce": 0.020194800570607185, + "loss_iou": 1.07538640499115, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 250801504, + "step": 1451 + }, + { + "epoch": 0.5561087705859824, + "grad_norm": 16.90062559129393, + "learning_rate": 5e-06, + "loss": 0.1497, + "num_input_tokens_seen": 250974672, + "step": 1452 + }, + { + "epoch": 0.5561087705859824, + "loss": 0.13314805924892426, + "loss_ce": 0.021819941699504852, + "loss_iou": 1.0009329319000244, + "loss_num": 0.111328125, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 250974672, + "step": 1452 + }, + { + "epoch": 0.5564917656070472, + "grad_norm": 27.94039267193011, + "learning_rate": 5e-06, + "loss": 0.2496, + "num_input_tokens_seen": 251148072, + "step": 1453 + }, + { + "epoch": 0.5564917656070472, + "loss": 0.2612951397895813, + "loss_ce": 0.019901107996702194, + "loss_iou": 1.042182445526123, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 251148072, + "step": 1453 + }, + { + "epoch": 0.5568747606281118, + "grad_norm": 36.678368035743375, + "learning_rate": 5e-06, + "loss": 0.243, + "num_input_tokens_seen": 251321304, + "step": 1454 + }, + { + "epoch": 0.5568747606281118, + "loss": 0.2739933729171753, + "loss_ce": 0.02124677784740925, + "loss_iou": 1.0768338441848755, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 251321304, + "step": 1454 + }, + { + "epoch": 0.5572577556491766, + "grad_norm": 24.70238774410247, + "learning_rate": 5e-06, + "loss": 0.2202, + "num_input_tokens_seen": 251494048, + "step": 1455 + }, + { + "epoch": 0.5572577556491766, + "loss": 0.23047927021980286, + "loss_ce": 0.017466576769948006, + "loss_iou": 1.013255000114441, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 251494048, + "step": 1455 + }, + { + "epoch": 0.5576407506702413, + "grad_norm": 23.99901276583434, + "learning_rate": 5e-06, + "loss": 0.1982, + "num_input_tokens_seen": 251667488, + "step": 1456 + }, + { + "epoch": 0.5576407506702413, + "loss": 0.22782182693481445, + "loss_ce": 0.020729543641209602, + "loss_iou": 1.0163581371307373, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 251667488, + "step": 1456 + }, + { + "epoch": 0.558023745691306, + "grad_norm": 20.10364549368622, + "learning_rate": 5e-06, + "loss": 0.2347, + "num_input_tokens_seen": 251840240, + "step": 1457 + }, + { + "epoch": 0.558023745691306, + "loss": 0.3191094994544983, + "loss_ce": 0.0212579183280468, + "loss_iou": 0.8945208787918091, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 251840240, + "step": 1457 + }, + { + "epoch": 0.5584067407123707, + "grad_norm": 22.94158214370783, + "learning_rate": 5e-06, + "loss": 0.1973, + "num_input_tokens_seen": 252013032, + "step": 1458 + }, + { + "epoch": 0.5584067407123707, + "loss": 0.1668839454650879, + "loss_ce": 0.019545067101716995, + "loss_iou": 1.0783774852752686, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 252013032, + "step": 1458 + }, + { + "epoch": 0.5587897357334355, + "grad_norm": 27.846996712471338, + "learning_rate": 5e-06, + "loss": 0.2153, + "num_input_tokens_seen": 252186040, + "step": 1459 + }, + { + "epoch": 0.5587897357334355, + "loss": 0.23262223601341248, + "loss_ce": 0.01912127062678337, + "loss_iou": 1.2774409055709839, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 252186040, + "step": 1459 + }, + { + "epoch": 0.5591727307545002, + "grad_norm": 29.953167089807298, + "learning_rate": 5e-06, + "loss": 0.2447, + "num_input_tokens_seen": 252358920, + "step": 1460 + }, + { + "epoch": 0.5591727307545002, + "loss": 0.2146613597869873, + "loss_ce": 0.0197761133313179, + "loss_iou": 1.064612627029419, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 252358920, + "step": 1460 + }, + { + "epoch": 0.5595557257755649, + "grad_norm": 24.440971150018694, + "learning_rate": 5e-06, + "loss": 0.2266, + "num_input_tokens_seen": 252531824, + "step": 1461 + }, + { + "epoch": 0.5595557257755649, + "loss": 0.2598302960395813, + "loss_ce": 0.021304896101355553, + "loss_iou": 1.0376880168914795, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 252531824, + "step": 1461 + }, + { + "epoch": 0.5599387207966297, + "grad_norm": 27.876724687870354, + "learning_rate": 5e-06, + "loss": 0.2594, + "num_input_tokens_seen": 252704720, + "step": 1462 + }, + { + "epoch": 0.5599387207966297, + "loss": 0.28872719407081604, + "loss_ce": 0.019318006932735443, + "loss_iou": 1.0152583122253418, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 252704720, + "step": 1462 + }, + { + "epoch": 0.5603217158176944, + "grad_norm": 32.41463048543971, + "learning_rate": 5e-06, + "loss": 0.2042, + "num_input_tokens_seen": 252877360, + "step": 1463 + }, + { + "epoch": 0.5603217158176944, + "loss": 0.19869321584701538, + "loss_ce": 0.02071470394730568, + "loss_iou": 0.9860658049583435, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 252877360, + "step": 1463 + }, + { + "epoch": 0.5607047108387591, + "grad_norm": 31.18462843405718, + "learning_rate": 5e-06, + "loss": 0.2878, + "num_input_tokens_seen": 253050392, + "step": 1464 + }, + { + "epoch": 0.5607047108387591, + "loss": 0.29182857275009155, + "loss_ce": 0.020771419629454613, + "loss_iou": 1.0111753940582275, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 253050392, + "step": 1464 + }, + { + "epoch": 0.5610877058598238, + "grad_norm": 23.093980086732667, + "learning_rate": 5e-06, + "loss": 0.2851, + "num_input_tokens_seen": 253223208, + "step": 1465 + }, + { + "epoch": 0.5610877058598238, + "loss": 0.29920488595962524, + "loss_ce": 0.019663862884044647, + "loss_iou": 2.007282257080078, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 253223208, + "step": 1465 + }, + { + "epoch": 0.5614707008808886, + "grad_norm": 21.495583545383287, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 253396032, + "step": 1466 + }, + { + "epoch": 0.5614707008808886, + "loss": 0.16312876343727112, + "loss_ce": 0.019818205386400223, + "loss_iou": 1.0369665622711182, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 253396032, + "step": 1466 + }, + { + "epoch": 0.5618536959019532, + "grad_norm": 32.0839754181178, + "learning_rate": 5e-06, + "loss": 0.223, + "num_input_tokens_seen": 253568952, + "step": 1467 + }, + { + "epoch": 0.5618536959019532, + "loss": 0.24454674124717712, + "loss_ce": 0.020608752965927124, + "loss_iou": 1.0019643306732178, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 253568952, + "step": 1467 + }, + { + "epoch": 0.562236690923018, + "grad_norm": 30.212997661988872, + "learning_rate": 5e-06, + "loss": 0.1683, + "num_input_tokens_seen": 253741792, + "step": 1468 + }, + { + "epoch": 0.562236690923018, + "loss": 0.17628955841064453, + "loss_ce": 0.02058887481689453, + "loss_iou": 1.0021042823791504, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 253741792, + "step": 1468 + }, + { + "epoch": 0.5626196859440827, + "grad_norm": 37.497116361590386, + "learning_rate": 5e-06, + "loss": 0.2354, + "num_input_tokens_seen": 253914840, + "step": 1469 + }, + { + "epoch": 0.5626196859440827, + "loss": 0.24797192215919495, + "loss_ce": 0.020432859659194946, + "loss_iou": 0.9836539626121521, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 253914840, + "step": 1469 + }, + { + "epoch": 0.5630026809651475, + "grad_norm": 47.64589918729428, + "learning_rate": 5e-06, + "loss": 0.2739, + "num_input_tokens_seen": 254088168, + "step": 1470 + }, + { + "epoch": 0.5630026809651475, + "loss": 0.22957342863082886, + "loss_ce": 0.021443545818328857, + "loss_iou": 1.033742904663086, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 254088168, + "step": 1470 + }, + { + "epoch": 0.5633856759862121, + "grad_norm": 52.447713593223874, + "learning_rate": 5e-06, + "loss": 0.2729, + "num_input_tokens_seen": 254260984, + "step": 1471 + }, + { + "epoch": 0.5633856759862121, + "loss": 0.3072156012058258, + "loss_ce": 0.020716574043035507, + "loss_iou": 1.0662565231323242, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 254260984, + "step": 1471 + }, + { + "epoch": 0.5637686710072769, + "grad_norm": 27.162718678453754, + "learning_rate": 5e-06, + "loss": 0.208, + "num_input_tokens_seen": 254434440, + "step": 1472 + }, + { + "epoch": 0.5637686710072769, + "loss": 0.21323244273662567, + "loss_ce": 0.02030031383037567, + "loss_iou": 1.198883295059204, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 254434440, + "step": 1472 + }, + { + "epoch": 0.5641516660283417, + "grad_norm": 30.729533469850466, + "learning_rate": 5e-06, + "loss": 0.2193, + "num_input_tokens_seen": 254607128, + "step": 1473 + }, + { + "epoch": 0.5641516660283417, + "loss": 0.2197684943675995, + "loss_ce": 0.021160103380680084, + "loss_iou": 1.0076149702072144, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 254607128, + "step": 1473 + }, + { + "epoch": 0.5645346610494063, + "grad_norm": 41.532002399408285, + "learning_rate": 5e-06, + "loss": 0.2282, + "num_input_tokens_seen": 254780352, + "step": 1474 + }, + { + "epoch": 0.5645346610494063, + "loss": 0.21799220144748688, + "loss_ce": 0.020238302648067474, + "loss_iou": 1.0031582117080688, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 254780352, + "step": 1474 + }, + { + "epoch": 0.5649176560704711, + "grad_norm": 40.10343286688188, + "learning_rate": 5e-06, + "loss": 0.2617, + "num_input_tokens_seen": 254953736, + "step": 1475 + }, + { + "epoch": 0.5649176560704711, + "loss": 0.30094754695892334, + "loss_ce": 0.019758574664592743, + "loss_iou": 1.1032297611236572, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 254953736, + "step": 1475 + }, + { + "epoch": 0.5653006510915358, + "grad_norm": 29.5335538702408, + "learning_rate": 5e-06, + "loss": 0.242, + "num_input_tokens_seen": 255126792, + "step": 1476 + }, + { + "epoch": 0.5653006510915358, + "loss": 0.23481246829032898, + "loss_ce": 0.018015600740909576, + "loss_iou": -0.00921630859375, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 255126792, + "step": 1476 + }, + { + "epoch": 0.5656836461126006, + "grad_norm": 30.262688256639134, + "learning_rate": 5e-06, + "loss": 0.2554, + "num_input_tokens_seen": 255299688, + "step": 1477 + }, + { + "epoch": 0.5656836461126006, + "loss": 0.2591654360294342, + "loss_ce": 0.01978554204106331, + "loss_iou": 1.044213056564331, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 255299688, + "step": 1477 + }, + { + "epoch": 0.5660666411336652, + "grad_norm": 31.018493474825984, + "learning_rate": 5e-06, + "loss": 0.2725, + "num_input_tokens_seen": 255472976, + "step": 1478 + }, + { + "epoch": 0.5660666411336652, + "loss": 0.29889142513275146, + "loss_ce": 0.01965557411313057, + "loss_iou": 1.062753677368164, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 255472976, + "step": 1478 + }, + { + "epoch": 0.56644963615473, + "grad_norm": 51.46796253515065, + "learning_rate": 5e-06, + "loss": 0.2663, + "num_input_tokens_seen": 255645936, + "step": 1479 + }, + { + "epoch": 0.56644963615473, + "loss": 0.24959470331668854, + "loss_ce": 0.02156735584139824, + "loss_iou": 1.0347603559494019, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 255645936, + "step": 1479 + }, + { + "epoch": 0.5668326311757947, + "grad_norm": 30.522244174594228, + "learning_rate": 5e-06, + "loss": 0.1875, + "num_input_tokens_seen": 255818808, + "step": 1480 + }, + { + "epoch": 0.5668326311757947, + "loss": 0.2001829445362091, + "loss_ce": 0.020800607278943062, + "loss_iou": 1.0021522045135498, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 255818808, + "step": 1480 + }, + { + "epoch": 0.5672156261968594, + "grad_norm": 30.645199803497867, + "learning_rate": 5e-06, + "loss": 0.2606, + "num_input_tokens_seen": 255991800, + "step": 1481 + }, + { + "epoch": 0.5672156261968594, + "loss": 0.2134249210357666, + "loss_ce": 0.018966909497976303, + "loss_iou": 0.8776056170463562, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 255991800, + "step": 1481 + }, + { + "epoch": 0.5675986212179241, + "grad_norm": 30.114094340279998, + "learning_rate": 5e-06, + "loss": 0.2152, + "num_input_tokens_seen": 256164736, + "step": 1482 + }, + { + "epoch": 0.5675986212179241, + "loss": 0.24286949634552002, + "loss_ce": 0.021311860531568527, + "loss_iou": 1.0650312900543213, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 256164736, + "step": 1482 + }, + { + "epoch": 0.5679816162389889, + "grad_norm": 60.050880997864404, + "learning_rate": 5e-06, + "loss": 0.2796, + "num_input_tokens_seen": 256337552, + "step": 1483 + }, + { + "epoch": 0.5679816162389889, + "loss": 0.23949606716632843, + "loss_ce": 0.02001364901661873, + "loss_iou": 1.0076258182525635, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 256337552, + "step": 1483 + }, + { + "epoch": 0.5683646112600537, + "grad_norm": 41.05984639796678, + "learning_rate": 5e-06, + "loss": 0.286, + "num_input_tokens_seen": 256510528, + "step": 1484 + }, + { + "epoch": 0.5683646112600537, + "loss": 0.3509371280670166, + "loss_ce": 0.019516244530677795, + "loss_iou": 1.0642974376678467, + "loss_num": 0.33203125, + "loss_xval": 0.33203125, + "num_input_tokens_seen": 256510528, + "step": 1484 + }, + { + "epoch": 0.5687476062811183, + "grad_norm": 46.347360230691486, + "learning_rate": 5e-06, + "loss": 0.371, + "num_input_tokens_seen": 256683304, + "step": 1485 + }, + { + "epoch": 0.5687476062811183, + "loss": 0.3466867208480835, + "loss_ce": 0.019294165074825287, + "loss_iou": 1.0048203468322754, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 256683304, + "step": 1485 + }, + { + "epoch": 0.5691306013021831, + "grad_norm": 23.912583644311166, + "learning_rate": 5e-06, + "loss": 0.3312, + "num_input_tokens_seen": 256856448, + "step": 1486 + }, + { + "epoch": 0.5691306013021831, + "loss": 0.38610681891441345, + "loss_ce": 0.02038414590060711, + "loss_iou": 1.1153357028961182, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 256856448, + "step": 1486 + }, + { + "epoch": 0.5695135963232478, + "grad_norm": 67.72882364069386, + "learning_rate": 5e-06, + "loss": 0.3698, + "num_input_tokens_seen": 257029184, + "step": 1487 + }, + { + "epoch": 0.5695135963232478, + "loss": 0.35023269057273865, + "loss_ce": 0.021497325971722603, + "loss_iou": 1.0250391960144043, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 257029184, + "step": 1487 + }, + { + "epoch": 0.5698965913443125, + "grad_norm": 48.08334423675242, + "learning_rate": 5e-06, + "loss": 0.4014, + "num_input_tokens_seen": 257202240, + "step": 1488 + }, + { + "epoch": 0.5698965913443125, + "loss": 0.3962937593460083, + "loss_ce": 0.020561344921588898, + "loss_iou": 1.0564351081848145, + "loss_num": 0.375, + "loss_xval": 0.375, + "num_input_tokens_seen": 257202240, + "step": 1488 + }, + { + "epoch": 0.5702795863653772, + "grad_norm": 79.31275399258222, + "learning_rate": 5e-06, + "loss": 0.3377, + "num_input_tokens_seen": 257375232, + "step": 1489 + }, + { + "epoch": 0.5702795863653772, + "loss": 0.3447765111923218, + "loss_ce": 0.021656420081853867, + "loss_iou": 1.2226455211639404, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 257375232, + "step": 1489 + }, + { + "epoch": 0.570662581386442, + "grad_norm": 30.31768962775951, + "learning_rate": 5e-06, + "loss": 0.2937, + "num_input_tokens_seen": 257548264, + "step": 1490 + }, + { + "epoch": 0.570662581386442, + "loss": 0.2713618278503418, + "loss_ce": 0.020995626226067543, + "loss_iou": 1.0252916812896729, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 257548264, + "step": 1490 + }, + { + "epoch": 0.5710455764075067, + "grad_norm": 55.12673938107984, + "learning_rate": 5e-06, + "loss": 0.2587, + "num_input_tokens_seen": 257721768, + "step": 1491 + }, + { + "epoch": 0.5710455764075067, + "loss": 0.2455759048461914, + "loss_ce": 0.022675510495901108, + "loss_iou": 1.016119360923767, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 257721768, + "step": 1491 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 41.4510872847938, + "learning_rate": 5e-06, + "loss": 0.2507, + "num_input_tokens_seen": 257894736, + "step": 1492 + }, + { + "epoch": 0.5714285714285714, + "loss": 0.2653059959411621, + "loss_ce": 0.02165365219116211, + "loss_iou": 1.0070408582687378, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 257894736, + "step": 1492 + }, + { + "epoch": 0.5718115664496362, + "grad_norm": 30.448018451643417, + "learning_rate": 5e-06, + "loss": 0.2485, + "num_input_tokens_seen": 258067632, + "step": 1493 + }, + { + "epoch": 0.5718115664496362, + "loss": 0.23420105874538422, + "loss_ce": 0.02155456691980362, + "loss_iou": 1.0096468925476074, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 258067632, + "step": 1493 + }, + { + "epoch": 0.5721945614707009, + "grad_norm": 22.206726089495916, + "learning_rate": 5e-06, + "loss": 0.244, + "num_input_tokens_seen": 258240832, + "step": 1494 + }, + { + "epoch": 0.5721945614707009, + "loss": 0.23333390057086945, + "loss_ce": 0.02026016265153885, + "loss_iou": 1.0437605381011963, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 258240832, + "step": 1494 + }, + { + "epoch": 0.5725775564917656, + "grad_norm": 33.7110286932408, + "learning_rate": 5e-06, + "loss": 0.1897, + "num_input_tokens_seen": 258413480, + "step": 1495 + }, + { + "epoch": 0.5725775564917656, + "loss": 0.1620618999004364, + "loss_ce": 0.021009644493460655, + "loss_iou": 1.0012726783752441, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 258413480, + "step": 1495 + }, + { + "epoch": 0.5729605515128303, + "grad_norm": 40.76843772553818, + "learning_rate": 5e-06, + "loss": 0.2507, + "num_input_tokens_seen": 258586424, + "step": 1496 + }, + { + "epoch": 0.5729605515128303, + "loss": 0.28173011541366577, + "loss_ce": 0.020560679957270622, + "loss_iou": 1.0176188945770264, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 258586424, + "step": 1496 + }, + { + "epoch": 0.5733435465338951, + "grad_norm": 43.95036831704065, + "learning_rate": 5e-06, + "loss": 0.2553, + "num_input_tokens_seen": 258759152, + "step": 1497 + }, + { + "epoch": 0.5733435465338951, + "loss": 0.26648202538490295, + "loss_ce": 0.019899986684322357, + "loss_iou": 1.0377943515777588, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 258759152, + "step": 1497 + }, + { + "epoch": 0.5737265415549598, + "grad_norm": 33.315325278592105, + "learning_rate": 5e-06, + "loss": 0.2254, + "num_input_tokens_seen": 258932120, + "step": 1498 + }, + { + "epoch": 0.5737265415549598, + "loss": 0.21317031979560852, + "loss_ce": 0.02219131588935852, + "loss_iou": 1.0846459865570068, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 258932120, + "step": 1498 + }, + { + "epoch": 0.5741095365760245, + "grad_norm": 30.609871109394707, + "learning_rate": 5e-06, + "loss": 0.3001, + "num_input_tokens_seen": 259105056, + "step": 1499 + }, + { + "epoch": 0.5741095365760245, + "loss": 0.23890420794487, + "loss_ce": 0.021008696407079697, + "loss_iou": 1.0146489143371582, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 259105056, + "step": 1499 + }, + { + "epoch": 0.5744925315970892, + "grad_norm": 34.31191492677639, + "learning_rate": 5e-06, + "loss": 0.2872, + "num_input_tokens_seen": 259277880, + "step": 1500 + }, + { + "epoch": 0.5744925315970892, + "eval_websight_new_CIoU": 0.8998479247093201, + "eval_websight_new_GIoU": 0.8992944955825806, + "eval_websight_new_IoU": 0.9003492593765259, + "eval_websight_new_MAE_all": 0.009700013790279627, + "eval_websight_new_MAE_h": 0.008959516882896423, + "eval_websight_new_MAE_w": 0.007738122018054128, + "eval_websight_new_MAE_x": 0.009648083243519068, + "eval_websight_new_MAE_y": 0.012454329989850521, + "eval_websight_new_NUM_probability": 7.878957694629207e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.15105408430099487, + "eval_websight_new_loss_ce": 0.04203689843416214, + "eval_websight_new_loss_iou": 1.000507116317749, + "eval_websight_new_loss_num": 0.107513427734375, + "eval_websight_new_loss_xval": 0.107513427734375, + "eval_websight_new_runtime": 55.5311, + "eval_websight_new_samples_per_second": 0.9, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 259277880, + "step": 1500 + }, + { + "epoch": 0.5744925315970892, + "eval_seeclick_CIoU": 0.6475102007389069, + "eval_seeclick_GIoU": 0.6547337472438812, + "eval_seeclick_IoU": 0.6767177283763885, + "eval_seeclick_MAE_all": 0.05796422250568867, + "eval_seeclick_MAE_h": 0.04909752868115902, + "eval_seeclick_MAE_w": 0.07029224187135696, + "eval_seeclick_MAE_x": 0.058849262073636055, + "eval_seeclick_MAE_y": 0.05361786112189293, + "eval_seeclick_NUM_probability": 0.00010826160723809153, + "eval_seeclick_inside_bbox": 0.8975694477558136, + "eval_seeclick_loss": 0.4864910840988159, + "eval_seeclick_loss_ce": 0.032762957736849785, + "eval_seeclick_loss_iou": 1.1455382108688354, + "eval_seeclick_loss_num": 0.43011474609375, + "eval_seeclick_loss_xval": 0.43011474609375, + "eval_seeclick_runtime": 84.9697, + "eval_seeclick_samples_per_second": 0.588, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 259277880, + "step": 1500 + }, + { + "epoch": 0.5744925315970892, + "eval_icons_CIoU": 0.8272820115089417, + "eval_icons_GIoU": 0.8224707245826721, + "eval_icons_IoU": 0.8362298011779785, + "eval_icons_MAE_all": 0.02518743323162198, + "eval_icons_MAE_h": 0.018266789615154266, + "eval_icons_MAE_w": 0.03073503915220499, + "eval_icons_MAE_x": 0.030140070244669914, + "eval_icons_MAE_y": 0.021607825998216867, + "eval_icons_NUM_probability": 7.407073280774057e-05, + "eval_icons_inside_bbox": 0.9722222089767456, + "eval_icons_loss": 0.21765857934951782, + "eval_icons_loss_ce": 0.028413159772753716, + "eval_icons_loss_iou": 1.04171884059906, + "eval_icons_loss_num": 0.178863525390625, + "eval_icons_loss_xval": 0.178863525390625, + "eval_icons_runtime": 84.1218, + "eval_icons_samples_per_second": 0.594, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 259277880, + "step": 1500 + }, + { + "epoch": 0.5744925315970892, + "loss": 0.24968013167381287, + "loss_ce": 0.028427692130208015, + "loss_iou": 1.0830289125442505, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 259277880, + "step": 1500 + }, + { + "epoch": 0.574875526618154, + "grad_norm": 55.80232622645771, + "learning_rate": 5e-06, + "loss": 0.3, + "num_input_tokens_seen": 259450864, + "step": 1501 + }, + { + "epoch": 0.574875526618154, + "loss": 0.2990880012512207, + "loss_ce": 0.020279401913285255, + "loss_iou": 1.3054325580596924, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 259450864, + "step": 1501 + }, + { + "epoch": 0.5752585216392186, + "grad_norm": 24.290580399677125, + "learning_rate": 5e-06, + "loss": 0.2629, + "num_input_tokens_seen": 259623904, + "step": 1502 + }, + { + "epoch": 0.5752585216392186, + "loss": 0.20929402112960815, + "loss_ce": 0.01947469264268875, + "loss_iou": 1.027704119682312, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 259623904, + "step": 1502 + }, + { + "epoch": 0.5756415166602834, + "grad_norm": 30.846707526882263, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 259797160, + "step": 1503 + }, + { + "epoch": 0.5756415166602834, + "loss": 0.22802825272083282, + "loss_ce": 0.021912530064582825, + "loss_iou": 1.0090848207473755, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 259797160, + "step": 1503 + }, + { + "epoch": 0.5760245116813482, + "grad_norm": 34.683843547461564, + "learning_rate": 5e-06, + "loss": 0.3247, + "num_input_tokens_seen": 259970080, + "step": 1504 + }, + { + "epoch": 0.5760245116813482, + "loss": 0.31092390418052673, + "loss_ce": 0.019786197692155838, + "loss_iou": 1.0614619255065918, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 259970080, + "step": 1504 + }, + { + "epoch": 0.5764075067024129, + "grad_norm": 26.00262513867948, + "learning_rate": 5e-06, + "loss": 0.2268, + "num_input_tokens_seen": 260142872, + "step": 1505 + }, + { + "epoch": 0.5764075067024129, + "loss": 0.2198001891374588, + "loss_ce": 0.021130749955773354, + "loss_iou": 2.49227237701416, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 260142872, + "step": 1505 + }, + { + "epoch": 0.5767905017234776, + "grad_norm": 25.905227039404274, + "learning_rate": 5e-06, + "loss": 0.3218, + "num_input_tokens_seen": 260315896, + "step": 1506 + }, + { + "epoch": 0.5767905017234776, + "loss": 0.25244709849357605, + "loss_ce": 0.0195369403809309, + "loss_iou": 1.0567975044250488, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 260315896, + "step": 1506 + }, + { + "epoch": 0.5771734967445423, + "grad_norm": 32.73442958754808, + "learning_rate": 5e-06, + "loss": 0.2985, + "num_input_tokens_seen": 260485488, + "step": 1507 + }, + { + "epoch": 0.5771734967445423, + "loss": 0.2852766215801239, + "loss_ce": 0.021299580112099648, + "loss_iou": 1.0657081604003906, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 260485488, + "step": 1507 + }, + { + "epoch": 0.5775564917656071, + "grad_norm": 27.04774616807621, + "learning_rate": 5e-06, + "loss": 0.2106, + "num_input_tokens_seen": 260658664, + "step": 1508 + }, + { + "epoch": 0.5775564917656071, + "loss": 0.18731904029846191, + "loss_ce": 0.02032686024904251, + "loss_iou": 1.0016627311706543, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 260658664, + "step": 1508 + }, + { + "epoch": 0.5779394867866717, + "grad_norm": 23.776641591909762, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 260831824, + "step": 1509 + }, + { + "epoch": 0.5779394867866717, + "loss": 0.22671952843666077, + "loss_ce": 0.01981034129858017, + "loss_iou": 0.8762431144714355, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 260831824, + "step": 1509 + }, + { + "epoch": 0.5783224818077365, + "grad_norm": 23.614096820361407, + "learning_rate": 5e-06, + "loss": 0.1847, + "num_input_tokens_seen": 261004752, + "step": 1510 + }, + { + "epoch": 0.5783224818077365, + "loss": 0.19045805931091309, + "loss_ce": 0.020597223192453384, + "loss_iou": 1.0042232275009155, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 261004752, + "step": 1510 + }, + { + "epoch": 0.5787054768288012, + "grad_norm": 28.50545548830771, + "learning_rate": 5e-06, + "loss": 0.2731, + "num_input_tokens_seen": 261177704, + "step": 1511 + }, + { + "epoch": 0.5787054768288012, + "loss": 0.31981053948402405, + "loss_ce": 0.021592766046524048, + "loss_iou": 1.026820421218872, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 261177704, + "step": 1511 + }, + { + "epoch": 0.579088471849866, + "grad_norm": 26.1929496348344, + "learning_rate": 5e-06, + "loss": 0.186, + "num_input_tokens_seen": 261351232, + "step": 1512 + }, + { + "epoch": 0.579088471849866, + "loss": 0.20613685250282288, + "loss_ce": 0.02217688038945198, + "loss_iou": 1.0080938339233398, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 261351232, + "step": 1512 + }, + { + "epoch": 0.5794714668709307, + "grad_norm": 36.631132215354285, + "learning_rate": 5e-06, + "loss": 0.2809, + "num_input_tokens_seen": 261524224, + "step": 1513 + }, + { + "epoch": 0.5794714668709307, + "loss": 0.246841698884964, + "loss_ce": 0.01985194906592369, + "loss_iou": 1.3500326871871948, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 261524224, + "step": 1513 + }, + { + "epoch": 0.5798544618919954, + "grad_norm": 29.077264193554647, + "learning_rate": 5e-06, + "loss": 0.2444, + "num_input_tokens_seen": 261696800, + "step": 1514 + }, + { + "epoch": 0.5798544618919954, + "loss": 0.2662728428840637, + "loss_ce": 0.020911507308483124, + "loss_iou": 1.1145825386047363, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 261696800, + "step": 1514 + }, + { + "epoch": 0.5802374569130602, + "grad_norm": 29.768499005814423, + "learning_rate": 5e-06, + "loss": 0.2606, + "num_input_tokens_seen": 261869992, + "step": 1515 + }, + { + "epoch": 0.5802374569130602, + "loss": 0.3129635453224182, + "loss_ce": 0.019567549228668213, + "loss_iou": 1.077012538909912, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 261869992, + "step": 1515 + }, + { + "epoch": 0.5806204519341248, + "grad_norm": 30.278667752168946, + "learning_rate": 5e-06, + "loss": 0.1945, + "num_input_tokens_seen": 262042752, + "step": 1516 + }, + { + "epoch": 0.5806204519341248, + "loss": 0.17716622352600098, + "loss_ce": 0.020122766494750977, + "loss_iou": 1.0018476247787476, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 262042752, + "step": 1516 + }, + { + "epoch": 0.5810034469551896, + "grad_norm": 30.94662522269035, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 262215528, + "step": 1517 + }, + { + "epoch": 0.5810034469551896, + "loss": 0.2343868613243103, + "loss_ce": 0.021069001406431198, + "loss_iou": 1.023953914642334, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 262215528, + "step": 1517 + }, + { + "epoch": 0.5813864419762543, + "grad_norm": 29.744591758048816, + "learning_rate": 5e-06, + "loss": 0.2075, + "num_input_tokens_seen": 262388576, + "step": 1518 + }, + { + "epoch": 0.5813864419762543, + "loss": 0.23691639304161072, + "loss_ce": 0.020119523629546165, + "loss_iou": 1.0106154680252075, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 262388576, + "step": 1518 + }, + { + "epoch": 0.5817694369973191, + "grad_norm": 24.549477449191258, + "learning_rate": 5e-06, + "loss": 0.2543, + "num_input_tokens_seen": 262561072, + "step": 1519 + }, + { + "epoch": 0.5817694369973191, + "loss": 0.28415757417678833, + "loss_ce": 0.01987534761428833, + "loss_iou": 1.010164737701416, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 262561072, + "step": 1519 + }, + { + "epoch": 0.5821524320183837, + "grad_norm": 20.161012045135564, + "learning_rate": 5e-06, + "loss": 0.2069, + "num_input_tokens_seen": 262733624, + "step": 1520 + }, + { + "epoch": 0.5821524320183837, + "loss": 0.2429553121328354, + "loss_ce": 0.019627690315246582, + "loss_iou": 1.100738525390625, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 262733624, + "step": 1520 + }, + { + "epoch": 0.5825354270394485, + "grad_norm": 25.1038676233111, + "learning_rate": 5e-06, + "loss": 0.1574, + "num_input_tokens_seen": 262906864, + "step": 1521 + }, + { + "epoch": 0.5825354270394485, + "loss": 0.15586310625076294, + "loss_ce": 0.021036438643932343, + "loss_iou": 1.0012270212173462, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 262906864, + "step": 1521 + }, + { + "epoch": 0.5829184220605133, + "grad_norm": 23.49886140963644, + "learning_rate": 5e-06, + "loss": 0.2296, + "num_input_tokens_seen": 263079752, + "step": 1522 + }, + { + "epoch": 0.5829184220605133, + "loss": 0.26926282048225403, + "loss_ce": 0.02030041441321373, + "loss_iou": 1.013770341873169, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 263079752, + "step": 1522 + }, + { + "epoch": 0.5833014170815779, + "grad_norm": 26.60289452188516, + "learning_rate": 5e-06, + "loss": 0.2097, + "num_input_tokens_seen": 263252944, + "step": 1523 + }, + { + "epoch": 0.5833014170815779, + "loss": 0.15148669481277466, + "loss_ce": 0.022763557732105255, + "loss_iou": 1.0016040802001953, + "loss_num": 0.12890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 263252944, + "step": 1523 + }, + { + "epoch": 0.5836844121026427, + "grad_norm": 34.65592953030426, + "learning_rate": 5e-06, + "loss": 0.2001, + "num_input_tokens_seen": 263425840, + "step": 1524 + }, + { + "epoch": 0.5836844121026427, + "loss": 0.20139773190021515, + "loss_ce": 0.020306427031755447, + "loss_iou": 1.0493004322052002, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 263425840, + "step": 1524 + }, + { + "epoch": 0.5840674071237074, + "grad_norm": 46.70347242404038, + "learning_rate": 5e-06, + "loss": 0.2972, + "num_input_tokens_seen": 263598696, + "step": 1525 + }, + { + "epoch": 0.5840674071237074, + "loss": 0.2707063555717468, + "loss_ce": 0.02174396812915802, + "loss_iou": 1.0237774848937988, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 263598696, + "step": 1525 + }, + { + "epoch": 0.5844504021447721, + "grad_norm": 31.70010982744844, + "learning_rate": 5e-06, + "loss": 0.1844, + "num_input_tokens_seen": 263771520, + "step": 1526 + }, + { + "epoch": 0.5844504021447721, + "loss": 0.1498769372701645, + "loss_ce": 0.021458961069583893, + "loss_iou": 1.0089287757873535, + "loss_num": 0.12890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 263771520, + "step": 1526 + }, + { + "epoch": 0.5848333971658368, + "grad_norm": 30.65245007818194, + "learning_rate": 5e-06, + "loss": 0.283, + "num_input_tokens_seen": 263944952, + "step": 1527 + }, + { + "epoch": 0.5848333971658368, + "loss": 0.2526056170463562, + "loss_ce": 0.020794082432985306, + "loss_iou": 1.1226778030395508, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 263944952, + "step": 1527 + }, + { + "epoch": 0.5852163921869016, + "grad_norm": 35.71132886886885, + "learning_rate": 5e-06, + "loss": 0.228, + "num_input_tokens_seen": 264117704, + "step": 1528 + }, + { + "epoch": 0.5852163921869016, + "loss": 0.2943909168243408, + "loss_ce": 0.023089634254574776, + "loss_iou": 1.307476282119751, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 264117704, + "step": 1528 + }, + { + "epoch": 0.5855993872079663, + "grad_norm": 46.6442063736582, + "learning_rate": 5e-06, + "loss": 0.2299, + "num_input_tokens_seen": 264287544, + "step": 1529 + }, + { + "epoch": 0.5855993872079663, + "loss": 0.28918662667274475, + "loss_ce": 0.021120227873325348, + "loss_iou": 1.409267783164978, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 264287544, + "step": 1529 + }, + { + "epoch": 0.585982382229031, + "grad_norm": 33.45443583117367, + "learning_rate": 5e-06, + "loss": 0.2288, + "num_input_tokens_seen": 264460256, + "step": 1530 + }, + { + "epoch": 0.585982382229031, + "loss": 0.23621076345443726, + "loss_ce": 0.019535966217517853, + "loss_iou": 0.8824535608291626, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 264460256, + "step": 1530 + }, + { + "epoch": 0.5863653772500957, + "grad_norm": 25.02279285582714, + "learning_rate": 5e-06, + "loss": 0.2264, + "num_input_tokens_seen": 264633048, + "step": 1531 + }, + { + "epoch": 0.5863653772500957, + "loss": 0.27118995785713196, + "loss_ce": 0.020762711763381958, + "loss_iou": 1.0662559270858765, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 264633048, + "step": 1531 + }, + { + "epoch": 0.5867483722711605, + "grad_norm": 31.053513002202813, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 264806088, + "step": 1532 + }, + { + "epoch": 0.5867483722711605, + "loss": 0.20815600454807281, + "loss_ce": 0.022426022216677666, + "loss_iou": 1.0312124490737915, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 264806088, + "step": 1532 + }, + { + "epoch": 0.5871313672922251, + "grad_norm": 28.228856148742768, + "learning_rate": 5e-06, + "loss": 0.2329, + "num_input_tokens_seen": 264979104, + "step": 1533 + }, + { + "epoch": 0.5871313672922251, + "loss": 0.2584834098815918, + "loss_ce": 0.02032424695789814, + "loss_iou": 1.1087110042572021, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 264979104, + "step": 1533 + }, + { + "epoch": 0.5875143623132899, + "grad_norm": 22.208791524816075, + "learning_rate": 5e-06, + "loss": 0.2332, + "num_input_tokens_seen": 265152016, + "step": 1534 + }, + { + "epoch": 0.5875143623132899, + "loss": 0.27769356966018677, + "loss_ce": 0.019636942073702812, + "loss_iou": 1.3713454008102417, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 265152016, + "step": 1534 + }, + { + "epoch": 0.5878973573343547, + "grad_norm": 26.176153806246376, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 265325016, + "step": 1535 + }, + { + "epoch": 0.5878973573343547, + "loss": 0.2010667324066162, + "loss_ce": 0.021684397011995316, + "loss_iou": 1.0136194229125977, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 265325016, + "step": 1535 + }, + { + "epoch": 0.5882803523554194, + "grad_norm": 28.91329659880314, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 265497768, + "step": 1536 + }, + { + "epoch": 0.5882803523554194, + "loss": 0.21207498013973236, + "loss_ce": 0.019631125032901764, + "loss_iou": 1.0956573486328125, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 265497768, + "step": 1536 + }, + { + "epoch": 0.5886633473764841, + "grad_norm": 29.28604948881464, + "learning_rate": 5e-06, + "loss": 0.2919, + "num_input_tokens_seen": 265670608, + "step": 1537 + }, + { + "epoch": 0.5886633473764841, + "loss": 0.36655059456825256, + "loss_ce": 0.01877225562930107, + "loss_iou": 1.0107009410858154, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 265670608, + "step": 1537 + }, + { + "epoch": 0.5890463423975488, + "grad_norm": 27.81439886334659, + "learning_rate": 5e-06, + "loss": 0.22, + "num_input_tokens_seen": 265842936, + "step": 1538 + }, + { + "epoch": 0.5890463423975488, + "loss": 0.25227534770965576, + "loss_ce": 0.019792431965470314, + "loss_iou": 1.0118613243103027, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 265842936, + "step": 1538 + }, + { + "epoch": 0.5894293374186136, + "grad_norm": 36.55628004442563, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 266015696, + "step": 1539 + }, + { + "epoch": 0.5894293374186136, + "loss": 0.26872682571411133, + "loss_ce": 0.020802026614546776, + "loss_iou": 1.065087914466858, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 266015696, + "step": 1539 + }, + { + "epoch": 0.5898123324396782, + "grad_norm": 39.728022637273845, + "learning_rate": 5e-06, + "loss": 0.2392, + "num_input_tokens_seen": 266189008, + "step": 1540 + }, + { + "epoch": 0.5898123324396782, + "loss": 0.18677133321762085, + "loss_ce": 0.022037452086806297, + "loss_iou": 1.0023694038391113, + "loss_num": 0.1650390625, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 266189008, + "step": 1540 + }, + { + "epoch": 0.590195327460743, + "grad_norm": 30.577603448642417, + "learning_rate": 5e-06, + "loss": 0.2204, + "num_input_tokens_seen": 266362008, + "step": 1541 + }, + { + "epoch": 0.590195327460743, + "loss": 0.20363269746303558, + "loss_ce": 0.020283091813325882, + "loss_iou": 1.0037730932235718, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 266362008, + "step": 1541 + }, + { + "epoch": 0.5905783224818077, + "grad_norm": 31.569422166715896, + "learning_rate": 5e-06, + "loss": 0.1983, + "num_input_tokens_seen": 266535504, + "step": 1542 + }, + { + "epoch": 0.5905783224818077, + "loss": 0.25024089217185974, + "loss_ce": 0.02178630605340004, + "loss_iou": 1.2143375873565674, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 266535504, + "step": 1542 + }, + { + "epoch": 0.5909613175028725, + "grad_norm": 35.75139674065863, + "learning_rate": 5e-06, + "loss": 0.2342, + "num_input_tokens_seen": 266708360, + "step": 1543 + }, + { + "epoch": 0.5909613175028725, + "loss": 0.1854146420955658, + "loss_ce": 0.019337981939315796, + "loss_iou": 1.0077852010726929, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 266708360, + "step": 1543 + }, + { + "epoch": 0.5913443125239372, + "grad_norm": 28.310511466260408, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 266881232, + "step": 1544 + }, + { + "epoch": 0.5913443125239372, + "loss": 0.1748485565185547, + "loss_ce": 0.019269946962594986, + "loss_iou": 1.0812819004058838, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 266881232, + "step": 1544 + }, + { + "epoch": 0.5917273075450019, + "grad_norm": 32.615718350974944, + "learning_rate": 5e-06, + "loss": 0.2253, + "num_input_tokens_seen": 267054064, + "step": 1545 + }, + { + "epoch": 0.5917273075450019, + "loss": 0.2351870834827423, + "loss_ce": 0.020587481558322906, + "loss_iou": 1.2616922855377197, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 267054064, + "step": 1545 + }, + { + "epoch": 0.5921103025660667, + "grad_norm": 33.03801999893034, + "learning_rate": 5e-06, + "loss": 0.2478, + "num_input_tokens_seen": 267226800, + "step": 1546 + }, + { + "epoch": 0.5921103025660667, + "loss": 0.2021588236093521, + "loss_ce": 0.020975960418581963, + "loss_iou": 1.0036303997039795, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 267226800, + "step": 1546 + }, + { + "epoch": 0.5924932975871313, + "grad_norm": 29.573668790736626, + "learning_rate": 5e-06, + "loss": 0.3187, + "num_input_tokens_seen": 267399576, + "step": 1547 + }, + { + "epoch": 0.5924932975871313, + "loss": 0.28870028257369995, + "loss_ce": 0.019474195316433907, + "loss_iou": 1.0450565814971924, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 267399576, + "step": 1547 + }, + { + "epoch": 0.5928762926081961, + "grad_norm": 34.80479538562652, + "learning_rate": 5e-06, + "loss": 0.2033, + "num_input_tokens_seen": 267572288, + "step": 1548 + }, + { + "epoch": 0.5928762926081961, + "loss": 0.240036278963089, + "loss_ce": 0.02067592367529869, + "loss_iou": 1.066023349761963, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 267572288, + "step": 1548 + }, + { + "epoch": 0.5932592876292608, + "grad_norm": 33.705767028333696, + "learning_rate": 5e-06, + "loss": 0.2367, + "num_input_tokens_seen": 267745432, + "step": 1549 + }, + { + "epoch": 0.5932592876292608, + "loss": 0.24160504341125488, + "loss_ce": 0.02181744948029518, + "loss_iou": 1.0746049880981445, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 267745432, + "step": 1549 + }, + { + "epoch": 0.5936422826503256, + "grad_norm": 27.237508864290163, + "learning_rate": 5e-06, + "loss": 0.2539, + "num_input_tokens_seen": 267918424, + "step": 1550 + }, + { + "epoch": 0.5936422826503256, + "loss": 0.22879761457443237, + "loss_ce": 0.020240485668182373, + "loss_iou": 1.034693717956543, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 267918424, + "step": 1550 + }, + { + "epoch": 0.5940252776713902, + "grad_norm": 25.724312141977965, + "learning_rate": 5e-06, + "loss": 0.2716, + "num_input_tokens_seen": 268087952, + "step": 1551 + }, + { + "epoch": 0.5940252776713902, + "loss": 0.2817750573158264, + "loss_ce": 0.02207046002149582, + "loss_iou": 1.0592975616455078, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 268087952, + "step": 1551 + }, + { + "epoch": 0.594408272692455, + "grad_norm": 26.99685174643569, + "learning_rate": 5e-06, + "loss": 0.2457, + "num_input_tokens_seen": 268260736, + "step": 1552 + }, + { + "epoch": 0.594408272692455, + "loss": 0.26946550607681274, + "loss_ce": 0.021540693938732147, + "loss_iou": 1.043034315109253, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 268260736, + "step": 1552 + }, + { + "epoch": 0.5947912677135198, + "grad_norm": 34.15153816987403, + "learning_rate": 5e-06, + "loss": 0.2533, + "num_input_tokens_seen": 268433768, + "step": 1553 + }, + { + "epoch": 0.5947912677135198, + "loss": 0.27357256412506104, + "loss_ce": 0.020703919231891632, + "loss_iou": 1.0922720432281494, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 268433768, + "step": 1553 + }, + { + "epoch": 0.5951742627345844, + "grad_norm": 30.296502570549386, + "learning_rate": 5e-06, + "loss": 0.1694, + "num_input_tokens_seen": 268606672, + "step": 1554 + }, + { + "epoch": 0.5951742627345844, + "loss": 0.1596354842185974, + "loss_ce": 0.01974291354417801, + "loss_iou": 1.0062255859375, + "loss_num": 0.1396484375, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 268606672, + "step": 1554 + }, + { + "epoch": 0.5955572577556492, + "grad_norm": 30.945768386800445, + "learning_rate": 5e-06, + "loss": 0.1955, + "num_input_tokens_seen": 268779456, + "step": 1555 + }, + { + "epoch": 0.5955572577556492, + "loss": 0.19111934304237366, + "loss_ce": 0.01985469087958336, + "loss_iou": 1.0321484804153442, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 268779456, + "step": 1555 + }, + { + "epoch": 0.5959402527767139, + "grad_norm": 29.196716116468462, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 268952560, + "step": 1556 + }, + { + "epoch": 0.5959402527767139, + "loss": 0.24636295437812805, + "loss_ce": 0.020838063210248947, + "loss_iou": 1.1337873935699463, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 268952560, + "step": 1556 + }, + { + "epoch": 0.5963232477977787, + "grad_norm": 33.06575460533335, + "learning_rate": 5e-06, + "loss": 0.271, + "num_input_tokens_seen": 269125648, + "step": 1557 + }, + { + "epoch": 0.5963232477977787, + "loss": 0.2610067129135132, + "loss_ce": 0.022725481539964676, + "loss_iou": 1.0387709140777588, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 269125648, + "step": 1557 + }, + { + "epoch": 0.5967062428188433, + "grad_norm": 36.4405642598575, + "learning_rate": 5e-06, + "loss": 0.2514, + "num_input_tokens_seen": 269298584, + "step": 1558 + }, + { + "epoch": 0.5967062428188433, + "loss": 0.23544277250766754, + "loss_ce": 0.02011074870824814, + "loss_iou": 1.0255565643310547, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 269298584, + "step": 1558 + }, + { + "epoch": 0.5970892378399081, + "grad_norm": 39.24894248601204, + "learning_rate": 5e-06, + "loss": 0.3001, + "num_input_tokens_seen": 269471696, + "step": 1559 + }, + { + "epoch": 0.5970892378399081, + "loss": 0.28608623147010803, + "loss_ce": 0.021193664520978928, + "loss_iou": 1.002530574798584, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 269471696, + "step": 1559 + }, + { + "epoch": 0.5974722328609728, + "grad_norm": 33.06850598554862, + "learning_rate": 5e-06, + "loss": 0.2206, + "num_input_tokens_seen": 269644656, + "step": 1560 + }, + { + "epoch": 0.5974722328609728, + "loss": 0.22993288934230804, + "loss_ce": 0.019605742767453194, + "loss_iou": 1.0346921682357788, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 269644656, + "step": 1560 + }, + { + "epoch": 0.5978552278820375, + "grad_norm": 29.91969492518461, + "learning_rate": 5e-06, + "loss": 0.2925, + "num_input_tokens_seen": 269817520, + "step": 1561 + }, + { + "epoch": 0.5978552278820375, + "loss": 0.2565000057220459, + "loss_ce": 0.019561532884836197, + "loss_iou": 1.0348634719848633, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 269817520, + "step": 1561 + }, + { + "epoch": 0.5982382229031022, + "grad_norm": 21.177720280584925, + "learning_rate": 5e-06, + "loss": 0.2179, + "num_input_tokens_seen": 269990584, + "step": 1562 + }, + { + "epoch": 0.5982382229031022, + "loss": 0.19144877791404724, + "loss_ce": 0.02073344960808754, + "loss_iou": 1.016440987586975, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 269990584, + "step": 1562 + }, + { + "epoch": 0.598621217924167, + "grad_norm": 23.906628156849884, + "learning_rate": 5e-06, + "loss": 0.2542, + "num_input_tokens_seen": 270163040, + "step": 1563 + }, + { + "epoch": 0.598621217924167, + "loss": 0.2534239590167999, + "loss_ce": 0.020025519654154778, + "loss_iou": 1.0246292352676392, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 270163040, + "step": 1563 + }, + { + "epoch": 0.5990042129452318, + "grad_norm": 26.215274489795025, + "learning_rate": 5e-06, + "loss": 0.2049, + "num_input_tokens_seen": 270336280, + "step": 1564 + }, + { + "epoch": 0.5990042129452318, + "loss": 0.1972009241580963, + "loss_ce": 0.02178589627146721, + "loss_iou": 1.0354797840118408, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 270336280, + "step": 1564 + }, + { + "epoch": 0.5993872079662964, + "grad_norm": 36.62401599598024, + "learning_rate": 5e-06, + "loss": 0.2472, + "num_input_tokens_seen": 270509168, + "step": 1565 + }, + { + "epoch": 0.5993872079662964, + "loss": 0.19089263677597046, + "loss_ce": 0.02231353148818016, + "loss_iou": 1.0060560703277588, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 270509168, + "step": 1565 + }, + { + "epoch": 0.5997702029873612, + "grad_norm": 30.118964926716398, + "learning_rate": 5e-06, + "loss": 0.2621, + "num_input_tokens_seen": 270681968, + "step": 1566 + }, + { + "epoch": 0.5997702029873612, + "loss": 0.259109228849411, + "loss_ce": 0.02119421400129795, + "loss_iou": 1.0127711296081543, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 270681968, + "step": 1566 + }, + { + "epoch": 0.6001531980084259, + "grad_norm": 21.00401339010055, + "learning_rate": 5e-06, + "loss": 0.2207, + "num_input_tokens_seen": 270854608, + "step": 1567 + }, + { + "epoch": 0.6001531980084259, + "loss": 0.22783751785755157, + "loss_ce": 0.020623154938220978, + "loss_iou": 1.0646750926971436, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 270854608, + "step": 1567 + }, + { + "epoch": 0.6005361930294906, + "grad_norm": 17.71810463096343, + "learning_rate": 5e-06, + "loss": 0.1758, + "num_input_tokens_seen": 271027576, + "step": 1568 + }, + { + "epoch": 0.6005361930294906, + "loss": 0.12108717858791351, + "loss_ce": 0.021081075072288513, + "loss_iou": 1.0009524822235107, + "loss_num": 0.10009765625, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 271027576, + "step": 1568 + }, + { + "epoch": 0.6009191880505553, + "grad_norm": 41.2732816626539, + "learning_rate": 5e-06, + "loss": 0.2311, + "num_input_tokens_seen": 271200608, + "step": 1569 + }, + { + "epoch": 0.6009191880505553, + "loss": 0.2101738303899765, + "loss_ce": 0.021270014345645905, + "loss_iou": 1.0262513160705566, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 271200608, + "step": 1569 + }, + { + "epoch": 0.6013021830716201, + "grad_norm": 37.32284388122061, + "learning_rate": 5e-06, + "loss": 0.3139, + "num_input_tokens_seen": 271373896, + "step": 1570 + }, + { + "epoch": 0.6013021830716201, + "loss": 0.32823628187179565, + "loss_ce": 0.01970357820391655, + "loss_iou": 1.0195873975753784, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 271373896, + "step": 1570 + }, + { + "epoch": 0.6016851780926848, + "grad_norm": 28.475165993161532, + "learning_rate": 5e-06, + "loss": 0.2955, + "num_input_tokens_seen": 271546480, + "step": 1571 + }, + { + "epoch": 0.6016851780926848, + "loss": 0.2166106402873993, + "loss_ce": 0.021786419674754143, + "loss_iou": 1.0070724487304688, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 271546480, + "step": 1571 + }, + { + "epoch": 0.6020681731137495, + "grad_norm": 25.18692711010138, + "learning_rate": 5e-06, + "loss": 0.2575, + "num_input_tokens_seen": 271718888, + "step": 1572 + }, + { + "epoch": 0.6020681731137495, + "loss": 0.29178929328918457, + "loss_ce": 0.020854221656918526, + "loss_iou": 1.1269915103912354, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 271718888, + "step": 1572 + }, + { + "epoch": 0.6024511681348143, + "grad_norm": 46.49956786920276, + "learning_rate": 5e-06, + "loss": 0.2366, + "num_input_tokens_seen": 271890128, + "step": 1573 + }, + { + "epoch": 0.6024511681348143, + "loss": 0.2369789481163025, + "loss_ce": 0.021891064941883087, + "loss_iou": 1.002115249633789, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 271890128, + "step": 1573 + }, + { + "epoch": 0.602834163155879, + "grad_norm": 32.18644200082336, + "learning_rate": 5e-06, + "loss": 0.309, + "num_input_tokens_seen": 272063224, + "step": 1574 + }, + { + "epoch": 0.602834163155879, + "loss": 0.2830401659011841, + "loss_ce": 0.021748654544353485, + "loss_iou": 1.1032469272613525, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 272063224, + "step": 1574 + }, + { + "epoch": 0.6032171581769437, + "grad_norm": 47.72776838553225, + "learning_rate": 5e-06, + "loss": 0.2869, + "num_input_tokens_seen": 272235864, + "step": 1575 + }, + { + "epoch": 0.6032171581769437, + "loss": 0.3127654194831848, + "loss_ce": 0.019064251333475113, + "loss_iou": 1.054098129272461, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 272235864, + "step": 1575 + }, + { + "epoch": 0.6036001531980084, + "grad_norm": 28.609362106101166, + "learning_rate": 5e-06, + "loss": 0.2425, + "num_input_tokens_seen": 272408776, + "step": 1576 + }, + { + "epoch": 0.6036001531980084, + "loss": 0.1467621922492981, + "loss_ce": 0.019137680530548096, + "loss_iou": 1.0034462213516235, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 272408776, + "step": 1576 + }, + { + "epoch": 0.6039831482190732, + "grad_norm": 28.13433494976978, + "learning_rate": 5e-06, + "loss": 0.2013, + "num_input_tokens_seen": 272581528, + "step": 1577 + }, + { + "epoch": 0.6039831482190732, + "loss": 0.2090604156255722, + "loss_ce": 0.018996933475136757, + "loss_iou": 1.0215201377868652, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 272581528, + "step": 1577 + }, + { + "epoch": 0.6043661432401379, + "grad_norm": 30.14117326051236, + "learning_rate": 5e-06, + "loss": 0.2265, + "num_input_tokens_seen": 272754568, + "step": 1578 + }, + { + "epoch": 0.6043661432401379, + "loss": 0.2674601674079895, + "loss_ce": 0.021793659776449203, + "loss_iou": 1.015618085861206, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 272754568, + "step": 1578 + }, + { + "epoch": 0.6047491382612026, + "grad_norm": 28.68259479834075, + "learning_rate": 5e-06, + "loss": 0.2768, + "num_input_tokens_seen": 272927408, + "step": 1579 + }, + { + "epoch": 0.6047491382612026, + "loss": 0.2753274738788605, + "loss_ce": 0.021055009216070175, + "loss_iou": 0.9962226152420044, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 272927408, + "step": 1579 + }, + { + "epoch": 0.6051321332822673, + "grad_norm": 31.988660693403762, + "learning_rate": 5e-06, + "loss": 0.234, + "num_input_tokens_seen": 273100464, + "step": 1580 + }, + { + "epoch": 0.6051321332822673, + "loss": 0.22599288821220398, + "loss_ce": 0.023051002994179726, + "loss_iou": 1.0454695224761963, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 273100464, + "step": 1580 + }, + { + "epoch": 0.6055151283033321, + "grad_norm": 34.11420750058403, + "learning_rate": 5e-06, + "loss": 0.3195, + "num_input_tokens_seen": 273273432, + "step": 1581 + }, + { + "epoch": 0.6055151283033321, + "loss": 0.34745293855667114, + "loss_ce": 0.022135566920042038, + "loss_iou": 1.0547513961791992, + "loss_num": 0.326171875, + "loss_xval": 0.326171875, + "num_input_tokens_seen": 273273432, + "step": 1581 + }, + { + "epoch": 0.6058981233243967, + "grad_norm": 29.537134166160698, + "learning_rate": 5e-06, + "loss": 0.2829, + "num_input_tokens_seen": 273446384, + "step": 1582 + }, + { + "epoch": 0.6058981233243967, + "loss": 0.26747170090675354, + "loss_ce": 0.02088966965675354, + "loss_iou": 1.044858455657959, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 273446384, + "step": 1582 + }, + { + "epoch": 0.6062811183454615, + "grad_norm": 27.618517631204, + "learning_rate": 5e-06, + "loss": 0.2604, + "num_input_tokens_seen": 273619232, + "step": 1583 + }, + { + "epoch": 0.6062811183454615, + "loss": 0.25198063254356384, + "loss_ce": 0.020962558686733246, + "loss_iou": 1.0346366167068481, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 273619232, + "step": 1583 + }, + { + "epoch": 0.6066641133665263, + "grad_norm": 25.74671603837596, + "learning_rate": 5e-06, + "loss": 0.2338, + "num_input_tokens_seen": 273792080, + "step": 1584 + }, + { + "epoch": 0.6066641133665263, + "loss": 0.26737356185913086, + "loss_ce": 0.021035686135292053, + "loss_iou": 1.031913161277771, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 273792080, + "step": 1584 + }, + { + "epoch": 0.607047108387591, + "grad_norm": 27.69611185926586, + "learning_rate": 5e-06, + "loss": 0.2177, + "num_input_tokens_seen": 273964672, + "step": 1585 + }, + { + "epoch": 0.607047108387591, + "loss": 0.17387646436691284, + "loss_ce": 0.02037305384874344, + "loss_iou": 1.0016062259674072, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 273964672, + "step": 1585 + }, + { + "epoch": 0.6074301034086557, + "grad_norm": 22.44781098462438, + "learning_rate": 5e-06, + "loss": 0.1801, + "num_input_tokens_seen": 274136968, + "step": 1586 + }, + { + "epoch": 0.6074301034086557, + "loss": 0.1811303198337555, + "loss_ce": 0.020180605351924896, + "loss_iou": 1.0062899589538574, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 274136968, + "step": 1586 + }, + { + "epoch": 0.6078130984297204, + "grad_norm": 28.575654203393004, + "learning_rate": 5e-06, + "loss": 0.2037, + "num_input_tokens_seen": 274309800, + "step": 1587 + }, + { + "epoch": 0.6078130984297204, + "loss": 0.19381070137023926, + "loss_ce": 0.021447420120239258, + "loss_iou": 1.0267584323883057, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 274309800, + "step": 1587 + }, + { + "epoch": 0.6081960934507852, + "grad_norm": 29.977417165715824, + "learning_rate": 5e-06, + "loss": 0.234, + "num_input_tokens_seen": 274479440, + "step": 1588 + }, + { + "epoch": 0.6081960934507852, + "loss": 0.30903321504592896, + "loss_ce": 0.021252445876598358, + "loss_iou": 1.0187793970108032, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 274479440, + "step": 1588 + }, + { + "epoch": 0.6085790884718498, + "grad_norm": 27.635098052466525, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 274652112, + "step": 1589 + }, + { + "epoch": 0.6085790884718498, + "loss": 0.21000096201896667, + "loss_ce": 0.021585427224636078, + "loss_iou": 1.0599991083145142, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 274652112, + "step": 1589 + }, + { + "epoch": 0.6089620834929146, + "grad_norm": 33.60852914184709, + "learning_rate": 5e-06, + "loss": 0.2577, + "num_input_tokens_seen": 274825168, + "step": 1590 + }, + { + "epoch": 0.6089620834929146, + "loss": 0.2673984467983246, + "loss_ce": 0.02301366999745369, + "loss_iou": 1.0333936214447021, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 274825168, + "step": 1590 + }, + { + "epoch": 0.6093450785139793, + "grad_norm": 31.5686827262231, + "learning_rate": 5e-06, + "loss": 0.2695, + "num_input_tokens_seen": 274998272, + "step": 1591 + }, + { + "epoch": 0.6093450785139793, + "loss": 0.2690613865852356, + "loss_ce": 0.020587250590324402, + "loss_iou": 1.126404047012329, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 274998272, + "step": 1591 + }, + { + "epoch": 0.609728073535044, + "grad_norm": 24.76210316977389, + "learning_rate": 5e-06, + "loss": 0.2361, + "num_input_tokens_seen": 275171152, + "step": 1592 + }, + { + "epoch": 0.609728073535044, + "loss": 0.18169772624969482, + "loss_ce": 0.02050388790667057, + "loss_iou": 1.0014326572418213, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 275171152, + "step": 1592 + }, + { + "epoch": 0.6101110685561087, + "grad_norm": 32.5769524023531, + "learning_rate": 5e-06, + "loss": 0.2315, + "num_input_tokens_seen": 275344120, + "step": 1593 + }, + { + "epoch": 0.6101110685561087, + "loss": 0.2832563519477844, + "loss_ce": 0.019584478810429573, + "loss_iou": 1.044764518737793, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 275344120, + "step": 1593 + }, + { + "epoch": 0.6104940635771735, + "grad_norm": 39.1097092552448, + "learning_rate": 5e-06, + "loss": 0.2219, + "num_input_tokens_seen": 275517296, + "step": 1594 + }, + { + "epoch": 0.6104940635771735, + "loss": 0.2574824392795563, + "loss_ce": 0.02152051031589508, + "loss_iou": 1.0240764617919922, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 275517296, + "step": 1594 + }, + { + "epoch": 0.6108770585982383, + "grad_norm": 34.747334844192, + "learning_rate": 5e-06, + "loss": 0.1953, + "num_input_tokens_seen": 275689976, + "step": 1595 + }, + { + "epoch": 0.6108770585982383, + "loss": 0.20638251304626465, + "loss_ce": 0.02126288414001465, + "loss_iou": 1.0271966457366943, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 275689976, + "step": 1595 + }, + { + "epoch": 0.6112600536193029, + "grad_norm": 31.4174384960578, + "learning_rate": 5e-06, + "loss": 0.2529, + "num_input_tokens_seen": 275863072, + "step": 1596 + }, + { + "epoch": 0.6112600536193029, + "loss": 0.3017125725746155, + "loss_ce": 0.021439146250486374, + "loss_iou": 1.0911595821380615, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 275863072, + "step": 1596 + }, + { + "epoch": 0.6116430486403677, + "grad_norm": 24.44942504133733, + "learning_rate": 5e-06, + "loss": 0.2421, + "num_input_tokens_seen": 276035896, + "step": 1597 + }, + { + "epoch": 0.6116430486403677, + "loss": 0.2739262282848358, + "loss_ce": 0.02203413099050522, + "loss_iou": 1.0876541137695312, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 276035896, + "step": 1597 + }, + { + "epoch": 0.6120260436614324, + "grad_norm": 32.93735089841536, + "learning_rate": 5e-06, + "loss": 0.1792, + "num_input_tokens_seen": 276209040, + "step": 1598 + }, + { + "epoch": 0.6120260436614324, + "loss": 0.1790980100631714, + "loss_ce": 0.02065073698759079, + "loss_iou": 1.0007457733154297, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 276209040, + "step": 1598 + }, + { + "epoch": 0.6124090386824971, + "grad_norm": 37.411721853665675, + "learning_rate": 5e-06, + "loss": 0.27, + "num_input_tokens_seen": 276378432, + "step": 1599 + }, + { + "epoch": 0.6124090386824971, + "loss": 0.1956443190574646, + "loss_ce": 0.020107213407754898, + "loss_iou": 1.0098230838775635, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 276378432, + "step": 1599 + }, + { + "epoch": 0.6127920337035618, + "grad_norm": 18.335439105215123, + "learning_rate": 5e-06, + "loss": 0.2042, + "num_input_tokens_seen": 276551216, + "step": 1600 + }, + { + "epoch": 0.6127920337035618, + "loss": 0.1819685399532318, + "loss_ce": 0.021262984722852707, + "loss_iou": 1.0139724016189575, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 276551216, + "step": 1600 + }, + { + "epoch": 0.6131750287246266, + "grad_norm": 20.397255184454824, + "learning_rate": 5e-06, + "loss": 0.2506, + "num_input_tokens_seen": 276724144, + "step": 1601 + }, + { + "epoch": 0.6131750287246266, + "loss": 0.29360121488571167, + "loss_ce": 0.02052992209792137, + "loss_iou": 1.0553431510925293, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 276724144, + "step": 1601 + }, + { + "epoch": 0.6135580237456913, + "grad_norm": 29.533116116854934, + "learning_rate": 5e-06, + "loss": 0.2152, + "num_input_tokens_seen": 276897128, + "step": 1602 + }, + { + "epoch": 0.6135580237456913, + "loss": 0.2100106179714203, + "loss_ce": 0.021228883415460587, + "loss_iou": 1.032307744026184, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 276897128, + "step": 1602 + }, + { + "epoch": 0.613941018766756, + "grad_norm": 34.48299475173718, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 277070168, + "step": 1603 + }, + { + "epoch": 0.613941018766756, + "loss": 0.22132205963134766, + "loss_ce": 0.021798141300678253, + "loss_iou": 1.0356637239456177, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 277070168, + "step": 1603 + }, + { + "epoch": 0.6143240137878208, + "grad_norm": 35.99404331891088, + "learning_rate": 5e-06, + "loss": 0.2527, + "num_input_tokens_seen": 277242848, + "step": 1604 + }, + { + "epoch": 0.6143240137878208, + "loss": 0.2681909203529358, + "loss_ce": 0.02179197408258915, + "loss_iou": 1.2864093780517578, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 277242848, + "step": 1604 + }, + { + "epoch": 0.6147070088088855, + "grad_norm": 28.881208952887658, + "learning_rate": 5e-06, + "loss": 0.2144, + "num_input_tokens_seen": 277415664, + "step": 1605 + }, + { + "epoch": 0.6147070088088855, + "loss": 0.22464069724082947, + "loss_ce": 0.020356036722660065, + "loss_iou": 0.9713693857192993, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 277415664, + "step": 1605 + }, + { + "epoch": 0.6150900038299502, + "grad_norm": 27.440955567741156, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 277588912, + "step": 1606 + }, + { + "epoch": 0.6150900038299502, + "loss": 0.14175783097743988, + "loss_ce": 0.022281505167484283, + "loss_iou": 1.0071306228637695, + "loss_num": 0.11962890625, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 277588912, + "step": 1606 + }, + { + "epoch": 0.6154729988510149, + "grad_norm": 37.156480360771, + "learning_rate": 5e-06, + "loss": 0.255, + "num_input_tokens_seen": 277761752, + "step": 1607 + }, + { + "epoch": 0.6154729988510149, + "loss": 0.26891425251960754, + "loss_ce": 0.01909736543893814, + "loss_iou": 1.0075769424438477, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 277761752, + "step": 1607 + }, + { + "epoch": 0.6158559938720797, + "grad_norm": 25.095222555526888, + "learning_rate": 5e-06, + "loss": 0.2492, + "num_input_tokens_seen": 277934936, + "step": 1608 + }, + { + "epoch": 0.6158559938720797, + "loss": 0.3241659998893738, + "loss_ce": 0.02149268612265587, + "loss_iou": 1.0435982942581177, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 277934936, + "step": 1608 + }, + { + "epoch": 0.6162389888931444, + "grad_norm": 27.34854352904686, + "learning_rate": 5e-06, + "loss": 0.1863, + "num_input_tokens_seen": 278107928, + "step": 1609 + }, + { + "epoch": 0.6162389888931444, + "loss": 0.1921370029449463, + "loss_ce": 0.02007889747619629, + "loss_iou": 1.0090808868408203, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 278107928, + "step": 1609 + }, + { + "epoch": 0.6166219839142091, + "grad_norm": 37.360157431991865, + "learning_rate": 5e-06, + "loss": 0.2326, + "num_input_tokens_seen": 278280776, + "step": 1610 + }, + { + "epoch": 0.6166219839142091, + "loss": 0.2784643769264221, + "loss_ce": 0.020224634557962418, + "loss_iou": 1.1624329090118408, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 278280776, + "step": 1610 + }, + { + "epoch": 0.6170049789352738, + "grad_norm": 54.19153033092486, + "learning_rate": 5e-06, + "loss": 0.2161, + "num_input_tokens_seen": 278453728, + "step": 1611 + }, + { + "epoch": 0.6170049789352738, + "loss": 0.22191108763217926, + "loss_ce": 0.022753378376364708, + "loss_iou": 1.0086132287979126, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 278453728, + "step": 1611 + }, + { + "epoch": 0.6173879739563386, + "grad_norm": 33.036250720636794, + "learning_rate": 5e-06, + "loss": 0.266, + "num_input_tokens_seen": 278626800, + "step": 1612 + }, + { + "epoch": 0.6173879739563386, + "loss": 0.26405468583106995, + "loss_ce": 0.02125684544444084, + "loss_iou": 1.0224087238311768, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 278626800, + "step": 1612 + }, + { + "epoch": 0.6177709689774032, + "grad_norm": 75.22629515793881, + "learning_rate": 5e-06, + "loss": 0.3944, + "num_input_tokens_seen": 278799464, + "step": 1613 + }, + { + "epoch": 0.6177709689774032, + "loss": 0.3575478494167328, + "loss_ce": 0.022464849054813385, + "loss_iou": 1.0131324529647827, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 278799464, + "step": 1613 + }, + { + "epoch": 0.618153963998468, + "grad_norm": 38.635798603481994, + "learning_rate": 5e-06, + "loss": 0.2966, + "num_input_tokens_seen": 278972672, + "step": 1614 + }, + { + "epoch": 0.618153963998468, + "loss": 0.30380117893218994, + "loss_ce": 0.021452531218528748, + "loss_iou": 1.0792896747589111, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 278972672, + "step": 1614 + }, + { + "epoch": 0.6185369590195328, + "grad_norm": 60.024768302190616, + "learning_rate": 5e-06, + "loss": 0.3133, + "num_input_tokens_seen": 279145368, + "step": 1615 + }, + { + "epoch": 0.6185369590195328, + "loss": 0.305372953414917, + "loss_ce": 0.020704954862594604, + "loss_iou": 1.1135112047195435, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 279145368, + "step": 1615 + }, + { + "epoch": 0.6189199540405975, + "grad_norm": 76.05768078594984, + "learning_rate": 5e-06, + "loss": 0.2857, + "num_input_tokens_seen": 279318816, + "step": 1616 + }, + { + "epoch": 0.6189199540405975, + "loss": 0.27048641443252563, + "loss_ce": 0.020974673330783844, + "loss_iou": 1.005634069442749, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 279318816, + "step": 1616 + }, + { + "epoch": 0.6193029490616622, + "grad_norm": 29.828337919803086, + "learning_rate": 5e-06, + "loss": 0.3173, + "num_input_tokens_seen": 279488520, + "step": 1617 + }, + { + "epoch": 0.6193029490616622, + "loss": 0.32703596353530884, + "loss_ce": 0.022470548748970032, + "loss_iou": 1.0088353157043457, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 279488520, + "step": 1617 + }, + { + "epoch": 0.6196859440827269, + "grad_norm": 59.639405587106204, + "learning_rate": 5e-06, + "loss": 0.4584, + "num_input_tokens_seen": 279661328, + "step": 1618 + }, + { + "epoch": 0.6196859440827269, + "loss": 0.4526071548461914, + "loss_ce": 0.021454807370901108, + "loss_iou": 1.0371252298355103, + "loss_num": 0.431640625, + "loss_xval": 0.431640625, + "num_input_tokens_seen": 279661328, + "step": 1618 + }, + { + "epoch": 0.6200689391037917, + "grad_norm": 74.61035306623141, + "learning_rate": 5e-06, + "loss": 0.3122, + "num_input_tokens_seen": 279834216, + "step": 1619 + }, + { + "epoch": 0.6200689391037917, + "loss": 0.2902352213859558, + "loss_ce": 0.021314337849617004, + "loss_iou": 1.079186201095581, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 279834216, + "step": 1619 + }, + { + "epoch": 0.6204519341248563, + "grad_norm": 102.86484359917267, + "learning_rate": 5e-06, + "loss": 0.8238, + "num_input_tokens_seen": 280007224, + "step": 1620 + }, + { + "epoch": 0.6204519341248563, + "loss": 0.8232241868972778, + "loss_ce": 0.020001530647277832, + "loss_iou": 1.0411213636398315, + "loss_num": 0.8046875, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 280007224, + "step": 1620 + }, + { + "epoch": 0.6208349291459211, + "grad_norm": 40.756967969013964, + "learning_rate": 5e-06, + "loss": 0.3399, + "num_input_tokens_seen": 280180296, + "step": 1621 + }, + { + "epoch": 0.6208349291459211, + "loss": 0.3413882255554199, + "loss_ce": 0.02095363475382328, + "loss_iou": 1.1663868427276611, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 280180296, + "step": 1621 + }, + { + "epoch": 0.6212179241669858, + "grad_norm": 62.14917524690402, + "learning_rate": 5e-06, + "loss": 0.6053, + "num_input_tokens_seen": 280352808, + "step": 1622 + }, + { + "epoch": 0.6212179241669858, + "loss": 0.592242956161499, + "loss_ce": 0.021442197263240814, + "loss_iou": 1.039393663406372, + "loss_num": 0.5703125, + "loss_xval": 0.5703125, + "num_input_tokens_seen": 280352808, + "step": 1622 + }, + { + "epoch": 0.6216009191880506, + "grad_norm": 37.529966296005945, + "learning_rate": 5e-06, + "loss": 0.3617, + "num_input_tokens_seen": 280525816, + "step": 1623 + }, + { + "epoch": 0.6216009191880506, + "loss": 0.36118265986442566, + "loss_ce": 0.020606480538845062, + "loss_iou": 1.027377963066101, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 280525816, + "step": 1623 + }, + { + "epoch": 0.6219839142091153, + "grad_norm": 24.375650783154086, + "learning_rate": 5e-06, + "loss": 0.3293, + "num_input_tokens_seen": 280698568, + "step": 1624 + }, + { + "epoch": 0.6219839142091153, + "loss": 0.29702889919281006, + "loss_ce": 0.02029551938176155, + "loss_iou": 1.015905499458313, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 280698568, + "step": 1624 + }, + { + "epoch": 0.62236690923018, + "grad_norm": 47.15583885616379, + "learning_rate": 5e-06, + "loss": 0.4098, + "num_input_tokens_seen": 280871560, + "step": 1625 + }, + { + "epoch": 0.62236690923018, + "loss": 0.3579084873199463, + "loss_ce": 0.021116478368639946, + "loss_iou": 1.030195951461792, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 280871560, + "step": 1625 + }, + { + "epoch": 0.6227499042512448, + "grad_norm": 35.927884118207174, + "learning_rate": 5e-06, + "loss": 0.2328, + "num_input_tokens_seen": 281044376, + "step": 1626 + }, + { + "epoch": 0.6227499042512448, + "loss": 0.24369803071022034, + "loss_ce": 0.02079763263463974, + "loss_iou": 1.0101929903030396, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 281044376, + "step": 1626 + }, + { + "epoch": 0.6231328992723094, + "grad_norm": 38.372933248487016, + "learning_rate": 5e-06, + "loss": 0.3057, + "num_input_tokens_seen": 281217632, + "step": 1627 + }, + { + "epoch": 0.6231328992723094, + "loss": 0.34230750799179077, + "loss_ce": 0.022361237555742264, + "loss_iou": 1.1838070154190063, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 281217632, + "step": 1627 + }, + { + "epoch": 0.6235158942933742, + "grad_norm": 36.05030553734535, + "learning_rate": 5e-06, + "loss": 0.2974, + "num_input_tokens_seen": 281390384, + "step": 1628 + }, + { + "epoch": 0.6235158942933742, + "loss": 0.23143544793128967, + "loss_ce": 0.021474510431289673, + "loss_iou": 1.023942470550537, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 281390384, + "step": 1628 + }, + { + "epoch": 0.6238988893144389, + "grad_norm": 33.046635329876665, + "learning_rate": 5e-06, + "loss": 0.2272, + "num_input_tokens_seen": 281563440, + "step": 1629 + }, + { + "epoch": 0.6238988893144389, + "loss": 0.18551652133464813, + "loss_ce": 0.021942298859357834, + "loss_iou": 1.0375853776931763, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 281563440, + "step": 1629 + }, + { + "epoch": 0.6242818843355037, + "grad_norm": 32.33134558376756, + "learning_rate": 5e-06, + "loss": 0.234, + "num_input_tokens_seen": 281736640, + "step": 1630 + }, + { + "epoch": 0.6242818843355037, + "loss": 0.2025236189365387, + "loss_ce": 0.0236295685172081, + "loss_iou": 1.0388567447662354, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 281736640, + "step": 1630 + }, + { + "epoch": 0.6246648793565683, + "grad_norm": 38.36817956911151, + "learning_rate": 5e-06, + "loss": 0.2175, + "num_input_tokens_seen": 281909704, + "step": 1631 + }, + { + "epoch": 0.6246648793565683, + "loss": 0.18636493384838104, + "loss_ce": 0.022302431985735893, + "loss_iou": 1.0021567344665527, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 281909704, + "step": 1631 + }, + { + "epoch": 0.6250478743776331, + "grad_norm": 30.744052702819378, + "learning_rate": 5e-06, + "loss": 0.2626, + "num_input_tokens_seen": 282082792, + "step": 1632 + }, + { + "epoch": 0.6250478743776331, + "loss": 0.254894882440567, + "loss_ce": 0.021313339471817017, + "loss_iou": 1.093733549118042, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 282082792, + "step": 1632 + }, + { + "epoch": 0.6254308693986979, + "grad_norm": 16.26735525242088, + "learning_rate": 5e-06, + "loss": 0.2333, + "num_input_tokens_seen": 282252504, + "step": 1633 + }, + { + "epoch": 0.6254308693986979, + "loss": 0.24430033564567566, + "loss_ce": 0.021766144782304764, + "loss_iou": 1.007523775100708, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 282252504, + "step": 1633 + }, + { + "epoch": 0.6258138644197625, + "grad_norm": 26.344668297926265, + "learning_rate": 5e-06, + "loss": 0.2552, + "num_input_tokens_seen": 282425272, + "step": 1634 + }, + { + "epoch": 0.6258138644197625, + "loss": 0.2294924259185791, + "loss_ce": 0.0196535587310791, + "loss_iou": 1.0868526697158813, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 282425272, + "step": 1634 + }, + { + "epoch": 0.6261968594408273, + "grad_norm": 25.93744116888221, + "learning_rate": 5e-06, + "loss": 0.2068, + "num_input_tokens_seen": 282598016, + "step": 1635 + }, + { + "epoch": 0.6261968594408273, + "loss": 0.17579901218414307, + "loss_ce": 0.021258002147078514, + "loss_iou": 1.0365262031555176, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 282598016, + "step": 1635 + }, + { + "epoch": 0.626579854461892, + "grad_norm": 26.145387506131154, + "learning_rate": 5e-06, + "loss": 0.2339, + "num_input_tokens_seen": 282770952, + "step": 1636 + }, + { + "epoch": 0.626579854461892, + "loss": 0.19785335659980774, + "loss_ce": 0.021827977150678635, + "loss_iou": 1.0252678394317627, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 282770952, + "step": 1636 + }, + { + "epoch": 0.6269628494829568, + "grad_norm": 30.47869796105112, + "learning_rate": 5e-06, + "loss": 0.2366, + "num_input_tokens_seen": 282944160, + "step": 1637 + }, + { + "epoch": 0.6269628494829568, + "loss": 0.21427859365940094, + "loss_ce": 0.02043093368411064, + "loss_iou": 1.0064401626586914, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 282944160, + "step": 1637 + }, + { + "epoch": 0.6273458445040214, + "grad_norm": 32.82926948027278, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 283116960, + "step": 1638 + }, + { + "epoch": 0.6273458445040214, + "loss": 0.2528603971004486, + "loss_ce": 0.02202543243765831, + "loss_iou": 1.0393564701080322, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 283116960, + "step": 1638 + }, + { + "epoch": 0.6277288395250862, + "grad_norm": 25.447593959033043, + "learning_rate": 5e-06, + "loss": 0.2319, + "num_input_tokens_seen": 283290080, + "step": 1639 + }, + { + "epoch": 0.6277288395250862, + "loss": 0.20689032971858978, + "loss_ce": 0.019756542518734932, + "loss_iou": 0.8765753507614136, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 283290080, + "step": 1639 + }, + { + "epoch": 0.6281118345461509, + "grad_norm": 22.74771257097244, + "learning_rate": 5e-06, + "loss": 0.2312, + "num_input_tokens_seen": 283463224, + "step": 1640 + }, + { + "epoch": 0.6281118345461509, + "loss": 0.19958898425102234, + "loss_ce": 0.021183211356401443, + "loss_iou": 1.0254478454589844, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 283463224, + "step": 1640 + }, + { + "epoch": 0.6284948295672156, + "grad_norm": 26.714942902917763, + "learning_rate": 5e-06, + "loss": 0.1812, + "num_input_tokens_seen": 283636248, + "step": 1641 + }, + { + "epoch": 0.6284948295672156, + "loss": 0.1777523159980774, + "loss_ce": 0.021990593522787094, + "loss_iou": 1.0114291906356812, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 283636248, + "step": 1641 + }, + { + "epoch": 0.6288778245882803, + "grad_norm": 22.714348404617184, + "learning_rate": 5e-06, + "loss": 0.2647, + "num_input_tokens_seen": 283809472, + "step": 1642 + }, + { + "epoch": 0.6288778245882803, + "loss": 0.26512426137924194, + "loss_ce": 0.020861556753516197, + "loss_iou": 1.3547765016555786, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 283809472, + "step": 1642 + }, + { + "epoch": 0.6292608196093451, + "grad_norm": 23.311314197616092, + "learning_rate": 5e-06, + "loss": 0.2186, + "num_input_tokens_seen": 283982360, + "step": 1643 + }, + { + "epoch": 0.6292608196093451, + "loss": 0.24859797954559326, + "loss_ce": 0.02255428209900856, + "loss_iou": 1.008880376815796, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 283982360, + "step": 1643 + }, + { + "epoch": 0.6296438146304099, + "grad_norm": 23.892541682634135, + "learning_rate": 5e-06, + "loss": 0.2183, + "num_input_tokens_seen": 284155680, + "step": 1644 + }, + { + "epoch": 0.6296438146304099, + "loss": 0.23071157932281494, + "loss_ce": 0.022337544709444046, + "loss_iou": 3.700438976287842, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 284155680, + "step": 1644 + }, + { + "epoch": 0.6300268096514745, + "grad_norm": 27.523460965028566, + "learning_rate": 5e-06, + "loss": 0.1842, + "num_input_tokens_seen": 284325152, + "step": 1645 + }, + { + "epoch": 0.6300268096514745, + "loss": 0.16071315109729767, + "loss_ce": 0.021614033728837967, + "loss_iou": 1.0021328926086426, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 284325152, + "step": 1645 + }, + { + "epoch": 0.6304098046725393, + "grad_norm": 25.977153537205968, + "learning_rate": 5e-06, + "loss": 0.182, + "num_input_tokens_seen": 284498248, + "step": 1646 + }, + { + "epoch": 0.6304098046725393, + "loss": 0.18294882774353027, + "loss_ce": 0.021693941205739975, + "loss_iou": 0.9968886375427246, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 284498248, + "step": 1646 + }, + { + "epoch": 0.630792799693604, + "grad_norm": 17.89546757301777, + "learning_rate": 5e-06, + "loss": 0.2154, + "num_input_tokens_seen": 284671096, + "step": 1647 + }, + { + "epoch": 0.630792799693604, + "loss": 0.18741793930530548, + "loss_ce": 0.01932712085545063, + "loss_iou": 0.891245424747467, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 284671096, + "step": 1647 + }, + { + "epoch": 0.6311757947146687, + "grad_norm": 24.095337657626278, + "learning_rate": 5e-06, + "loss": 0.1844, + "num_input_tokens_seen": 284844192, + "step": 1648 + }, + { + "epoch": 0.6311757947146687, + "loss": 0.20032504200935364, + "loss_ce": 0.02191927656531334, + "loss_iou": 1.0907624959945679, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 284844192, + "step": 1648 + }, + { + "epoch": 0.6315587897357334, + "grad_norm": 36.76351481502342, + "learning_rate": 5e-06, + "loss": 0.2067, + "num_input_tokens_seen": 285016912, + "step": 1649 + }, + { + "epoch": 0.6315587897357334, + "loss": 0.2016732096672058, + "loss_ce": 0.021680543199181557, + "loss_iou": 4.320743083953857, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 285016912, + "step": 1649 + }, + { + "epoch": 0.6319417847567982, + "grad_norm": 27.485742269959434, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 285189800, + "step": 1650 + }, + { + "epoch": 0.6319417847567982, + "loss": 0.2237074077129364, + "loss_ce": 0.020399296656250954, + "loss_iou": 1.0881264209747314, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 285189800, + "step": 1650 + }, + { + "epoch": 0.6323247797778628, + "grad_norm": 16.255869079745406, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 285363000, + "step": 1651 + }, + { + "epoch": 0.6323247797778628, + "loss": 0.17966032028198242, + "loss_ce": 0.022006506100296974, + "loss_iou": 1.1113789081573486, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 285363000, + "step": 1651 + }, + { + "epoch": 0.6327077747989276, + "grad_norm": 16.03206054029721, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 285536200, + "step": 1652 + }, + { + "epoch": 0.6327077747989276, + "loss": 0.21859735250473022, + "loss_ce": 0.021697930991649628, + "loss_iou": 1.0353161096572876, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 285536200, + "step": 1652 + }, + { + "epoch": 0.6330907698199923, + "grad_norm": 24.418809592359533, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 285708704, + "step": 1653 + }, + { + "epoch": 0.6330907698199923, + "loss": 0.25326675176620483, + "loss_ce": 0.02151626907289028, + "loss_iou": 1.0436711311340332, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 285708704, + "step": 1653 + }, + { + "epoch": 0.6334737648410571, + "grad_norm": 37.11570855501896, + "learning_rate": 5e-06, + "loss": 0.3043, + "num_input_tokens_seen": 285881616, + "step": 1654 + }, + { + "epoch": 0.6334737648410571, + "loss": 0.3383890390396118, + "loss_ce": 0.020762072876095772, + "loss_iou": 1.2734043598175049, + "loss_num": 0.318359375, + "loss_xval": 0.318359375, + "num_input_tokens_seen": 285881616, + "step": 1654 + }, + { + "epoch": 0.6338567598621218, + "grad_norm": 25.86617718679928, + "learning_rate": 5e-06, + "loss": 0.1773, + "num_input_tokens_seen": 286054784, + "step": 1655 + }, + { + "epoch": 0.6338567598621218, + "loss": 0.12519463896751404, + "loss_ce": 0.019359681755304337, + "loss_iou": 1.00177001953125, + "loss_num": 0.10595703125, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 286054784, + "step": 1655 + }, + { + "epoch": 0.6342397548831865, + "grad_norm": 28.462609982513673, + "learning_rate": 5e-06, + "loss": 0.2588, + "num_input_tokens_seen": 286227912, + "step": 1656 + }, + { + "epoch": 0.6342397548831865, + "loss": 0.25157588720321655, + "loss_ce": 0.02220574952661991, + "loss_iou": 1.0223424434661865, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 286227912, + "step": 1656 + }, + { + "epoch": 0.6346227499042513, + "grad_norm": 30.96868167947015, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 286401312, + "step": 1657 + }, + { + "epoch": 0.6346227499042513, + "loss": 0.23401233553886414, + "loss_ce": 0.020938601344823837, + "loss_iou": 1.0035679340362549, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 286401312, + "step": 1657 + }, + { + "epoch": 0.6350057449253159, + "grad_norm": 44.1654473720544, + "learning_rate": 5e-06, + "loss": 0.2216, + "num_input_tokens_seen": 286574152, + "step": 1658 + }, + { + "epoch": 0.6350057449253159, + "loss": 0.26512664556503296, + "loss_ce": 0.018361497670412064, + "loss_iou": 1.0043344497680664, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 286574152, + "step": 1658 + }, + { + "epoch": 0.6353887399463807, + "grad_norm": 16.50700974628968, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 286746752, + "step": 1659 + }, + { + "epoch": 0.6353887399463807, + "loss": 0.1298852562904358, + "loss_ce": 0.02121216617524624, + "loss_iou": 1.0161259174346924, + "loss_num": 0.10888671875, + "loss_xval": 0.10888671875, + "num_input_tokens_seen": 286746752, + "step": 1659 + }, + { + "epoch": 0.6357717349674454, + "grad_norm": 21.526947475498805, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 286919896, + "step": 1660 + }, + { + "epoch": 0.6357717349674454, + "loss": 0.1910654902458191, + "loss_ce": 0.02114361897110939, + "loss_iou": 1.0035732984542847, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 286919896, + "step": 1660 + }, + { + "epoch": 0.6361547299885102, + "grad_norm": 23.07709534576469, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 287092832, + "step": 1661 + }, + { + "epoch": 0.6361547299885102, + "loss": 0.20819546282291412, + "loss_ce": 0.021122703328728676, + "loss_iou": 1.021871566772461, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 287092832, + "step": 1661 + }, + { + "epoch": 0.6365377250095748, + "grad_norm": 31.348648113259728, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 287266000, + "step": 1662 + }, + { + "epoch": 0.6365377250095748, + "loss": 0.1994139552116394, + "loss_ce": 0.020336810499429703, + "loss_iou": 1.0821785926818848, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 287266000, + "step": 1662 + }, + { + "epoch": 0.6369207200306396, + "grad_norm": 39.429898446331514, + "learning_rate": 5e-06, + "loss": 0.1862, + "num_input_tokens_seen": 287438760, + "step": 1663 + }, + { + "epoch": 0.6369207200306396, + "loss": 0.19204115867614746, + "loss_ce": 0.021081678569316864, + "loss_iou": 0.9045433402061462, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 287438760, + "step": 1663 + }, + { + "epoch": 0.6373037150517044, + "grad_norm": 24.579819652640182, + "learning_rate": 5e-06, + "loss": 0.2217, + "num_input_tokens_seen": 287611488, + "step": 1664 + }, + { + "epoch": 0.6373037150517044, + "loss": 0.20055729150772095, + "loss_ce": 0.020930826663970947, + "loss_iou": 1.0237832069396973, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 287611488, + "step": 1664 + }, + { + "epoch": 0.637686710072769, + "grad_norm": 18.916837951616397, + "learning_rate": 5e-06, + "loss": 0.1694, + "num_input_tokens_seen": 287784648, + "step": 1665 + }, + { + "epoch": 0.637686710072769, + "loss": 0.18160784244537354, + "loss_ce": 0.021115906536579132, + "loss_iou": 1.0030603408813477, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 287784648, + "step": 1665 + }, + { + "epoch": 0.6380697050938338, + "grad_norm": 28.85645059721232, + "learning_rate": 5e-06, + "loss": 0.2832, + "num_input_tokens_seen": 287957632, + "step": 1666 + }, + { + "epoch": 0.6380697050938338, + "loss": 0.37453964352607727, + "loss_ce": 0.01998642459511757, + "loss_iou": 2.200084686279297, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 287957632, + "step": 1666 + }, + { + "epoch": 0.6384527001148985, + "grad_norm": 28.787803855370317, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 288130336, + "step": 1667 + }, + { + "epoch": 0.6384527001148985, + "loss": 0.1775747835636139, + "loss_ce": 0.02156892418861389, + "loss_iou": 1.0389750003814697, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 288130336, + "step": 1667 + }, + { + "epoch": 0.6388356951359633, + "grad_norm": 29.94586825692482, + "learning_rate": 5e-06, + "loss": 0.2187, + "num_input_tokens_seen": 288303472, + "step": 1668 + }, + { + "epoch": 0.6388356951359633, + "loss": 0.23204870522022247, + "loss_ce": 0.020561883226037025, + "loss_iou": 0.9820910692214966, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 288303472, + "step": 1668 + }, + { + "epoch": 0.6392186901570279, + "grad_norm": 27.772451317795284, + "learning_rate": 5e-06, + "loss": 0.2079, + "num_input_tokens_seen": 288476312, + "step": 1669 + }, + { + "epoch": 0.6392186901570279, + "loss": 0.1951129138469696, + "loss_ce": 0.020918574184179306, + "loss_iou": 1.0338059663772583, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 288476312, + "step": 1669 + }, + { + "epoch": 0.6396016851780927, + "grad_norm": 27.280457085926844, + "learning_rate": 5e-06, + "loss": 0.2288, + "num_input_tokens_seen": 288649312, + "step": 1670 + }, + { + "epoch": 0.6396016851780927, + "loss": 0.27583760023117065, + "loss_ce": 0.020832736045122147, + "loss_iou": 1.078195333480835, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 288649312, + "step": 1670 + }, + { + "epoch": 0.6399846801991574, + "grad_norm": 32.09006951719356, + "learning_rate": 5e-06, + "loss": 0.2056, + "num_input_tokens_seen": 288821832, + "step": 1671 + }, + { + "epoch": 0.6399846801991574, + "loss": 0.23407715559005737, + "loss_ce": 0.019111350178718567, + "loss_iou": 0.9695178270339966, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 288821832, + "step": 1671 + }, + { + "epoch": 0.6403676752202221, + "grad_norm": 41.748672694407006, + "learning_rate": 5e-06, + "loss": 0.2483, + "num_input_tokens_seen": 288991464, + "step": 1672 + }, + { + "epoch": 0.6403676752202221, + "loss": 0.21558049321174622, + "loss_ce": 0.02185489796102047, + "loss_iou": 1.0301659107208252, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 288991464, + "step": 1672 + }, + { + "epoch": 0.6407506702412868, + "grad_norm": 39.87815093937564, + "learning_rate": 5e-06, + "loss": 0.2109, + "num_input_tokens_seen": 289164728, + "step": 1673 + }, + { + "epoch": 0.6407506702412868, + "loss": 0.2459956407546997, + "loss_ce": 0.021935580298304558, + "loss_iou": 1.057338833808899, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 289164728, + "step": 1673 + }, + { + "epoch": 0.6411336652623516, + "grad_norm": 35.7630799527807, + "learning_rate": 5e-06, + "loss": 0.2092, + "num_input_tokens_seen": 289337528, + "step": 1674 + }, + { + "epoch": 0.6411336652623516, + "loss": 0.22301757335662842, + "loss_ce": 0.020319823175668716, + "loss_iou": 1.0427360534667969, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 289337528, + "step": 1674 + }, + { + "epoch": 0.6415166602834164, + "grad_norm": 28.352158649635488, + "learning_rate": 5e-06, + "loss": 0.191, + "num_input_tokens_seen": 289510168, + "step": 1675 + }, + { + "epoch": 0.6415166602834164, + "loss": 0.18749476969242096, + "loss_ce": 0.021814826875925064, + "loss_iou": 1.0057984590530396, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 289510168, + "step": 1675 + }, + { + "epoch": 0.641899655304481, + "grad_norm": 33.214192015695716, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 289683288, + "step": 1676 + }, + { + "epoch": 0.641899655304481, + "loss": 0.19039714336395264, + "loss_ce": 0.019376643002033234, + "loss_iou": 0.9672365784645081, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 289683288, + "step": 1676 + }, + { + "epoch": 0.6422826503255458, + "grad_norm": 27.11509876909704, + "learning_rate": 5e-06, + "loss": 0.1772, + "num_input_tokens_seen": 289856024, + "step": 1677 + }, + { + "epoch": 0.6422826503255458, + "loss": 0.22653698921203613, + "loss_ce": 0.020909534767270088, + "loss_iou": 1.023066759109497, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 289856024, + "step": 1677 + }, + { + "epoch": 0.6426656453466105, + "grad_norm": 33.94245069034829, + "learning_rate": 5e-06, + "loss": 0.2166, + "num_input_tokens_seen": 290028792, + "step": 1678 + }, + { + "epoch": 0.6426656453466105, + "loss": 0.21347205340862274, + "loss_ce": 0.02047889307141304, + "loss_iou": 1.0252187252044678, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 290028792, + "step": 1678 + }, + { + "epoch": 0.6430486403676752, + "grad_norm": 36.118953789569794, + "learning_rate": 5e-06, + "loss": 0.256, + "num_input_tokens_seen": 290202104, + "step": 1679 + }, + { + "epoch": 0.6430486403676752, + "loss": 0.35787707567214966, + "loss_ce": 0.01974230445921421, + "loss_iou": 1.1241517066955566, + "loss_num": 0.337890625, + "loss_xval": 0.337890625, + "num_input_tokens_seen": 290202104, + "step": 1679 + }, + { + "epoch": 0.6434316353887399, + "grad_norm": 34.75509918571788, + "learning_rate": 5e-06, + "loss": 0.2249, + "num_input_tokens_seen": 290374728, + "step": 1680 + }, + { + "epoch": 0.6434316353887399, + "loss": 0.21840673685073853, + "loss_ce": 0.021049557253718376, + "loss_iou": 1.0462462902069092, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 290374728, + "step": 1680 + }, + { + "epoch": 0.6438146304098047, + "grad_norm": 29.673059916538776, + "learning_rate": 5e-06, + "loss": 0.2113, + "num_input_tokens_seen": 290547696, + "step": 1681 + }, + { + "epoch": 0.6438146304098047, + "loss": 0.23977741599082947, + "loss_ce": 0.023041583597660065, + "loss_iou": 1.0123975276947021, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 290547696, + "step": 1681 + }, + { + "epoch": 0.6441976254308694, + "grad_norm": 29.471975842247414, + "learning_rate": 5e-06, + "loss": 0.2328, + "num_input_tokens_seen": 290717232, + "step": 1682 + }, + { + "epoch": 0.6441976254308694, + "loss": 0.18632182478904724, + "loss_ce": 0.020916547626256943, + "loss_iou": 1.036426305770874, + "loss_num": 0.1650390625, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 290717232, + "step": 1682 + }, + { + "epoch": 0.6445806204519341, + "grad_norm": 22.77480070318721, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 290890248, + "step": 1683 + }, + { + "epoch": 0.6445806204519341, + "loss": 0.13573291897773743, + "loss_ce": 0.022634778171777725, + "loss_iou": 1.00262451171875, + "loss_num": 0.11328125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 290890248, + "step": 1683 + }, + { + "epoch": 0.6449636154729989, + "grad_norm": 35.551238250964886, + "learning_rate": 5e-06, + "loss": 0.2259, + "num_input_tokens_seen": 291063376, + "step": 1684 + }, + { + "epoch": 0.6449636154729989, + "loss": 0.25219807028770447, + "loss_ce": 0.020508628338575363, + "loss_iou": 1.0799319744110107, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 291063376, + "step": 1684 + }, + { + "epoch": 0.6453466104940636, + "grad_norm": 29.99433453945507, + "learning_rate": 5e-06, + "loss": 0.1988, + "num_input_tokens_seen": 291236424, + "step": 1685 + }, + { + "epoch": 0.6453466104940636, + "loss": 0.18930909037590027, + "loss_ce": 0.02018067240715027, + "loss_iou": 1.0057191848754883, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 291236424, + "step": 1685 + }, + { + "epoch": 0.6457296055151283, + "grad_norm": 37.8492513803729, + "learning_rate": 5e-06, + "loss": 0.2212, + "num_input_tokens_seen": 291409312, + "step": 1686 + }, + { + "epoch": 0.6457296055151283, + "loss": 0.24024012684822083, + "loss_ce": 0.020757704973220825, + "loss_iou": 0.892715334892273, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 291409312, + "step": 1686 + }, + { + "epoch": 0.646112600536193, + "grad_norm": 30.779299488203282, + "learning_rate": 5e-06, + "loss": 0.2733, + "num_input_tokens_seen": 291582360, + "step": 1687 + }, + { + "epoch": 0.646112600536193, + "loss": 0.32749754190444946, + "loss_ce": 0.02110106498003006, + "loss_iou": 1.1312453746795654, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 291582360, + "step": 1687 + }, + { + "epoch": 0.6464955955572578, + "grad_norm": 32.70923023423609, + "learning_rate": 5e-06, + "loss": 0.2588, + "num_input_tokens_seen": 291755264, + "step": 1688 + }, + { + "epoch": 0.6464955955572578, + "loss": 0.3143477141857147, + "loss_ce": 0.02040240727365017, + "loss_iou": 1.493055820465088, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 291755264, + "step": 1688 + }, + { + "epoch": 0.6468785905783225, + "grad_norm": 27.306927819272115, + "learning_rate": 5e-06, + "loss": 0.2093, + "num_input_tokens_seen": 291928456, + "step": 1689 + }, + { + "epoch": 0.6468785905783225, + "loss": 0.20340877771377563, + "loss_ce": 0.022073332220315933, + "loss_iou": 1.0076968669891357, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 291928456, + "step": 1689 + }, + { + "epoch": 0.6472615855993872, + "grad_norm": 23.322346668501687, + "learning_rate": 5e-06, + "loss": 0.2521, + "num_input_tokens_seen": 292101520, + "step": 1690 + }, + { + "epoch": 0.6472615855993872, + "loss": 0.20817424356937408, + "loss_ce": 0.02049114555120468, + "loss_iou": 1.0999984741210938, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 292101520, + "step": 1690 + }, + { + "epoch": 0.6476445806204519, + "grad_norm": 23.571239380389898, + "learning_rate": 5e-06, + "loss": 0.201, + "num_input_tokens_seen": 292274584, + "step": 1691 + }, + { + "epoch": 0.6476445806204519, + "loss": 0.2165507972240448, + "loss_ce": 0.022703150287270546, + "loss_iou": 1.0310851335525513, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 292274584, + "step": 1691 + }, + { + "epoch": 0.6480275756415167, + "grad_norm": 28.042909165345502, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 292448056, + "step": 1692 + }, + { + "epoch": 0.6480275756415167, + "loss": 0.16891542077064514, + "loss_ce": 0.02072206139564514, + "loss_iou": 1.0096759796142578, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 292448056, + "step": 1692 + }, + { + "epoch": 0.6484105706625813, + "grad_norm": 26.77857807766684, + "learning_rate": 5e-06, + "loss": 0.2242, + "num_input_tokens_seen": 292621320, + "step": 1693 + }, + { + "epoch": 0.6484105706625813, + "loss": 0.2604060173034668, + "loss_ce": 0.020598899573087692, + "loss_iou": 1.0400781631469727, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 292621320, + "step": 1693 + }, + { + "epoch": 0.6487935656836461, + "grad_norm": 29.361275525562398, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 292794472, + "step": 1694 + }, + { + "epoch": 0.6487935656836461, + "loss": 0.21219977736473083, + "loss_ce": 0.02085457369685173, + "loss_iou": 1.0477708578109741, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 292794472, + "step": 1694 + }, + { + "epoch": 0.6491765607047109, + "grad_norm": 37.07073880078034, + "learning_rate": 5e-06, + "loss": 0.2105, + "num_input_tokens_seen": 292967456, + "step": 1695 + }, + { + "epoch": 0.6491765607047109, + "loss": 0.27898040413856506, + "loss_ce": 0.02061857283115387, + "loss_iou": 1.0436944961547852, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 292967456, + "step": 1695 + }, + { + "epoch": 0.6495595557257756, + "grad_norm": 45.18737609991261, + "learning_rate": 5e-06, + "loss": 0.3145, + "num_input_tokens_seen": 293140272, + "step": 1696 + }, + { + "epoch": 0.6495595557257756, + "loss": 0.38537001609802246, + "loss_ce": 0.020746011286973953, + "loss_iou": 1.120348572731018, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 293140272, + "step": 1696 + }, + { + "epoch": 0.6499425507468403, + "grad_norm": 23.833577214188534, + "learning_rate": 5e-06, + "loss": 0.2291, + "num_input_tokens_seen": 293313096, + "step": 1697 + }, + { + "epoch": 0.6499425507468403, + "loss": 0.1783304065465927, + "loss_ce": 0.021714193746447563, + "loss_iou": 1.012166142463684, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 293313096, + "step": 1697 + }, + { + "epoch": 0.650325545767905, + "grad_norm": 28.94721750502272, + "learning_rate": 5e-06, + "loss": 0.1924, + "num_input_tokens_seen": 293485568, + "step": 1698 + }, + { + "epoch": 0.650325545767905, + "loss": 0.20731818675994873, + "loss_ce": 0.020092826336622238, + "loss_iou": 1.0383963584899902, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 293485568, + "step": 1698 + }, + { + "epoch": 0.6507085407889698, + "grad_norm": 26.39736723406935, + "learning_rate": 5e-06, + "loss": 0.1738, + "num_input_tokens_seen": 293658400, + "step": 1699 + }, + { + "epoch": 0.6507085407889698, + "loss": 0.1380467712879181, + "loss_ce": 0.019028211012482643, + "loss_iou": 1.0051178932189941, + "loss_num": 0.119140625, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 293658400, + "step": 1699 + }, + { + "epoch": 0.6510915358100344, + "grad_norm": 34.994208015125395, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 293831368, + "step": 1700 + }, + { + "epoch": 0.6510915358100344, + "loss": 0.1436193585395813, + "loss_ce": 0.021365948021411896, + "loss_iou": 1.000901222229004, + "loss_num": 0.1220703125, + "loss_xval": 0.1220703125, + "num_input_tokens_seen": 293831368, + "step": 1700 + }, + { + "epoch": 0.6514745308310992, + "grad_norm": 40.207505559738514, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 294004096, + "step": 1701 + }, + { + "epoch": 0.6514745308310992, + "loss": 0.1822366863489151, + "loss_ce": 0.0209817998111248, + "loss_iou": 1.0126309394836426, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 294004096, + "step": 1701 + }, + { + "epoch": 0.6518575258521639, + "grad_norm": 29.243923476544346, + "learning_rate": 5e-06, + "loss": 0.256, + "num_input_tokens_seen": 294176928, + "step": 1702 + }, + { + "epoch": 0.6518575258521639, + "loss": 0.2195555418729782, + "loss_ce": 0.020886098966002464, + "loss_iou": 1.1945915222167969, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 294176928, + "step": 1702 + }, + { + "epoch": 0.6522405208732287, + "grad_norm": 32.061288386345836, + "learning_rate": 5e-06, + "loss": 0.1868, + "num_input_tokens_seen": 294349888, + "step": 1703 + }, + { + "epoch": 0.6522405208732287, + "loss": 0.19357256591320038, + "loss_ce": 0.02139239013195038, + "loss_iou": 1.1044819355010986, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 294349888, + "step": 1703 + }, + { + "epoch": 0.6526235158942933, + "grad_norm": 37.757048849163496, + "learning_rate": 5e-06, + "loss": 0.2269, + "num_input_tokens_seen": 294522608, + "step": 1704 + }, + { + "epoch": 0.6526235158942933, + "loss": 0.21185964345932007, + "loss_ce": 0.019415799528360367, + "loss_iou": 0.9338486194610596, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 294522608, + "step": 1704 + }, + { + "epoch": 0.6530065109153581, + "grad_norm": 38.167257992835175, + "learning_rate": 5e-06, + "loss": 0.172, + "num_input_tokens_seen": 294695712, + "step": 1705 + }, + { + "epoch": 0.6530065109153581, + "loss": 0.17286206781864166, + "loss_ce": 0.02143385075032711, + "loss_iou": 1.0026297569274902, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 294695712, + "step": 1705 + }, + { + "epoch": 0.6533895059364229, + "grad_norm": 36.23637394923736, + "learning_rate": 5e-06, + "loss": 0.2488, + "num_input_tokens_seen": 294868800, + "step": 1706 + }, + { + "epoch": 0.6533895059364229, + "loss": 0.22707700729370117, + "loss_ce": 0.02193785458803177, + "loss_iou": 1.0421545505523682, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 294868800, + "step": 1706 + }, + { + "epoch": 0.6537725009574875, + "grad_norm": 37.870493352470206, + "learning_rate": 5e-06, + "loss": 0.2235, + "num_input_tokens_seen": 295041824, + "step": 1707 + }, + { + "epoch": 0.6537725009574875, + "loss": 0.21987639367580414, + "loss_ce": 0.02016936056315899, + "loss_iou": 0.8914568424224854, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 295041824, + "step": 1707 + }, + { + "epoch": 0.6541554959785523, + "grad_norm": 38.45133141639371, + "learning_rate": 5e-06, + "loss": 0.189, + "num_input_tokens_seen": 295215032, + "step": 1708 + }, + { + "epoch": 0.6541554959785523, + "loss": 0.2147710770368576, + "loss_ce": 0.020496182143688202, + "loss_iou": 1.0865932703018188, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 295215032, + "step": 1708 + }, + { + "epoch": 0.654538490999617, + "grad_norm": 34.235338399320725, + "learning_rate": 5e-06, + "loss": 0.1696, + "num_input_tokens_seen": 295387952, + "step": 1709 + }, + { + "epoch": 0.654538490999617, + "loss": 0.21084356307983398, + "loss_ce": 0.022977357730269432, + "loss_iou": 1.0059623718261719, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 295387952, + "step": 1709 + }, + { + "epoch": 0.6549214860206817, + "grad_norm": 33.04359780642886, + "learning_rate": 5e-06, + "loss": 0.2155, + "num_input_tokens_seen": 295560728, + "step": 1710 + }, + { + "epoch": 0.6549214860206817, + "loss": 0.23250362277030945, + "loss_ce": 0.020650606602430344, + "loss_iou": 1.0279673337936401, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 295560728, + "step": 1710 + }, + { + "epoch": 0.6553044810417464, + "grad_norm": 19.966859329183624, + "learning_rate": 5e-06, + "loss": 0.1738, + "num_input_tokens_seen": 295733856, + "step": 1711 + }, + { + "epoch": 0.6553044810417464, + "loss": 0.1837073266506195, + "loss_ce": 0.021781060844659805, + "loss_iou": 1.016855001449585, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 295733856, + "step": 1711 + }, + { + "epoch": 0.6556874760628112, + "grad_norm": 23.880401350337625, + "learning_rate": 5e-06, + "loss": 0.2026, + "num_input_tokens_seen": 295906992, + "step": 1712 + }, + { + "epoch": 0.6556874760628112, + "loss": 0.21519222855567932, + "loss_ce": 0.022138018161058426, + "loss_iou": 1.0332608222961426, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 295906992, + "step": 1712 + }, + { + "epoch": 0.656070471083876, + "grad_norm": 31.534495763656256, + "learning_rate": 5e-06, + "loss": 0.1444, + "num_input_tokens_seen": 296080216, + "step": 1713 + }, + { + "epoch": 0.656070471083876, + "loss": 0.1463220715522766, + "loss_ce": 0.020345501601696014, + "loss_iou": 1.0005614757537842, + "loss_num": 0.1259765625, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 296080216, + "step": 1713 + }, + { + "epoch": 0.6564534661049406, + "grad_norm": 34.2174501424663, + "learning_rate": 5e-06, + "loss": 0.2318, + "num_input_tokens_seen": 296253312, + "step": 1714 + }, + { + "epoch": 0.6564534661049406, + "loss": 0.23360732197761536, + "loss_ce": 0.020960837602615356, + "loss_iou": 1.0654317140579224, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 296253312, + "step": 1714 + }, + { + "epoch": 0.6568364611260054, + "grad_norm": 31.381640399255122, + "learning_rate": 5e-06, + "loss": 0.1829, + "num_input_tokens_seen": 296426312, + "step": 1715 + }, + { + "epoch": 0.6568364611260054, + "loss": 0.16834989190101624, + "loss_ce": 0.021438272669911385, + "loss_iou": 1.0068178176879883, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 296426312, + "step": 1715 + }, + { + "epoch": 0.6572194561470701, + "grad_norm": 27.4816830684573, + "learning_rate": 5e-06, + "loss": 0.2295, + "num_input_tokens_seen": 296599056, + "step": 1716 + }, + { + "epoch": 0.6572194561470701, + "loss": 0.17428439855575562, + "loss_ce": 0.020780974999070168, + "loss_iou": 1.0212924480438232, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 296599056, + "step": 1716 + }, + { + "epoch": 0.6576024511681348, + "grad_norm": 33.80017547041262, + "learning_rate": 5e-06, + "loss": 0.2593, + "num_input_tokens_seen": 296771928, + "step": 1717 + }, + { + "epoch": 0.6576024511681348, + "loss": 0.2885706126689911, + "loss_ce": 0.022701462730765343, + "loss_iou": 1.0551213026046753, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 296771928, + "step": 1717 + }, + { + "epoch": 0.6579854461891995, + "grad_norm": 31.938425239438647, + "learning_rate": 5e-06, + "loss": 0.2438, + "num_input_tokens_seen": 296945000, + "step": 1718 + }, + { + "epoch": 0.6579854461891995, + "loss": 0.28085488080978394, + "loss_ce": 0.019624410197138786, + "loss_iou": 1.2784092426300049, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 296945000, + "step": 1718 + }, + { + "epoch": 0.6583684412102643, + "grad_norm": 26.634670937491236, + "learning_rate": 5e-06, + "loss": 0.1825, + "num_input_tokens_seen": 297118256, + "step": 1719 + }, + { + "epoch": 0.6583684412102643, + "loss": 0.20411787927150726, + "loss_ce": 0.019486526027321815, + "loss_iou": 1.0216999053955078, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 297118256, + "step": 1719 + }, + { + "epoch": 0.658751436231329, + "grad_norm": 31.39031539117525, + "learning_rate": 5e-06, + "loss": 0.2815, + "num_input_tokens_seen": 297291424, + "step": 1720 + }, + { + "epoch": 0.658751436231329, + "loss": 0.3041594922542572, + "loss_ce": 0.0213225856423378, + "loss_iou": 1.1091408729553223, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 297291424, + "step": 1720 + }, + { + "epoch": 0.6591344312523937, + "grad_norm": 20.24297703046322, + "learning_rate": 5e-06, + "loss": 0.2664, + "num_input_tokens_seen": 297464400, + "step": 1721 + }, + { + "epoch": 0.6591344312523937, + "loss": 0.22056660056114197, + "loss_ce": 0.02061541937291622, + "loss_iou": 3.551558494567871, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 297464400, + "step": 1721 + }, + { + "epoch": 0.6595174262734584, + "grad_norm": 50.324486409063304, + "learning_rate": 5e-06, + "loss": 0.2629, + "num_input_tokens_seen": 297637456, + "step": 1722 + }, + { + "epoch": 0.6595174262734584, + "loss": 0.28015631437301636, + "loss_ce": 0.021733462810516357, + "loss_iou": 1.0348681211471558, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 297637456, + "step": 1722 + }, + { + "epoch": 0.6599004212945232, + "grad_norm": 37.87642873521686, + "learning_rate": 5e-06, + "loss": 0.2542, + "num_input_tokens_seen": 297810136, + "step": 1723 + }, + { + "epoch": 0.6599004212945232, + "loss": 0.25040507316589355, + "loss_ce": 0.018837690353393555, + "loss_iou": 1.0068309307098389, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 297810136, + "step": 1723 + }, + { + "epoch": 0.6602834163155878, + "grad_norm": 31.731085646022194, + "learning_rate": 5e-06, + "loss": 0.2083, + "num_input_tokens_seen": 297983224, + "step": 1724 + }, + { + "epoch": 0.6602834163155878, + "loss": 0.22054725885391235, + "loss_ce": 0.0201078113168478, + "loss_iou": 1.0050134658813477, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 297983224, + "step": 1724 + }, + { + "epoch": 0.6606664113366526, + "grad_norm": 51.012362404574965, + "learning_rate": 5e-06, + "loss": 0.2488, + "num_input_tokens_seen": 298156328, + "step": 1725 + }, + { + "epoch": 0.6606664113366526, + "loss": 0.21641242504119873, + "loss_ce": 0.021832339465618134, + "loss_iou": 1.051340103149414, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 298156328, + "step": 1725 + }, + { + "epoch": 0.6610494063577174, + "grad_norm": 49.488018407958606, + "learning_rate": 5e-06, + "loss": 0.2671, + "num_input_tokens_seen": 298329096, + "step": 1726 + }, + { + "epoch": 0.6610494063577174, + "loss": 0.2786455750465393, + "loss_ce": 0.0202227421104908, + "loss_iou": 1.1039822101593018, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 298329096, + "step": 1726 + }, + { + "epoch": 0.6614324013787821, + "grad_norm": 33.18477592994935, + "learning_rate": 5e-06, + "loss": 0.2186, + "num_input_tokens_seen": 298501624, + "step": 1727 + }, + { + "epoch": 0.6614324013787821, + "loss": 0.19704484939575195, + "loss_ce": 0.020348068326711655, + "loss_iou": 1.369502067565918, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 298501624, + "step": 1727 + }, + { + "epoch": 0.6618153963998468, + "grad_norm": 27.470186172630914, + "learning_rate": 5e-06, + "loss": 0.2499, + "num_input_tokens_seen": 298674488, + "step": 1728 + }, + { + "epoch": 0.6618153963998468, + "loss": 0.29464203119277954, + "loss_ce": 0.020594192668795586, + "loss_iou": 1.1022378206253052, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 298674488, + "step": 1728 + }, + { + "epoch": 0.6621983914209115, + "grad_norm": 26.470082484477828, + "learning_rate": 5e-06, + "loss": 0.1768, + "num_input_tokens_seen": 298847728, + "step": 1729 + }, + { + "epoch": 0.6621983914209115, + "loss": 0.19022804498672485, + "loss_ce": 0.02067238837480545, + "loss_iou": 1.0489208698272705, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 298847728, + "step": 1729 + }, + { + "epoch": 0.6625813864419763, + "grad_norm": 34.58069753353553, + "learning_rate": 5e-06, + "loss": 0.2653, + "num_input_tokens_seen": 299020904, + "step": 1730 + }, + { + "epoch": 0.6625813864419763, + "loss": 0.23365744948387146, + "loss_ce": 0.019424058496952057, + "loss_iou": 1.0863093137741089, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 299020904, + "step": 1730 + }, + { + "epoch": 0.6629643814630409, + "grad_norm": 32.29339221098594, + "learning_rate": 5e-06, + "loss": 0.2302, + "num_input_tokens_seen": 299193568, + "step": 1731 + }, + { + "epoch": 0.6629643814630409, + "loss": 0.24851149320602417, + "loss_ce": 0.021949004381895065, + "loss_iou": 1.064180850982666, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 299193568, + "step": 1731 + }, + { + "epoch": 0.6633473764841057, + "grad_norm": 26.165793245855838, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 299366744, + "step": 1732 + }, + { + "epoch": 0.6633473764841057, + "loss": 0.16233590245246887, + "loss_ce": 0.02000192180275917, + "loss_iou": 1.0067273378372192, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 299366744, + "step": 1732 + }, + { + "epoch": 0.6637303715051704, + "grad_norm": 52.60015171294837, + "learning_rate": 5e-06, + "loss": 0.2458, + "num_input_tokens_seen": 299539784, + "step": 1733 + }, + { + "epoch": 0.6637303715051704, + "loss": 0.20560500025749207, + "loss_ce": 0.020485367625951767, + "loss_iou": 1.0419907569885254, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 299539784, + "step": 1733 + }, + { + "epoch": 0.6641133665262352, + "grad_norm": 41.139530338835335, + "learning_rate": 5e-06, + "loss": 0.239, + "num_input_tokens_seen": 299712544, + "step": 1734 + }, + { + "epoch": 0.6641133665262352, + "loss": 0.2755686044692993, + "loss_ce": 0.020990949124097824, + "loss_iou": 1.0008246898651123, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 299712544, + "step": 1734 + }, + { + "epoch": 0.6644963615472999, + "grad_norm": 29.248925135000487, + "learning_rate": 5e-06, + "loss": 0.2162, + "num_input_tokens_seen": 299885552, + "step": 1735 + }, + { + "epoch": 0.6644963615472999, + "loss": 0.2706708312034607, + "loss_ce": 0.02024359256029129, + "loss_iou": 1.039218544960022, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 299885552, + "step": 1735 + }, + { + "epoch": 0.6648793565683646, + "grad_norm": 40.82120532816002, + "learning_rate": 5e-06, + "loss": 0.2969, + "num_input_tokens_seen": 300058296, + "step": 1736 + }, + { + "epoch": 0.6648793565683646, + "loss": 0.37651216983795166, + "loss_ce": 0.02116549387574196, + "loss_iou": 1.0084853172302246, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 300058296, + "step": 1736 + }, + { + "epoch": 0.6652623515894294, + "grad_norm": 31.57196412613779, + "learning_rate": 5e-06, + "loss": 0.2361, + "num_input_tokens_seen": 300231216, + "step": 1737 + }, + { + "epoch": 0.6652623515894294, + "loss": 0.2580101490020752, + "loss_ce": 0.02064444310963154, + "loss_iou": 1.0133150815963745, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 300231216, + "step": 1737 + }, + { + "epoch": 0.665645346610494, + "grad_norm": 48.78518103902854, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 300404144, + "step": 1738 + }, + { + "epoch": 0.665645346610494, + "loss": 0.19864070415496826, + "loss_ce": 0.02267635054886341, + "loss_iou": 1.0027598142623901, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 300404144, + "step": 1738 + }, + { + "epoch": 0.6660283416315588, + "grad_norm": 39.83128700582122, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 300577064, + "step": 1739 + }, + { + "epoch": 0.6660283416315588, + "loss": 0.21679170429706573, + "loss_ce": 0.022089559584856033, + "loss_iou": 1.00639009475708, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 300577064, + "step": 1739 + }, + { + "epoch": 0.6664113366526235, + "grad_norm": 32.20998987442295, + "learning_rate": 5e-06, + "loss": 0.225, + "num_input_tokens_seen": 300750048, + "step": 1740 + }, + { + "epoch": 0.6664113366526235, + "loss": 0.1972222626209259, + "loss_ce": 0.020952735096216202, + "loss_iou": 1.0016233921051025, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 300750048, + "step": 1740 + }, + { + "epoch": 0.6667943316736883, + "grad_norm": 39.70792162268065, + "learning_rate": 5e-06, + "loss": 0.2469, + "num_input_tokens_seen": 300923072, + "step": 1741 + }, + { + "epoch": 0.6667943316736883, + "loss": 0.25135573744773865, + "loss_ce": 0.020886968821287155, + "loss_iou": 1.003562092781067, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 300923072, + "step": 1741 + }, + { + "epoch": 0.6671773266947529, + "grad_norm": 44.71350835915628, + "learning_rate": 5e-06, + "loss": 0.2468, + "num_input_tokens_seen": 301092496, + "step": 1742 + }, + { + "epoch": 0.6671773266947529, + "loss": 0.2708122730255127, + "loss_ce": 0.02093435451388359, + "loss_iou": 1.0258967876434326, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 301092496, + "step": 1742 + }, + { + "epoch": 0.6675603217158177, + "grad_norm": 33.32851894888501, + "learning_rate": 5e-06, + "loss": 0.2162, + "num_input_tokens_seen": 301265456, + "step": 1743 + }, + { + "epoch": 0.6675603217158177, + "loss": 0.22700703144073486, + "loss_ce": 0.02247823402285576, + "loss_iou": 1.108150601387024, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 301265456, + "step": 1743 + }, + { + "epoch": 0.6679433167368825, + "grad_norm": 31.718458304650298, + "learning_rate": 5e-06, + "loss": 0.1828, + "num_input_tokens_seen": 301438424, + "step": 1744 + }, + { + "epoch": 0.6679433167368825, + "loss": 0.186773881316185, + "loss_ce": 0.019659623503684998, + "loss_iou": 1.0158426761627197, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 301438424, + "step": 1744 + }, + { + "epoch": 0.6683263117579471, + "grad_norm": 26.422891777734012, + "learning_rate": 5e-06, + "loss": 0.2487, + "num_input_tokens_seen": 301610784, + "step": 1745 + }, + { + "epoch": 0.6683263117579471, + "loss": 0.25292688608169556, + "loss_ce": 0.020199844613671303, + "loss_iou": 1.0773735046386719, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 301610784, + "step": 1745 + }, + { + "epoch": 0.6687093067790119, + "grad_norm": 30.388233032232165, + "learning_rate": 5e-06, + "loss": 0.2648, + "num_input_tokens_seen": 301783448, + "step": 1746 + }, + { + "epoch": 0.6687093067790119, + "loss": 0.267360121011734, + "loss_ce": 0.020533941686153412, + "loss_iou": 1.0193747282028198, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 301783448, + "step": 1746 + }, + { + "epoch": 0.6690923018000766, + "grad_norm": 32.88619745615411, + "learning_rate": 5e-06, + "loss": 0.2584, + "num_input_tokens_seen": 301956288, + "step": 1747 + }, + { + "epoch": 0.6690923018000766, + "loss": 0.24807390570640564, + "loss_ce": 0.02029069885611534, + "loss_iou": 1.0906078815460205, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 301956288, + "step": 1747 + }, + { + "epoch": 0.6694752968211414, + "grad_norm": 33.873327785376866, + "learning_rate": 5e-06, + "loss": 0.2666, + "num_input_tokens_seen": 302129344, + "step": 1748 + }, + { + "epoch": 0.6694752968211414, + "loss": 0.30207377672195435, + "loss_ce": 0.021312061697244644, + "loss_iou": 1.071867823600769, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 302129344, + "step": 1748 + }, + { + "epoch": 0.669858291842206, + "grad_norm": 29.252939318589306, + "learning_rate": 5e-06, + "loss": 0.1732, + "num_input_tokens_seen": 302302592, + "step": 1749 + }, + { + "epoch": 0.669858291842206, + "loss": 0.1982949674129486, + "loss_ce": 0.019339896738529205, + "loss_iou": 1.044935703277588, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 302302592, + "step": 1749 + }, + { + "epoch": 0.6702412868632708, + "grad_norm": 36.6035992879886, + "learning_rate": 5e-06, + "loss": 0.2687, + "num_input_tokens_seen": 302475376, + "step": 1750 + }, + { + "epoch": 0.6702412868632708, + "eval_websight_new_CIoU": 0.9130485653877258, + "eval_websight_new_GIoU": 0.9125950634479523, + "eval_websight_new_IoU": 0.9134105741977692, + "eval_websight_new_MAE_all": 0.009155952837318182, + "eval_websight_new_MAE_h": 0.008650638163089752, + "eval_websight_new_MAE_w": 0.009684689342975616, + "eval_websight_new_MAE_x": 0.008301699068397284, + "eval_websight_new_MAE_y": 0.009986785240471363, + "eval_websight_new_NUM_probability": 8.473627895000391e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.1299513578414917, + "eval_websight_new_loss_ce": 0.04177057556807995, + "eval_websight_new_loss_iou": 1.0004342794418335, + "eval_websight_new_loss_num": 0.087677001953125, + "eval_websight_new_loss_xval": 0.087677001953125, + "eval_websight_new_runtime": 55.7981, + "eval_websight_new_samples_per_second": 0.896, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 302475376, + "step": 1750 + }, + { + "epoch": 0.6702412868632708, + "eval_seeclick_CIoU": 0.6328222751617432, + "eval_seeclick_GIoU": 0.637648195028305, + "eval_seeclick_IoU": 0.6658692359924316, + "eval_seeclick_MAE_all": 0.060492176562547684, + "eval_seeclick_MAE_h": 0.038623055443167686, + "eval_seeclick_MAE_w": 0.08360513672232628, + "eval_seeclick_MAE_x": 0.07366372644901276, + "eval_seeclick_MAE_y": 0.046076804399490356, + "eval_seeclick_NUM_probability": 0.00011501446715556085, + "eval_seeclick_inside_bbox": 0.9097222089767456, + "eval_seeclick_loss": 0.5198314189910889, + "eval_seeclick_loss_ce": 0.0325775733217597, + "eval_seeclick_loss_iou": 1.1606248021125793, + "eval_seeclick_loss_num": 0.4901123046875, + "eval_seeclick_loss_xval": 0.4901123046875, + "eval_seeclick_runtime": 82.4764, + "eval_seeclick_samples_per_second": 0.606, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 302475376, + "step": 1750 + }, + { + "epoch": 0.6702412868632708, + "eval_icons_CIoU": 0.8662566840648651, + "eval_icons_GIoU": 0.8614764511585236, + "eval_icons_IoU": 0.8702732026576996, + "eval_icons_MAE_all": 0.01815019128844142, + "eval_icons_MAE_h": 0.018266551662236452, + "eval_icons_MAE_w": 0.018474142998456955, + "eval_icons_MAE_x": 0.016881443560123444, + "eval_icons_MAE_y": 0.018978629261255264, + "eval_icons_NUM_probability": 7.645552977919579e-05, + "eval_icons_inside_bbox": 1.0, + "eval_icons_loss": 0.14464829862117767, + "eval_icons_loss_ce": 0.028360147960484028, + "eval_icons_loss_iou": 1.0153546333312988, + "eval_icons_loss_num": 0.1070709228515625, + "eval_icons_loss_xval": 0.1070709228515625, + "eval_icons_runtime": 82.735, + "eval_icons_samples_per_second": 0.604, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 302475376, + "step": 1750 + }, + { + "epoch": 0.6702412868632708, + "loss": 0.1576666533946991, + "loss_ce": 0.028394190594553947, + "loss_iou": 1.0300023555755615, + "loss_num": 0.12890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 302475376, + "step": 1750 + }, + { + "epoch": 0.6706242818843355, + "grad_norm": 51.95143696163088, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 302648304, + "step": 1751 + }, + { + "epoch": 0.6706242818843355, + "loss": 0.2516476809978485, + "loss_ce": 0.020568573847413063, + "loss_iou": 1.066545009613037, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 302648304, + "step": 1751 + }, + { + "epoch": 0.6710072769054002, + "grad_norm": 34.17640835813798, + "learning_rate": 5e-06, + "loss": 0.2336, + "num_input_tokens_seen": 302821224, + "step": 1752 + }, + { + "epoch": 0.6710072769054002, + "loss": 0.2308710515499115, + "loss_ce": 0.022252880036830902, + "loss_iou": 1.0408929586410522, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 302821224, + "step": 1752 + }, + { + "epoch": 0.6713902719264649, + "grad_norm": 30.280846550618126, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 302994432, + "step": 1753 + }, + { + "epoch": 0.6713902719264649, + "loss": 0.18668514490127563, + "loss_ce": 0.02304989844560623, + "loss_iou": 1.007064700126648, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 302994432, + "step": 1753 + }, + { + "epoch": 0.6717732669475297, + "grad_norm": 24.04276657890047, + "learning_rate": 5e-06, + "loss": 0.2479, + "num_input_tokens_seen": 303167336, + "step": 1754 + }, + { + "epoch": 0.6717732669475297, + "loss": 0.3154633045196533, + "loss_ce": 0.021456940099596977, + "loss_iou": 1.0964784622192383, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 303167336, + "step": 1754 + }, + { + "epoch": 0.6721562619685945, + "grad_norm": 28.63546381930945, + "learning_rate": 5e-06, + "loss": 0.1799, + "num_input_tokens_seen": 303339800, + "step": 1755 + }, + { + "epoch": 0.6721562619685945, + "loss": 0.1940738409757614, + "loss_ce": 0.019940536469221115, + "loss_iou": 1.013685703277588, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 303339800, + "step": 1755 + }, + { + "epoch": 0.6725392569896591, + "grad_norm": 43.70532154112608, + "learning_rate": 5e-06, + "loss": 0.2719, + "num_input_tokens_seen": 303512416, + "step": 1756 + }, + { + "epoch": 0.6725392569896591, + "loss": 0.2490992546081543, + "loss_ce": 0.019240861758589745, + "loss_iou": 1.0142097473144531, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 303512416, + "step": 1756 + }, + { + "epoch": 0.6729222520107239, + "grad_norm": 31.59577451586341, + "learning_rate": 5e-06, + "loss": 0.253, + "num_input_tokens_seen": 303685704, + "step": 1757 + }, + { + "epoch": 0.6729222520107239, + "loss": 0.2875283360481262, + "loss_ce": 0.022086460143327713, + "loss_iou": 1.224463701248169, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 303685704, + "step": 1757 + }, + { + "epoch": 0.6733052470317886, + "grad_norm": 32.15160803956687, + "learning_rate": 5e-06, + "loss": 0.269, + "num_input_tokens_seen": 303858832, + "step": 1758 + }, + { + "epoch": 0.6733052470317886, + "loss": 0.22685261070728302, + "loss_ce": 0.02073688805103302, + "loss_iou": 1.0503966808319092, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 303858832, + "step": 1758 + }, + { + "epoch": 0.6736882420528533, + "grad_norm": 24.766245863577627, + "learning_rate": 5e-06, + "loss": 0.2847, + "num_input_tokens_seen": 304027704, + "step": 1759 + }, + { + "epoch": 0.6736882420528533, + "loss": 0.2911217510700226, + "loss_ce": 0.020736005157232285, + "loss_iou": 1.1352735757827759, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 304027704, + "step": 1759 + }, + { + "epoch": 0.674071237073918, + "grad_norm": 21.91037300207571, + "learning_rate": 5e-06, + "loss": 0.2097, + "num_input_tokens_seen": 304200096, + "step": 1760 + }, + { + "epoch": 0.674071237073918, + "loss": 0.20687098801136017, + "loss_ce": 0.02034755051136017, + "loss_iou": 1.0257363319396973, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 304200096, + "step": 1760 + }, + { + "epoch": 0.6744542320949828, + "grad_norm": 27.580579349103214, + "learning_rate": 5e-06, + "loss": 0.2412, + "num_input_tokens_seen": 304372944, + "step": 1761 + }, + { + "epoch": 0.6744542320949828, + "loss": 0.23300498723983765, + "loss_ce": 0.020236432552337646, + "loss_iou": 1.0436434745788574, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 304372944, + "step": 1761 + }, + { + "epoch": 0.6748372271160475, + "grad_norm": 38.406674583378454, + "learning_rate": 5e-06, + "loss": 0.2576, + "num_input_tokens_seen": 304545992, + "step": 1762 + }, + { + "epoch": 0.6748372271160475, + "loss": 0.27290505170822144, + "loss_ce": 0.02363748662173748, + "loss_iou": 1.0639228820800781, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 304545992, + "step": 1762 + }, + { + "epoch": 0.6752202221371122, + "grad_norm": 21.226288009192686, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 304718952, + "step": 1763 + }, + { + "epoch": 0.6752202221371122, + "loss": 0.17204737663269043, + "loss_ce": 0.018757598474621773, + "loss_iou": 0.8931306600570679, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 304718952, + "step": 1763 + }, + { + "epoch": 0.675603217158177, + "grad_norm": 44.21116227709698, + "learning_rate": 5e-06, + "loss": 0.3058, + "num_input_tokens_seen": 304892248, + "step": 1764 + }, + { + "epoch": 0.675603217158177, + "loss": 0.2746030390262604, + "loss_ce": 0.01990334317088127, + "loss_iou": 1.0796000957489014, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 304892248, + "step": 1764 + }, + { + "epoch": 0.6759862121792417, + "grad_norm": 21.00194825165669, + "learning_rate": 5e-06, + "loss": 0.1898, + "num_input_tokens_seen": 305065088, + "step": 1765 + }, + { + "epoch": 0.6759862121792417, + "loss": 0.15532422065734863, + "loss_ce": 0.02043652907013893, + "loss_iou": 1.0036628246307373, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 305065088, + "step": 1765 + }, + { + "epoch": 0.6763692072003064, + "grad_norm": 25.8206740495571, + "learning_rate": 5e-06, + "loss": 0.1807, + "num_input_tokens_seen": 305238112, + "step": 1766 + }, + { + "epoch": 0.6763692072003064, + "loss": 0.1638648957014084, + "loss_ce": 0.020859526470303535, + "loss_iou": 1.0076106786727905, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 305238112, + "step": 1766 + }, + { + "epoch": 0.6767522022213711, + "grad_norm": 29.409402510935802, + "learning_rate": 5e-06, + "loss": 0.253, + "num_input_tokens_seen": 305411176, + "step": 1767 + }, + { + "epoch": 0.6767522022213711, + "loss": 0.26368898153305054, + "loss_ce": 0.02095215767621994, + "loss_iou": 0.9582514762878418, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 305411176, + "step": 1767 + }, + { + "epoch": 0.6771351972424359, + "grad_norm": 33.04338935083643, + "learning_rate": 5e-06, + "loss": 0.2338, + "num_input_tokens_seen": 305584496, + "step": 1768 + }, + { + "epoch": 0.6771351972424359, + "loss": 0.280699759721756, + "loss_ce": 0.021483462303876877, + "loss_iou": 1.0993882417678833, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 305584496, + "step": 1768 + }, + { + "epoch": 0.6775181922635006, + "grad_norm": 26.930870875600633, + "learning_rate": 5e-06, + "loss": 0.2234, + "num_input_tokens_seen": 305753888, + "step": 1769 + }, + { + "epoch": 0.6775181922635006, + "loss": 0.16383543610572815, + "loss_ce": 0.02211179956793785, + "loss_iou": 1.0060840845108032, + "loss_num": 0.1416015625, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 305753888, + "step": 1769 + }, + { + "epoch": 0.6779011872845653, + "grad_norm": 31.539665686264513, + "learning_rate": 5e-06, + "loss": 0.1786, + "num_input_tokens_seen": 305926968, + "step": 1770 + }, + { + "epoch": 0.6779011872845653, + "loss": 0.14972564578056335, + "loss_ce": 0.021551813930273056, + "loss_iou": 1.006534457206726, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 305926968, + "step": 1770 + }, + { + "epoch": 0.67828418230563, + "grad_norm": 23.749091023528408, + "learning_rate": 5e-06, + "loss": 0.2213, + "num_input_tokens_seen": 306099816, + "step": 1771 + }, + { + "epoch": 0.67828418230563, + "loss": 0.1796785145998001, + "loss_ce": 0.01933915913105011, + "loss_iou": 1.0010827779769897, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 306099816, + "step": 1771 + }, + { + "epoch": 0.6786671773266948, + "grad_norm": 30.60906493905963, + "learning_rate": 5e-06, + "loss": 0.1519, + "num_input_tokens_seen": 306272848, + "step": 1772 + }, + { + "epoch": 0.6786671773266948, + "loss": 0.16486088931560516, + "loss_ce": 0.020329643040895462, + "loss_iou": 1.0240137577056885, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 306272848, + "step": 1772 + }, + { + "epoch": 0.6790501723477594, + "grad_norm": 26.415161692070637, + "learning_rate": 5e-06, + "loss": 0.1976, + "num_input_tokens_seen": 306446264, + "step": 1773 + }, + { + "epoch": 0.6790501723477594, + "loss": 0.2072274088859558, + "loss_ce": 0.02161949872970581, + "loss_iou": 1.0546596050262451, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 306446264, + "step": 1773 + }, + { + "epoch": 0.6794331673688242, + "grad_norm": 25.473193131889023, + "learning_rate": 5e-06, + "loss": 0.2043, + "num_input_tokens_seen": 306619528, + "step": 1774 + }, + { + "epoch": 0.6794331673688242, + "loss": 0.18194493651390076, + "loss_ce": 0.02142249047756195, + "loss_iou": 1.021148681640625, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 306619528, + "step": 1774 + }, + { + "epoch": 0.679816162389889, + "grad_norm": 31.990611083987442, + "learning_rate": 5e-06, + "loss": 0.1896, + "num_input_tokens_seen": 306792296, + "step": 1775 + }, + { + "epoch": 0.679816162389889, + "loss": 0.16622373461723328, + "loss_ce": 0.021021097898483276, + "loss_iou": 1.001260757446289, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 306792296, + "step": 1775 + }, + { + "epoch": 0.6801991574109536, + "grad_norm": 31.886559171456238, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 306965216, + "step": 1776 + }, + { + "epoch": 0.6801991574109536, + "loss": 0.22478890419006348, + "loss_ce": 0.02123665250837803, + "loss_iou": 1.0109344720840454, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 306965216, + "step": 1776 + }, + { + "epoch": 0.6805821524320184, + "grad_norm": 36.18691494698098, + "learning_rate": 5e-06, + "loss": 0.1627, + "num_input_tokens_seen": 307138248, + "step": 1777 + }, + { + "epoch": 0.6805821524320184, + "loss": 0.18779145181179047, + "loss_ce": 0.020921334624290466, + "loss_iou": 1.028134822845459, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 307138248, + "step": 1777 + }, + { + "epoch": 0.6809651474530831, + "grad_norm": 23.832373663803043, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 307311440, + "step": 1778 + }, + { + "epoch": 0.6809651474530831, + "loss": 0.2649584412574768, + "loss_ce": 0.020573675632476807, + "loss_iou": 1.0320253372192383, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 307311440, + "step": 1778 + }, + { + "epoch": 0.6813481424741479, + "grad_norm": 23.33458947613337, + "learning_rate": 5e-06, + "loss": 0.1962, + "num_input_tokens_seen": 307484264, + "step": 1779 + }, + { + "epoch": 0.6813481424741479, + "loss": 0.1672830432653427, + "loss_ce": 0.018998129293322563, + "loss_iou": 1.0017093420028687, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 307484264, + "step": 1779 + }, + { + "epoch": 0.6817311374952125, + "grad_norm": 31.416724601681047, + "learning_rate": 5e-06, + "loss": 0.2722, + "num_input_tokens_seen": 307657208, + "step": 1780 + }, + { + "epoch": 0.6817311374952125, + "loss": 0.23875892162322998, + "loss_ce": 0.020314078778028488, + "loss_iou": 1.0517665147781372, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 307657208, + "step": 1780 + }, + { + "epoch": 0.6821141325162773, + "grad_norm": 33.7553905878189, + "learning_rate": 5e-06, + "loss": 0.2234, + "num_input_tokens_seen": 307830152, + "step": 1781 + }, + { + "epoch": 0.6821141325162773, + "loss": 0.23483659327030182, + "loss_ce": 0.02084733173251152, + "loss_iou": 1.0256426334381104, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 307830152, + "step": 1781 + }, + { + "epoch": 0.682497127537342, + "grad_norm": 21.318674986674704, + "learning_rate": 5e-06, + "loss": 0.1875, + "num_input_tokens_seen": 308003104, + "step": 1782 + }, + { + "epoch": 0.682497127537342, + "loss": 0.1763216257095337, + "loss_ce": 0.021353356540203094, + "loss_iou": 1.34816575050354, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 308003104, + "step": 1782 + }, + { + "epoch": 0.6828801225584067, + "grad_norm": 29.362432741919577, + "learning_rate": 5e-06, + "loss": 0.2344, + "num_input_tokens_seen": 308176144, + "step": 1783 + }, + { + "epoch": 0.6828801225584067, + "loss": 0.2393755167722702, + "loss_ce": 0.021480005234479904, + "loss_iou": 0.979744553565979, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 308176144, + "step": 1783 + }, + { + "epoch": 0.6832631175794714, + "grad_norm": 34.696348663593994, + "learning_rate": 5e-06, + "loss": 0.2452, + "num_input_tokens_seen": 308348824, + "step": 1784 + }, + { + "epoch": 0.6832631175794714, + "loss": 0.2556586265563965, + "loss_ce": 0.02244327962398529, + "loss_iou": 1.0269641876220703, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 308348824, + "step": 1784 + }, + { + "epoch": 0.6836461126005362, + "grad_norm": 20.509339795245733, + "learning_rate": 5e-06, + "loss": 0.237, + "num_input_tokens_seen": 308521984, + "step": 1785 + }, + { + "epoch": 0.6836461126005362, + "loss": 0.15286551415920258, + "loss_ce": 0.019808875396847725, + "loss_iou": 1.003145694732666, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 308521984, + "step": 1785 + }, + { + "epoch": 0.684029107621601, + "grad_norm": 26.791570992031154, + "learning_rate": 5e-06, + "loss": 0.2871, + "num_input_tokens_seen": 308694760, + "step": 1786 + }, + { + "epoch": 0.684029107621601, + "loss": 0.3113164007663727, + "loss_ce": 0.021155260503292084, + "loss_iou": 1.0845476388931274, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 308694760, + "step": 1786 + }, + { + "epoch": 0.6844121026426656, + "grad_norm": 9.353929146451705, + "learning_rate": 5e-06, + "loss": 0.2244, + "num_input_tokens_seen": 308867624, + "step": 1787 + }, + { + "epoch": 0.6844121026426656, + "loss": 0.20658767223358154, + "loss_ce": 0.022017350420355797, + "loss_iou": 1.0860481262207031, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 308867624, + "step": 1787 + }, + { + "epoch": 0.6847950976637304, + "grad_norm": 20.58304051139585, + "learning_rate": 5e-06, + "loss": 0.2084, + "num_input_tokens_seen": 309040664, + "step": 1788 + }, + { + "epoch": 0.6847950976637304, + "loss": 0.1884603202342987, + "loss_ce": 0.02058313600718975, + "loss_iou": 1.0026249885559082, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 309040664, + "step": 1788 + }, + { + "epoch": 0.6851780926847951, + "grad_norm": 23.51325793939438, + "learning_rate": 5e-06, + "loss": 0.2274, + "num_input_tokens_seen": 309214056, + "step": 1789 + }, + { + "epoch": 0.6851780926847951, + "loss": 0.17779070138931274, + "loss_ce": 0.02209002524614334, + "loss_iou": 1.0156054496765137, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 309214056, + "step": 1789 + }, + { + "epoch": 0.6855610877058598, + "grad_norm": 30.523427600375143, + "learning_rate": 5e-06, + "loss": 0.199, + "num_input_tokens_seen": 309386984, + "step": 1790 + }, + { + "epoch": 0.6855610877058598, + "loss": 0.2529047131538391, + "loss_ce": 0.02023870125412941, + "loss_iou": 1.097543478012085, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 309386984, + "step": 1790 + }, + { + "epoch": 0.6859440827269245, + "grad_norm": 29.3815753506915, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 309560096, + "step": 1791 + }, + { + "epoch": 0.6859440827269245, + "loss": 0.18151524662971497, + "loss_ce": 0.019344843924045563, + "loss_iou": 1.019117832183838, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 309560096, + "step": 1791 + }, + { + "epoch": 0.6863270777479893, + "grad_norm": 24.416869287090638, + "learning_rate": 5e-06, + "loss": 0.2331, + "num_input_tokens_seen": 309732576, + "step": 1792 + }, + { + "epoch": 0.6863270777479893, + "loss": 0.22634467482566833, + "loss_ce": 0.019923774525523186, + "loss_iou": 1.1221668720245361, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 309732576, + "step": 1792 + }, + { + "epoch": 0.686710072769054, + "grad_norm": 20.21171192055158, + "learning_rate": 5e-06, + "loss": 0.1818, + "num_input_tokens_seen": 309905544, + "step": 1793 + }, + { + "epoch": 0.686710072769054, + "loss": 0.19531100988388062, + "loss_ce": 0.022032195702195168, + "loss_iou": 1.0145184993743896, + "loss_num": 0.1728515625, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 309905544, + "step": 1793 + }, + { + "epoch": 0.6870930677901187, + "grad_norm": 29.931546959836826, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 310078512, + "step": 1794 + }, + { + "epoch": 0.6870930677901187, + "loss": 0.2022952437400818, + "loss_ce": 0.02041049674153328, + "loss_iou": 1.0460946559906006, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 310078512, + "step": 1794 + }, + { + "epoch": 0.6874760628111835, + "grad_norm": 27.17543643661829, + "learning_rate": 5e-06, + "loss": 0.1886, + "num_input_tokens_seen": 310251408, + "step": 1795 + }, + { + "epoch": 0.6874760628111835, + "loss": 0.17125827074050903, + "loss_ce": 0.02123386040329933, + "loss_iou": 1.0009284019470215, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 310251408, + "step": 1795 + }, + { + "epoch": 0.6878590578322482, + "grad_norm": 25.98153473046399, + "learning_rate": 5e-06, + "loss": 0.2399, + "num_input_tokens_seen": 310424544, + "step": 1796 + }, + { + "epoch": 0.6878590578322482, + "loss": 0.23470944166183472, + "loss_ce": 0.019804662093520164, + "loss_iou": 1.0204904079437256, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 310424544, + "step": 1796 + }, + { + "epoch": 0.6882420528533129, + "grad_norm": 36.81538122837507, + "learning_rate": 5e-06, + "loss": 0.2462, + "num_input_tokens_seen": 310597288, + "step": 1797 + }, + { + "epoch": 0.6882420528533129, + "loss": 0.271190345287323, + "loss_ce": 0.020335841923952103, + "loss_iou": 1.472503900527954, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 310597288, + "step": 1797 + }, + { + "epoch": 0.6886250478743776, + "grad_norm": 47.644600023114414, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 310770488, + "step": 1798 + }, + { + "epoch": 0.6886250478743776, + "loss": 0.2012127935886383, + "loss_ce": 0.0221966914832592, + "loss_iou": 1.0174134969711304, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 310770488, + "step": 1798 + }, + { + "epoch": 0.6890080428954424, + "grad_norm": 39.08401946318235, + "learning_rate": 5e-06, + "loss": 0.174, + "num_input_tokens_seen": 310943440, + "step": 1799 + }, + { + "epoch": 0.6890080428954424, + "loss": 0.15548494458198547, + "loss_ce": 0.020902413874864578, + "loss_iou": 1.0032386779785156, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 310943440, + "step": 1799 + }, + { + "epoch": 0.6893910379165071, + "grad_norm": 42.43240130592587, + "learning_rate": 5e-06, + "loss": 0.25, + "num_input_tokens_seen": 311116304, + "step": 1800 + }, + { + "epoch": 0.6893910379165071, + "loss": 0.2081068456172943, + "loss_ce": 0.018653731793165207, + "loss_iou": 1.005466103553772, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 311116304, + "step": 1800 + }, + { + "epoch": 0.6897740329375718, + "grad_norm": 16.92622312624107, + "learning_rate": 5e-06, + "loss": 0.1629, + "num_input_tokens_seen": 311289352, + "step": 1801 + }, + { + "epoch": 0.6897740329375718, + "loss": 0.15594685077667236, + "loss_ce": 0.022188305854797363, + "loss_iou": 1.0031026601791382, + "loss_num": 0.1337890625, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 311289352, + "step": 1801 + }, + { + "epoch": 0.6901570279586365, + "grad_norm": 43.27051799407796, + "learning_rate": 5e-06, + "loss": 0.2678, + "num_input_tokens_seen": 311462320, + "step": 1802 + }, + { + "epoch": 0.6901570279586365, + "loss": 0.2979928255081177, + "loss_ce": 0.021747712045907974, + "loss_iou": 1.0230367183685303, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 311462320, + "step": 1802 + }, + { + "epoch": 0.6905400229797013, + "grad_norm": 32.77732264245229, + "learning_rate": 5e-06, + "loss": 0.2891, + "num_input_tokens_seen": 311634952, + "step": 1803 + }, + { + "epoch": 0.6905400229797013, + "loss": 0.30508124828338623, + "loss_ce": 0.02175602689385414, + "loss_iou": 1.0113251209259033, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 311634952, + "step": 1803 + }, + { + "epoch": 0.6909230180007659, + "grad_norm": 25.16295713300046, + "learning_rate": 5e-06, + "loss": 0.2448, + "num_input_tokens_seen": 311807592, + "step": 1804 + }, + { + "epoch": 0.6909230180007659, + "loss": 0.2753583490848541, + "loss_ce": 0.02114691585302353, + "loss_iou": 1.1480212211608887, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 311807592, + "step": 1804 + }, + { + "epoch": 0.6913060130218307, + "grad_norm": 33.56581977308756, + "learning_rate": 5e-06, + "loss": 0.2076, + "num_input_tokens_seen": 311980744, + "step": 1805 + }, + { + "epoch": 0.6913060130218307, + "loss": 0.2249014675617218, + "loss_ce": 0.0222647525370121, + "loss_iou": 1.1057695150375366, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 311980744, + "step": 1805 + }, + { + "epoch": 0.6916890080428955, + "grad_norm": 40.95398966890601, + "learning_rate": 5e-06, + "loss": 0.2382, + "num_input_tokens_seen": 312153968, + "step": 1806 + }, + { + "epoch": 0.6916890080428955, + "loss": 0.20139385759830475, + "loss_ce": 0.02146221324801445, + "loss_iou": 1.0483115911483765, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 312153968, + "step": 1806 + }, + { + "epoch": 0.6920720030639602, + "grad_norm": 33.06774960828792, + "learning_rate": 5e-06, + "loss": 0.3074, + "num_input_tokens_seen": 312327064, + "step": 1807 + }, + { + "epoch": 0.6920720030639602, + "loss": 0.2533792555332184, + "loss_ce": 0.020102884620428085, + "loss_iou": 1.029458999633789, + "loss_num": 0.2333984375, + "loss_xval": 0.2333984375, + "num_input_tokens_seen": 312327064, + "step": 1807 + }, + { + "epoch": 0.6924549980850249, + "grad_norm": 20.167685865150624, + "learning_rate": 5e-06, + "loss": 0.2446, + "num_input_tokens_seen": 312499512, + "step": 1808 + }, + { + "epoch": 0.6924549980850249, + "loss": 0.2071533352136612, + "loss_ce": 0.020019549876451492, + "loss_iou": 1.005505084991455, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 312499512, + "step": 1808 + }, + { + "epoch": 0.6928379931060896, + "grad_norm": 24.88942268196023, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 312672160, + "step": 1809 + }, + { + "epoch": 0.6928379931060896, + "loss": 0.2139686644077301, + "loss_ce": 0.020944979041814804, + "loss_iou": 1.0078048706054688, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 312672160, + "step": 1809 + }, + { + "epoch": 0.6932209881271544, + "grad_norm": 54.577292414133005, + "learning_rate": 5e-06, + "loss": 0.2115, + "num_input_tokens_seen": 312845208, + "step": 1810 + }, + { + "epoch": 0.6932209881271544, + "loss": 0.2643311619758606, + "loss_ce": 0.021838489919900894, + "loss_iou": 1.1271297931671143, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 312845208, + "step": 1810 + }, + { + "epoch": 0.693603983148219, + "grad_norm": 34.267886907261975, + "learning_rate": 5e-06, + "loss": 0.2612, + "num_input_tokens_seen": 313018064, + "step": 1811 + }, + { + "epoch": 0.693603983148219, + "loss": 0.26771610975265503, + "loss_ce": 0.020889950916171074, + "loss_iou": 1.0351500511169434, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 313018064, + "step": 1811 + }, + { + "epoch": 0.6939869781692838, + "grad_norm": 27.288532665173932, + "learning_rate": 5e-06, + "loss": 0.2585, + "num_input_tokens_seen": 313191096, + "step": 1812 + }, + { + "epoch": 0.6939869781692838, + "loss": 0.2542651295661926, + "loss_ce": 0.021538063883781433, + "loss_iou": 1.0879662036895752, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 313191096, + "step": 1812 + }, + { + "epoch": 0.6943699731903485, + "grad_norm": 29.010078146165668, + "learning_rate": 5e-06, + "loss": 0.2039, + "num_input_tokens_seen": 313364240, + "step": 1813 + }, + { + "epoch": 0.6943699731903485, + "loss": 0.24573346972465515, + "loss_ce": 0.020941000431776047, + "loss_iou": 1.0150185823440552, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 313364240, + "step": 1813 + }, + { + "epoch": 0.6947529682114133, + "grad_norm": 53.61927297149612, + "learning_rate": 5e-06, + "loss": 0.2497, + "num_input_tokens_seen": 313536896, + "step": 1814 + }, + { + "epoch": 0.6947529682114133, + "loss": 0.25504958629608154, + "loss_ce": 0.02018630877137184, + "loss_iou": 1.1167068481445312, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 313536896, + "step": 1814 + }, + { + "epoch": 0.695135963232478, + "grad_norm": 38.93817465803134, + "learning_rate": 5e-06, + "loss": 0.2548, + "num_input_tokens_seen": 313710240, + "step": 1815 + }, + { + "epoch": 0.695135963232478, + "loss": 0.24499168992042542, + "loss_ce": 0.02245749533176422, + "loss_iou": 1.0315313339233398, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 313710240, + "step": 1815 + }, + { + "epoch": 0.6955189582535427, + "grad_norm": 31.376630992015357, + "learning_rate": 5e-06, + "loss": 0.2778, + "num_input_tokens_seen": 313882832, + "step": 1816 + }, + { + "epoch": 0.6955189582535427, + "loss": 0.3155968189239502, + "loss_ce": 0.018355632200837135, + "loss_iou": NaN, + "loss_num": 0.296875, + "loss_xval": 0.296875, + "num_input_tokens_seen": 313882832, + "step": 1816 + }, + { + "epoch": 0.6959019532746075, + "grad_norm": 25.29873565702416, + "learning_rate": 5e-06, + "loss": 0.2453, + "num_input_tokens_seen": 314055344, + "step": 1817 + }, + { + "epoch": 0.6959019532746075, + "loss": 0.25351792573928833, + "loss_ce": 0.022560909390449524, + "loss_iou": 1.0375151634216309, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 314055344, + "step": 1817 + }, + { + "epoch": 0.6962849482956721, + "grad_norm": 26.80187544021142, + "learning_rate": 5e-06, + "loss": 0.1737, + "num_input_tokens_seen": 314228136, + "step": 1818 + }, + { + "epoch": 0.6962849482956721, + "loss": 0.14994150400161743, + "loss_ce": 0.02188975363969803, + "loss_iou": 1.000547170639038, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 314228136, + "step": 1818 + }, + { + "epoch": 0.6966679433167369, + "grad_norm": 28.61940095339164, + "learning_rate": 5e-06, + "loss": 0.2436, + "num_input_tokens_seen": 314401296, + "step": 1819 + }, + { + "epoch": 0.6966679433167369, + "loss": 0.21377557516098022, + "loss_ce": 0.020782414823770523, + "loss_iou": 1.0035618543624878, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 314401296, + "step": 1819 + }, + { + "epoch": 0.6970509383378016, + "grad_norm": 17.3206459206108, + "learning_rate": 5e-06, + "loss": 0.2735, + "num_input_tokens_seen": 314574384, + "step": 1820 + }, + { + "epoch": 0.6970509383378016, + "loss": 0.2638574540615082, + "loss_ce": 0.02240237034857273, + "loss_iou": 1.1314351558685303, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 314574384, + "step": 1820 + }, + { + "epoch": 0.6974339333588664, + "grad_norm": 27.36915259376651, + "learning_rate": 5e-06, + "loss": 0.2481, + "num_input_tokens_seen": 314747208, + "step": 1821 + }, + { + "epoch": 0.6974339333588664, + "loss": 0.24881714582443237, + "loss_ce": 0.020728763192892075, + "loss_iou": 1.0145410299301147, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 314747208, + "step": 1821 + }, + { + "epoch": 0.697816928379931, + "grad_norm": 22.691723151541613, + "learning_rate": 5e-06, + "loss": 0.1891, + "num_input_tokens_seen": 314920240, + "step": 1822 + }, + { + "epoch": 0.697816928379931, + "loss": 0.16145747900009155, + "loss_ce": 0.0218700859695673, + "loss_iou": 1.024095058441162, + "loss_num": 0.1396484375, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 314920240, + "step": 1822 + }, + { + "epoch": 0.6981999234009958, + "grad_norm": 22.46967825224015, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 315093088, + "step": 1823 + }, + { + "epoch": 0.6981999234009958, + "loss": 0.3342197835445404, + "loss_ce": 0.021292559802532196, + "loss_iou": 1.044093370437622, + "loss_num": 0.3125, + "loss_xval": 0.3125, + "num_input_tokens_seen": 315093088, + "step": 1823 + }, + { + "epoch": 0.6985829184220606, + "grad_norm": 24.523147278668453, + "learning_rate": 5e-06, + "loss": 0.2306, + "num_input_tokens_seen": 315266040, + "step": 1824 + }, + { + "epoch": 0.6985829184220606, + "loss": 0.21536186337471008, + "loss_ce": 0.02139214053750038, + "loss_iou": 1.0824241638183594, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 315266040, + "step": 1824 + }, + { + "epoch": 0.6989659134431252, + "grad_norm": 33.30640836205073, + "learning_rate": 5e-06, + "loss": 0.2435, + "num_input_tokens_seen": 315439136, + "step": 1825 + }, + { + "epoch": 0.6989659134431252, + "loss": 0.28745371103286743, + "loss_ce": 0.02207283303141594, + "loss_iou": 1.0710875988006592, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 315439136, + "step": 1825 + }, + { + "epoch": 0.69934890846419, + "grad_norm": 25.15528350612063, + "learning_rate": 5e-06, + "loss": 0.2096, + "num_input_tokens_seen": 315611704, + "step": 1826 + }, + { + "epoch": 0.69934890846419, + "loss": 0.21007584035396576, + "loss_ce": 0.021233057603240013, + "loss_iou": 1.0091652870178223, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 315611704, + "step": 1826 + }, + { + "epoch": 0.6997319034852547, + "grad_norm": 30.373965426663016, + "learning_rate": 5e-06, + "loss": 0.2322, + "num_input_tokens_seen": 315781056, + "step": 1827 + }, + { + "epoch": 0.6997319034852547, + "loss": 0.2667866349220276, + "loss_ce": 0.022279784083366394, + "loss_iou": 1.012694001197815, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 315781056, + "step": 1827 + }, + { + "epoch": 0.7001148985063195, + "grad_norm": 23.609229512710332, + "learning_rate": 5e-06, + "loss": 0.2367, + "num_input_tokens_seen": 315954008, + "step": 1828 + }, + { + "epoch": 0.7001148985063195, + "loss": 0.20945066213607788, + "loss_ce": 0.020180631428956985, + "loss_iou": 1.009779453277588, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 315954008, + "step": 1828 + }, + { + "epoch": 0.7004978935273841, + "grad_norm": 61.88931626384124, + "learning_rate": 5e-06, + "loss": 0.2644, + "num_input_tokens_seen": 316126888, + "step": 1829 + }, + { + "epoch": 0.7004978935273841, + "loss": 0.2504710257053375, + "loss_ce": 0.020490553230047226, + "loss_iou": 1.0115447044372559, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 316126888, + "step": 1829 + }, + { + "epoch": 0.7008808885484489, + "grad_norm": 33.63699569721759, + "learning_rate": 5e-06, + "loss": 0.2755, + "num_input_tokens_seen": 316299888, + "step": 1830 + }, + { + "epoch": 0.7008808885484489, + "loss": 0.31528377532958984, + "loss_ce": 0.02097224071621895, + "loss_iou": 1.0388249158859253, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 316299888, + "step": 1830 + }, + { + "epoch": 0.7012638835695136, + "grad_norm": 36.98355302940735, + "learning_rate": 5e-06, + "loss": 0.3499, + "num_input_tokens_seen": 316473136, + "step": 1831 + }, + { + "epoch": 0.7012638835695136, + "loss": 0.40260884165763855, + "loss_ce": 0.023702584207057953, + "loss_iou": 1.1157183647155762, + "loss_num": 0.37890625, + "loss_xval": 0.37890625, + "num_input_tokens_seen": 316473136, + "step": 1831 + }, + { + "epoch": 0.7016468785905783, + "grad_norm": 56.72892788926891, + "learning_rate": 5e-06, + "loss": 0.2425, + "num_input_tokens_seen": 316645952, + "step": 1832 + }, + { + "epoch": 0.7016468785905783, + "loss": 0.22687043249607086, + "loss_ce": 0.020205387845635414, + "loss_iou": 1.0851229429244995, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 316645952, + "step": 1832 + }, + { + "epoch": 0.702029873611643, + "grad_norm": 64.42947426499191, + "learning_rate": 5e-06, + "loss": 0.3272, + "num_input_tokens_seen": 316818576, + "step": 1833 + }, + { + "epoch": 0.702029873611643, + "loss": 0.3310290575027466, + "loss_ce": 0.020970463752746582, + "loss_iou": 1.009353518486023, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 316818576, + "step": 1833 + }, + { + "epoch": 0.7024128686327078, + "grad_norm": 33.30955134153118, + "learning_rate": 5e-06, + "loss": 0.2896, + "num_input_tokens_seen": 316991216, + "step": 1834 + }, + { + "epoch": 0.7024128686327078, + "loss": 0.2690102458000183, + "loss_ce": 0.022061984986066818, + "loss_iou": 1.0346126556396484, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 316991216, + "step": 1834 + }, + { + "epoch": 0.7027958636537724, + "grad_norm": 45.28812474396357, + "learning_rate": 5e-06, + "loss": 0.3274, + "num_input_tokens_seen": 317164296, + "step": 1835 + }, + { + "epoch": 0.7027958636537724, + "loss": 0.3138014078140259, + "loss_ce": 0.021076807752251625, + "loss_iou": 1.0114707946777344, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 317164296, + "step": 1835 + }, + { + "epoch": 0.7031788586748372, + "grad_norm": 16.852578736279813, + "learning_rate": 5e-06, + "loss": 0.2851, + "num_input_tokens_seen": 317337200, + "step": 1836 + }, + { + "epoch": 0.7031788586748372, + "loss": 0.29935815930366516, + "loss_ce": 0.020427484065294266, + "loss_iou": 1.0910439491271973, + "loss_num": 0.279296875, + "loss_xval": 0.279296875, + "num_input_tokens_seen": 317337200, + "step": 1836 + }, + { + "epoch": 0.703561853695902, + "grad_norm": 41.01459174752276, + "learning_rate": 5e-06, + "loss": 0.1961, + "num_input_tokens_seen": 317510032, + "step": 1837 + }, + { + "epoch": 0.703561853695902, + "loss": 0.2029310017824173, + "loss_ce": 0.021595552563667297, + "loss_iou": 1.0131685733795166, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 317510032, + "step": 1837 + }, + { + "epoch": 0.7039448487169667, + "grad_norm": 41.26333970809996, + "learning_rate": 5e-06, + "loss": 0.2812, + "num_input_tokens_seen": 317683112, + "step": 1838 + }, + { + "epoch": 0.7039448487169667, + "loss": 0.2858147621154785, + "loss_ce": 0.021288391202688217, + "loss_iou": 1.279576063156128, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 317683112, + "step": 1838 + }, + { + "epoch": 0.7043278437380314, + "grad_norm": 32.12157863098567, + "learning_rate": 5e-06, + "loss": 0.2464, + "num_input_tokens_seen": 317856480, + "step": 1839 + }, + { + "epoch": 0.7043278437380314, + "loss": 0.1840820014476776, + "loss_ce": 0.020568817853927612, + "loss_iou": 0.8792381882667542, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 317856480, + "step": 1839 + }, + { + "epoch": 0.7047108387590961, + "grad_norm": 24.574644804661407, + "learning_rate": 5e-06, + "loss": 0.2033, + "num_input_tokens_seen": 318029536, + "step": 1840 + }, + { + "epoch": 0.7047108387590961, + "loss": 0.1571667194366455, + "loss_ce": 0.020936254411935806, + "loss_iou": 1.0036258697509766, + "loss_num": 0.13671875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 318029536, + "step": 1840 + }, + { + "epoch": 0.7050938337801609, + "grad_norm": 26.059741678743233, + "learning_rate": 5e-06, + "loss": 0.1947, + "num_input_tokens_seen": 318202800, + "step": 1841 + }, + { + "epoch": 0.7050938337801609, + "loss": 0.18011049926280975, + "loss_ce": 0.022456692531704903, + "loss_iou": 1.0436346530914307, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 318202800, + "step": 1841 + }, + { + "epoch": 0.7054768288012255, + "grad_norm": 33.16696966465949, + "learning_rate": 5e-06, + "loss": 0.17, + "num_input_tokens_seen": 318375920, + "step": 1842 + }, + { + "epoch": 0.7054768288012255, + "loss": 0.15799319744110107, + "loss_ce": 0.021640656515955925, + "loss_iou": 1.0010097026824951, + "loss_num": 0.13671875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 318375920, + "step": 1842 + }, + { + "epoch": 0.7058598238222903, + "grad_norm": 37.9307528576101, + "learning_rate": 5e-06, + "loss": 0.2035, + "num_input_tokens_seen": 318548752, + "step": 1843 + }, + { + "epoch": 0.7058598238222903, + "loss": 0.20799294114112854, + "loss_ce": 0.020737089216709137, + "loss_iou": 1.0322823524475098, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 318548752, + "step": 1843 + }, + { + "epoch": 0.706242818843355, + "grad_norm": 28.351593090408336, + "learning_rate": 5e-06, + "loss": 0.2193, + "num_input_tokens_seen": 318722048, + "step": 1844 + }, + { + "epoch": 0.706242818843355, + "loss": 0.24400952458381653, + "loss_ce": 0.020498784258961678, + "loss_iou": 1.0954004526138306, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 318722048, + "step": 1844 + }, + { + "epoch": 0.7066258138644198, + "grad_norm": 16.71906040148047, + "learning_rate": 5e-06, + "loss": 0.2042, + "num_input_tokens_seen": 318895464, + "step": 1845 + }, + { + "epoch": 0.7066258138644198, + "loss": 0.18017390370368958, + "loss_ce": 0.01830865815281868, + "loss_iou": 1.0080845355987549, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 318895464, + "step": 1845 + }, + { + "epoch": 0.7070088088854845, + "grad_norm": 25.977744715712003, + "learning_rate": 5e-06, + "loss": 0.2046, + "num_input_tokens_seen": 319068416, + "step": 1846 + }, + { + "epoch": 0.7070088088854845, + "loss": 0.25116094946861267, + "loss_ce": 0.020692206919193268, + "loss_iou": 1.0209722518920898, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 319068416, + "step": 1846 + }, + { + "epoch": 0.7073918039065492, + "grad_norm": 34.94968383525336, + "learning_rate": 5e-06, + "loss": 0.2244, + "num_input_tokens_seen": 319241712, + "step": 1847 + }, + { + "epoch": 0.7073918039065492, + "loss": 0.2732456922531128, + "loss_ce": 0.022208085283637047, + "loss_iou": 1.0894484519958496, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 319241712, + "step": 1847 + }, + { + "epoch": 0.707774798927614, + "grad_norm": 38.78415559673452, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 319414632, + "step": 1848 + }, + { + "epoch": 0.707774798927614, + "loss": 0.22200219333171844, + "loss_ce": 0.021074455231428146, + "loss_iou": 1.0549986362457275, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 319414632, + "step": 1848 + }, + { + "epoch": 0.7081577939486786, + "grad_norm": 37.47710799659906, + "learning_rate": 5e-06, + "loss": 0.19, + "num_input_tokens_seen": 319587448, + "step": 1849 + }, + { + "epoch": 0.7081577939486786, + "loss": 0.1949232965707779, + "loss_ce": 0.01975240930914879, + "loss_iou": 1.0029751062393188, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 319587448, + "step": 1849 + }, + { + "epoch": 0.7085407889697434, + "grad_norm": 24.402401517247196, + "learning_rate": 5e-06, + "loss": 0.2155, + "num_input_tokens_seen": 319760048, + "step": 1850 + }, + { + "epoch": 0.7085407889697434, + "loss": 0.17069105803966522, + "loss_ce": 0.02188735269010067, + "loss_iou": 1.0251952409744263, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 319760048, + "step": 1850 + }, + { + "epoch": 0.7089237839908081, + "grad_norm": 29.130151725933946, + "learning_rate": 5e-06, + "loss": 0.2035, + "num_input_tokens_seen": 319932864, + "step": 1851 + }, + { + "epoch": 0.7089237839908081, + "loss": 0.18228460848331451, + "loss_ce": 0.022067327052354813, + "loss_iou": 1.0114346742630005, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 319932864, + "step": 1851 + }, + { + "epoch": 0.7093067790118729, + "grad_norm": 45.23779309242795, + "learning_rate": 5e-06, + "loss": 0.2543, + "num_input_tokens_seen": 320105680, + "step": 1852 + }, + { + "epoch": 0.7093067790118729, + "loss": 0.24232217669487, + "loss_ce": 0.02186320349574089, + "loss_iou": 1.1184134483337402, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 320105680, + "step": 1852 + }, + { + "epoch": 0.7096897740329375, + "grad_norm": 33.19348338625306, + "learning_rate": 5e-06, + "loss": 0.2521, + "num_input_tokens_seen": 320278488, + "step": 1853 + }, + { + "epoch": 0.7096897740329375, + "loss": 0.2258143424987793, + "loss_ce": 0.021102434024214745, + "loss_iou": 1.023998737335205, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 320278488, + "step": 1853 + }, + { + "epoch": 0.7100727690540023, + "grad_norm": 30.516140167944577, + "learning_rate": 5e-06, + "loss": 0.1968, + "num_input_tokens_seen": 320451320, + "step": 1854 + }, + { + "epoch": 0.7100727690540023, + "loss": 0.23307326436042786, + "loss_ce": 0.020854037255048752, + "loss_iou": 1.1666905879974365, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 320451320, + "step": 1854 + }, + { + "epoch": 0.710455764075067, + "grad_norm": 29.262022286823285, + "learning_rate": 5e-06, + "loss": 0.2492, + "num_input_tokens_seen": 320624088, + "step": 1855 + }, + { + "epoch": 0.710455764075067, + "loss": 0.21036887168884277, + "loss_ce": 0.01987816020846367, + "loss_iou": 1.0528923273086548, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 320624088, + "step": 1855 + }, + { + "epoch": 0.7108387590961317, + "grad_norm": 22.6309814105481, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 320796304, + "step": 1856 + }, + { + "epoch": 0.7108387590961317, + "loss": 0.1989680528640747, + "loss_ce": 0.019402626901865005, + "loss_iou": 1.003929615020752, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 320796304, + "step": 1856 + }, + { + "epoch": 0.7112217541171965, + "grad_norm": 27.431224869874097, + "learning_rate": 5e-06, + "loss": 0.1652, + "num_input_tokens_seen": 320969272, + "step": 1857 + }, + { + "epoch": 0.7112217541171965, + "loss": 0.1824040412902832, + "loss_ce": 0.0225529782474041, + "loss_iou": 1.026893973350525, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 320969272, + "step": 1857 + }, + { + "epoch": 0.7116047491382612, + "grad_norm": 33.80062001300053, + "learning_rate": 5e-06, + "loss": 0.2347, + "num_input_tokens_seen": 321142200, + "step": 1858 + }, + { + "epoch": 0.7116047491382612, + "loss": 0.20400670170783997, + "loss_ce": 0.021511578932404518, + "loss_iou": 1.0098191499710083, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 321142200, + "step": 1858 + }, + { + "epoch": 0.711987744159326, + "grad_norm": 28.330625937192586, + "learning_rate": 5e-06, + "loss": 0.2163, + "num_input_tokens_seen": 321314720, + "step": 1859 + }, + { + "epoch": 0.711987744159326, + "loss": 0.2437838613986969, + "loss_ce": 0.020700372755527496, + "loss_iou": 1.153388500213623, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 321314720, + "step": 1859 + }, + { + "epoch": 0.7123707391803906, + "grad_norm": 40.836253157253, + "learning_rate": 5e-06, + "loss": 0.1979, + "num_input_tokens_seen": 321487200, + "step": 1860 + }, + { + "epoch": 0.7123707391803906, + "loss": 0.22483478486537933, + "loss_ce": 0.02140461653470993, + "loss_iou": 1.0260980129241943, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 321487200, + "step": 1860 + }, + { + "epoch": 0.7127537342014554, + "grad_norm": 30.395535199677283, + "learning_rate": 5e-06, + "loss": 0.2491, + "num_input_tokens_seen": 321659920, + "step": 1861 + }, + { + "epoch": 0.7127537342014554, + "loss": 0.2886390686035156, + "loss_ce": 0.02032851055264473, + "loss_iou": 1.3753141164779663, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 321659920, + "step": 1861 + }, + { + "epoch": 0.7131367292225201, + "grad_norm": 22.248818786729604, + "learning_rate": 5e-06, + "loss": 0.2034, + "num_input_tokens_seen": 321832856, + "step": 1862 + }, + { + "epoch": 0.7131367292225201, + "loss": 0.15091675519943237, + "loss_ce": 0.021400148048996925, + "loss_iou": 1.0252573490142822, + "loss_num": 0.1298828125, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 321832856, + "step": 1862 + }, + { + "epoch": 0.7135197242435848, + "grad_norm": 28.365412603846075, + "learning_rate": 5e-06, + "loss": 0.2122, + "num_input_tokens_seen": 322005928, + "step": 1863 + }, + { + "epoch": 0.7135197242435848, + "loss": 0.2460293173789978, + "loss_ce": 0.021419944241642952, + "loss_iou": 1.1003503799438477, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 322005928, + "step": 1863 + }, + { + "epoch": 0.7139027192646495, + "grad_norm": 28.21811543717518, + "learning_rate": 5e-06, + "loss": 0.2566, + "num_input_tokens_seen": 322179088, + "step": 1864 + }, + { + "epoch": 0.7139027192646495, + "loss": 0.2528541684150696, + "loss_ce": 0.02250749245285988, + "loss_iou": 3.2350127696990967, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 322179088, + "step": 1864 + }, + { + "epoch": 0.7142857142857143, + "grad_norm": 31.58919824994602, + "learning_rate": 5e-06, + "loss": 0.3049, + "num_input_tokens_seen": 322352536, + "step": 1865 + }, + { + "epoch": 0.7142857142857143, + "loss": 0.2377358078956604, + "loss_ce": 0.021976524963974953, + "loss_iou": 1.0187640190124512, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 322352536, + "step": 1865 + }, + { + "epoch": 0.7146687093067791, + "grad_norm": 23.940216788480484, + "learning_rate": 5e-06, + "loss": 0.1936, + "num_input_tokens_seen": 322525600, + "step": 1866 + }, + { + "epoch": 0.7146687093067791, + "loss": 0.17934811115264893, + "loss_ce": 0.02083982154726982, + "loss_iou": 1.0212841033935547, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 322525600, + "step": 1866 + }, + { + "epoch": 0.7150517043278437, + "grad_norm": 23.618919057048874, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 322698872, + "step": 1867 + }, + { + "epoch": 0.7150517043278437, + "loss": 0.20185860991477966, + "loss_ce": 0.021988004446029663, + "loss_iou": 1.0116420984268188, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 322698872, + "step": 1867 + }, + { + "epoch": 0.7154346993489085, + "grad_norm": 26.95212945883852, + "learning_rate": 5e-06, + "loss": 0.2307, + "num_input_tokens_seen": 322871712, + "step": 1868 + }, + { + "epoch": 0.7154346993489085, + "loss": 0.19863513112068176, + "loss_ce": 0.021999388933181763, + "loss_iou": 1.0071381330490112, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 322871712, + "step": 1868 + }, + { + "epoch": 0.7158176943699732, + "grad_norm": 36.59885840226958, + "learning_rate": 5e-06, + "loss": 0.1883, + "num_input_tokens_seen": 323044928, + "step": 1869 + }, + { + "epoch": 0.7158176943699732, + "loss": 0.17399346828460693, + "loss_ce": 0.020306944847106934, + "loss_iou": 1.0215654373168945, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 323044928, + "step": 1869 + }, + { + "epoch": 0.7162006893910379, + "grad_norm": 40.02371575368256, + "learning_rate": 5e-06, + "loss": 0.2284, + "num_input_tokens_seen": 323217896, + "step": 1870 + }, + { + "epoch": 0.7162006893910379, + "loss": 0.2157425433397293, + "loss_ce": 0.02000279538333416, + "loss_iou": 1.035560131072998, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 323217896, + "step": 1870 + }, + { + "epoch": 0.7165836844121026, + "grad_norm": 21.78368644754905, + "learning_rate": 5e-06, + "loss": 0.1449, + "num_input_tokens_seen": 323390552, + "step": 1871 + }, + { + "epoch": 0.7165836844121026, + "loss": 0.1488848626613617, + "loss_ce": 0.020039651542901993, + "loss_iou": 1.0301177501678467, + "loss_num": 0.12890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 323390552, + "step": 1871 + }, + { + "epoch": 0.7169666794331674, + "grad_norm": 30.65881465222365, + "learning_rate": 5e-06, + "loss": 0.2183, + "num_input_tokens_seen": 323563552, + "step": 1872 + }, + { + "epoch": 0.7169666794331674, + "loss": 0.17817732691764832, + "loss_ce": 0.021622147411108017, + "loss_iou": 1.0330085754394531, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 323563552, + "step": 1872 + }, + { + "epoch": 0.7173496744542321, + "grad_norm": 37.830110764306, + "learning_rate": 5e-06, + "loss": 0.2421, + "num_input_tokens_seen": 323736632, + "step": 1873 + }, + { + "epoch": 0.7173496744542321, + "loss": 0.2265729159116745, + "loss_ce": 0.021250661462545395, + "loss_iou": 0.9831060171127319, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 323736632, + "step": 1873 + }, + { + "epoch": 0.7177326694752968, + "grad_norm": 30.63227223544062, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 323909224, + "step": 1874 + }, + { + "epoch": 0.7177326694752968, + "loss": 0.2136291265487671, + "loss_ce": 0.021734602749347687, + "loss_iou": 1.0560487508773804, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 323909224, + "step": 1874 + }, + { + "epoch": 0.7181156644963615, + "grad_norm": 27.650254576505585, + "learning_rate": 5e-06, + "loss": 0.3444, + "num_input_tokens_seen": 324082240, + "step": 1875 + }, + { + "epoch": 0.7181156644963615, + "loss": 0.4103952646255493, + "loss_ce": 0.02208959311246872, + "loss_iou": 1.0925495624542236, + "loss_num": 0.388671875, + "loss_xval": 0.388671875, + "num_input_tokens_seen": 324082240, + "step": 1875 + }, + { + "epoch": 0.7184986595174263, + "grad_norm": 21.903677973358782, + "learning_rate": 5e-06, + "loss": 0.2315, + "num_input_tokens_seen": 324252064, + "step": 1876 + }, + { + "epoch": 0.7184986595174263, + "loss": 0.24588297307491302, + "loss_ce": 0.022738447412848473, + "loss_iou": 1.1731048822402954, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 324252064, + "step": 1876 + }, + { + "epoch": 0.718881654538491, + "grad_norm": 42.13102796256304, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 324425096, + "step": 1877 + }, + { + "epoch": 0.718881654538491, + "loss": 0.2502211928367615, + "loss_ce": 0.019874490797519684, + "loss_iou": 1.0776121616363525, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 324425096, + "step": 1877 + }, + { + "epoch": 0.7192646495595557, + "grad_norm": 34.698582698143234, + "learning_rate": 5e-06, + "loss": 0.1942, + "num_input_tokens_seen": 324597704, + "step": 1878 + }, + { + "epoch": 0.7192646495595557, + "loss": 0.2114652544260025, + "loss_ce": 0.019265543669462204, + "loss_iou": 1.0166583061218262, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 324597704, + "step": 1878 + }, + { + "epoch": 0.7196476445806205, + "grad_norm": 27.7722452880696, + "learning_rate": 5e-06, + "loss": 0.2278, + "num_input_tokens_seen": 324770808, + "step": 1879 + }, + { + "epoch": 0.7196476445806205, + "loss": 0.2010706663131714, + "loss_ce": 0.02144422009587288, + "loss_iou": 1.0378485918045044, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 324770808, + "step": 1879 + }, + { + "epoch": 0.7200306396016852, + "grad_norm": 25.019436539960555, + "learning_rate": 5e-06, + "loss": 0.1939, + "num_input_tokens_seen": 324943704, + "step": 1880 + }, + { + "epoch": 0.7200306396016852, + "loss": 0.14633317291736603, + "loss_ce": 0.021089037880301476, + "loss_iou": 1.0011621713638306, + "loss_num": 0.125, + "loss_xval": 0.125, + "num_input_tokens_seen": 324943704, + "step": 1880 + }, + { + "epoch": 0.7204136346227499, + "grad_norm": 39.26587788359916, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 325116648, + "step": 1881 + }, + { + "epoch": 0.7204136346227499, + "loss": 0.244556725025177, + "loss_ce": 0.020496664568781853, + "loss_iou": 1.0621919631958008, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 325116648, + "step": 1881 + }, + { + "epoch": 0.7207966296438146, + "grad_norm": 21.890345170337966, + "learning_rate": 5e-06, + "loss": 0.2042, + "num_input_tokens_seen": 325289192, + "step": 1882 + }, + { + "epoch": 0.7207966296438146, + "loss": 0.2093326300382614, + "loss_ce": 0.02018468827009201, + "loss_iou": 1.0757040977478027, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 325289192, + "step": 1882 + }, + { + "epoch": 0.7211796246648794, + "grad_norm": 26.583255122561585, + "learning_rate": 5e-06, + "loss": 0.1908, + "num_input_tokens_seen": 325462360, + "step": 1883 + }, + { + "epoch": 0.7211796246648794, + "loss": 0.21546167135238647, + "loss_ce": 0.022163324058055878, + "loss_iou": 1.0645512342453003, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 325462360, + "step": 1883 + }, + { + "epoch": 0.721562619685944, + "grad_norm": 29.33114335313516, + "learning_rate": 5e-06, + "loss": 0.236, + "num_input_tokens_seen": 325635792, + "step": 1884 + }, + { + "epoch": 0.721562619685944, + "loss": 0.3069278299808502, + "loss_ce": 0.020367780700325966, + "loss_iou": 1.0742062330245972, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 325635792, + "step": 1884 + }, + { + "epoch": 0.7219456147070088, + "grad_norm": 57.63312598078634, + "learning_rate": 5e-06, + "loss": 0.2272, + "num_input_tokens_seen": 325808752, + "step": 1885 + }, + { + "epoch": 0.7219456147070088, + "loss": 0.20381572842597961, + "loss_ce": 0.020405080169439316, + "loss_iou": 1.0015591382980347, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 325808752, + "step": 1885 + }, + { + "epoch": 0.7223286097280736, + "grad_norm": 19.83901232484663, + "learning_rate": 5e-06, + "loss": 0.2271, + "num_input_tokens_seen": 325981624, + "step": 1886 + }, + { + "epoch": 0.7223286097280736, + "loss": 0.17987343668937683, + "loss_ce": 0.01959512010216713, + "loss_iou": 1.0048749446868896, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 325981624, + "step": 1886 + }, + { + "epoch": 0.7227116047491383, + "grad_norm": 23.234290999388904, + "learning_rate": 5e-06, + "loss": 0.2187, + "num_input_tokens_seen": 326154704, + "step": 1887 + }, + { + "epoch": 0.7227116047491383, + "loss": 0.22305038571357727, + "loss_ce": 0.021939542144536972, + "loss_iou": 1.0524241924285889, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 326154704, + "step": 1887 + }, + { + "epoch": 0.723094599770203, + "grad_norm": 36.07468391266884, + "learning_rate": 5e-06, + "loss": 0.2834, + "num_input_tokens_seen": 326327992, + "step": 1888 + }, + { + "epoch": 0.723094599770203, + "loss": 0.3624667525291443, + "loss_ce": 0.020059533417224884, + "loss_iou": 2.5894272327423096, + "loss_num": 0.341796875, + "loss_xval": 0.341796875, + "num_input_tokens_seen": 326327992, + "step": 1888 + }, + { + "epoch": 0.7234775947912677, + "grad_norm": 37.85224542460828, + "learning_rate": 5e-06, + "loss": 0.232, + "num_input_tokens_seen": 326501080, + "step": 1889 + }, + { + "epoch": 0.7234775947912677, + "loss": 0.25145411491394043, + "loss_ce": 0.021900873631238937, + "loss_iou": 1.0371770858764648, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 326501080, + "step": 1889 + }, + { + "epoch": 0.7238605898123325, + "grad_norm": 29.64032047382248, + "learning_rate": 5e-06, + "loss": 0.1923, + "num_input_tokens_seen": 326674160, + "step": 1890 + }, + { + "epoch": 0.7238605898123325, + "loss": 0.17930102348327637, + "loss_ce": 0.019877180457115173, + "loss_iou": 1.0070887804031372, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 326674160, + "step": 1890 + }, + { + "epoch": 0.7242435848333971, + "grad_norm": 22.305280094687475, + "learning_rate": 5e-06, + "loss": 0.2314, + "num_input_tokens_seen": 326847016, + "step": 1891 + }, + { + "epoch": 0.7242435848333971, + "loss": 0.2490444779396057, + "loss_ce": 0.01973540522158146, + "loss_iou": 1.1225407123565674, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 326847016, + "step": 1891 + }, + { + "epoch": 0.7246265798544619, + "grad_norm": 13.35396760968828, + "learning_rate": 5e-06, + "loss": 0.2064, + "num_input_tokens_seen": 327019808, + "step": 1892 + }, + { + "epoch": 0.7246265798544619, + "loss": 0.2213289737701416, + "loss_ce": 0.0208895206451416, + "loss_iou": 1.1386830806732178, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 327019808, + "step": 1892 + }, + { + "epoch": 0.7250095748755266, + "grad_norm": 17.63824320302357, + "learning_rate": 5e-06, + "loss": 0.1918, + "num_input_tokens_seen": 327192560, + "step": 1893 + }, + { + "epoch": 0.7250095748755266, + "loss": 0.15080958604812622, + "loss_ce": 0.01982813887298107, + "loss_iou": 1.00714111328125, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 327192560, + "step": 1893 + }, + { + "epoch": 0.7253925698965914, + "grad_norm": 14.81202091255723, + "learning_rate": 5e-06, + "loss": 0.1812, + "num_input_tokens_seen": 327365376, + "step": 1894 + }, + { + "epoch": 0.7253925698965914, + "loss": 0.14532124996185303, + "loss_ce": 0.020229686051607132, + "loss_iou": 1.0008666515350342, + "loss_num": 0.125, + "loss_xval": 0.125, + "num_input_tokens_seen": 327365376, + "step": 1894 + }, + { + "epoch": 0.725775564917656, + "grad_norm": 32.98841455217743, + "learning_rate": 5e-06, + "loss": 0.1703, + "num_input_tokens_seen": 327538480, + "step": 1895 + }, + { + "epoch": 0.725775564917656, + "loss": 0.17655575275421143, + "loss_ce": 0.020641446113586426, + "loss_iou": 1.0238258838653564, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 327538480, + "step": 1895 + }, + { + "epoch": 0.7261585599387208, + "grad_norm": 27.088272969106512, + "learning_rate": 5e-06, + "loss": 0.2131, + "num_input_tokens_seen": 327711344, + "step": 1896 + }, + { + "epoch": 0.7261585599387208, + "loss": 0.2655426263809204, + "loss_ce": 0.019876133650541306, + "loss_iou": 1.0179781913757324, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 327711344, + "step": 1896 + }, + { + "epoch": 0.7265415549597856, + "grad_norm": 24.266018201396527, + "learning_rate": 5e-06, + "loss": 0.2471, + "num_input_tokens_seen": 327884464, + "step": 1897 + }, + { + "epoch": 0.7265415549597856, + "loss": 0.23851393163204193, + "loss_ce": 0.01976393163204193, + "loss_iou": 1.0578659772872925, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 327884464, + "step": 1897 + }, + { + "epoch": 0.7269245499808502, + "grad_norm": 32.69504654197707, + "learning_rate": 5e-06, + "loss": 0.2307, + "num_input_tokens_seen": 328057616, + "step": 1898 + }, + { + "epoch": 0.7269245499808502, + "loss": 0.28195953369140625, + "loss_ce": 0.02188875898718834, + "loss_iou": 1.0293887853622437, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 328057616, + "step": 1898 + }, + { + "epoch": 0.727307545001915, + "grad_norm": 31.398790680254006, + "learning_rate": 5e-06, + "loss": 0.2314, + "num_input_tokens_seen": 328230576, + "step": 1899 + }, + { + "epoch": 0.727307545001915, + "loss": 0.25293177366256714, + "loss_ce": 0.020387839525938034, + "loss_iou": 1.1067075729370117, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 328230576, + "step": 1899 + }, + { + "epoch": 0.7276905400229797, + "grad_norm": 31.319907783873976, + "learning_rate": 5e-06, + "loss": 0.2518, + "num_input_tokens_seen": 328403432, + "step": 1900 + }, + { + "epoch": 0.7276905400229797, + "loss": 0.23616424202919006, + "loss_ce": 0.02168671041727066, + "loss_iou": 1.0616157054901123, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 328403432, + "step": 1900 + }, + { + "epoch": 0.7280735350440444, + "grad_norm": 22.96159105155981, + "learning_rate": 5e-06, + "loss": 0.2045, + "num_input_tokens_seen": 328576248, + "step": 1901 + }, + { + "epoch": 0.7280735350440444, + "loss": 0.1811392605304718, + "loss_ce": 0.0214712955057621, + "loss_iou": 1.0072622299194336, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 328576248, + "step": 1901 + }, + { + "epoch": 0.7284565300651091, + "grad_norm": 20.793484682660253, + "learning_rate": 5e-06, + "loss": 0.2069, + "num_input_tokens_seen": 328748744, + "step": 1902 + }, + { + "epoch": 0.7284565300651091, + "loss": 0.22401505708694458, + "loss_ce": 0.019913481548428535, + "loss_iou": 1.0367412567138672, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 328748744, + "step": 1902 + }, + { + "epoch": 0.7288395250861739, + "grad_norm": 24.684177600987404, + "learning_rate": 5e-06, + "loss": 0.2624, + "num_input_tokens_seen": 328921688, + "step": 1903 + }, + { + "epoch": 0.7288395250861739, + "loss": 0.298135906457901, + "loss_ce": 0.022501137107610703, + "loss_iou": 1.0930315256118774, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 328921688, + "step": 1903 + }, + { + "epoch": 0.7292225201072386, + "grad_norm": 33.94493969385388, + "learning_rate": 5e-06, + "loss": 0.1772, + "num_input_tokens_seen": 329094792, + "step": 1904 + }, + { + "epoch": 0.7292225201072386, + "loss": 0.18944339454174042, + "loss_ce": 0.021596720442175865, + "loss_iou": 1.0274012088775635, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 329094792, + "step": 1904 + }, + { + "epoch": 0.7296055151283033, + "grad_norm": 39.59546038720797, + "learning_rate": 5e-06, + "loss": 0.2316, + "num_input_tokens_seen": 329267496, + "step": 1905 + }, + { + "epoch": 0.7296055151283033, + "loss": 0.21068190038204193, + "loss_ce": 0.02177809551358223, + "loss_iou": 1.0081746578216553, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 329267496, + "step": 1905 + }, + { + "epoch": 0.729988510149368, + "grad_norm": 43.32140474351385, + "learning_rate": 5e-06, + "loss": 0.284, + "num_input_tokens_seen": 329440488, + "step": 1906 + }, + { + "epoch": 0.729988510149368, + "loss": 0.23827320337295532, + "loss_ce": 0.02019459754228592, + "loss_iou": 1.0973848104476929, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 329440488, + "step": 1906 + }, + { + "epoch": 0.7303715051704328, + "grad_norm": 30.18629047529966, + "learning_rate": 5e-06, + "loss": 0.2078, + "num_input_tokens_seen": 329613352, + "step": 1907 + }, + { + "epoch": 0.7303715051704328, + "loss": 0.20912249386310577, + "loss_ce": 0.020279716700315475, + "loss_iou": 1.0031514167785645, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 329613352, + "step": 1907 + }, + { + "epoch": 0.7307545001914975, + "grad_norm": 18.667252988879593, + "learning_rate": 5e-06, + "loss": 0.2115, + "num_input_tokens_seen": 329786320, + "step": 1908 + }, + { + "epoch": 0.7307545001914975, + "loss": 0.19140136241912842, + "loss_ce": 0.020319821313023567, + "loss_iou": 1.0130813121795654, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 329786320, + "step": 1908 + }, + { + "epoch": 0.7311374952125622, + "grad_norm": 18.289709185667753, + "learning_rate": 5e-06, + "loss": 0.1699, + "num_input_tokens_seen": 329958960, + "step": 1909 + }, + { + "epoch": 0.7311374952125622, + "loss": 0.21079006791114807, + "loss_ce": 0.019933123141527176, + "loss_iou": 1.0034202337265015, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 329958960, + "step": 1909 + }, + { + "epoch": 0.731520490233627, + "grad_norm": 35.93743772055923, + "learning_rate": 5e-06, + "loss": 0.1935, + "num_input_tokens_seen": 330131696, + "step": 1910 + }, + { + "epoch": 0.731520490233627, + "loss": 0.1739504188299179, + "loss_ce": 0.01861594431102276, + "loss_iou": 0.8773469924926758, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 330131696, + "step": 1910 + }, + { + "epoch": 0.7319034852546917, + "grad_norm": 48.448821400523876, + "learning_rate": 5e-06, + "loss": 0.2859, + "num_input_tokens_seen": 330303048, + "step": 1911 + }, + { + "epoch": 0.7319034852546917, + "loss": 0.23737116158008575, + "loss_ce": 0.020085997879505157, + "loss_iou": 1.020316243171692, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 330303048, + "step": 1911 + }, + { + "epoch": 0.7322864802757564, + "grad_norm": 23.65524386702177, + "learning_rate": 5e-06, + "loss": 0.1487, + "num_input_tokens_seen": 330476160, + "step": 1912 + }, + { + "epoch": 0.7322864802757564, + "loss": 0.14482766389846802, + "loss_ce": 0.021628201007843018, + "loss_iou": 1.0011117458343506, + "loss_num": 0.123046875, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 330476160, + "step": 1912 + }, + { + "epoch": 0.7326694752968211, + "grad_norm": 50.27152919345598, + "learning_rate": 5e-06, + "loss": 0.2478, + "num_input_tokens_seen": 330649200, + "step": 1913 + }, + { + "epoch": 0.7326694752968211, + "loss": 0.28704240918159485, + "loss_ce": 0.022027764469385147, + "loss_iou": 1.0086801052093506, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 330649200, + "step": 1913 + }, + { + "epoch": 0.7330524703178859, + "grad_norm": 28.28467450339639, + "learning_rate": 5e-06, + "loss": 0.1824, + "num_input_tokens_seen": 330822152, + "step": 1914 + }, + { + "epoch": 0.7330524703178859, + "loss": 0.18633687496185303, + "loss_ce": 0.020565396174788475, + "loss_iou": 1.0290091037750244, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 330822152, + "step": 1914 + }, + { + "epoch": 0.7334354653389505, + "grad_norm": 74.82632620370264, + "learning_rate": 5e-06, + "loss": 0.3198, + "num_input_tokens_seen": 330995384, + "step": 1915 + }, + { + "epoch": 0.7334354653389505, + "loss": 0.3004084825515747, + "loss_ce": 0.01976882293820381, + "loss_iou": 1.0028400421142578, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 330995384, + "step": 1915 + }, + { + "epoch": 0.7338184603600153, + "grad_norm": 36.30834955459456, + "learning_rate": 5e-06, + "loss": 0.2722, + "num_input_tokens_seen": 331168296, + "step": 1916 + }, + { + "epoch": 0.7338184603600153, + "loss": 0.24254938960075378, + "loss_ce": 0.022334547713398933, + "loss_iou": 1.0026772022247314, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 331168296, + "step": 1916 + }, + { + "epoch": 0.7342014553810801, + "grad_norm": 29.075128758724365, + "learning_rate": 5e-06, + "loss": 0.2522, + "num_input_tokens_seen": 331341696, + "step": 1917 + }, + { + "epoch": 0.7342014553810801, + "loss": 0.25283104181289673, + "loss_ce": 0.021019525825977325, + "loss_iou": 1.015965461730957, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 331341696, + "step": 1917 + }, + { + "epoch": 0.7345844504021448, + "grad_norm": 51.62003870462262, + "learning_rate": 5e-06, + "loss": 0.2467, + "num_input_tokens_seen": 331514488, + "step": 1918 + }, + { + "epoch": 0.7345844504021448, + "loss": 0.23413214087486267, + "loss_ce": 0.02258429303765297, + "loss_iou": 1.0972702503204346, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 331514488, + "step": 1918 + }, + { + "epoch": 0.7349674454232095, + "grad_norm": 68.56112628782104, + "learning_rate": 5e-06, + "loss": 0.419, + "num_input_tokens_seen": 331687288, + "step": 1919 + }, + { + "epoch": 0.7349674454232095, + "loss": 0.40661534667015076, + "loss_ce": 0.01916416361927986, + "loss_iou": 0.9524319171905518, + "loss_num": 0.38671875, + "loss_xval": 0.38671875, + "num_input_tokens_seen": 331687288, + "step": 1919 + }, + { + "epoch": 0.7353504404442742, + "grad_norm": 30.10748045309126, + "learning_rate": 5e-06, + "loss": 0.2931, + "num_input_tokens_seen": 331860520, + "step": 1920 + }, + { + "epoch": 0.7353504404442742, + "loss": 0.3277454078197479, + "loss_ce": 0.022325461730360985, + "loss_iou": 1.275111436843872, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 331860520, + "step": 1920 + }, + { + "epoch": 0.735733435465339, + "grad_norm": 51.05229778968739, + "learning_rate": 5e-06, + "loss": 0.4039, + "num_input_tokens_seen": 332033792, + "step": 1921 + }, + { + "epoch": 0.735733435465339, + "loss": 0.37663334608078003, + "loss_ce": 0.022629449144005775, + "loss_iou": 1.0174347162246704, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 332033792, + "step": 1921 + }, + { + "epoch": 0.7361164304864036, + "grad_norm": 56.819231729650525, + "learning_rate": 5e-06, + "loss": 0.3581, + "num_input_tokens_seen": 332206664, + "step": 1922 + }, + { + "epoch": 0.7361164304864036, + "loss": 0.37505754828453064, + "loss_ce": 0.020443283021450043, + "loss_iou": 1.0403156280517578, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 332206664, + "step": 1922 + }, + { + "epoch": 0.7364994255074684, + "grad_norm": 67.59315787006065, + "learning_rate": 5e-06, + "loss": 0.5422, + "num_input_tokens_seen": 332379312, + "step": 1923 + }, + { + "epoch": 0.7364994255074684, + "loss": 0.4992562234401703, + "loss_ce": 0.02025231346487999, + "loss_iou": 1.0131621360778809, + "loss_num": 0.478515625, + "loss_xval": 0.478515625, + "num_input_tokens_seen": 332379312, + "step": 1923 + }, + { + "epoch": 0.7368824205285331, + "grad_norm": 41.83252474139575, + "learning_rate": 5e-06, + "loss": 0.3726, + "num_input_tokens_seen": 332552456, + "step": 1924 + }, + { + "epoch": 0.7368824205285331, + "loss": 0.3864128291606903, + "loss_ce": 0.021666740998625755, + "loss_iou": 0.998197615146637, + "loss_num": 0.365234375, + "loss_xval": 0.365234375, + "num_input_tokens_seen": 332552456, + "step": 1924 + }, + { + "epoch": 0.7372654155495979, + "grad_norm": 43.10657188034638, + "learning_rate": 5e-06, + "loss": 0.3381, + "num_input_tokens_seen": 332725344, + "step": 1925 + }, + { + "epoch": 0.7372654155495979, + "loss": 0.3305189609527588, + "loss_ce": 0.022779714316129684, + "loss_iou": 1.039806842803955, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 332725344, + "step": 1925 + }, + { + "epoch": 0.7376484105706625, + "grad_norm": 180.95190202755572, + "learning_rate": 5e-06, + "loss": 1.355, + "num_input_tokens_seen": 332898656, + "step": 1926 + }, + { + "epoch": 0.7376484105706625, + "loss": 1.3647723197937012, + "loss_ce": 0.021022357046604156, + "loss_iou": 1.2992215156555176, + "loss_num": 1.34375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 332898656, + "step": 1926 + }, + { + "epoch": 0.7380314055917273, + "grad_norm": 35.60054618008287, + "learning_rate": 5e-06, + "loss": 0.2432, + "num_input_tokens_seen": 333071768, + "step": 1927 + }, + { + "epoch": 0.7380314055917273, + "loss": 0.2595188021659851, + "loss_ce": 0.01904027909040451, + "loss_iou": 0.883613109588623, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 333071768, + "step": 1927 + }, + { + "epoch": 0.7384144006127921, + "grad_norm": 109.72271360750891, + "learning_rate": 5e-06, + "loss": 0.6054, + "num_input_tokens_seen": 333245000, + "step": 1928 + }, + { + "epoch": 0.7384144006127921, + "loss": 0.6654654741287231, + "loss_ce": 0.020445970818400383, + "loss_iou": 1.1310943365097046, + "loss_num": 0.64453125, + "loss_xval": 0.64453125, + "num_input_tokens_seen": 333245000, + "step": 1928 + }, + { + "epoch": 0.7387973956338567, + "grad_norm": 82.77122485843303, + "learning_rate": 5e-06, + "loss": 0.7423, + "num_input_tokens_seen": 333418072, + "step": 1929 + }, + { + "epoch": 0.7387973956338567, + "loss": 0.7344993948936462, + "loss_ce": 0.021120460703969002, + "loss_iou": 1.038887619972229, + "loss_num": 0.71484375, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 333418072, + "step": 1929 + }, + { + "epoch": 0.7391803906549215, + "grad_norm": 23.992720364107736, + "learning_rate": 5e-06, + "loss": 0.3149, + "num_input_tokens_seen": 333591104, + "step": 1930 + }, + { + "epoch": 0.7391803906549215, + "loss": 0.3694052994251251, + "loss_ce": 0.021016621962189674, + "loss_iou": 1.0462157726287842, + "loss_num": 0.34765625, + "loss_xval": 0.34765625, + "num_input_tokens_seen": 333591104, + "step": 1930 + }, + { + "epoch": 0.7395633856759862, + "grad_norm": 33.174950142789754, + "learning_rate": 5e-06, + "loss": 0.3172, + "num_input_tokens_seen": 333763832, + "step": 1931 + }, + { + "epoch": 0.7395633856759862, + "loss": 0.3304111957550049, + "loss_ce": 0.02010846883058548, + "loss_iou": 1.093118667602539, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 333763832, + "step": 1931 + }, + { + "epoch": 0.739946380697051, + "grad_norm": 27.50218950073205, + "learning_rate": 5e-06, + "loss": 0.2231, + "num_input_tokens_seen": 333936968, + "step": 1932 + }, + { + "epoch": 0.739946380697051, + "loss": 0.25011956691741943, + "loss_ce": 0.022702565416693687, + "loss_iou": 1.086707592010498, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 333936968, + "step": 1932 + }, + { + "epoch": 0.7403293757181156, + "grad_norm": 26.609634414458018, + "learning_rate": 5e-06, + "loss": 0.2598, + "num_input_tokens_seen": 334110008, + "step": 1933 + }, + { + "epoch": 0.7403293757181156, + "loss": 0.19299322366714478, + "loss_ce": 0.022277887910604477, + "loss_iou": 1.0121299028396606, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 334110008, + "step": 1933 + }, + { + "epoch": 0.7407123707391804, + "grad_norm": 36.71801216282503, + "learning_rate": 5e-06, + "loss": 0.2352, + "num_input_tokens_seen": 334283192, + "step": 1934 + }, + { + "epoch": 0.7407123707391804, + "loss": 0.29141005873680115, + "loss_ce": 0.021146384999155998, + "loss_iou": 1.1183552742004395, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 334283192, + "step": 1934 + }, + { + "epoch": 0.7410953657602452, + "grad_norm": 48.04796533209979, + "learning_rate": 5e-06, + "loss": 0.2181, + "num_input_tokens_seen": 334456104, + "step": 1935 + }, + { + "epoch": 0.7410953657602452, + "loss": 0.199800044298172, + "loss_ce": 0.022004637867212296, + "loss_iou": 1.0481548309326172, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 334456104, + "step": 1935 + }, + { + "epoch": 0.7414783607813098, + "grad_norm": 28.96683163057016, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 334629256, + "step": 1936 + }, + { + "epoch": 0.7414783607813098, + "loss": 0.2581450939178467, + "loss_ce": 0.021939020603895187, + "loss_iou": 1.0524083375930786, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 334629256, + "step": 1936 + }, + { + "epoch": 0.7418613558023746, + "grad_norm": 23.239962749653266, + "learning_rate": 5e-06, + "loss": 0.1837, + "num_input_tokens_seen": 334802328, + "step": 1937 + }, + { + "epoch": 0.7418613558023746, + "loss": 0.1810568869113922, + "loss_ce": 0.02321997843682766, + "loss_iou": 1.0080535411834717, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 334802328, + "step": 1937 + }, + { + "epoch": 0.7422443508234393, + "grad_norm": 22.8872095408136, + "learning_rate": 5e-06, + "loss": 0.1751, + "num_input_tokens_seen": 334975168, + "step": 1938 + }, + { + "epoch": 0.7422443508234393, + "loss": 0.17818833887577057, + "loss_ce": 0.02047349140048027, + "loss_iou": 1.0368616580963135, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 334975168, + "step": 1938 + }, + { + "epoch": 0.7426273458445041, + "grad_norm": 27.716447217111558, + "learning_rate": 5e-06, + "loss": 0.1666, + "num_input_tokens_seen": 335144456, + "step": 1939 + }, + { + "epoch": 0.7426273458445041, + "loss": 0.17497143149375916, + "loss_ce": 0.020552489906549454, + "loss_iou": 1.0021817684173584, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 335144456, + "step": 1939 + }, + { + "epoch": 0.7430103408655687, + "grad_norm": 25.457089685695703, + "learning_rate": 5e-06, + "loss": 0.2083, + "num_input_tokens_seen": 335317368, + "step": 1940 + }, + { + "epoch": 0.7430103408655687, + "loss": 0.18655753135681152, + "loss_ce": 0.02017568051815033, + "loss_iou": 1.0007985830307007, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 335317368, + "step": 1940 + }, + { + "epoch": 0.7433933358866335, + "grad_norm": 29.218416777853236, + "learning_rate": 5e-06, + "loss": 0.3297, + "num_input_tokens_seen": 335490392, + "step": 1941 + }, + { + "epoch": 0.7433933358866335, + "loss": 0.3510556221008301, + "loss_ce": 0.021038515493273735, + "loss_iou": 1.0548796653747559, + "loss_num": 0.330078125, + "loss_xval": 0.330078125, + "num_input_tokens_seen": 335490392, + "step": 1941 + }, + { + "epoch": 0.7437763309076982, + "grad_norm": 26.701328298012317, + "learning_rate": 5e-06, + "loss": 0.1655, + "num_input_tokens_seen": 335663552, + "step": 1942 + }, + { + "epoch": 0.7437763309076982, + "loss": 0.13223397731781006, + "loss_ce": 0.02029549330472946, + "loss_iou": 1.0102285146713257, + "loss_num": 0.11181640625, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 335663552, + "step": 1942 + }, + { + "epoch": 0.7441593259287629, + "grad_norm": 32.80138127728905, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 335836368, + "step": 1943 + }, + { + "epoch": 0.7441593259287629, + "loss": 0.2144605815410614, + "loss_ce": 0.021040182560682297, + "loss_iou": 1.0209829807281494, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 335836368, + "step": 1943 + }, + { + "epoch": 0.7445423209498276, + "grad_norm": 25.188772203269707, + "learning_rate": 5e-06, + "loss": 0.2231, + "num_input_tokens_seen": 336009616, + "step": 1944 + }, + { + "epoch": 0.7445423209498276, + "loss": 0.2847317159175873, + "loss_ce": 0.021670211106538773, + "loss_iou": 1.0225014686584473, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 336009616, + "step": 1944 + }, + { + "epoch": 0.7449253159708924, + "grad_norm": 27.33898594322862, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 336182800, + "step": 1945 + }, + { + "epoch": 0.7449253159708924, + "loss": 0.18002618849277496, + "loss_ce": 0.022982735186815262, + "loss_iou": 1.0188391208648682, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 336182800, + "step": 1945 + }, + { + "epoch": 0.7453083109919572, + "grad_norm": 22.219886110299534, + "learning_rate": 5e-06, + "loss": 0.2346, + "num_input_tokens_seen": 336355760, + "step": 1946 + }, + { + "epoch": 0.7453083109919572, + "loss": 0.25230154395103455, + "loss_ce": 0.020367953926324844, + "loss_iou": 1.0885792970657349, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 336355760, + "step": 1946 + }, + { + "epoch": 0.7456913060130218, + "grad_norm": 19.908992415199794, + "learning_rate": 5e-06, + "loss": 0.1841, + "num_input_tokens_seen": 336528496, + "step": 1947 + }, + { + "epoch": 0.7456913060130218, + "loss": 0.22517076134681702, + "loss_ce": 0.021404892206192017, + "loss_iou": 1.009037971496582, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 336528496, + "step": 1947 + }, + { + "epoch": 0.7460743010340866, + "grad_norm": 19.781945656679195, + "learning_rate": 5e-06, + "loss": 0.1946, + "num_input_tokens_seen": 336701264, + "step": 1948 + }, + { + "epoch": 0.7460743010340866, + "loss": 0.2301415503025055, + "loss_ce": 0.021096128970384598, + "loss_iou": 3.1631762981414795, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 336701264, + "step": 1948 + }, + { + "epoch": 0.7464572960551513, + "grad_norm": 29.355093963100664, + "learning_rate": 5e-06, + "loss": 0.2014, + "num_input_tokens_seen": 336874368, + "step": 1949 + }, + { + "epoch": 0.7464572960551513, + "loss": 0.1448083519935608, + "loss_ce": 0.02145630121231079, + "loss_iou": 1.012711524963379, + "loss_num": 0.12353515625, + "loss_xval": 0.12353515625, + "num_input_tokens_seen": 336874368, + "step": 1949 + }, + { + "epoch": 0.746840291076216, + "grad_norm": 26.855786936722488, + "learning_rate": 5e-06, + "loss": 0.2183, + "num_input_tokens_seen": 337047448, + "step": 1950 + }, + { + "epoch": 0.746840291076216, + "loss": 0.21372899413108826, + "loss_ce": 0.021102052181959152, + "loss_iou": 1.0027873516082764, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 337047448, + "step": 1950 + }, + { + "epoch": 0.7472232860972807, + "grad_norm": 29.78256819336044, + "learning_rate": 5e-06, + "loss": 0.2189, + "num_input_tokens_seen": 337220720, + "step": 1951 + }, + { + "epoch": 0.7472232860972807, + "loss": 0.22373650968074799, + "loss_ce": 0.02109978348016739, + "loss_iou": 1.032132863998413, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 337220720, + "step": 1951 + }, + { + "epoch": 0.7476062811183455, + "grad_norm": 29.64222327377733, + "learning_rate": 5e-06, + "loss": 0.2249, + "num_input_tokens_seen": 337394200, + "step": 1952 + }, + { + "epoch": 0.7476062811183455, + "loss": 0.2393241822719574, + "loss_ce": 0.019597632810473442, + "loss_iou": 1.1529085636138916, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 337394200, + "step": 1952 + }, + { + "epoch": 0.7479892761394102, + "grad_norm": 29.486507672186317, + "learning_rate": 5e-06, + "loss": 0.3001, + "num_input_tokens_seen": 337567312, + "step": 1953 + }, + { + "epoch": 0.7479892761394102, + "loss": 0.2638113796710968, + "loss_ce": 0.02296665497124195, + "loss_iou": 3.5116450786590576, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 337567312, + "step": 1953 + }, + { + "epoch": 0.7483722711604749, + "grad_norm": 23.535065054104823, + "learning_rate": 5e-06, + "loss": 0.1708, + "num_input_tokens_seen": 337740152, + "step": 1954 + }, + { + "epoch": 0.7483722711604749, + "loss": 0.1761426478624344, + "loss_ce": 0.021296454593539238, + "loss_iou": 1.1424171924591064, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 337740152, + "step": 1954 + }, + { + "epoch": 0.7487552661815396, + "grad_norm": 21.769023134524247, + "learning_rate": 5e-06, + "loss": 0.1987, + "num_input_tokens_seen": 337912816, + "step": 1955 + }, + { + "epoch": 0.7487552661815396, + "loss": 0.23964551091194153, + "loss_ce": 0.01961377263069153, + "loss_iou": 2.957237720489502, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 337912816, + "step": 1955 + }, + { + "epoch": 0.7491382612026044, + "grad_norm": 14.491886493471908, + "learning_rate": 5e-06, + "loss": 0.208, + "num_input_tokens_seen": 338082056, + "step": 1956 + }, + { + "epoch": 0.7491382612026044, + "loss": 0.2334989309310913, + "loss_ce": 0.023110752925276756, + "loss_iou": 1.1820414066314697, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 338082056, + "step": 1956 + }, + { + "epoch": 0.749521256223669, + "grad_norm": 22.076057577851937, + "learning_rate": 5e-06, + "loss": 0.1821, + "num_input_tokens_seen": 338255256, + "step": 1957 + }, + { + "epoch": 0.749521256223669, + "loss": 0.1340140700340271, + "loss_ce": 0.020946452394127846, + "loss_iou": 1.0144014358520508, + "loss_num": 0.11328125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 338255256, + "step": 1957 + }, + { + "epoch": 0.7499042512447338, + "grad_norm": 24.821614162565513, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 338428080, + "step": 1958 + }, + { + "epoch": 0.7499042512447338, + "loss": 0.16101813316345215, + "loss_ce": 0.020942451432347298, + "loss_iou": 1.0032565593719482, + "loss_num": 0.1396484375, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 338428080, + "step": 1958 + }, + { + "epoch": 0.7502872462657986, + "grad_norm": 20.563365404112613, + "learning_rate": 5e-06, + "loss": 0.2758, + "num_input_tokens_seen": 338597520, + "step": 1959 + }, + { + "epoch": 0.7502872462657986, + "loss": 0.25606077909469604, + "loss_ce": 0.0215636994689703, + "loss_iou": 1.018204927444458, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 338597520, + "step": 1959 + }, + { + "epoch": 0.7506702412868632, + "grad_norm": 21.817141799337517, + "learning_rate": 5e-06, + "loss": 0.2089, + "num_input_tokens_seen": 338770312, + "step": 1960 + }, + { + "epoch": 0.7506702412868632, + "loss": 0.232526034116745, + "loss_ce": 0.020245764404535294, + "loss_iou": 1.0887852907180786, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 338770312, + "step": 1960 + }, + { + "epoch": 0.751053236307928, + "grad_norm": 29.991984152111403, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 338943232, + "step": 1961 + }, + { + "epoch": 0.751053236307928, + "loss": 0.17735590040683746, + "loss_ce": 0.021227972581982613, + "loss_iou": 1.0765886306762695, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 338943232, + "step": 1961 + }, + { + "epoch": 0.7514362313289927, + "grad_norm": 23.697245978079234, + "learning_rate": 5e-06, + "loss": 0.1825, + "num_input_tokens_seen": 339116448, + "step": 1962 + }, + { + "epoch": 0.7514362313289927, + "loss": 0.15435320138931274, + "loss_ce": 0.021967953070998192, + "loss_iou": 1.001384973526001, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 339116448, + "step": 1962 + }, + { + "epoch": 0.7518192263500575, + "grad_norm": 18.029364515237333, + "learning_rate": 5e-06, + "loss": 0.1804, + "num_input_tokens_seen": 339289328, + "step": 1963 + }, + { + "epoch": 0.7518192263500575, + "loss": 0.13389170169830322, + "loss_ce": 0.01783336140215397, + "loss_iou": 1.0064783096313477, + "loss_num": 0.1162109375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 339289328, + "step": 1963 + }, + { + "epoch": 0.7522022213711221, + "grad_norm": 17.664782598935876, + "learning_rate": 5e-06, + "loss": 0.18, + "num_input_tokens_seen": 339462496, + "step": 1964 + }, + { + "epoch": 0.7522022213711221, + "loss": 0.17858535051345825, + "loss_ce": 0.02080945298075676, + "loss_iou": 1.0110020637512207, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 339462496, + "step": 1964 + }, + { + "epoch": 0.7525852163921869, + "grad_norm": 28.50489486344704, + "learning_rate": 5e-06, + "loss": 0.2123, + "num_input_tokens_seen": 339635312, + "step": 1965 + }, + { + "epoch": 0.7525852163921869, + "loss": 0.2552037537097931, + "loss_ce": 0.020035291090607643, + "loss_iou": 1.068601131439209, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 339635312, + "step": 1965 + }, + { + "epoch": 0.7529682114132517, + "grad_norm": 25.976001639195506, + "learning_rate": 5e-06, + "loss": 0.2451, + "num_input_tokens_seen": 339808200, + "step": 1966 + }, + { + "epoch": 0.7529682114132517, + "loss": 0.2603006064891815, + "loss_ce": 0.019211728125810623, + "loss_iou": 1.0250359773635864, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 339808200, + "step": 1966 + }, + { + "epoch": 0.7533512064343163, + "grad_norm": 37.32805644716445, + "learning_rate": 5e-06, + "loss": 0.2289, + "num_input_tokens_seen": 339981232, + "step": 1967 + }, + { + "epoch": 0.7533512064343163, + "loss": 0.23148807883262634, + "loss_ce": 0.021282989531755447, + "loss_iou": 1.006373643875122, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 339981232, + "step": 1967 + }, + { + "epoch": 0.7537342014553811, + "grad_norm": 26.530237429389697, + "learning_rate": 5e-06, + "loss": 0.2303, + "num_input_tokens_seen": 340154240, + "step": 1968 + }, + { + "epoch": 0.7537342014553811, + "loss": 0.24449728429317474, + "loss_ce": 0.021657921373844147, + "loss_iou": 1.009596347808838, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 340154240, + "step": 1968 + }, + { + "epoch": 0.7541171964764458, + "grad_norm": 24.563041510666604, + "learning_rate": 5e-06, + "loss": 0.2157, + "num_input_tokens_seen": 340327520, + "step": 1969 + }, + { + "epoch": 0.7541171964764458, + "loss": 0.20339317619800568, + "loss_ce": 0.020348750054836273, + "loss_iou": 1.0349680185317993, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 340327520, + "step": 1969 + }, + { + "epoch": 0.7545001914975106, + "grad_norm": 26.82092541960977, + "learning_rate": 5e-06, + "loss": 0.1736, + "num_input_tokens_seen": 340500296, + "step": 1970 + }, + { + "epoch": 0.7545001914975106, + "loss": 0.19258993864059448, + "loss_ce": 0.020592864602804184, + "loss_iou": 1.0155099630355835, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 340500296, + "step": 1970 + }, + { + "epoch": 0.7548831865185752, + "grad_norm": 16.375679650085974, + "learning_rate": 5e-06, + "loss": 0.246, + "num_input_tokens_seen": 340673192, + "step": 1971 + }, + { + "epoch": 0.7548831865185752, + "loss": 0.24143922328948975, + "loss_ce": 0.018477799370884895, + "loss_iou": 1.0040318965911865, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 340673192, + "step": 1971 + }, + { + "epoch": 0.75526618153964, + "grad_norm": 17.621575018072924, + "learning_rate": 5e-06, + "loss": 0.1851, + "num_input_tokens_seen": 340846432, + "step": 1972 + }, + { + "epoch": 0.75526618153964, + "loss": 0.1742793172597885, + "loss_ce": 0.020531758666038513, + "loss_iou": 1.0360504388809204, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 340846432, + "step": 1972 + }, + { + "epoch": 0.7556491765607047, + "grad_norm": 16.324200281366345, + "learning_rate": 5e-06, + "loss": 0.1853, + "num_input_tokens_seen": 341019616, + "step": 1973 + }, + { + "epoch": 0.7556491765607047, + "loss": 0.17437125742435455, + "loss_ce": 0.01989128440618515, + "loss_iou": 1.0057682991027832, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 341019616, + "step": 1973 + }, + { + "epoch": 0.7560321715817694, + "grad_norm": 24.37760011723323, + "learning_rate": 5e-06, + "loss": 0.146, + "num_input_tokens_seen": 341192336, + "step": 1974 + }, + { + "epoch": 0.7560321715817694, + "loss": 0.13626563549041748, + "loss_ce": 0.021305926144123077, + "loss_iou": 1.0007812976837158, + "loss_num": 0.11474609375, + "loss_xval": 0.11474609375, + "num_input_tokens_seen": 341192336, + "step": 1974 + }, + { + "epoch": 0.7564151666028341, + "grad_norm": 32.72219329255361, + "learning_rate": 5e-06, + "loss": 0.2585, + "num_input_tokens_seen": 341365312, + "step": 1975 + }, + { + "epoch": 0.7564151666028341, + "loss": 0.23727071285247803, + "loss_ce": 0.020168669521808624, + "loss_iou": 1.013498306274414, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 341365312, + "step": 1975 + }, + { + "epoch": 0.7567981616238989, + "grad_norm": 31.79542505969276, + "learning_rate": 5e-06, + "loss": 0.2152, + "num_input_tokens_seen": 341538392, + "step": 1976 + }, + { + "epoch": 0.7567981616238989, + "loss": 0.2574518322944641, + "loss_ce": 0.020757492631673813, + "loss_iou": 1.0479531288146973, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 341538392, + "step": 1976 + }, + { + "epoch": 0.7571811566449637, + "grad_norm": 25.08359959058798, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 341711288, + "step": 1977 + }, + { + "epoch": 0.7571811566449637, + "loss": 0.20602668821811676, + "loss_ce": 0.020907070487737656, + "loss_iou": 1.0533559322357178, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 341711288, + "step": 1977 + }, + { + "epoch": 0.7575641516660283, + "grad_norm": 32.50815703718899, + "learning_rate": 5e-06, + "loss": 0.2079, + "num_input_tokens_seen": 341884544, + "step": 1978 + }, + { + "epoch": 0.7575641516660283, + "loss": 0.24934254586696625, + "loss_ce": 0.02253590151667595, + "loss_iou": 1.0637845993041992, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 341884544, + "step": 1978 + }, + { + "epoch": 0.7579471466870931, + "grad_norm": 43.25528112245329, + "learning_rate": 5e-06, + "loss": 0.2784, + "num_input_tokens_seen": 342057504, + "step": 1979 + }, + { + "epoch": 0.7579471466870931, + "loss": 0.3320387005805969, + "loss_ce": 0.022468380630016327, + "loss_iou": 1.1759567260742188, + "loss_num": 0.30859375, + "loss_xval": 0.30859375, + "num_input_tokens_seen": 342057504, + "step": 1979 + }, + { + "epoch": 0.7583301417081578, + "grad_norm": 24.578675392235837, + "learning_rate": 5e-06, + "loss": 0.1512, + "num_input_tokens_seen": 342230272, + "step": 1980 + }, + { + "epoch": 0.7583301417081578, + "loss": 0.14127089083194733, + "loss_ce": 0.021519910544157028, + "loss_iou": 1.013789415359497, + "loss_num": 0.11962890625, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 342230272, + "step": 1980 + }, + { + "epoch": 0.7587131367292225, + "grad_norm": 34.95370956751587, + "learning_rate": 5e-06, + "loss": 0.2554, + "num_input_tokens_seen": 342403064, + "step": 1981 + }, + { + "epoch": 0.7587131367292225, + "loss": 0.2870115637779236, + "loss_ce": 0.021996907889842987, + "loss_iou": 1.2670702934265137, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 342403064, + "step": 1981 + }, + { + "epoch": 0.7590961317502872, + "grad_norm": 33.95058763744049, + "learning_rate": 5e-06, + "loss": 0.1714, + "num_input_tokens_seen": 342576152, + "step": 1982 + }, + { + "epoch": 0.7590961317502872, + "loss": 0.1686820089817047, + "loss_ce": 0.020366588607430458, + "loss_iou": 1.0044770240783691, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 342576152, + "step": 1982 + }, + { + "epoch": 0.759479126771352, + "grad_norm": 37.854718918510926, + "learning_rate": 5e-06, + "loss": 0.1765, + "num_input_tokens_seen": 342748848, + "step": 1983 + }, + { + "epoch": 0.759479126771352, + "loss": 0.18910756707191467, + "loss_ce": 0.020467441529035568, + "loss_iou": 1.018295407295227, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 342748848, + "step": 1983 + }, + { + "epoch": 0.7598621217924167, + "grad_norm": 42.20493584403363, + "learning_rate": 5e-06, + "loss": 0.1897, + "num_input_tokens_seen": 342921960, + "step": 1984 + }, + { + "epoch": 0.7598621217924167, + "loss": 0.17905503511428833, + "loss_ce": 0.02109605446457863, + "loss_iou": 1.0033435821533203, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 342921960, + "step": 1984 + }, + { + "epoch": 0.7602451168134814, + "grad_norm": 34.01835564223233, + "learning_rate": 5e-06, + "loss": 0.2467, + "num_input_tokens_seen": 343094824, + "step": 1985 + }, + { + "epoch": 0.7602451168134814, + "loss": 0.27467986941337585, + "loss_ce": 0.021506035700440407, + "loss_iou": 1.085257649421692, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 343094824, + "step": 1985 + }, + { + "epoch": 0.7606281118345462, + "grad_norm": 35.93022189793799, + "learning_rate": 5e-06, + "loss": 0.2204, + "num_input_tokens_seen": 343268144, + "step": 1986 + }, + { + "epoch": 0.7606281118345462, + "loss": 0.26171666383743286, + "loss_ce": 0.022641940042376518, + "loss_iou": 1.0948108434677124, + "loss_num": 0.2392578125, + "loss_xval": 0.2392578125, + "num_input_tokens_seen": 343268144, + "step": 1986 + }, + { + "epoch": 0.7610111068556109, + "grad_norm": 28.91457837451079, + "learning_rate": 5e-06, + "loss": 0.2297, + "num_input_tokens_seen": 343440912, + "step": 1987 + }, + { + "epoch": 0.7610111068556109, + "loss": 0.22892248630523682, + "loss_ce": 0.021036745980381966, + "loss_iou": 1.0305060148239136, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 343440912, + "step": 1987 + }, + { + "epoch": 0.7613941018766756, + "grad_norm": 24.05486499303163, + "learning_rate": 5e-06, + "loss": 0.1849, + "num_input_tokens_seen": 343613736, + "step": 1988 + }, + { + "epoch": 0.7613941018766756, + "loss": 0.22147849202156067, + "loss_ce": 0.02244284562766552, + "loss_iou": 1.0540740489959717, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 343613736, + "step": 1988 + }, + { + "epoch": 0.7617770968977403, + "grad_norm": 29.733899839657322, + "learning_rate": 5e-06, + "loss": 0.2641, + "num_input_tokens_seen": 343786608, + "step": 1989 + }, + { + "epoch": 0.7617770968977403, + "loss": 0.28331613540649414, + "loss_ce": 0.021353235468268394, + "loss_iou": 1.0573334693908691, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 343786608, + "step": 1989 + }, + { + "epoch": 0.7621600919188051, + "grad_norm": 27.944995791783562, + "learning_rate": 5e-06, + "loss": 0.17, + "num_input_tokens_seen": 343959224, + "step": 1990 + }, + { + "epoch": 0.7621600919188051, + "loss": 0.14480268955230713, + "loss_ce": 0.020901326090097427, + "loss_iou": 1.010115146636963, + "loss_num": 0.1240234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 343959224, + "step": 1990 + }, + { + "epoch": 0.7625430869398698, + "grad_norm": 21.727588820142955, + "learning_rate": 5e-06, + "loss": 0.1742, + "num_input_tokens_seen": 344132128, + "step": 1991 + }, + { + "epoch": 0.7625430869398698, + "loss": 0.16735507547855377, + "loss_ce": 0.02123691514134407, + "loss_iou": 1.010033369064331, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 344132128, + "step": 1991 + }, + { + "epoch": 0.7629260819609345, + "grad_norm": 22.119752668454986, + "learning_rate": 5e-06, + "loss": 0.163, + "num_input_tokens_seen": 344305016, + "step": 1992 + }, + { + "epoch": 0.7629260819609345, + "loss": 0.20107056200504303, + "loss_ce": 0.02193237468600273, + "loss_iou": 1.1044776439666748, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 344305016, + "step": 1992 + }, + { + "epoch": 0.7633090769819992, + "grad_norm": 25.744499024295337, + "learning_rate": 5e-06, + "loss": 0.2258, + "num_input_tokens_seen": 344478160, + "step": 1993 + }, + { + "epoch": 0.7633090769819992, + "loss": 0.23733490705490112, + "loss_ce": 0.02200287953019142, + "loss_iou": 1.0133190155029297, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 344478160, + "step": 1993 + }, + { + "epoch": 0.763692072003064, + "grad_norm": 19.137289495522907, + "learning_rate": 5e-06, + "loss": 0.2101, + "num_input_tokens_seen": 344651520, + "step": 1994 + }, + { + "epoch": 0.763692072003064, + "loss": 0.19120462238788605, + "loss_ce": 0.021893102675676346, + "loss_iou": 1.0239546298980713, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 344651520, + "step": 1994 + }, + { + "epoch": 0.7640750670241286, + "grad_norm": 27.315501159437055, + "learning_rate": 5e-06, + "loss": 0.241, + "num_input_tokens_seen": 344824336, + "step": 1995 + }, + { + "epoch": 0.7640750670241286, + "loss": 0.2901032567024231, + "loss_ce": 0.021060287952423096, + "loss_iou": 1.0676565170288086, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 344824336, + "step": 1995 + }, + { + "epoch": 0.7644580620451934, + "grad_norm": 20.198019155313965, + "learning_rate": 5e-06, + "loss": 0.2246, + "num_input_tokens_seen": 344997088, + "step": 1996 + }, + { + "epoch": 0.7644580620451934, + "loss": 0.2020232379436493, + "loss_ce": 0.020077433437108994, + "loss_iou": 1.0154491662979126, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 344997088, + "step": 1996 + }, + { + "epoch": 0.7648410570662582, + "grad_norm": 28.205076589215945, + "learning_rate": 5e-06, + "loss": 0.1778, + "num_input_tokens_seen": 345170424, + "step": 1997 + }, + { + "epoch": 0.7648410570662582, + "loss": 0.15298880636692047, + "loss_ce": 0.02078666165471077, + "loss_iou": 1.01637601852417, + "loss_num": 0.1318359375, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 345170424, + "step": 1997 + }, + { + "epoch": 0.7652240520873229, + "grad_norm": 30.740484651778416, + "learning_rate": 5e-06, + "loss": 0.2372, + "num_input_tokens_seen": 345343024, + "step": 1998 + }, + { + "epoch": 0.7652240520873229, + "loss": 0.28460055589675903, + "loss_ce": 0.01830417476594448, + "loss_iou": 1.0067775249481201, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 345343024, + "step": 1998 + }, + { + "epoch": 0.7656070471083876, + "grad_norm": 39.224480963224835, + "learning_rate": 5e-06, + "loss": 0.233, + "num_input_tokens_seen": 345515992, + "step": 1999 + }, + { + "epoch": 0.7656070471083876, + "loss": 0.19093447923660278, + "loss_ce": 0.020585356280207634, + "loss_iou": 1.0244207382202148, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 345515992, + "step": 1999 + }, + { + "epoch": 0.7659900421294523, + "grad_norm": 45.45632633343496, + "learning_rate": 5e-06, + "loss": 0.2306, + "num_input_tokens_seen": 345689096, + "step": 2000 + }, + { + "epoch": 0.7659900421294523, + "eval_websight_new_CIoU": 0.9228273034095764, + "eval_websight_new_GIoU": 0.9226952195167542, + "eval_websight_new_IoU": 0.9230187237262726, + "eval_websight_new_MAE_all": 0.009114941116422415, + "eval_websight_new_MAE_h": 0.013225205708295107, + "eval_websight_new_MAE_w": 0.011090578977018595, + "eval_websight_new_MAE_x": 0.007492676377296448, + "eval_websight_new_MAE_y": 0.004651304334402084, + "eval_websight_new_NUM_probability": 7.791836469550617e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.14586272835731506, + "eval_websight_new_loss_ce": 0.04211839847266674, + "eval_websight_new_loss_iou": 1.0003713369369507, + "eval_websight_new_loss_num": 0.101348876953125, + "eval_websight_new_loss_xval": 0.101348876953125, + "eval_websight_new_runtime": 55.7017, + "eval_websight_new_samples_per_second": 0.898, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 345689096, + "step": 2000 + }, + { + "epoch": 0.7659900421294523, + "eval_seeclick_CIoU": 0.6462754011154175, + "eval_seeclick_GIoU": 0.6530387699604034, + "eval_seeclick_IoU": 0.6783847212791443, + "eval_seeclick_MAE_all": 0.06139025278389454, + "eval_seeclick_MAE_h": 0.03869203571230173, + "eval_seeclick_MAE_w": 0.0867950338870287, + "eval_seeclick_MAE_x": 0.08058405667543411, + "eval_seeclick_MAE_y": 0.03948989789932966, + "eval_seeclick_NUM_probability": 0.00010747522537712939, + "eval_seeclick_inside_bbox": 0.9097222089767456, + "eval_seeclick_loss": 0.5110470056533813, + "eval_seeclick_loss_ce": 0.03283202834427357, + "eval_seeclick_loss_iou": 1.152630627155304, + "eval_seeclick_loss_num": 0.4788818359375, + "eval_seeclick_loss_xval": 0.4788818359375, + "eval_seeclick_runtime": 83.792, + "eval_seeclick_samples_per_second": 0.597, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 345689096, + "step": 2000 + }, + { + "epoch": 0.7659900421294523, + "eval_icons_CIoU": 0.8534445464611053, + "eval_icons_GIoU": 0.848601758480072, + "eval_icons_IoU": 0.8605115115642548, + "eval_icons_MAE_all": 0.022490271367132664, + "eval_icons_MAE_h": 0.019543664529919624, + "eval_icons_MAE_w": 0.029191632755100727, + "eval_icons_MAE_x": 0.02440809179097414, + "eval_icons_MAE_y": 0.01681769685819745, + "eval_icons_NUM_probability": 7.248714246088639e-05, + "eval_icons_inside_bbox": 0.9722222089767456, + "eval_icons_loss": 0.1872413605451584, + "eval_icons_loss_ce": 0.028505710884928703, + "eval_icons_loss_iou": 1.0292596817016602, + "eval_icons_loss_num": 0.148284912109375, + "eval_icons_loss_xval": 0.148284912109375, + "eval_icons_runtime": 85.5313, + "eval_icons_samples_per_second": 0.585, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 345689096, + "step": 2000 + }, + { + "epoch": 0.7659900421294523, + "loss": 0.19107291102409363, + "loss_ce": 0.028536278754472733, + "loss_iou": 1.037780523300171, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 345689096, + "step": 2000 + }, + { + "epoch": 0.7663730371505171, + "grad_norm": 37.62851686661492, + "learning_rate": 5e-06, + "loss": 0.2714, + "num_input_tokens_seen": 345862128, + "step": 2001 + }, + { + "epoch": 0.7663730371505171, + "loss": 0.30593255162239075, + "loss_ce": 0.020165957510471344, + "loss_iou": 1.0260435342788696, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 345862128, + "step": 2001 + }, + { + "epoch": 0.7667560321715817, + "grad_norm": 30.529578676536875, + "learning_rate": 5e-06, + "loss": 0.2283, + "num_input_tokens_seen": 346034768, + "step": 2002 + }, + { + "epoch": 0.7667560321715817, + "loss": 0.22363267838954926, + "loss_ce": 0.021179085597395897, + "loss_iou": 1.083114504814148, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 346034768, + "step": 2002 + }, + { + "epoch": 0.7671390271926465, + "grad_norm": 28.619464310192097, + "learning_rate": 5e-06, + "loss": 0.1877, + "num_input_tokens_seen": 346203840, + "step": 2003 + }, + { + "epoch": 0.7671390271926465, + "loss": 0.23254099488258362, + "loss_ce": 0.020932119339704514, + "loss_iou": 1.1524338722229004, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 346203840, + "step": 2003 + }, + { + "epoch": 0.7675220222137112, + "grad_norm": 29.2887391250122, + "learning_rate": 5e-06, + "loss": 0.1851, + "num_input_tokens_seen": 346377152, + "step": 2004 + }, + { + "epoch": 0.7675220222137112, + "loss": 0.1696953922510147, + "loss_ce": 0.02064754255115986, + "loss_iou": 1.009027123451233, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 346377152, + "step": 2004 + }, + { + "epoch": 0.767905017234776, + "grad_norm": 27.633679375149583, + "learning_rate": 5e-06, + "loss": 0.2442, + "num_input_tokens_seen": 346550168, + "step": 2005 + }, + { + "epoch": 0.767905017234776, + "loss": 0.22287316620349884, + "loss_ce": 0.019931267946958542, + "loss_iou": 1.0643115043640137, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 346550168, + "step": 2005 + }, + { + "epoch": 0.7682880122558406, + "grad_norm": 36.47280484065821, + "learning_rate": 5e-06, + "loss": 0.2381, + "num_input_tokens_seen": 346722848, + "step": 2006 + }, + { + "epoch": 0.7682880122558406, + "loss": 0.24743898212909698, + "loss_ce": 0.021273205056786537, + "loss_iou": 1.0317459106445312, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 346722848, + "step": 2006 + }, + { + "epoch": 0.7686710072769054, + "grad_norm": 36.33389931984183, + "learning_rate": 5e-06, + "loss": 0.2083, + "num_input_tokens_seen": 346895904, + "step": 2007 + }, + { + "epoch": 0.7686710072769054, + "loss": 0.14966237545013428, + "loss_ce": 0.02185475081205368, + "loss_iou": 0.8812423348426819, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 346895904, + "step": 2007 + }, + { + "epoch": 0.7690540022979702, + "grad_norm": 28.770279519362564, + "learning_rate": 5e-06, + "loss": 0.2267, + "num_input_tokens_seen": 347068800, + "step": 2008 + }, + { + "epoch": 0.7690540022979702, + "loss": 0.16842623054981232, + "loss_ce": 0.020751673728227615, + "loss_iou": 1.016047716140747, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 347068800, + "step": 2008 + }, + { + "epoch": 0.7694369973190348, + "grad_norm": 25.881096443290257, + "learning_rate": 5e-06, + "loss": 0.1906, + "num_input_tokens_seen": 347241552, + "step": 2009 + }, + { + "epoch": 0.7694369973190348, + "loss": 0.22845833003520966, + "loss_ce": 0.021427083760499954, + "loss_iou": 1.0062979459762573, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 347241552, + "step": 2009 + }, + { + "epoch": 0.7698199923400996, + "grad_norm": 33.24424791559408, + "learning_rate": 5e-06, + "loss": 0.1866, + "num_input_tokens_seen": 347413224, + "step": 2010 + }, + { + "epoch": 0.7698199923400996, + "loss": 0.1563665270805359, + "loss_ce": 0.02102106809616089, + "loss_iou": 1.0179115533828735, + "loss_num": 0.1357421875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 347413224, + "step": 2010 + }, + { + "epoch": 0.7702029873611643, + "grad_norm": 38.63471375316766, + "learning_rate": 5e-06, + "loss": 0.2204, + "num_input_tokens_seen": 347586024, + "step": 2011 + }, + { + "epoch": 0.7702029873611643, + "loss": 0.2395392805337906, + "loss_ce": 0.021460656076669693, + "loss_iou": 1.1907134056091309, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 347586024, + "step": 2011 + }, + { + "epoch": 0.7705859823822291, + "grad_norm": 25.37617773051809, + "learning_rate": 5e-06, + "loss": 0.1709, + "num_input_tokens_seen": 347758936, + "step": 2012 + }, + { + "epoch": 0.7705859823822291, + "loss": 0.17508289217948914, + "loss_ce": 0.02310534566640854, + "loss_iou": 1.030497670173645, + "loss_num": 0.15234375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 347758936, + "step": 2012 + }, + { + "epoch": 0.7709689774032937, + "grad_norm": 29.09347029281265, + "learning_rate": 5e-06, + "loss": 0.1937, + "num_input_tokens_seen": 347931872, + "step": 2013 + }, + { + "epoch": 0.7709689774032937, + "loss": 0.19590593874454498, + "loss_ce": 0.020490895956754684, + "loss_iou": 0.8654123544692993, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 347931872, + "step": 2013 + }, + { + "epoch": 0.7713519724243585, + "grad_norm": 27.7951852319954, + "learning_rate": 5e-06, + "loss": 0.2602, + "num_input_tokens_seen": 348104600, + "step": 2014 + }, + { + "epoch": 0.7713519724243585, + "loss": 0.34026581048965454, + "loss_ce": 0.019648149609565735, + "loss_iou": 1.2110865116119385, + "loss_num": 0.3203125, + "loss_xval": 0.3203125, + "num_input_tokens_seen": 348104600, + "step": 2014 + }, + { + "epoch": 0.7717349674454232, + "grad_norm": 43.72003435417935, + "learning_rate": 5e-06, + "loss": 0.1909, + "num_input_tokens_seen": 348277448, + "step": 2015 + }, + { + "epoch": 0.7717349674454232, + "loss": 0.1730906367301941, + "loss_ce": 0.02184552513062954, + "loss_iou": 1.026153802871704, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 348277448, + "step": 2015 + }, + { + "epoch": 0.7721179624664879, + "grad_norm": 32.331311683787206, + "learning_rate": 5e-06, + "loss": 0.1919, + "num_input_tokens_seen": 348450496, + "step": 2016 + }, + { + "epoch": 0.7721179624664879, + "loss": 0.19085338711738586, + "loss_ce": 0.02056529000401497, + "loss_iou": 1.0118297338485718, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 348450496, + "step": 2016 + }, + { + "epoch": 0.7725009574875527, + "grad_norm": 44.85670015680483, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 348623496, + "step": 2017 + }, + { + "epoch": 0.7725009574875527, + "loss": 0.22206342220306396, + "loss_ce": 0.021196719259023666, + "loss_iou": 1.0615859031677246, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 348623496, + "step": 2017 + }, + { + "epoch": 0.7728839525086174, + "grad_norm": 31.448395185881434, + "learning_rate": 5e-06, + "loss": 0.2297, + "num_input_tokens_seen": 348796480, + "step": 2018 + }, + { + "epoch": 0.7728839525086174, + "loss": 0.2339247763156891, + "loss_ce": 0.01975243166089058, + "loss_iou": 1.0011065006256104, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 348796480, + "step": 2018 + }, + { + "epoch": 0.7732669475296822, + "grad_norm": 58.93911242383194, + "learning_rate": 5e-06, + "loss": 0.2494, + "num_input_tokens_seen": 348968856, + "step": 2019 + }, + { + "epoch": 0.7732669475296822, + "loss": 0.24253229796886444, + "loss_ce": 0.019876059144735336, + "loss_iou": 1.0425891876220703, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 348968856, + "step": 2019 + }, + { + "epoch": 0.7736499425507468, + "grad_norm": 31.521528661475983, + "learning_rate": 5e-06, + "loss": 0.2652, + "num_input_tokens_seen": 349141984, + "step": 2020 + }, + { + "epoch": 0.7736499425507468, + "loss": 0.22655808925628662, + "loss_ce": 0.02221238613128662, + "loss_iou": 1.077653408050537, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 349141984, + "step": 2020 + }, + { + "epoch": 0.7740329375718116, + "grad_norm": 51.802398179186724, + "learning_rate": 5e-06, + "loss": 0.2872, + "num_input_tokens_seen": 349314688, + "step": 2021 + }, + { + "epoch": 0.7740329375718116, + "loss": 0.24481329321861267, + "loss_ce": 0.020692195743322372, + "loss_iou": 1.0158942937850952, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 349314688, + "step": 2021 + }, + { + "epoch": 0.7744159325928763, + "grad_norm": 30.043300193427182, + "learning_rate": 5e-06, + "loss": 0.2316, + "num_input_tokens_seen": 349487840, + "step": 2022 + }, + { + "epoch": 0.7744159325928763, + "loss": 0.28100672364234924, + "loss_ce": 0.020325560122728348, + "loss_iou": 1.015325665473938, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 349487840, + "step": 2022 + }, + { + "epoch": 0.774798927613941, + "grad_norm": 58.21751211807337, + "learning_rate": 5e-06, + "loss": 0.2654, + "num_input_tokens_seen": 349660672, + "step": 2023 + }, + { + "epoch": 0.774798927613941, + "loss": 0.2263191044330597, + "loss_ce": 0.021729253232479095, + "loss_iou": 1.007361888885498, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 349660672, + "step": 2023 + }, + { + "epoch": 0.7751819226350057, + "grad_norm": 26.886955651983698, + "learning_rate": 5e-06, + "loss": 0.2666, + "num_input_tokens_seen": 349833424, + "step": 2024 + }, + { + "epoch": 0.7751819226350057, + "loss": 0.250285267829895, + "loss_ce": 0.021525517106056213, + "loss_iou": 1.1009631156921387, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 349833424, + "step": 2024 + }, + { + "epoch": 0.7755649176560705, + "grad_norm": 22.93500372346212, + "learning_rate": 5e-06, + "loss": 0.2116, + "num_input_tokens_seen": 350006432, + "step": 2025 + }, + { + "epoch": 0.7755649176560705, + "loss": 0.2316955029964447, + "loss_ce": 0.020147655159235, + "loss_iou": 1.4622738361358643, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 350006432, + "step": 2025 + }, + { + "epoch": 0.7759479126771351, + "grad_norm": 15.292739097693845, + "learning_rate": 5e-06, + "loss": 0.1678, + "num_input_tokens_seen": 350179648, + "step": 2026 + }, + { + "epoch": 0.7759479126771351, + "loss": 0.15203899145126343, + "loss_ce": 0.021851008757948875, + "loss_iou": 0.9951585531234741, + "loss_num": 0.1298828125, + "loss_xval": 0.1298828125, + "num_input_tokens_seen": 350179648, + "step": 2026 + }, + { + "epoch": 0.7763309076981999, + "grad_norm": 15.43524855294656, + "learning_rate": 5e-06, + "loss": 0.1686, + "num_input_tokens_seen": 350352496, + "step": 2027 + }, + { + "epoch": 0.7763309076981999, + "loss": 0.17393441498279572, + "loss_ce": 0.020614106208086014, + "loss_iou": 1.019606590270996, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 350352496, + "step": 2027 + }, + { + "epoch": 0.7767139027192647, + "grad_norm": 24.727026774937, + "learning_rate": 5e-06, + "loss": 0.2016, + "num_input_tokens_seen": 350525272, + "step": 2028 + }, + { + "epoch": 0.7767139027192647, + "loss": 0.16336774826049805, + "loss_ce": 0.021461009979248047, + "loss_iou": 1.018656611442566, + "loss_num": 0.1416015625, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 350525272, + "step": 2028 + }, + { + "epoch": 0.7770968977403294, + "grad_norm": 28.55673031986966, + "learning_rate": 5e-06, + "loss": 0.1844, + "num_input_tokens_seen": 350697960, + "step": 2029 + }, + { + "epoch": 0.7770968977403294, + "loss": 0.18055246770381927, + "loss_ce": 0.020335180684924126, + "loss_iou": 1.0059808492660522, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 350697960, + "step": 2029 + }, + { + "epoch": 0.7774798927613941, + "grad_norm": 29.256057572891525, + "learning_rate": 5e-06, + "loss": 0.1845, + "num_input_tokens_seen": 350870944, + "step": 2030 + }, + { + "epoch": 0.7774798927613941, + "loss": 0.21915271878242493, + "loss_ce": 0.018896378576755524, + "loss_iou": 1.0013165473937988, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 350870944, + "step": 2030 + }, + { + "epoch": 0.7778628877824588, + "grad_norm": 31.918065765061225, + "learning_rate": 5e-06, + "loss": 0.1945, + "num_input_tokens_seen": 351040520, + "step": 2031 + }, + { + "epoch": 0.7778628877824588, + "loss": 0.20982369780540466, + "loss_ce": 0.023117145523428917, + "loss_iou": 1.1483569145202637, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 351040520, + "step": 2031 + }, + { + "epoch": 0.7782458828035236, + "grad_norm": 24.643181892068174, + "learning_rate": 5e-06, + "loss": 0.1785, + "num_input_tokens_seen": 351213032, + "step": 2032 + }, + { + "epoch": 0.7782458828035236, + "loss": 0.14677008986473083, + "loss_ce": 0.020976630970835686, + "loss_iou": 1.0016255378723145, + "loss_num": 0.1259765625, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 351213032, + "step": 2032 + }, + { + "epoch": 0.7786288778245882, + "grad_norm": 22.723302450830133, + "learning_rate": 5e-06, + "loss": 0.2351, + "num_input_tokens_seen": 351385744, + "step": 2033 + }, + { + "epoch": 0.7786288778245882, + "loss": 0.2747228741645813, + "loss_ce": 0.022098341956734657, + "loss_iou": 1.1827709674835205, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 351385744, + "step": 2033 + }, + { + "epoch": 0.779011872845653, + "grad_norm": 28.561332086628987, + "learning_rate": 5e-06, + "loss": 0.2361, + "num_input_tokens_seen": 351558224, + "step": 2034 + }, + { + "epoch": 0.779011872845653, + "loss": 0.2604377567768097, + "loss_ce": 0.019959241151809692, + "loss_iou": 1.0186917781829834, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 351558224, + "step": 2034 + }, + { + "epoch": 0.7793948678667177, + "grad_norm": 32.677920741788206, + "learning_rate": 5e-06, + "loss": 0.1881, + "num_input_tokens_seen": 351731232, + "step": 2035 + }, + { + "epoch": 0.7793948678667177, + "loss": 0.1979053020477295, + "loss_ce": 0.021879900246858597, + "loss_iou": 1.046138048171997, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 351731232, + "step": 2035 + }, + { + "epoch": 0.7797778628877825, + "grad_norm": 30.640751263353486, + "learning_rate": 5e-06, + "loss": 0.2185, + "num_input_tokens_seen": 351904312, + "step": 2036 + }, + { + "epoch": 0.7797778628877825, + "loss": 0.1802019327878952, + "loss_ce": 0.0223650224506855, + "loss_iou": 1.0634071826934814, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 351904312, + "step": 2036 + }, + { + "epoch": 0.7801608579088471, + "grad_norm": 27.624165621482252, + "learning_rate": 5e-06, + "loss": 0.2514, + "num_input_tokens_seen": 352077536, + "step": 2037 + }, + { + "epoch": 0.7801608579088471, + "loss": 0.28442516922950745, + "loss_ce": 0.0214246716350317, + "loss_iou": 1.0522079467773438, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 352077536, + "step": 2037 + }, + { + "epoch": 0.7805438529299119, + "grad_norm": 26.965646504091804, + "learning_rate": 5e-06, + "loss": 0.2443, + "num_input_tokens_seen": 352250400, + "step": 2038 + }, + { + "epoch": 0.7805438529299119, + "loss": 0.26357731223106384, + "loss_ce": 0.020199626684188843, + "loss_iou": 1.0834300518035889, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 352250400, + "step": 2038 + }, + { + "epoch": 0.7809268479509767, + "grad_norm": 32.410844358147976, + "learning_rate": 5e-06, + "loss": 0.2331, + "num_input_tokens_seen": 352423376, + "step": 2039 + }, + { + "epoch": 0.7809268479509767, + "loss": 0.2548708915710449, + "loss_ce": 0.022326957434415817, + "loss_iou": 1.0806260108947754, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 352423376, + "step": 2039 + }, + { + "epoch": 0.7813098429720413, + "grad_norm": 25.32244849030144, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 352596808, + "step": 2040 + }, + { + "epoch": 0.7813098429720413, + "loss": 0.1374257504940033, + "loss_ce": 0.019536349922418594, + "loss_iou": 1.0070759057998657, + "loss_num": 0.11767578125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 352596808, + "step": 2040 + }, + { + "epoch": 0.7816928379931061, + "grad_norm": 26.46573959918601, + "learning_rate": 5e-06, + "loss": 0.2183, + "num_input_tokens_seen": 352770024, + "step": 2041 + }, + { + "epoch": 0.7816928379931061, + "loss": 0.22981318831443787, + "loss_ce": 0.02113397791981697, + "loss_iou": 1.1103980541229248, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 352770024, + "step": 2041 + }, + { + "epoch": 0.7820758330141708, + "grad_norm": 32.560681429372664, + "learning_rate": 5e-06, + "loss": 0.2158, + "num_input_tokens_seen": 352943200, + "step": 2042 + }, + { + "epoch": 0.7820758330141708, + "loss": 0.18732693791389465, + "loss_ce": 0.022104769945144653, + "loss_iou": 1.007914423942566, + "loss_num": 0.1650390625, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 352943200, + "step": 2042 + }, + { + "epoch": 0.7824588280352356, + "grad_norm": 43.25860910942051, + "learning_rate": 5e-06, + "loss": 0.2537, + "num_input_tokens_seen": 353116408, + "step": 2043 + }, + { + "epoch": 0.7824588280352356, + "loss": 0.22283262014389038, + "loss_ce": 0.02141660824418068, + "loss_iou": 1.0568044185638428, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 353116408, + "step": 2043 + }, + { + "epoch": 0.7828418230563002, + "grad_norm": 36.265030863036884, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 353289184, + "step": 2044 + }, + { + "epoch": 0.7828418230563002, + "loss": 0.17458541691303253, + "loss_ce": 0.020532675087451935, + "loss_iou": 1.0050618648529053, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 353289184, + "step": 2044 + }, + { + "epoch": 0.783224818077365, + "grad_norm": 25.566315369884663, + "learning_rate": 5e-06, + "loss": 0.2078, + "num_input_tokens_seen": 353462240, + "step": 2045 + }, + { + "epoch": 0.783224818077365, + "loss": 0.1959572583436966, + "loss_ce": 0.021274644881486893, + "loss_iou": 1.0417563915252686, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 353462240, + "step": 2045 + }, + { + "epoch": 0.7836078130984298, + "grad_norm": 24.19159450809671, + "learning_rate": 5e-06, + "loss": 0.1916, + "num_input_tokens_seen": 353634736, + "step": 2046 + }, + { + "epoch": 0.7836078130984298, + "loss": 0.1993541270494461, + "loss_ce": 0.019727662205696106, + "loss_iou": 1.0452462434768677, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 353634736, + "step": 2046 + }, + { + "epoch": 0.7839908081194944, + "grad_norm": 35.112046726772306, + "learning_rate": 5e-06, + "loss": 0.2197, + "num_input_tokens_seen": 353807432, + "step": 2047 + }, + { + "epoch": 0.7839908081194944, + "loss": 0.2643466293811798, + "loss_ce": 0.02258637547492981, + "loss_iou": 1.0596150159835815, + "loss_num": 0.2421875, + "loss_xval": 0.2421875, + "num_input_tokens_seen": 353807432, + "step": 2047 + }, + { + "epoch": 0.7843738031405592, + "grad_norm": 38.27092369525028, + "learning_rate": 5e-06, + "loss": 0.1947, + "num_input_tokens_seen": 353980744, + "step": 2048 + }, + { + "epoch": 0.7843738031405592, + "loss": 0.19919967651367188, + "loss_ce": 0.022136688232421875, + "loss_iou": 1.01801598072052, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 353980744, + "step": 2048 + }, + { + "epoch": 0.7847567981616239, + "grad_norm": 35.303257622853316, + "learning_rate": 5e-06, + "loss": 0.2135, + "num_input_tokens_seen": 354153920, + "step": 2049 + }, + { + "epoch": 0.7847567981616239, + "loss": 0.19922125339508057, + "loss_ce": 0.019594788551330566, + "loss_iou": 1.0008752346038818, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 354153920, + "step": 2049 + }, + { + "epoch": 0.7851397931826887, + "grad_norm": 26.223173743701402, + "learning_rate": 5e-06, + "loss": 0.2507, + "num_input_tokens_seen": 354326824, + "step": 2050 + }, + { + "epoch": 0.7851397931826887, + "loss": 0.2493046373128891, + "loss_ce": 0.019080042839050293, + "loss_iou": 1.091526985168457, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 354326824, + "step": 2050 + }, + { + "epoch": 0.7855227882037533, + "grad_norm": 26.155226545194463, + "learning_rate": 5e-06, + "loss": 0.1696, + "num_input_tokens_seen": 354499760, + "step": 2051 + }, + { + "epoch": 0.7855227882037533, + "loss": 0.13985827565193176, + "loss_ce": 0.021328000351786613, + "loss_iou": 1.001230239868164, + "loss_num": 0.11865234375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 354499760, + "step": 2051 + }, + { + "epoch": 0.7859057832248181, + "grad_norm": 33.22083130224732, + "learning_rate": 5e-06, + "loss": 0.1797, + "num_input_tokens_seen": 354672576, + "step": 2052 + }, + { + "epoch": 0.7859057832248181, + "loss": 0.16594764590263367, + "loss_ce": 0.020195700228214264, + "loss_iou": 1.0120246410369873, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 354672576, + "step": 2052 + }, + { + "epoch": 0.7862887782458828, + "grad_norm": 31.91281088969455, + "learning_rate": 5e-06, + "loss": 0.2004, + "num_input_tokens_seen": 354845680, + "step": 2053 + }, + { + "epoch": 0.7862887782458828, + "loss": 0.13769575953483582, + "loss_ce": 0.020203087478876114, + "loss_iou": 1.0007909536361694, + "loss_num": 0.11767578125, + "loss_xval": 0.11767578125, + "num_input_tokens_seen": 354845680, + "step": 2053 + }, + { + "epoch": 0.7866717732669475, + "grad_norm": 24.824588348523132, + "learning_rate": 5e-06, + "loss": 0.2602, + "num_input_tokens_seen": 355018536, + "step": 2054 + }, + { + "epoch": 0.7866717732669475, + "loss": 0.20731166005134583, + "loss_ce": 0.019506465643644333, + "loss_iou": 1.0803972482681274, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 355018536, + "step": 2054 + }, + { + "epoch": 0.7870547682880122, + "grad_norm": 49.85580010214409, + "learning_rate": 5e-06, + "loss": 0.2189, + "num_input_tokens_seen": 355191824, + "step": 2055 + }, + { + "epoch": 0.7870547682880122, + "loss": 0.18205732107162476, + "loss_ce": 0.021840039640665054, + "loss_iou": 1.0319499969482422, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 355191824, + "step": 2055 + }, + { + "epoch": 0.787437763309077, + "grad_norm": 35.185942298183626, + "learning_rate": 5e-06, + "loss": 0.26, + "num_input_tokens_seen": 355365016, + "step": 2056 + }, + { + "epoch": 0.787437763309077, + "loss": 0.31597667932510376, + "loss_ce": 0.022031398490071297, + "loss_iou": 1.257796287536621, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 355365016, + "step": 2056 + }, + { + "epoch": 0.7878207583301418, + "grad_norm": 34.48133769594558, + "learning_rate": 5e-06, + "loss": 0.2261, + "num_input_tokens_seen": 355538216, + "step": 2057 + }, + { + "epoch": 0.7878207583301418, + "loss": 0.2338372766971588, + "loss_ce": 0.022228389978408813, + "loss_iou": 1.0854668617248535, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 355538216, + "step": 2057 + }, + { + "epoch": 0.7882037533512064, + "grad_norm": 27.905404587663607, + "learning_rate": 5e-06, + "loss": 0.2673, + "num_input_tokens_seen": 355711048, + "step": 2058 + }, + { + "epoch": 0.7882037533512064, + "loss": 0.26376307010650635, + "loss_ce": 0.019805539399385452, + "loss_iou": 1.094059944152832, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 355711048, + "step": 2058 + }, + { + "epoch": 0.7885867483722712, + "grad_norm": 37.08467618361751, + "learning_rate": 5e-06, + "loss": 0.2489, + "num_input_tokens_seen": 355884240, + "step": 2059 + }, + { + "epoch": 0.7885867483722712, + "loss": 0.2613140940666199, + "loss_ce": 0.02144593559205532, + "loss_iou": 1.0572009086608887, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 355884240, + "step": 2059 + }, + { + "epoch": 0.7889697433933359, + "grad_norm": 27.2685601043996, + "learning_rate": 5e-06, + "loss": 0.2062, + "num_input_tokens_seen": 356057592, + "step": 2060 + }, + { + "epoch": 0.7889697433933359, + "loss": 0.17139238119125366, + "loss_ce": 0.021245889365673065, + "loss_iou": 1.0331575870513916, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 356057592, + "step": 2060 + }, + { + "epoch": 0.7893527384144006, + "grad_norm": 40.15787509001266, + "learning_rate": 5e-06, + "loss": 0.2283, + "num_input_tokens_seen": 356230424, + "step": 2061 + }, + { + "epoch": 0.7893527384144006, + "loss": 0.2529626190662384, + "loss_ce": 0.020784892141819, + "loss_iou": 0.8782117366790771, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 356230424, + "step": 2061 + }, + { + "epoch": 0.7897357334354653, + "grad_norm": 27.111634342880546, + "learning_rate": 5e-06, + "loss": 0.1672, + "num_input_tokens_seen": 356403264, + "step": 2062 + }, + { + "epoch": 0.7897357334354653, + "loss": 0.18083465099334717, + "loss_ce": 0.021838072687387466, + "loss_iou": 1.0160695314407349, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 356403264, + "step": 2062 + }, + { + "epoch": 0.7901187284565301, + "grad_norm": 44.99530458943178, + "learning_rate": 5e-06, + "loss": 0.2912, + "num_input_tokens_seen": 356576056, + "step": 2063 + }, + { + "epoch": 0.7901187284565301, + "loss": 0.2838969826698303, + "loss_ce": 0.021445803344249725, + "loss_iou": 1.0098295211791992, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 356576056, + "step": 2063 + }, + { + "epoch": 0.7905017234775948, + "grad_norm": 18.00601333246881, + "learning_rate": 5e-06, + "loss": 0.2703, + "num_input_tokens_seen": 356749432, + "step": 2064 + }, + { + "epoch": 0.7905017234775948, + "loss": 0.22683188319206238, + "loss_ce": 0.021570663899183273, + "loss_iou": 1.008726716041565, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 356749432, + "step": 2064 + }, + { + "epoch": 0.7908847184986595, + "grad_norm": 14.109572408707434, + "learning_rate": 5e-06, + "loss": 0.2188, + "num_input_tokens_seen": 356922256, + "step": 2065 + }, + { + "epoch": 0.7908847184986595, + "loss": 0.20682987570762634, + "loss_ce": 0.02030644193291664, + "loss_iou": 1.1127333641052246, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 356922256, + "step": 2065 + }, + { + "epoch": 0.7912677135197242, + "grad_norm": 14.511777793218755, + "learning_rate": 5e-06, + "loss": 0.1767, + "num_input_tokens_seen": 357095208, + "step": 2066 + }, + { + "epoch": 0.7912677135197242, + "loss": 0.15498453378677368, + "loss_ce": 0.02180583029985428, + "loss_iou": 1.0268007516860962, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 357095208, + "step": 2066 + }, + { + "epoch": 0.791650708540789, + "grad_norm": 34.14061707017323, + "learning_rate": 5e-06, + "loss": 0.2862, + "num_input_tokens_seen": 357268344, + "step": 2067 + }, + { + "epoch": 0.791650708540789, + "loss": 0.31500089168548584, + "loss_ce": 0.0212386604398489, + "loss_iou": 1.1864237785339355, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 357268344, + "step": 2067 + }, + { + "epoch": 0.7920337035618537, + "grad_norm": 47.77593428494504, + "learning_rate": 5e-06, + "loss": 0.2179, + "num_input_tokens_seen": 357441400, + "step": 2068 + }, + { + "epoch": 0.7920337035618537, + "loss": 0.17708002030849457, + "loss_ce": 0.02137933485209942, + "loss_iou": 1.0025368928909302, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 357441400, + "step": 2068 + }, + { + "epoch": 0.7924166985829184, + "grad_norm": 29.402961507585474, + "learning_rate": 5e-06, + "loss": 0.2119, + "num_input_tokens_seen": 357614232, + "step": 2069 + }, + { + "epoch": 0.7924166985829184, + "loss": 0.20056194067001343, + "loss_ce": 0.019470641389489174, + "loss_iou": 1.0144984722137451, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 357614232, + "step": 2069 + }, + { + "epoch": 0.7927996936039832, + "grad_norm": 23.703830648499626, + "learning_rate": 5e-06, + "loss": 0.2828, + "num_input_tokens_seen": 357787064, + "step": 2070 + }, + { + "epoch": 0.7927996936039832, + "loss": 0.2905902862548828, + "loss_ce": 0.02014351263642311, + "loss_iou": 1.168410301208496, + "loss_num": 0.26953125, + "loss_xval": 0.26953125, + "num_input_tokens_seen": 357787064, + "step": 2070 + }, + { + "epoch": 0.7931826886250479, + "grad_norm": 25.10557124225831, + "learning_rate": 5e-06, + "loss": 0.19, + "num_input_tokens_seen": 357960160, + "step": 2071 + }, + { + "epoch": 0.7931826886250479, + "loss": 0.13955967128276825, + "loss_ce": 0.021517682820558548, + "loss_iou": 1.0058672428131104, + "loss_num": 0.1181640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 357960160, + "step": 2071 + }, + { + "epoch": 0.7935656836461126, + "grad_norm": 26.610721080718825, + "learning_rate": 5e-06, + "loss": 0.2413, + "num_input_tokens_seen": 358133040, + "step": 2072 + }, + { + "epoch": 0.7935656836461126, + "loss": 0.19871611893177032, + "loss_ce": 0.02147001028060913, + "loss_iou": 0.9230528473854065, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 358133040, + "step": 2072 + }, + { + "epoch": 0.7939486786671773, + "grad_norm": 30.32934432344583, + "learning_rate": 5e-06, + "loss": 0.2017, + "num_input_tokens_seen": 358305792, + "step": 2073 + }, + { + "epoch": 0.7939486786671773, + "loss": 0.23280559480190277, + "loss_ce": 0.019670821726322174, + "loss_iou": 1.2293424606323242, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 358305792, + "step": 2073 + }, + { + "epoch": 0.7943316736882421, + "grad_norm": 21.668661242135155, + "learning_rate": 5e-06, + "loss": 0.2759, + "num_input_tokens_seen": 358478600, + "step": 2074 + }, + { + "epoch": 0.7943316736882421, + "loss": 0.26689743995666504, + "loss_ce": 0.02068161964416504, + "loss_iou": 1.0439400672912598, + "loss_num": 0.24609375, + "loss_xval": 0.24609375, + "num_input_tokens_seen": 358478600, + "step": 2074 + }, + { + "epoch": 0.7947146687093067, + "grad_norm": 29.057915372282242, + "learning_rate": 5e-06, + "loss": 0.2486, + "num_input_tokens_seen": 358651464, + "step": 2075 + }, + { + "epoch": 0.7947146687093067, + "loss": 0.23087254166603088, + "loss_ce": 0.023658178746700287, + "loss_iou": 1.0160874128341675, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 358651464, + "step": 2075 + }, + { + "epoch": 0.7950976637303715, + "grad_norm": 30.89577680325765, + "learning_rate": 5e-06, + "loss": 0.2869, + "num_input_tokens_seen": 358824696, + "step": 2076 + }, + { + "epoch": 0.7950976637303715, + "loss": 0.2944260835647583, + "loss_ce": 0.0224534310400486, + "loss_iou": 1.1428030729293823, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 358824696, + "step": 2076 + }, + { + "epoch": 0.7954806587514363, + "grad_norm": 41.178257398994205, + "learning_rate": 5e-06, + "loss": 0.1971, + "num_input_tokens_seen": 358997896, + "step": 2077 + }, + { + "epoch": 0.7954806587514363, + "loss": 0.19884341955184937, + "loss_ce": 0.021231111139059067, + "loss_iou": 1.0368378162384033, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 358997896, + "step": 2077 + }, + { + "epoch": 0.795863653772501, + "grad_norm": 37.12009418828327, + "learning_rate": 5e-06, + "loss": 0.306, + "num_input_tokens_seen": 359170800, + "step": 2078 + }, + { + "epoch": 0.795863653772501, + "loss": 0.3145269453525543, + "loss_ce": 0.020886801183223724, + "loss_iou": 1.0644571781158447, + "loss_num": 0.29296875, + "loss_xval": 0.29296875, + "num_input_tokens_seen": 359170800, + "step": 2078 + }, + { + "epoch": 0.7962466487935657, + "grad_norm": 30.776086178949864, + "learning_rate": 5e-06, + "loss": 0.2334, + "num_input_tokens_seen": 359343760, + "step": 2079 + }, + { + "epoch": 0.7962466487935657, + "loss": 0.24104654788970947, + "loss_ce": 0.02095378190279007, + "loss_iou": 1.2850362062454224, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 359343760, + "step": 2079 + }, + { + "epoch": 0.7966296438146304, + "grad_norm": 22.805719872927575, + "learning_rate": 5e-06, + "loss": 0.1952, + "num_input_tokens_seen": 359516672, + "step": 2080 + }, + { + "epoch": 0.7966296438146304, + "loss": 0.19519786536693573, + "loss_ce": 0.019965939223766327, + "loss_iou": 1.0284029245376587, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 359516672, + "step": 2080 + }, + { + "epoch": 0.7970126388356952, + "grad_norm": 44.0592154790448, + "learning_rate": 5e-06, + "loss": 0.2271, + "num_input_tokens_seen": 359689872, + "step": 2081 + }, + { + "epoch": 0.7970126388356952, + "loss": 0.15045644342899323, + "loss_ce": 0.020024321973323822, + "loss_iou": 1.0021027326583862, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 359689872, + "step": 2081 + }, + { + "epoch": 0.7973956338567598, + "grad_norm": 23.506748884147438, + "learning_rate": 5e-06, + "loss": 0.2539, + "num_input_tokens_seen": 359862848, + "step": 2082 + }, + { + "epoch": 0.7973956338567598, + "loss": 0.2538887858390808, + "loss_ce": 0.019818957895040512, + "loss_iou": 1.2948635816574097, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 359862848, + "step": 2082 + }, + { + "epoch": 0.7977786288778246, + "grad_norm": 28.647543722436414, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 360035736, + "step": 2083 + }, + { + "epoch": 0.7977786288778246, + "loss": 0.21334119141101837, + "loss_ce": 0.01985974609851837, + "loss_iou": 1.0197844505310059, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 360035736, + "step": 2083 + }, + { + "epoch": 0.7981616238988893, + "grad_norm": 30.10333488122401, + "learning_rate": 5e-06, + "loss": 0.1708, + "num_input_tokens_seen": 360208664, + "step": 2084 + }, + { + "epoch": 0.7981616238988893, + "loss": 0.15990781784057617, + "loss_ce": 0.021449562162160873, + "loss_iou": 1.0397412776947021, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 360208664, + "step": 2084 + }, + { + "epoch": 0.798544618919954, + "grad_norm": 52.63208100675534, + "learning_rate": 5e-06, + "loss": 0.2085, + "num_input_tokens_seen": 360381704, + "step": 2085 + }, + { + "epoch": 0.798544618919954, + "loss": 0.20577174425125122, + "loss_ce": 0.02199489437043667, + "loss_iou": 1.0019832849502563, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 360381704, + "step": 2085 + }, + { + "epoch": 0.7989276139410187, + "grad_norm": 29.04375222577297, + "learning_rate": 5e-06, + "loss": 0.1741, + "num_input_tokens_seen": 360554776, + "step": 2086 + }, + { + "epoch": 0.7989276139410187, + "loss": 0.2163219153881073, + "loss_ce": 0.0211925208568573, + "loss_iou": 1.025056004524231, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 360554776, + "step": 2086 + }, + { + "epoch": 0.7993106089620835, + "grad_norm": 29.037543914778514, + "learning_rate": 5e-06, + "loss": 0.2268, + "num_input_tokens_seen": 360727624, + "step": 2087 + }, + { + "epoch": 0.7993106089620835, + "loss": 0.2393513023853302, + "loss_ce": 0.021150611340999603, + "loss_iou": 1.039138674736023, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 360727624, + "step": 2087 + }, + { + "epoch": 0.7996936039831483, + "grad_norm": 36.23397125700912, + "learning_rate": 5e-06, + "loss": 0.182, + "num_input_tokens_seen": 360900776, + "step": 2088 + }, + { + "epoch": 0.7996936039831483, + "loss": 0.17068441212177277, + "loss_ce": 0.020965175703167915, + "loss_iou": 1.0142332315444946, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 360900776, + "step": 2088 + }, + { + "epoch": 0.8000765990042129, + "grad_norm": 52.41293020388545, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 361073576, + "step": 2089 + }, + { + "epoch": 0.8000765990042129, + "loss": 0.21941468119621277, + "loss_ce": 0.021477673202753067, + "loss_iou": 1.0017900466918945, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 361073576, + "step": 2089 + }, + { + "epoch": 0.8004595940252777, + "grad_norm": 30.35644681172131, + "learning_rate": 5e-06, + "loss": 0.1564, + "num_input_tokens_seen": 361246336, + "step": 2090 + }, + { + "epoch": 0.8004595940252777, + "loss": 0.16635313630104065, + "loss_ce": 0.020662210881710052, + "loss_iou": 1.0163631439208984, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 361246336, + "step": 2090 + }, + { + "epoch": 0.8008425890463424, + "grad_norm": 33.49175637491388, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 361419024, + "step": 2091 + }, + { + "epoch": 0.8008425890463424, + "loss": 0.23608174920082092, + "loss_ce": 0.021726282313466072, + "loss_iou": 1.0362515449523926, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 361419024, + "step": 2091 + }, + { + "epoch": 0.8012255840674071, + "grad_norm": 38.10898259614945, + "learning_rate": 5e-06, + "loss": 0.2263, + "num_input_tokens_seen": 361591832, + "step": 2092 + }, + { + "epoch": 0.8012255840674071, + "loss": 0.24866808950901031, + "loss_ce": 0.021129027009010315, + "loss_iou": 1.0237829685211182, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 361591832, + "step": 2092 + }, + { + "epoch": 0.8016085790884718, + "grad_norm": 60.87094645779356, + "learning_rate": 5e-06, + "loss": 0.2476, + "num_input_tokens_seen": 361764896, + "step": 2093 + }, + { + "epoch": 0.8016085790884718, + "loss": 0.2902504503726959, + "loss_ce": 0.01876608096063137, + "loss_iou": 1.036728858947754, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 361764896, + "step": 2093 + }, + { + "epoch": 0.8019915741095366, + "grad_norm": 33.85406987960584, + "learning_rate": 5e-06, + "loss": 0.2625, + "num_input_tokens_seen": 361937568, + "step": 2094 + }, + { + "epoch": 0.8019915741095366, + "loss": 0.19497427344322205, + "loss_ce": 0.02126821130514145, + "loss_iou": 1.0377048254013062, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 361937568, + "step": 2094 + }, + { + "epoch": 0.8023745691306013, + "grad_norm": 22.97566587849187, + "learning_rate": 5e-06, + "loss": 0.2191, + "num_input_tokens_seen": 362110456, + "step": 2095 + }, + { + "epoch": 0.8023745691306013, + "loss": 0.2223237156867981, + "loss_ce": 0.02029736153781414, + "loss_iou": 1.0889081954956055, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 362110456, + "step": 2095 + }, + { + "epoch": 0.802757564151666, + "grad_norm": 22.655194333177427, + "learning_rate": 5e-06, + "loss": 0.2478, + "num_input_tokens_seen": 362283680, + "step": 2096 + }, + { + "epoch": 0.802757564151666, + "loss": 0.27080637216567993, + "loss_ce": 0.02065379172563553, + "loss_iou": 1.3040798902511597, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 362283680, + "step": 2096 + }, + { + "epoch": 0.8031405591727308, + "grad_norm": 23.83590037094105, + "learning_rate": 5e-06, + "loss": 0.2786, + "num_input_tokens_seen": 362456224, + "step": 2097 + }, + { + "epoch": 0.8031405591727308, + "loss": 0.2533208727836609, + "loss_ce": 0.020532775670289993, + "loss_iou": 1.056600570678711, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 362456224, + "step": 2097 + }, + { + "epoch": 0.8035235541937955, + "grad_norm": 27.137259830250642, + "learning_rate": 5e-06, + "loss": 0.1775, + "num_input_tokens_seen": 362628992, + "step": 2098 + }, + { + "epoch": 0.8035235541937955, + "loss": 0.15176838636398315, + "loss_ce": 0.020909003913402557, + "loss_iou": 0.9796233773231506, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 362628992, + "step": 2098 + }, + { + "epoch": 0.8039065492148602, + "grad_norm": 28.31381030618961, + "learning_rate": 5e-06, + "loss": 0.1969, + "num_input_tokens_seen": 362798384, + "step": 2099 + }, + { + "epoch": 0.8039065492148602, + "loss": 0.1582416445016861, + "loss_ce": 0.022682560607790947, + "loss_iou": 1.0035731792449951, + "loss_num": 0.1357421875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 362798384, + "step": 2099 + }, + { + "epoch": 0.8042895442359249, + "grad_norm": 33.032148385521666, + "learning_rate": 5e-06, + "loss": 0.2686, + "num_input_tokens_seen": 362971272, + "step": 2100 + }, + { + "epoch": 0.8042895442359249, + "loss": 0.28126421570777893, + "loss_ce": 0.01985062286257744, + "loss_iou": 1.083396553993225, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 362971272, + "step": 2100 + }, + { + "epoch": 0.8046725392569897, + "grad_norm": 22.602817795148876, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 363144400, + "step": 2101 + }, + { + "epoch": 0.8046725392569897, + "loss": 0.1815548539161682, + "loss_ce": 0.02103240415453911, + "loss_iou": 1.030548334121704, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 363144400, + "step": 2101 + }, + { + "epoch": 0.8050555342780544, + "grad_norm": 38.47049504384218, + "learning_rate": 5e-06, + "loss": 0.2397, + "num_input_tokens_seen": 363317320, + "step": 2102 + }, + { + "epoch": 0.8050555342780544, + "loss": 0.18445762991905212, + "loss_ce": 0.021615827456116676, + "loss_iou": 0.9913636445999146, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 363317320, + "step": 2102 + }, + { + "epoch": 0.8054385292991191, + "grad_norm": 25.713987475296403, + "learning_rate": 5e-06, + "loss": 0.1688, + "num_input_tokens_seen": 363490280, + "step": 2103 + }, + { + "epoch": 0.8054385292991191, + "loss": 0.18460124731063843, + "loss_ce": 0.021393220871686935, + "loss_iou": 1.0054594278335571, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 363490280, + "step": 2103 + }, + { + "epoch": 0.8058215243201838, + "grad_norm": 23.378121283979322, + "learning_rate": 5e-06, + "loss": 0.1959, + "num_input_tokens_seen": 363663168, + "step": 2104 + }, + { + "epoch": 0.8058215243201838, + "loss": 0.22951623797416687, + "loss_ce": 0.021142205223441124, + "loss_iou": 1.0498735904693604, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 363663168, + "step": 2104 + }, + { + "epoch": 0.8062045193412486, + "grad_norm": 17.599769831356202, + "learning_rate": 5e-06, + "loss": 0.2367, + "num_input_tokens_seen": 363835856, + "step": 2105 + }, + { + "epoch": 0.8062045193412486, + "loss": 0.17348536849021912, + "loss_ce": 0.02077540010213852, + "loss_iou": 1.0428988933563232, + "loss_num": 0.15234375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 363835856, + "step": 2105 + }, + { + "epoch": 0.8065875143623132, + "grad_norm": 19.769912404207556, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 364009184, + "step": 2106 + }, + { + "epoch": 0.8065875143623132, + "loss": 0.17005111277103424, + "loss_ce": 0.021247392520308495, + "loss_iou": 1.009874701499939, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 364009184, + "step": 2106 + }, + { + "epoch": 0.806970509383378, + "grad_norm": 26.649027915265457, + "learning_rate": 5e-06, + "loss": 0.1644, + "num_input_tokens_seen": 364182736, + "step": 2107 + }, + { + "epoch": 0.806970509383378, + "loss": 0.17446421086788177, + "loss_ce": 0.020228378474712372, + "loss_iou": 0.9851024150848389, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 364182736, + "step": 2107 + }, + { + "epoch": 0.8073535044044428, + "grad_norm": 35.44751979570732, + "learning_rate": 5e-06, + "loss": 0.2026, + "num_input_tokens_seen": 364355976, + "step": 2108 + }, + { + "epoch": 0.8073535044044428, + "loss": 0.20467320084571838, + "loss_ce": 0.022361187264323235, + "loss_iou": 1.039179801940918, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 364355976, + "step": 2108 + }, + { + "epoch": 0.8077364994255075, + "grad_norm": 28.703068016312443, + "learning_rate": 5e-06, + "loss": 0.2332, + "num_input_tokens_seen": 364528720, + "step": 2109 + }, + { + "epoch": 0.8077364994255075, + "loss": 0.21477672457695007, + "loss_ce": 0.019342154264450073, + "loss_iou": 1.0085649490356445, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 364528720, + "step": 2109 + }, + { + "epoch": 0.8081194944465722, + "grad_norm": 28.63109062864066, + "learning_rate": 5e-06, + "loss": 0.1813, + "num_input_tokens_seen": 364701560, + "step": 2110 + }, + { + "epoch": 0.8081194944465722, + "loss": 0.14917662739753723, + "loss_ce": 0.021033326163887978, + "loss_iou": 1.0138494968414307, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 364701560, + "step": 2110 + }, + { + "epoch": 0.8085024894676369, + "grad_norm": 38.70015498466309, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 364874104, + "step": 2111 + }, + { + "epoch": 0.8085024894676369, + "loss": 0.18069684505462646, + "loss_ce": 0.01962505653500557, + "loss_iou": 1.0061640739440918, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 364874104, + "step": 2111 + }, + { + "epoch": 0.8088854844887017, + "grad_norm": 33.512475942128496, + "learning_rate": 5e-06, + "loss": 0.2515, + "num_input_tokens_seen": 365047352, + "step": 2112 + }, + { + "epoch": 0.8088854844887017, + "loss": 0.3174218237400055, + "loss_ce": 0.021706480532884598, + "loss_iou": 1.0606505870819092, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 365047352, + "step": 2112 + }, + { + "epoch": 0.8092684795097663, + "grad_norm": 29.020052069927704, + "learning_rate": 5e-06, + "loss": 0.1748, + "num_input_tokens_seen": 365220232, + "step": 2113 + }, + { + "epoch": 0.8092684795097663, + "loss": 0.15384788811206818, + "loss_ce": 0.02005882002413273, + "loss_iou": 1.007267951965332, + "loss_num": 0.1337890625, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 365220232, + "step": 2113 + }, + { + "epoch": 0.8096514745308311, + "grad_norm": 24.396770407285977, + "learning_rate": 5e-06, + "loss": 0.2347, + "num_input_tokens_seen": 365393064, + "step": 2114 + }, + { + "epoch": 0.8096514745308311, + "loss": 0.2564903795719147, + "loss_ce": 0.020162250846624374, + "loss_iou": 1.3626725673675537, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 365393064, + "step": 2114 + }, + { + "epoch": 0.8100344695518958, + "grad_norm": 40.716933413128686, + "learning_rate": 5e-06, + "loss": 0.3466, + "num_input_tokens_seen": 365566056, + "step": 2115 + }, + { + "epoch": 0.8100344695518958, + "loss": 0.30868810415267944, + "loss_ce": 0.022250106558203697, + "loss_iou": 1.0419776439666748, + "loss_num": 0.287109375, + "loss_xval": 0.287109375, + "num_input_tokens_seen": 365566056, + "step": 2115 + }, + { + "epoch": 0.8104174645729606, + "grad_norm": 28.101673655580367, + "learning_rate": 5e-06, + "loss": 0.2098, + "num_input_tokens_seen": 365739120, + "step": 2116 + }, + { + "epoch": 0.8104174645729606, + "loss": 0.24193474650382996, + "loss_ce": 0.02074335142970085, + "loss_iou": 1.0302741527557373, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 365739120, + "step": 2116 + }, + { + "epoch": 0.8108004595940252, + "grad_norm": 27.215441670805387, + "learning_rate": 5e-06, + "loss": 0.2351, + "num_input_tokens_seen": 365912000, + "step": 2117 + }, + { + "epoch": 0.8108004595940252, + "loss": 0.1826138198375702, + "loss_ce": 0.020473934710025787, + "loss_iou": 1.0102052688598633, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 365912000, + "step": 2117 + }, + { + "epoch": 0.81118345461509, + "grad_norm": 30.17607812198289, + "learning_rate": 5e-06, + "loss": 0.2069, + "num_input_tokens_seen": 366084704, + "step": 2118 + }, + { + "epoch": 0.81118345461509, + "loss": 0.20413601398468018, + "loss_ce": 0.022800570353865623, + "loss_iou": 2.548659324645996, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 366084704, + "step": 2118 + }, + { + "epoch": 0.8115664496361548, + "grad_norm": 35.92505267130526, + "learning_rate": 5e-06, + "loss": 0.196, + "num_input_tokens_seen": 366257608, + "step": 2119 + }, + { + "epoch": 0.8115664496361548, + "loss": 0.20702782273292542, + "loss_ce": 0.020748518407344818, + "loss_iou": 1.0230991840362549, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 366257608, + "step": 2119 + }, + { + "epoch": 0.8119494446572194, + "grad_norm": 29.807727312866955, + "learning_rate": 5e-06, + "loss": 0.2408, + "num_input_tokens_seen": 366430608, + "step": 2120 + }, + { + "epoch": 0.8119494446572194, + "loss": 0.2166225165128708, + "loss_ce": 0.021554159000515938, + "loss_iou": 1.045461654663086, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 366430608, + "step": 2120 + }, + { + "epoch": 0.8123324396782842, + "grad_norm": 27.749053225360697, + "learning_rate": 5e-06, + "loss": 0.2009, + "num_input_tokens_seen": 366603600, + "step": 2121 + }, + { + "epoch": 0.8123324396782842, + "loss": 0.21991193294525146, + "loss_ce": 0.021181467920541763, + "loss_iou": 1.0545415878295898, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 366603600, + "step": 2121 + }, + { + "epoch": 0.8127154346993489, + "grad_norm": 28.99954836164965, + "learning_rate": 5e-06, + "loss": 0.1613, + "num_input_tokens_seen": 366776760, + "step": 2122 + }, + { + "epoch": 0.8127154346993489, + "loss": 0.1873789131641388, + "loss_ce": 0.019562749192118645, + "loss_iou": 1.0477802753448486, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 366776760, + "step": 2122 + }, + { + "epoch": 0.8130984297204137, + "grad_norm": 29.983256245017014, + "learning_rate": 5e-06, + "loss": 0.2394, + "num_input_tokens_seen": 366949696, + "step": 2123 + }, + { + "epoch": 0.8130984297204137, + "loss": 0.28331607580184937, + "loss_ce": 0.02055971696972847, + "loss_iou": 1.051964282989502, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 366949696, + "step": 2123 + }, + { + "epoch": 0.8134814247414783, + "grad_norm": 18.967397198416126, + "learning_rate": 5e-06, + "loss": 0.2025, + "num_input_tokens_seen": 367122664, + "step": 2124 + }, + { + "epoch": 0.8134814247414783, + "loss": 0.1603628695011139, + "loss_ce": 0.02062288671731949, + "loss_iou": 1.014026165008545, + "loss_num": 0.1396484375, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 367122664, + "step": 2124 + }, + { + "epoch": 0.8138644197625431, + "grad_norm": 23.401417353363087, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 367295616, + "step": 2125 + }, + { + "epoch": 0.8138644197625431, + "loss": 0.24524861574172974, + "loss_ce": 0.02088339626789093, + "loss_iou": 1.0667381286621094, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 367295616, + "step": 2125 + }, + { + "epoch": 0.8142474147836078, + "grad_norm": 18.681374620707405, + "learning_rate": 5e-06, + "loss": 0.1829, + "num_input_tokens_seen": 367468840, + "step": 2126 + }, + { + "epoch": 0.8142474147836078, + "loss": 0.13272728025913239, + "loss_ce": 0.020514145493507385, + "loss_iou": 1.0010244846343994, + "loss_num": 0.1123046875, + "loss_xval": 0.1123046875, + "num_input_tokens_seen": 367468840, + "step": 2126 + }, + { + "epoch": 0.8146304098046725, + "grad_norm": 24.468260270885658, + "learning_rate": 5e-06, + "loss": 0.2504, + "num_input_tokens_seen": 367641648, + "step": 2127 + }, + { + "epoch": 0.8146304098046725, + "loss": 0.25432175397872925, + "loss_ce": 0.021655723452568054, + "loss_iou": 1.0185184478759766, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 367641648, + "step": 2127 + }, + { + "epoch": 0.8150134048257373, + "grad_norm": 28.332489760750022, + "learning_rate": 5e-06, + "loss": 0.2322, + "num_input_tokens_seen": 367814864, + "step": 2128 + }, + { + "epoch": 0.8150134048257373, + "loss": 0.23658353090286255, + "loss_ce": 0.021068410947918892, + "loss_iou": 1.0070009231567383, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 367814864, + "step": 2128 + }, + { + "epoch": 0.815396399846802, + "grad_norm": 20.92409368983065, + "learning_rate": 5e-06, + "loss": 0.1697, + "num_input_tokens_seen": 367987808, + "step": 2129 + }, + { + "epoch": 0.815396399846802, + "loss": 0.16304948925971985, + "loss_ce": 0.02010514587163925, + "loss_iou": 1.0108318328857422, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 367987808, + "step": 2129 + }, + { + "epoch": 0.8157793948678668, + "grad_norm": 16.311196285773622, + "learning_rate": 5e-06, + "loss": 0.1442, + "num_input_tokens_seen": 368161048, + "step": 2130 + }, + { + "epoch": 0.8157793948678668, + "loss": 0.1444142460823059, + "loss_ce": 0.020573917776346207, + "loss_iou": 1.0141609907150269, + "loss_num": 0.1240234375, + "loss_xval": 0.1240234375, + "num_input_tokens_seen": 368161048, + "step": 2130 + }, + { + "epoch": 0.8161623898889314, + "grad_norm": 19.817899330667746, + "learning_rate": 5e-06, + "loss": 0.2286, + "num_input_tokens_seen": 368330792, + "step": 2131 + }, + { + "epoch": 0.8161623898889314, + "loss": 0.24902987480163574, + "loss_ce": 0.022406332194805145, + "loss_iou": 1.040596604347229, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 368330792, + "step": 2131 + }, + { + "epoch": 0.8165453849099962, + "grad_norm": 21.139751947844744, + "learning_rate": 5e-06, + "loss": 0.2042, + "num_input_tokens_seen": 368503360, + "step": 2132 + }, + { + "epoch": 0.8165453849099962, + "loss": 0.19341111183166504, + "loss_ce": 0.020437486469745636, + "loss_iou": 1.0012788772583008, + "loss_num": 0.1728515625, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 368503360, + "step": 2132 + }, + { + "epoch": 0.8169283799310609, + "grad_norm": 19.840473264358938, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 368676456, + "step": 2133 + }, + { + "epoch": 0.8169283799310609, + "loss": 0.17164191603660583, + "loss_ce": 0.020824046805500984, + "loss_iou": 1.0003976821899414, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 368676456, + "step": 2133 + }, + { + "epoch": 0.8173113749521256, + "grad_norm": 29.21394391065354, + "learning_rate": 5e-06, + "loss": 0.1859, + "num_input_tokens_seen": 368849576, + "step": 2134 + }, + { + "epoch": 0.8173113749521256, + "loss": 0.22054269909858704, + "loss_ce": 0.021262917667627335, + "loss_iou": 1.0249555110931396, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 368849576, + "step": 2134 + }, + { + "epoch": 0.8176943699731903, + "grad_norm": 40.56123718651517, + "learning_rate": 5e-06, + "loss": 0.2921, + "num_input_tokens_seen": 369022440, + "step": 2135 + }, + { + "epoch": 0.8176943699731903, + "loss": 0.3429931402206421, + "loss_ce": 0.021398894488811493, + "loss_iou": 1.076008915901184, + "loss_num": 0.322265625, + "loss_xval": 0.322265625, + "num_input_tokens_seen": 369022440, + "step": 2135 + }, + { + "epoch": 0.8180773649942551, + "grad_norm": 32.061146496298235, + "learning_rate": 5e-06, + "loss": 0.2334, + "num_input_tokens_seen": 369195448, + "step": 2136 + }, + { + "epoch": 0.8180773649942551, + "loss": 0.21450859308242798, + "loss_ce": 0.020538877695798874, + "loss_iou": 1.0362865924835205, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 369195448, + "step": 2136 + }, + { + "epoch": 0.8184603600153199, + "grad_norm": 28.584843416958208, + "learning_rate": 5e-06, + "loss": 0.2059, + "num_input_tokens_seen": 369368440, + "step": 2137 + }, + { + "epoch": 0.8184603600153199, + "loss": 0.1834711879491806, + "loss_ce": 0.0206293947994709, + "loss_iou": 1.011899471282959, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 369368440, + "step": 2137 + }, + { + "epoch": 0.8188433550363845, + "grad_norm": 22.691707761012573, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 369541216, + "step": 2138 + }, + { + "epoch": 0.8188433550363845, + "loss": 0.15554609894752502, + "loss_ce": 0.020963573828339577, + "loss_iou": 1.0016136169433594, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 369541216, + "step": 2138 + }, + { + "epoch": 0.8192263500574493, + "grad_norm": 36.32095499337774, + "learning_rate": 5e-06, + "loss": 0.2489, + "num_input_tokens_seen": 369714304, + "step": 2139 + }, + { + "epoch": 0.8192263500574493, + "loss": 0.296573281288147, + "loss_ce": 0.020084049552679062, + "loss_iou": 1.2641435861587524, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 369714304, + "step": 2139 + }, + { + "epoch": 0.819609345078514, + "grad_norm": 26.687714119843587, + "learning_rate": 5e-06, + "loss": 0.2286, + "num_input_tokens_seen": 369887256, + "step": 2140 + }, + { + "epoch": 0.819609345078514, + "loss": 0.2310848981142044, + "loss_ce": 0.021062923595309258, + "loss_iou": 1.0150922536849976, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 369887256, + "step": 2140 + }, + { + "epoch": 0.8199923400995787, + "grad_norm": 42.32563606563892, + "learning_rate": 5e-06, + "loss": 0.3101, + "num_input_tokens_seen": 370059944, + "step": 2141 + }, + { + "epoch": 0.8199923400995787, + "loss": 0.2709304392337799, + "loss_ce": 0.021418720483779907, + "loss_iou": 1.113772988319397, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 370059944, + "step": 2141 + }, + { + "epoch": 0.8203753351206434, + "grad_norm": 37.72077567350258, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 370232800, + "step": 2142 + }, + { + "epoch": 0.8203753351206434, + "loss": 0.2032146453857422, + "loss_ce": 0.02212332934141159, + "loss_iou": 1.0222200155258179, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 370232800, + "step": 2142 + }, + { + "epoch": 0.8207583301417082, + "grad_norm": 28.976553942483406, + "learning_rate": 5e-06, + "loss": 0.2758, + "num_input_tokens_seen": 370405640, + "step": 2143 + }, + { + "epoch": 0.8207583301417082, + "loss": 0.31651657819747925, + "loss_ce": 0.022144028916954994, + "loss_iou": 1.1050704717636108, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 370405640, + "step": 2143 + }, + { + "epoch": 0.8211413251627729, + "grad_norm": 29.255901172619154, + "learning_rate": 5e-06, + "loss": 0.1941, + "num_input_tokens_seen": 370578624, + "step": 2144 + }, + { + "epoch": 0.8211413251627729, + "loss": 0.1887333244085312, + "loss_ce": 0.020520437508821487, + "loss_iou": 1.041619062423706, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 370578624, + "step": 2144 + }, + { + "epoch": 0.8215243201838376, + "grad_norm": 46.483402494126544, + "learning_rate": 5e-06, + "loss": 0.2374, + "num_input_tokens_seen": 370751592, + "step": 2145 + }, + { + "epoch": 0.8215243201838376, + "loss": 0.2568110525608063, + "loss_ce": 0.021459471434354782, + "loss_iou": 1.0386011600494385, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 370751592, + "step": 2145 + }, + { + "epoch": 0.8219073152049023, + "grad_norm": 29.947747066676154, + "learning_rate": 5e-06, + "loss": 0.2778, + "num_input_tokens_seen": 370924456, + "step": 2146 + }, + { + "epoch": 0.8219073152049023, + "loss": 0.2346021831035614, + "loss_ce": 0.020063605159521103, + "loss_iou": 1.0243932008743286, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 370924456, + "step": 2146 + }, + { + "epoch": 0.8222903102259671, + "grad_norm": 28.665856752031125, + "learning_rate": 5e-06, + "loss": 0.2505, + "num_input_tokens_seen": 371097616, + "step": 2147 + }, + { + "epoch": 0.8222903102259671, + "loss": 0.2775253653526306, + "loss_ce": 0.022703586146235466, + "loss_iou": 1.0256977081298828, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 371097616, + "step": 2147 + }, + { + "epoch": 0.8226733052470317, + "grad_norm": 42.77345054291103, + "learning_rate": 5e-06, + "loss": 0.204, + "num_input_tokens_seen": 371270672, + "step": 2148 + }, + { + "epoch": 0.8226733052470317, + "loss": 0.20909292995929718, + "loss_ce": 0.02183707244694233, + "loss_iou": 17.05697250366211, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 371270672, + "step": 2148 + }, + { + "epoch": 0.8230563002680965, + "grad_norm": 64.91123199264825, + "learning_rate": 5e-06, + "loss": 0.2677, + "num_input_tokens_seen": 371443352, + "step": 2149 + }, + { + "epoch": 0.8230563002680965, + "loss": 0.24871593713760376, + "loss_ce": 0.019834104925394058, + "loss_iou": 1.0209572315216064, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 371443352, + "step": 2149 + }, + { + "epoch": 0.8234392952891613, + "grad_norm": 26.387438541462288, + "learning_rate": 5e-06, + "loss": 0.2468, + "num_input_tokens_seen": 371616552, + "step": 2150 + }, + { + "epoch": 0.8234392952891613, + "loss": 0.24685382843017578, + "loss_ce": 0.021817198023200035, + "loss_iou": 1.0309360027313232, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 371616552, + "step": 2150 + }, + { + "epoch": 0.8238222903102259, + "grad_norm": 63.38123772802693, + "learning_rate": 5e-06, + "loss": 0.3584, + "num_input_tokens_seen": 371785936, + "step": 2151 + }, + { + "epoch": 0.8238222903102259, + "loss": 0.3480418920516968, + "loss_ce": 0.01955069601535797, + "loss_iou": 1.041642665863037, + "loss_num": 0.328125, + "loss_xval": 0.328125, + "num_input_tokens_seen": 371785936, + "step": 2151 + }, + { + "epoch": 0.8242052853312907, + "grad_norm": 29.862904214490893, + "learning_rate": 5e-06, + "loss": 0.2428, + "num_input_tokens_seen": 371958816, + "step": 2152 + }, + { + "epoch": 0.8242052853312907, + "loss": 0.22665396332740784, + "loss_ce": 0.018768219277262688, + "loss_iou": 0.9489197731018066, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 371958816, + "step": 2152 + }, + { + "epoch": 0.8245882803523554, + "grad_norm": 91.69820226345597, + "learning_rate": 5e-06, + "loss": 0.4184, + "num_input_tokens_seen": 372131608, + "step": 2153 + }, + { + "epoch": 0.8245882803523554, + "loss": 0.3356097936630249, + "loss_ce": 0.021889086812734604, + "loss_iou": 1.030665397644043, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 372131608, + "step": 2153 + }, + { + "epoch": 0.8249712753734202, + "grad_norm": 42.06860118666766, + "learning_rate": 5e-06, + "loss": 0.352, + "num_input_tokens_seen": 372304832, + "step": 2154 + }, + { + "epoch": 0.8249712753734202, + "loss": 0.35705554485321045, + "loss_ce": 0.022827016189694405, + "loss_iou": 1.0636298656463623, + "loss_num": 0.333984375, + "loss_xval": 0.333984375, + "num_input_tokens_seen": 372304832, + "step": 2154 + }, + { + "epoch": 0.8253542703944848, + "grad_norm": 61.00509001393007, + "learning_rate": 5e-06, + "loss": 0.3619, + "num_input_tokens_seen": 372477664, + "step": 2155 + }, + { + "epoch": 0.8253542703944848, + "loss": 0.36045730113983154, + "loss_ce": 0.020613551139831543, + "loss_iou": 1.0060477256774902, + "loss_num": 0.33984375, + "loss_xval": 0.33984375, + "num_input_tokens_seen": 372477664, + "step": 2155 + }, + { + "epoch": 0.8257372654155496, + "grad_norm": 37.30844281452132, + "learning_rate": 5e-06, + "loss": 0.34, + "num_input_tokens_seen": 372650296, + "step": 2156 + }, + { + "epoch": 0.8257372654155496, + "loss": 0.31085580587387085, + "loss_ce": 0.020328452810645103, + "loss_iou": 1.030238389968872, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 372650296, + "step": 2156 + }, + { + "epoch": 0.8261202604366144, + "grad_norm": 82.53127759207469, + "learning_rate": 5e-06, + "loss": 0.4079, + "num_input_tokens_seen": 372822944, + "step": 2157 + }, + { + "epoch": 0.8261202604366144, + "loss": 0.376138836145401, + "loss_ce": 0.02005971595644951, + "loss_iou": 1.04275643825531, + "loss_num": 0.35546875, + "loss_xval": 0.35546875, + "num_input_tokens_seen": 372822944, + "step": 2157 + }, + { + "epoch": 0.826503255457679, + "grad_norm": 60.30255454366365, + "learning_rate": 5e-06, + "loss": 0.4474, + "num_input_tokens_seen": 372995960, + "step": 2158 + }, + { + "epoch": 0.826503255457679, + "loss": 0.41652911901474, + "loss_ce": 0.020288866013288498, + "loss_iou": 1.0174659490585327, + "loss_num": 0.396484375, + "loss_xval": 0.396484375, + "num_input_tokens_seen": 372995960, + "step": 2158 + }, + { + "epoch": 0.8268862504787438, + "grad_norm": 28.334335210002113, + "learning_rate": 5e-06, + "loss": 0.2853, + "num_input_tokens_seen": 373169096, + "step": 2159 + }, + { + "epoch": 0.8268862504787438, + "loss": 0.3576209843158722, + "loss_ce": 0.02180556021630764, + "loss_iou": 19.851102828979492, + "loss_num": 0.3359375, + "loss_xval": 0.3359375, + "num_input_tokens_seen": 373169096, + "step": 2159 + }, + { + "epoch": 0.8272692454998085, + "grad_norm": 49.53509686154801, + "learning_rate": 5e-06, + "loss": 0.4267, + "num_input_tokens_seen": 373341848, + "step": 2160 + }, + { + "epoch": 0.8272692454998085, + "loss": 0.41843438148498535, + "loss_ce": 0.0207293089479208, + "loss_iou": 1.0405924320220947, + "loss_num": 0.3984375, + "loss_xval": 0.3984375, + "num_input_tokens_seen": 373341848, + "step": 2160 + }, + { + "epoch": 0.8276522405208733, + "grad_norm": 20.37068228369542, + "learning_rate": 5e-06, + "loss": 0.2118, + "num_input_tokens_seen": 373514824, + "step": 2161 + }, + { + "epoch": 0.8276522405208733, + "loss": 0.24461804330348969, + "loss_ce": 0.019581424072384834, + "loss_iou": 0.8936472535133362, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 373514824, + "step": 2161 + }, + { + "epoch": 0.8280352355419379, + "grad_norm": 33.944329710520314, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 373687448, + "step": 2162 + }, + { + "epoch": 0.8280352355419379, + "loss": 0.21628016233444214, + "loss_ce": 0.021333884447813034, + "loss_iou": 1.006072998046875, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 373687448, + "step": 2162 + }, + { + "epoch": 0.8284182305630027, + "grad_norm": 27.412374432146066, + "learning_rate": 5e-06, + "loss": 0.2433, + "num_input_tokens_seen": 373860320, + "step": 2163 + }, + { + "epoch": 0.8284182305630027, + "loss": 0.20806829631328583, + "loss_ce": 0.020690374076366425, + "loss_iou": 1.0283517837524414, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 373860320, + "step": 2163 + }, + { + "epoch": 0.8288012255840674, + "grad_norm": 19.06495389278362, + "learning_rate": 5e-06, + "loss": 0.1999, + "num_input_tokens_seen": 374033112, + "step": 2164 + }, + { + "epoch": 0.8288012255840674, + "loss": 0.2409350574016571, + "loss_ce": 0.019987797364592552, + "loss_iou": 1.3699593544006348, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 374033112, + "step": 2164 + }, + { + "epoch": 0.8291842206051321, + "grad_norm": 27.206349337833004, + "learning_rate": 5e-06, + "loss": 0.2062, + "num_input_tokens_seen": 374206168, + "step": 2165 + }, + { + "epoch": 0.8291842206051321, + "loss": 0.18531841039657593, + "loss_ce": 0.02137799561023712, + "loss_iou": 1.0548542737960815, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 374206168, + "step": 2165 + }, + { + "epoch": 0.8295672156261968, + "grad_norm": 25.131795982644185, + "learning_rate": 5e-06, + "loss": 0.1718, + "num_input_tokens_seen": 374379016, + "step": 2166 + }, + { + "epoch": 0.8295672156261968, + "loss": 0.13089054822921753, + "loss_ce": 0.022309008985757828, + "loss_iou": 1.0004377365112305, + "loss_num": 0.1083984375, + "loss_xval": 0.1083984375, + "num_input_tokens_seen": 374379016, + "step": 2166 + }, + { + "epoch": 0.8299502106472616, + "grad_norm": 29.096596528635764, + "learning_rate": 5e-06, + "loss": 0.1633, + "num_input_tokens_seen": 374552040, + "step": 2167 + }, + { + "epoch": 0.8299502106472616, + "loss": 0.16615352034568787, + "loss_ce": 0.021866418421268463, + "loss_iou": 1.0073778629302979, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 374552040, + "step": 2167 + }, + { + "epoch": 0.8303332056683264, + "grad_norm": 27.675018344639614, + "learning_rate": 5e-06, + "loss": 0.2255, + "num_input_tokens_seen": 374724824, + "step": 2168 + }, + { + "epoch": 0.8303332056683264, + "loss": 0.21904276311397552, + "loss_ce": 0.02134990133345127, + "loss_iou": 1.0683300495147705, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 374724824, + "step": 2168 + }, + { + "epoch": 0.830716200689391, + "grad_norm": 28.67479113154888, + "learning_rate": 5e-06, + "loss": 0.1988, + "num_input_tokens_seen": 374897856, + "step": 2169 + }, + { + "epoch": 0.830716200689391, + "loss": 0.2088775783777237, + "loss_ce": 0.020645156502723694, + "loss_iou": 1.0369899272918701, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 374897856, + "step": 2169 + }, + { + "epoch": 0.8310991957104558, + "grad_norm": 20.00694280309562, + "learning_rate": 5e-06, + "loss": 0.191, + "num_input_tokens_seen": 375070864, + "step": 2170 + }, + { + "epoch": 0.8310991957104558, + "loss": 0.16283264756202698, + "loss_ce": 0.02043761871755123, + "loss_iou": 1.0019893646240234, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 375070864, + "step": 2170 + }, + { + "epoch": 0.8314821907315205, + "grad_norm": 20.537005457242522, + "learning_rate": 5e-06, + "loss": 0.2047, + "num_input_tokens_seen": 375243624, + "step": 2171 + }, + { + "epoch": 0.8314821907315205, + "loss": 0.14835287630558014, + "loss_ce": 0.02231527492403984, + "loss_iou": 1.0038883686065674, + "loss_num": 0.1259765625, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 375243624, + "step": 2171 + }, + { + "epoch": 0.8318651857525852, + "grad_norm": 24.981646224644035, + "learning_rate": 5e-06, + "loss": 0.2113, + "num_input_tokens_seen": 375416456, + "step": 2172 + }, + { + "epoch": 0.8318651857525852, + "loss": 0.23956190049648285, + "loss_ce": 0.019530151039361954, + "loss_iou": 1.0428447723388672, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 375416456, + "step": 2172 + }, + { + "epoch": 0.8322481807736499, + "grad_norm": 26.02767440825196, + "learning_rate": 5e-06, + "loss": 0.1984, + "num_input_tokens_seen": 375589656, + "step": 2173 + }, + { + "epoch": 0.8322481807736499, + "loss": 0.19998681545257568, + "loss_ce": 0.021092761307954788, + "loss_iou": 1.0418498516082764, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 375589656, + "step": 2173 + }, + { + "epoch": 0.8326311757947147, + "grad_norm": 22.477332073355125, + "learning_rate": 5e-06, + "loss": 0.1727, + "num_input_tokens_seen": 375762792, + "step": 2174 + }, + { + "epoch": 0.8326311757947147, + "loss": 0.15376584231853485, + "loss_ce": 0.021227996796369553, + "loss_iou": 1.002480387687683, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 375762792, + "step": 2174 + }, + { + "epoch": 0.8330141708157794, + "grad_norm": 26.873458656094986, + "learning_rate": 5e-06, + "loss": 0.2338, + "num_input_tokens_seen": 375935360, + "step": 2175 + }, + { + "epoch": 0.8330141708157794, + "loss": 0.22055332362651825, + "loss_ce": 0.021822858601808548, + "loss_iou": 1.0023974180221558, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 375935360, + "step": 2175 + }, + { + "epoch": 0.8333971658368441, + "grad_norm": 31.37132126335547, + "learning_rate": 5e-06, + "loss": 0.1972, + "num_input_tokens_seen": 376108328, + "step": 2176 + }, + { + "epoch": 0.8333971658368441, + "loss": 0.18834498524665833, + "loss_ce": 0.020925553515553474, + "loss_iou": 1.0201756954193115, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 376108328, + "step": 2176 + }, + { + "epoch": 0.8337801608579088, + "grad_norm": 37.163570600558636, + "learning_rate": 5e-06, + "loss": 0.2749, + "num_input_tokens_seen": 376280864, + "step": 2177 + }, + { + "epoch": 0.8337801608579088, + "loss": 0.2612314820289612, + "loss_ce": 0.020569849759340286, + "loss_iou": 4.367435932159424, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 376280864, + "step": 2177 + }, + { + "epoch": 0.8341631558789736, + "grad_norm": 21.319412688034024, + "learning_rate": 5e-06, + "loss": 0.2065, + "num_input_tokens_seen": 376454120, + "step": 2178 + }, + { + "epoch": 0.8341631558789736, + "loss": 0.22772261500358582, + "loss_ce": 0.021118616685271263, + "loss_iou": 1.0112444162368774, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 376454120, + "step": 2178 + }, + { + "epoch": 0.8345461509000383, + "grad_norm": 21.729132395081642, + "learning_rate": 5e-06, + "loss": 0.2015, + "num_input_tokens_seen": 376626848, + "step": 2179 + }, + { + "epoch": 0.8345461509000383, + "loss": 0.23938342928886414, + "loss_ce": 0.020572395995259285, + "loss_iou": 1.0284411907196045, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 376626848, + "step": 2179 + }, + { + "epoch": 0.834929145921103, + "grad_norm": 20.853724556187558, + "learning_rate": 5e-06, + "loss": 0.2455, + "num_input_tokens_seen": 376799728, + "step": 2180 + }, + { + "epoch": 0.834929145921103, + "loss": 0.30870160460472107, + "loss_ce": 0.019578076899051666, + "loss_iou": 0.8902505040168762, + "loss_num": 0.2890625, + "loss_xval": 0.2890625, + "num_input_tokens_seen": 376799728, + "step": 2180 + }, + { + "epoch": 0.8353121409421678, + "grad_norm": 28.389613043360605, + "learning_rate": 5e-06, + "loss": 0.1318, + "num_input_tokens_seen": 376972560, + "step": 2181 + }, + { + "epoch": 0.8353121409421678, + "loss": 0.13370594382286072, + "loss_ce": 0.020607803016901016, + "loss_iou": 1.0374066829681396, + "loss_num": 0.11328125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 376972560, + "step": 2181 + }, + { + "epoch": 0.8356951359632325, + "grad_norm": 29.161765278793478, + "learning_rate": 5e-06, + "loss": 0.2211, + "num_input_tokens_seen": 377145440, + "step": 2182 + }, + { + "epoch": 0.8356951359632325, + "loss": 0.2618965804576874, + "loss_ce": 0.021784283220767975, + "loss_iou": 1.1681959629058838, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 377145440, + "step": 2182 + }, + { + "epoch": 0.8360781309842972, + "grad_norm": 21.795808593225356, + "learning_rate": 5e-06, + "loss": 0.2082, + "num_input_tokens_seen": 377318280, + "step": 2183 + }, + { + "epoch": 0.8360781309842972, + "loss": 0.21658295392990112, + "loss_ce": 0.02066010981798172, + "loss_iou": 1.0167796611785889, + "loss_num": 0.1962890625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 377318280, + "step": 2183 + }, + { + "epoch": 0.8364611260053619, + "grad_norm": 22.245390840813947, + "learning_rate": 5e-06, + "loss": 0.159, + "num_input_tokens_seen": 377491168, + "step": 2184 + }, + { + "epoch": 0.8364611260053619, + "loss": 0.17089596390724182, + "loss_ce": 0.022031232714653015, + "loss_iou": 1.0310887098312378, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 377491168, + "step": 2184 + }, + { + "epoch": 0.8368441210264267, + "grad_norm": 24.88724443711426, + "learning_rate": 5e-06, + "loss": 0.2646, + "num_input_tokens_seen": 377664032, + "step": 2185 + }, + { + "epoch": 0.8368441210264267, + "loss": 0.23490586876869202, + "loss_ce": 0.021710064262151718, + "loss_iou": 1.0489373207092285, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 377664032, + "step": 2185 + }, + { + "epoch": 0.8372271160474913, + "grad_norm": 22.16338435228188, + "learning_rate": 5e-06, + "loss": 0.1908, + "num_input_tokens_seen": 377837440, + "step": 2186 + }, + { + "epoch": 0.8372271160474913, + "loss": 0.2569584846496582, + "loss_ce": 0.020386239513754845, + "loss_iou": 1.0582001209259033, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 377837440, + "step": 2186 + }, + { + "epoch": 0.8376101110685561, + "grad_norm": 24.7277753271268, + "learning_rate": 5e-06, + "loss": 0.2387, + "num_input_tokens_seen": 378010264, + "step": 2187 + }, + { + "epoch": 0.8376101110685561, + "loss": 0.25835248827934265, + "loss_ce": 0.02092573791742325, + "loss_iou": 1.0623435974121094, + "loss_num": 0.2373046875, + "loss_xval": 0.2373046875, + "num_input_tokens_seen": 378010264, + "step": 2187 + }, + { + "epoch": 0.8379931060896209, + "grad_norm": 24.719697236965832, + "learning_rate": 5e-06, + "loss": 0.1879, + "num_input_tokens_seen": 378183416, + "step": 2188 + }, + { + "epoch": 0.8379931060896209, + "loss": 0.19210229814052582, + "loss_ce": 0.02059350535273552, + "loss_iou": 1.016941785812378, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 378183416, + "step": 2188 + }, + { + "epoch": 0.8383761011106856, + "grad_norm": 24.113227832932065, + "learning_rate": 5e-06, + "loss": 0.26, + "num_input_tokens_seen": 378356272, + "step": 2189 + }, + { + "epoch": 0.8383761011106856, + "loss": 0.3190307915210724, + "loss_ce": 0.020080596208572388, + "loss_iou": 1.0181410312652588, + "loss_num": 0.298828125, + "loss_xval": 0.298828125, + "num_input_tokens_seen": 378356272, + "step": 2189 + }, + { + "epoch": 0.8387590961317503, + "grad_norm": 24.62787157043042, + "learning_rate": 5e-06, + "loss": 0.2192, + "num_input_tokens_seen": 378529560, + "step": 2190 + }, + { + "epoch": 0.8387590961317503, + "loss": 0.1842830926179886, + "loss_ce": 0.021380264312028885, + "loss_iou": 1.012451410293579, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 378529560, + "step": 2190 + }, + { + "epoch": 0.839142091152815, + "grad_norm": 38.905196400234196, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 378702368, + "step": 2191 + }, + { + "epoch": 0.839142091152815, + "loss": 0.2040984332561493, + "loss_ce": 0.022152645513415337, + "loss_iou": 1.0184812545776367, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 378702368, + "step": 2191 + }, + { + "epoch": 0.8395250861738798, + "grad_norm": 26.759518992974776, + "learning_rate": 5e-06, + "loss": 0.1746, + "num_input_tokens_seen": 378875176, + "step": 2192 + }, + { + "epoch": 0.8395250861738798, + "loss": 0.1554000973701477, + "loss_ce": 0.020146194845438004, + "loss_iou": 1.0175584554672241, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 378875176, + "step": 2192 + }, + { + "epoch": 0.8399080811949444, + "grad_norm": 27.126001230207233, + "learning_rate": 5e-06, + "loss": 0.1853, + "num_input_tokens_seen": 379048104, + "step": 2193 + }, + { + "epoch": 0.8399080811949444, + "loss": 0.23117583990097046, + "loss_ce": 0.020116277039051056, + "loss_iou": 0.9910538792610168, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 379048104, + "step": 2193 + }, + { + "epoch": 0.8402910762160092, + "grad_norm": 20.014968760344406, + "learning_rate": 5e-06, + "loss": 0.2103, + "num_input_tokens_seen": 379220888, + "step": 2194 + }, + { + "epoch": 0.8402910762160092, + "loss": 0.20933499932289124, + "loss_ce": 0.021102581173181534, + "loss_iou": 1.0173618793487549, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 379220888, + "step": 2194 + }, + { + "epoch": 0.8406740712370739, + "grad_norm": 31.80461049633313, + "learning_rate": 5e-06, + "loss": 0.1958, + "num_input_tokens_seen": 379394040, + "step": 2195 + }, + { + "epoch": 0.8406740712370739, + "loss": 0.18281367421150208, + "loss_ce": 0.021314652636647224, + "loss_iou": 1.0045641660690308, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 379394040, + "step": 2195 + }, + { + "epoch": 0.8410570662581387, + "grad_norm": 29.73284418557474, + "learning_rate": 5e-06, + "loss": 0.208, + "num_input_tokens_seen": 379567176, + "step": 2196 + }, + { + "epoch": 0.8410570662581387, + "loss": 0.2742595374584198, + "loss_ce": 0.020475370809435844, + "loss_iou": 1.0036565065383911, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 379567176, + "step": 2196 + }, + { + "epoch": 0.8414400612792033, + "grad_norm": 21.963597026004095, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 379740144, + "step": 2197 + }, + { + "epoch": 0.8414400612792033, + "loss": 0.25153568387031555, + "loss_ce": 0.020639687776565552, + "loss_iou": 1.0811197757720947, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 379740144, + "step": 2197 + }, + { + "epoch": 0.8418230563002681, + "grad_norm": 25.053483094314313, + "learning_rate": 5e-06, + "loss": 0.2007, + "num_input_tokens_seen": 379913112, + "step": 2198 + }, + { + "epoch": 0.8418230563002681, + "loss": 0.222274512052536, + "loss_ce": 0.021346762776374817, + "loss_iou": 1.0332086086273193, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 379913112, + "step": 2198 + }, + { + "epoch": 0.8422060513213329, + "grad_norm": 33.65434983095775, + "learning_rate": 5e-06, + "loss": 0.2125, + "num_input_tokens_seen": 380085608, + "step": 2199 + }, + { + "epoch": 0.8422060513213329, + "loss": 0.17855077981948853, + "loss_ce": 0.018943851813673973, + "loss_iou": 1.009749174118042, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 380085608, + "step": 2199 + }, + { + "epoch": 0.8425890463423975, + "grad_norm": 24.803349461339845, + "learning_rate": 5e-06, + "loss": 0.2258, + "num_input_tokens_seen": 380258560, + "step": 2200 + }, + { + "epoch": 0.8425890463423975, + "loss": 0.27116209268569946, + "loss_ce": 0.020246557891368866, + "loss_iou": 1.1479721069335938, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 380258560, + "step": 2200 + }, + { + "epoch": 0.8429720413634623, + "grad_norm": 26.52332924299739, + "learning_rate": 5e-06, + "loss": 0.2184, + "num_input_tokens_seen": 380431568, + "step": 2201 + }, + { + "epoch": 0.8429720413634623, + "loss": 0.1903233826160431, + "loss_ce": 0.02119497023522854, + "loss_iou": 1.0225993394851685, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 380431568, + "step": 2201 + }, + { + "epoch": 0.843355036384527, + "grad_norm": 19.306394250905814, + "learning_rate": 5e-06, + "loss": 0.1937, + "num_input_tokens_seen": 380604168, + "step": 2202 + }, + { + "epoch": 0.843355036384527, + "loss": 0.23338836431503296, + "loss_ce": 0.02245086431503296, + "loss_iou": 1.0257644653320312, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 380604168, + "step": 2202 + }, + { + "epoch": 0.8437380314055918, + "grad_norm": 33.28715800345254, + "learning_rate": 5e-06, + "loss": 0.1573, + "num_input_tokens_seen": 380776720, + "step": 2203 + }, + { + "epoch": 0.8437380314055918, + "loss": 0.15688501298427582, + "loss_ce": 0.020715579390525818, + "loss_iou": 1.0013618469238281, + "loss_num": 0.1357421875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 380776720, + "step": 2203 + }, + { + "epoch": 0.8441210264266564, + "grad_norm": 25.592195810622155, + "learning_rate": 5e-06, + "loss": 0.2243, + "num_input_tokens_seen": 380949600, + "step": 2204 + }, + { + "epoch": 0.8441210264266564, + "loss": 0.20534177124500275, + "loss_ce": 0.019001437351107597, + "loss_iou": 1.033729076385498, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 380949600, + "step": 2204 + }, + { + "epoch": 0.8445040214477212, + "grad_norm": 17.2297216848919, + "learning_rate": 5e-06, + "loss": 0.2054, + "num_input_tokens_seen": 381122752, + "step": 2205 + }, + { + "epoch": 0.8445040214477212, + "loss": 0.18818581104278564, + "loss_ce": 0.018752217292785645, + "loss_iou": 1.0046367645263672, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 381122752, + "step": 2205 + }, + { + "epoch": 0.844887016468786, + "grad_norm": 25.84307296924863, + "learning_rate": 5e-06, + "loss": 0.2294, + "num_input_tokens_seen": 381295568, + "step": 2206 + }, + { + "epoch": 0.844887016468786, + "loss": 0.2742776870727539, + "loss_ce": 0.02098177559673786, + "loss_iou": 1.0310285091400146, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 381295568, + "step": 2206 + }, + { + "epoch": 0.8452700114898506, + "grad_norm": 26.137643475532453, + "learning_rate": 5e-06, + "loss": 0.2608, + "num_input_tokens_seen": 381468568, + "step": 2207 + }, + { + "epoch": 0.8452700114898506, + "loss": 0.2337532341480255, + "loss_ce": 0.020130200311541557, + "loss_iou": 0.9583425521850586, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 381468568, + "step": 2207 + }, + { + "epoch": 0.8456530065109154, + "grad_norm": 22.739127563204576, + "learning_rate": 5e-06, + "loss": 0.1654, + "num_input_tokens_seen": 381641584, + "step": 2208 + }, + { + "epoch": 0.8456530065109154, + "loss": 0.19659042358398438, + "loss_ce": 0.019710546359419823, + "loss_iou": 1.021919846534729, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 381641584, + "step": 2208 + }, + { + "epoch": 0.8460360015319801, + "grad_norm": 24.891123378386958, + "learning_rate": 5e-06, + "loss": 0.2735, + "num_input_tokens_seen": 381814368, + "step": 2209 + }, + { + "epoch": 0.8460360015319801, + "loss": 0.2113550901412964, + "loss_ce": 0.019826766103506088, + "loss_iou": 1.0132403373718262, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 381814368, + "step": 2209 + }, + { + "epoch": 0.8464189965530448, + "grad_norm": 28.400589982922263, + "learning_rate": 5e-06, + "loss": 0.1942, + "num_input_tokens_seen": 381987128, + "step": 2210 + }, + { + "epoch": 0.8464189965530448, + "loss": 0.18612128496170044, + "loss_ce": 0.020105667412281036, + "loss_iou": 1.0098161697387695, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 381987128, + "step": 2210 + }, + { + "epoch": 0.8468019915741095, + "grad_norm": 26.111639458418296, + "learning_rate": 5e-06, + "loss": 0.2345, + "num_input_tokens_seen": 382159824, + "step": 2211 + }, + { + "epoch": 0.8468019915741095, + "loss": 0.25814738869667053, + "loss_ce": 0.020049242302775383, + "loss_iou": 1.0347554683685303, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 382159824, + "step": 2211 + }, + { + "epoch": 0.8471849865951743, + "grad_norm": 26.875641179779645, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 382332736, + "step": 2212 + }, + { + "epoch": 0.8471849865951743, + "loss": 0.20974765717983246, + "loss_ce": 0.02163730189204216, + "loss_iou": 1.0757482051849365, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 382332736, + "step": 2212 + }, + { + "epoch": 0.847567981616239, + "grad_norm": 32.48165760489675, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 382505648, + "step": 2213 + }, + { + "epoch": 0.847567981616239, + "loss": 0.23111853003501892, + "loss_ce": 0.020425166934728622, + "loss_iou": 1.0130873918533325, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 382505648, + "step": 2213 + }, + { + "epoch": 0.8479509766373037, + "grad_norm": 28.07529780996336, + "learning_rate": 5e-06, + "loss": 0.1389, + "num_input_tokens_seen": 382678792, + "step": 2214 + }, + { + "epoch": 0.8479509766373037, + "loss": 0.1350688338279724, + "loss_ce": 0.02206224389374256, + "loss_iou": 1.0005462169647217, + "loss_num": 0.11279296875, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 382678792, + "step": 2214 + }, + { + "epoch": 0.8483339716583684, + "grad_norm": 34.326411712989156, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 382851968, + "step": 2215 + }, + { + "epoch": 0.8483339716583684, + "loss": 0.23377156257629395, + "loss_ce": 0.021491285413503647, + "loss_iou": 1.0419487953186035, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 382851968, + "step": 2215 + }, + { + "epoch": 0.8487169666794332, + "grad_norm": 43.532088051136896, + "learning_rate": 5e-06, + "loss": 0.2152, + "num_input_tokens_seen": 383024904, + "step": 2216 + }, + { + "epoch": 0.8487169666794332, + "loss": 0.27027279138565063, + "loss_ce": 0.023446639999747276, + "loss_iou": 1.0436408519744873, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 383024904, + "step": 2216 + }, + { + "epoch": 0.8490999617004978, + "grad_norm": 36.55397794869057, + "learning_rate": 5e-06, + "loss": 0.2482, + "num_input_tokens_seen": 383197920, + "step": 2217 + }, + { + "epoch": 0.8490999617004978, + "loss": 0.2088879495859146, + "loss_ce": 0.021448995918035507, + "loss_iou": 1.0622910261154175, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 383197920, + "step": 2217 + }, + { + "epoch": 0.8494829567215626, + "grad_norm": 19.570506990689445, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 383370320, + "step": 2218 + }, + { + "epoch": 0.8494829567215626, + "loss": 0.12578538060188293, + "loss_ce": 0.02117113023996353, + "loss_iou": 1.0086634159088135, + "loss_num": 0.1044921875, + "loss_xval": 0.1044921875, + "num_input_tokens_seen": 383370320, + "step": 2218 + }, + { + "epoch": 0.8498659517426274, + "grad_norm": 20.79907884711053, + "learning_rate": 5e-06, + "loss": 0.1775, + "num_input_tokens_seen": 383543840, + "step": 2219 + }, + { + "epoch": 0.8498659517426274, + "loss": 0.14958077669143677, + "loss_ce": 0.021315395832061768, + "loss_iou": 1.0119179487228394, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 383543840, + "step": 2219 + }, + { + "epoch": 0.8502489467636921, + "grad_norm": 41.55804129473687, + "learning_rate": 5e-06, + "loss": 0.1978, + "num_input_tokens_seen": 383716784, + "step": 2220 + }, + { + "epoch": 0.8502489467636921, + "loss": 0.20395150780677795, + "loss_ce": 0.021029146388173103, + "loss_iou": 1.0557193756103516, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 383716784, + "step": 2220 + }, + { + "epoch": 0.8506319417847568, + "grad_norm": 34.48967079210651, + "learning_rate": 5e-06, + "loss": 0.2426, + "num_input_tokens_seen": 383889664, + "step": 2221 + }, + { + "epoch": 0.8506319417847568, + "loss": 0.24308425188064575, + "loss_ce": 0.01981765404343605, + "loss_iou": 1.001485824584961, + "loss_num": 0.2236328125, + "loss_xval": 0.2236328125, + "num_input_tokens_seen": 383889664, + "step": 2221 + }, + { + "epoch": 0.8510149368058215, + "grad_norm": 24.43551228595156, + "learning_rate": 5e-06, + "loss": 0.1599, + "num_input_tokens_seen": 384062600, + "step": 2222 + }, + { + "epoch": 0.8510149368058215, + "loss": 0.17191261053085327, + "loss_ce": 0.02036231942474842, + "loss_iou": 1.019148349761963, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 384062600, + "step": 2222 + }, + { + "epoch": 0.8513979318268863, + "grad_norm": 48.541317174882316, + "learning_rate": 5e-06, + "loss": 0.1604, + "num_input_tokens_seen": 384236016, + "step": 2223 + }, + { + "epoch": 0.8513979318268863, + "loss": 0.14895141124725342, + "loss_ce": 0.021021710708737373, + "loss_iou": 1.0104756355285645, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 384236016, + "step": 2223 + }, + { + "epoch": 0.8517809268479509, + "grad_norm": 32.418865843011815, + "learning_rate": 5e-06, + "loss": 0.2399, + "num_input_tokens_seen": 384409240, + "step": 2224 + }, + { + "epoch": 0.8517809268479509, + "loss": 0.2681843638420105, + "loss_ce": 0.020930945873260498, + "loss_iou": 1.0598926544189453, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 384409240, + "step": 2224 + }, + { + "epoch": 0.8521639218690157, + "grad_norm": 30.691232291627884, + "learning_rate": 5e-06, + "loss": 0.1607, + "num_input_tokens_seen": 384581896, + "step": 2225 + }, + { + "epoch": 0.8521639218690157, + "loss": 0.15713070333003998, + "loss_ce": 0.021480072289705276, + "loss_iou": 1.0184576511383057, + "loss_num": 0.1357421875, + "loss_xval": 0.1357421875, + "num_input_tokens_seen": 384581896, + "step": 2225 + }, + { + "epoch": 0.8525469168900804, + "grad_norm": 26.887317307608107, + "learning_rate": 5e-06, + "loss": 0.1923, + "num_input_tokens_seen": 384754616, + "step": 2226 + }, + { + "epoch": 0.8525469168900804, + "loss": 0.13293921947479248, + "loss_ce": 0.021061770617961884, + "loss_iou": 1.002497673034668, + "loss_num": 0.11181640625, + "loss_xval": 0.11181640625, + "num_input_tokens_seen": 384754616, + "step": 2226 + }, + { + "epoch": 0.8529299119111452, + "grad_norm": 35.72703494227469, + "learning_rate": 5e-06, + "loss": 0.237, + "num_input_tokens_seen": 384927320, + "step": 2227 + }, + { + "epoch": 0.8529299119111452, + "loss": 0.2753600478172302, + "loss_ce": 0.019988957792520523, + "loss_iou": 1.6816809177398682, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 384927320, + "step": 2227 + }, + { + "epoch": 0.8533129069322098, + "grad_norm": 30.365398214819066, + "learning_rate": 5e-06, + "loss": 0.1783, + "num_input_tokens_seen": 385100520, + "step": 2228 + }, + { + "epoch": 0.8533129069322098, + "loss": 0.19887545704841614, + "loss_ce": 0.020225554704666138, + "loss_iou": 1.050445318222046, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 385100520, + "step": 2228 + }, + { + "epoch": 0.8536959019532746, + "grad_norm": 22.968221588790158, + "learning_rate": 5e-06, + "loss": 0.2021, + "num_input_tokens_seen": 385273720, + "step": 2229 + }, + { + "epoch": 0.8536959019532746, + "loss": 0.24257753789424896, + "loss_ce": 0.020287496969103813, + "loss_iou": 1.0931062698364258, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 385273720, + "step": 2229 + }, + { + "epoch": 0.8540788969743394, + "grad_norm": 25.78179269820435, + "learning_rate": 5e-06, + "loss": 0.2538, + "num_input_tokens_seen": 385446512, + "step": 2230 + }, + { + "epoch": 0.8540788969743394, + "loss": 0.1526906043291092, + "loss_ce": 0.02210589125752449, + "loss_iou": 1.0236563682556152, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 385446512, + "step": 2230 + }, + { + "epoch": 0.854461891995404, + "grad_norm": 31.826088139453805, + "learning_rate": 5e-06, + "loss": 0.1534, + "num_input_tokens_seen": 385619712, + "step": 2231 + }, + { + "epoch": 0.854461891995404, + "loss": 0.13914582133293152, + "loss_ce": 0.022385576739907265, + "loss_iou": 0.9842702746391296, + "loss_num": 0.11669921875, + "loss_xval": 0.11669921875, + "num_input_tokens_seen": 385619712, + "step": 2231 + }, + { + "epoch": 0.8548448870164688, + "grad_norm": 31.061881249812, + "learning_rate": 5e-06, + "loss": 0.1978, + "num_input_tokens_seen": 385792480, + "step": 2232 + }, + { + "epoch": 0.8548448870164688, + "loss": 0.21213898062705994, + "loss_ce": 0.021831370890140533, + "loss_iou": 1.017231822013855, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 385792480, + "step": 2232 + }, + { + "epoch": 0.8552278820375335, + "grad_norm": 33.208733578507065, + "learning_rate": 5e-06, + "loss": 0.1814, + "num_input_tokens_seen": 385965608, + "step": 2233 + }, + { + "epoch": 0.8552278820375335, + "loss": 0.1736002117395401, + "loss_ce": 0.02168370969593525, + "loss_iou": 1.0410422086715698, + "loss_num": 0.15234375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 385965608, + "step": 2233 + }, + { + "epoch": 0.8556108770585983, + "grad_norm": 24.36039320694214, + "learning_rate": 5e-06, + "loss": 0.169, + "num_input_tokens_seen": 386138736, + "step": 2234 + }, + { + "epoch": 0.8556108770585983, + "loss": 0.16091981530189514, + "loss_ce": 0.02218690887093544, + "loss_iou": 1.0135810375213623, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 386138736, + "step": 2234 + }, + { + "epoch": 0.8559938720796629, + "grad_norm": 26.630559602629717, + "learning_rate": 5e-06, + "loss": 0.1939, + "num_input_tokens_seen": 386311616, + "step": 2235 + }, + { + "epoch": 0.8559938720796629, + "loss": 0.20099741220474243, + "loss_ce": 0.019295761361718178, + "loss_iou": 1.00544011592865, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 386311616, + "step": 2235 + }, + { + "epoch": 0.8563768671007277, + "grad_norm": 29.432108707921554, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 386485216, + "step": 2236 + }, + { + "epoch": 0.8563768671007277, + "loss": 0.2291969358921051, + "loss_ce": 0.022653959691524506, + "loss_iou": 1.0504539012908936, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 386485216, + "step": 2236 + }, + { + "epoch": 0.8567598621217924, + "grad_norm": 32.27350031667615, + "learning_rate": 5e-06, + "loss": 0.1902, + "num_input_tokens_seen": 386658064, + "step": 2237 + }, + { + "epoch": 0.8567598621217924, + "loss": 0.18089860677719116, + "loss_ce": 0.02165788784623146, + "loss_iou": 1.0156598091125488, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 386658064, + "step": 2237 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 16.848194771374665, + "learning_rate": 5e-06, + "loss": 0.166, + "num_input_tokens_seen": 386830816, + "step": 2238 + }, + { + "epoch": 0.8571428571428571, + "loss": 0.1765880137681961, + "loss_ce": 0.020704224705696106, + "loss_iou": 1.0056991577148438, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 386830816, + "step": 2238 + }, + { + "epoch": 0.8575258521639219, + "grad_norm": 38.74609977662123, + "learning_rate": 5e-06, + "loss": 0.1959, + "num_input_tokens_seen": 387003656, + "step": 2239 + }, + { + "epoch": 0.8575258521639219, + "loss": 0.16399705410003662, + "loss_ce": 0.020076151937246323, + "loss_iou": 1.002182960510254, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 387003656, + "step": 2239 + }, + { + "epoch": 0.8579088471849866, + "grad_norm": 30.62404269175233, + "learning_rate": 5e-06, + "loss": 0.2349, + "num_input_tokens_seen": 387176648, + "step": 2240 + }, + { + "epoch": 0.8579088471849866, + "loss": 0.23139256238937378, + "loss_ce": 0.021614745259284973, + "loss_iou": 1.033926248550415, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 387176648, + "step": 2240 + }, + { + "epoch": 0.8582918422060514, + "grad_norm": 37.479556106391456, + "learning_rate": 5e-06, + "loss": 0.2319, + "num_input_tokens_seen": 387349160, + "step": 2241 + }, + { + "epoch": 0.8582918422060514, + "loss": 0.2569850981235504, + "loss_ce": 0.020168686285614967, + "loss_iou": 1.0369092226028442, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 387349160, + "step": 2241 + }, + { + "epoch": 0.858674837227116, + "grad_norm": 25.513422322811078, + "learning_rate": 5e-06, + "loss": 0.1938, + "num_input_tokens_seen": 387522208, + "step": 2242 + }, + { + "epoch": 0.858674837227116, + "loss": 0.2113761603832245, + "loss_ce": 0.020214062184095383, + "loss_iou": 1.0536246299743652, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 387522208, + "step": 2242 + }, + { + "epoch": 0.8590578322481808, + "grad_norm": 36.624122585785116, + "learning_rate": 5e-06, + "loss": 0.2894, + "num_input_tokens_seen": 387691656, + "step": 2243 + }, + { + "epoch": 0.8590578322481808, + "loss": 0.3449817895889282, + "loss_ce": 0.020396839827299118, + "loss_iou": 1.0083636045455933, + "loss_num": 0.32421875, + "loss_xval": 0.32421875, + "num_input_tokens_seen": 387691656, + "step": 2243 + }, + { + "epoch": 0.8594408272692455, + "grad_norm": 26.62656131057298, + "learning_rate": 5e-06, + "loss": 0.2166, + "num_input_tokens_seen": 387864496, + "step": 2244 + }, + { + "epoch": 0.8594408272692455, + "loss": 0.20909324288368225, + "loss_ce": 0.021288076415657997, + "loss_iou": 1.0143465995788574, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 387864496, + "step": 2244 + }, + { + "epoch": 0.8598238222903102, + "grad_norm": 28.81258415694795, + "learning_rate": 5e-06, + "loss": 0.2048, + "num_input_tokens_seen": 388037376, + "step": 2245 + }, + { + "epoch": 0.8598238222903102, + "loss": 0.22831951081752777, + "loss_ce": 0.02000652253627777, + "loss_iou": 1.0391998291015625, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 388037376, + "step": 2245 + }, + { + "epoch": 0.8602068173113749, + "grad_norm": 26.798224782980558, + "learning_rate": 5e-06, + "loss": 0.1991, + "num_input_tokens_seen": 388210568, + "step": 2246 + }, + { + "epoch": 0.8602068173113749, + "loss": 0.19132068753242493, + "loss_ce": 0.020971566438674927, + "loss_iou": 1.0045247077941895, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 388210568, + "step": 2246 + }, + { + "epoch": 0.8605898123324397, + "grad_norm": 29.27226331767748, + "learning_rate": 5e-06, + "loss": 0.2494, + "num_input_tokens_seen": 388383984, + "step": 2247 + }, + { + "epoch": 0.8605898123324397, + "loss": 0.2672705054283142, + "loss_ce": 0.020383302122354507, + "loss_iou": 1.04952871799469, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 388383984, + "step": 2247 + }, + { + "epoch": 0.8609728073535045, + "grad_norm": 29.59762985020245, + "learning_rate": 5e-06, + "loss": 0.2152, + "num_input_tokens_seen": 388556784, + "step": 2248 + }, + { + "epoch": 0.8609728073535045, + "loss": 0.21293094754219055, + "loss_ce": 0.020059864968061447, + "loss_iou": 1.0007498264312744, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 388556784, + "step": 2248 + }, + { + "epoch": 0.8613558023745691, + "grad_norm": 34.46333613595097, + "learning_rate": 5e-06, + "loss": 0.278, + "num_input_tokens_seen": 388730048, + "step": 2249 + }, + { + "epoch": 0.8613558023745691, + "loss": 0.3135603964328766, + "loss_ce": 0.02254476770758629, + "loss_iou": 1.1369743347167969, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 388730048, + "step": 2249 + }, + { + "epoch": 0.8617387973956339, + "grad_norm": 18.148969965182555, + "learning_rate": 5e-06, + "loss": 0.1675, + "num_input_tokens_seen": 388902864, + "step": 2250 + }, + { + "epoch": 0.8617387973956339, + "eval_websight_new_CIoU": 0.925597608089447, + "eval_websight_new_GIoU": 0.925267904996872, + "eval_websight_new_IoU": 0.9258756935596466, + "eval_websight_new_MAE_all": 0.007574299583211541, + "eval_websight_new_MAE_h": 0.006647913018241525, + "eval_websight_new_MAE_w": 0.007652605883777142, + "eval_websight_new_MAE_x": 0.007741168374195695, + "eval_websight_new_MAE_y": 0.008255511056631804, + "eval_websight_new_NUM_probability": 8.322440044139512e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.11716654151678085, + "eval_websight_new_loss_ce": 0.041889848187565804, + "eval_websight_new_loss_iou": 1.0004693865776062, + "eval_websight_new_loss_num": 0.07281494140625, + "eval_websight_new_loss_xval": 0.07281494140625, + "eval_websight_new_runtime": 56.5385, + "eval_websight_new_samples_per_second": 0.884, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 388902864, + "step": 2250 + }, + { + "epoch": 0.8617387973956339, + "eval_seeclick_CIoU": 0.6681079566478729, + "eval_seeclick_GIoU": 0.6744631826877594, + "eval_seeclick_IoU": 0.6976601779460907, + "eval_seeclick_MAE_all": 0.055332526564598083, + "eval_seeclick_MAE_h": 0.03067066613584757, + "eval_seeclick_MAE_w": 0.08008594438433647, + "eval_seeclick_MAE_x": 0.07529773563146591, + "eval_seeclick_MAE_y": 0.03527577221393585, + "eval_seeclick_NUM_probability": 0.00011169226127094589, + "eval_seeclick_inside_bbox": 0.9097222089767456, + "eval_seeclick_loss": 0.47150173783302307, + "eval_seeclick_loss_ce": 0.03271147049963474, + "eval_seeclick_loss_iou": 1.140836775302887, + "eval_seeclick_loss_num": 0.438751220703125, + "eval_seeclick_loss_xval": 0.438751220703125, + "eval_seeclick_runtime": 85.9892, + "eval_seeclick_samples_per_second": 0.581, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 388902864, + "step": 2250 + }, + { + "epoch": 0.8617387973956339, + "eval_icons_CIoU": 0.8652712106704712, + "eval_icons_GIoU": 0.8643836081027985, + "eval_icons_IoU": 0.8742239475250244, + "eval_icons_MAE_all": 0.020118060521781445, + "eval_icons_MAE_h": 0.01535157673060894, + "eval_icons_MAE_w": 0.023680626414716244, + "eval_icons_MAE_x": 0.024736194871366024, + "eval_icons_MAE_y": 0.016703845001757145, + "eval_icons_NUM_probability": 7.394958811346442e-05, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.17382636666297913, + "eval_icons_loss_ce": 0.02849969081580639, + "eval_icons_loss_iou": 1.042946219444275, + "eval_icons_loss_num": 0.13238525390625, + "eval_icons_loss_xval": 0.13238525390625, + "eval_icons_runtime": 82.6232, + "eval_icons_samples_per_second": 0.605, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 388902864, + "step": 2250 + }, + { + "epoch": 0.8617387973956339, + "loss": 0.1544845551252365, + "loss_ce": 0.02850799635052681, + "loss_iou": 1.0282611846923828, + "loss_num": 0.1259765625, + "loss_xval": 0.1259765625, + "num_input_tokens_seen": 388902864, + "step": 2250 + }, + { + "epoch": 0.8621217924166986, + "grad_norm": 40.262798507762206, + "learning_rate": 5e-06, + "loss": 0.2046, + "num_input_tokens_seen": 389075768, + "step": 2251 + }, + { + "epoch": 0.8621217924166986, + "loss": 0.24195796251296997, + "loss_ce": 0.020766548812389374, + "loss_iou": 1.143406867980957, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 389075768, + "step": 2251 + }, + { + "epoch": 0.8625047874377633, + "grad_norm": 23.771191181194094, + "learning_rate": 5e-06, + "loss": 0.2024, + "num_input_tokens_seen": 389248752, + "step": 2252 + }, + { + "epoch": 0.8625047874377633, + "loss": 0.1764417290687561, + "loss_ce": 0.021778643131256104, + "loss_iou": 1.0457133054733276, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 389248752, + "step": 2252 + }, + { + "epoch": 0.862887782458828, + "grad_norm": 30.254986516485246, + "learning_rate": 5e-06, + "loss": 0.2046, + "num_input_tokens_seen": 389421704, + "step": 2253 + }, + { + "epoch": 0.862887782458828, + "loss": 0.18306171894073486, + "loss_ce": 0.01991475746035576, + "loss_iou": 1.9940533638000488, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 389421704, + "step": 2253 + }, + { + "epoch": 0.8632707774798928, + "grad_norm": 21.778127440440873, + "learning_rate": 5e-06, + "loss": 0.1694, + "num_input_tokens_seen": 389594840, + "step": 2254 + }, + { + "epoch": 0.8632707774798928, + "loss": 0.17349952459335327, + "loss_ce": 0.020484384149312973, + "loss_iou": 1.0874111652374268, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 389594840, + "step": 2254 + }, + { + "epoch": 0.8636537725009575, + "grad_norm": 23.969587805732107, + "learning_rate": 5e-06, + "loss": 0.1503, + "num_input_tokens_seen": 389768176, + "step": 2255 + }, + { + "epoch": 0.8636537725009575, + "loss": 0.1377573311328888, + "loss_ce": 0.022095710039138794, + "loss_iou": 1.0036972761154175, + "loss_num": 0.11572265625, + "loss_xval": 0.11572265625, + "num_input_tokens_seen": 389768176, + "step": 2255 + }, + { + "epoch": 0.8640367675220222, + "grad_norm": 28.174938708153093, + "learning_rate": 5e-06, + "loss": 0.2382, + "num_input_tokens_seen": 389941656, + "step": 2256 + }, + { + "epoch": 0.8640367675220222, + "loss": 0.21343086659908295, + "loss_ce": 0.02281806617975235, + "loss_iou": 1.106879711151123, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 389941656, + "step": 2256 + }, + { + "epoch": 0.864419762543087, + "grad_norm": 26.522715403720657, + "learning_rate": 5e-06, + "loss": 0.2505, + "num_input_tokens_seen": 390114520, + "step": 2257 + }, + { + "epoch": 0.864419762543087, + "loss": 0.2640177607536316, + "loss_ce": 0.020853698253631592, + "loss_iou": 1.0961253643035889, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 390114520, + "step": 2257 + }, + { + "epoch": 0.8648027575641517, + "grad_norm": 28.008023588531298, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 390285824, + "step": 2258 + }, + { + "epoch": 0.8648027575641517, + "loss": 0.2037627100944519, + "loss_ce": 0.021389653906226158, + "loss_iou": 0.9831381440162659, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 390285824, + "step": 2258 + }, + { + "epoch": 0.8651857525852163, + "grad_norm": 34.32974022877522, + "learning_rate": 5e-06, + "loss": 0.2206, + "num_input_tokens_seen": 390458624, + "step": 2259 + }, + { + "epoch": 0.8651857525852163, + "loss": 0.2119344174861908, + "loss_ce": 0.02107749506831169, + "loss_iou": 1.0079381465911865, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 390458624, + "step": 2259 + }, + { + "epoch": 0.8655687476062811, + "grad_norm": 28.77967027761165, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 390631360, + "step": 2260 + }, + { + "epoch": 0.8655687476062811, + "loss": 0.19896094501018524, + "loss_ce": 0.022081052884459496, + "loss_iou": 1.0542672872543335, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 390631360, + "step": 2260 + }, + { + "epoch": 0.8659517426273459, + "grad_norm": 23.3673415650207, + "learning_rate": 5e-06, + "loss": 0.2002, + "num_input_tokens_seen": 390804368, + "step": 2261 + }, + { + "epoch": 0.8659517426273459, + "loss": 0.20097553730010986, + "loss_ce": 0.021043909713625908, + "loss_iou": 1.0051054954528809, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 390804368, + "step": 2261 + }, + { + "epoch": 0.8663347376484106, + "grad_norm": 28.53559371537568, + "learning_rate": 5e-06, + "loss": 0.254, + "num_input_tokens_seen": 390977120, + "step": 2262 + }, + { + "epoch": 0.8663347376484106, + "loss": 0.2656322419643402, + "loss_ce": 0.02045402117073536, + "loss_iou": 1.1557904481887817, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 390977120, + "step": 2262 + }, + { + "epoch": 0.8667177326694753, + "grad_norm": 36.17334970556343, + "learning_rate": 5e-06, + "loss": 0.2581, + "num_input_tokens_seen": 391149976, + "step": 2263 + }, + { + "epoch": 0.8667177326694753, + "loss": 0.25398796796798706, + "loss_ce": 0.02156611531972885, + "loss_iou": 1.130397915840149, + "loss_num": 0.232421875, + "loss_xval": 0.232421875, + "num_input_tokens_seen": 391149976, + "step": 2263 + }, + { + "epoch": 0.86710072769054, + "grad_norm": 31.429505011009915, + "learning_rate": 5e-06, + "loss": 0.2676, + "num_input_tokens_seen": 391323128, + "step": 2264 + }, + { + "epoch": 0.86710072769054, + "loss": 0.27364373207092285, + "loss_ce": 0.021263331174850464, + "loss_iou": 1.0309957265853882, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 391323128, + "step": 2264 + }, + { + "epoch": 0.8674837227116048, + "grad_norm": 31.740600094807085, + "learning_rate": 5e-06, + "loss": 0.209, + "num_input_tokens_seen": 391495984, + "step": 2265 + }, + { + "epoch": 0.8674837227116048, + "loss": 0.23950469493865967, + "loss_ce": 0.020571591332554817, + "loss_iou": 1.0285762548446655, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 391495984, + "step": 2265 + }, + { + "epoch": 0.8678667177326694, + "grad_norm": 26.633176792341132, + "learning_rate": 5e-06, + "loss": 0.232, + "num_input_tokens_seen": 391668984, + "step": 2266 + }, + { + "epoch": 0.8678667177326694, + "loss": 0.22047019004821777, + "loss_ce": 0.023204566910862923, + "loss_iou": 1.0347226858139038, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 391668984, + "step": 2266 + }, + { + "epoch": 0.8682497127537342, + "grad_norm": 34.927207210440685, + "learning_rate": 5e-06, + "loss": 0.1771, + "num_input_tokens_seen": 391841904, + "step": 2267 + }, + { + "epoch": 0.8682497127537342, + "loss": 0.13843293488025665, + "loss_ce": 0.021123360842466354, + "loss_iou": 1.0038037300109863, + "loss_num": 0.1171875, + "loss_xval": 0.1171875, + "num_input_tokens_seen": 391841904, + "step": 2267 + }, + { + "epoch": 0.868632707774799, + "grad_norm": 39.75350681767262, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 392014976, + "step": 2268 + }, + { + "epoch": 0.868632707774799, + "loss": 0.1553298830986023, + "loss_ce": 0.021418750286102295, + "loss_iou": 1.0097993612289429, + "loss_num": 0.1337890625, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 392014976, + "step": 2268 + }, + { + "epoch": 0.8690157027958637, + "grad_norm": 23.521416444063636, + "learning_rate": 5e-06, + "loss": 0.2839, + "num_input_tokens_seen": 392188224, + "step": 2269 + }, + { + "epoch": 0.8690157027958637, + "loss": 0.3249661326408386, + "loss_ce": 0.019973479211330414, + "loss_iou": 1.1880580186843872, + "loss_num": 0.3046875, + "loss_xval": 0.3046875, + "num_input_tokens_seen": 392188224, + "step": 2269 + }, + { + "epoch": 0.8693986978169284, + "grad_norm": 24.155046461733335, + "learning_rate": 5e-06, + "loss": 0.1708, + "num_input_tokens_seen": 392361312, + "step": 2270 + }, + { + "epoch": 0.8693986978169284, + "loss": 0.12677741050720215, + "loss_ce": 0.021003492176532745, + "loss_iou": 1.0012356042861938, + "loss_num": 0.10595703125, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 392361312, + "step": 2270 + }, + { + "epoch": 0.8697816928379931, + "grad_norm": 30.501720723333715, + "learning_rate": 5e-06, + "loss": 0.223, + "num_input_tokens_seen": 392534216, + "step": 2271 + }, + { + "epoch": 0.8697816928379931, + "loss": 0.20977681875228882, + "loss_ce": 0.02099508047103882, + "loss_iou": 1.0238680839538574, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 392534216, + "step": 2271 + }, + { + "epoch": 0.8701646878590579, + "grad_norm": 28.394669787689313, + "learning_rate": 5e-06, + "loss": 0.2039, + "num_input_tokens_seen": 392706952, + "step": 2272 + }, + { + "epoch": 0.8701646878590579, + "loss": 0.2121831327676773, + "loss_ce": 0.021082064136862755, + "loss_iou": 1.0486198663711548, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 392706952, + "step": 2272 + }, + { + "epoch": 0.8705476828801225, + "grad_norm": 30.8297616515793, + "learning_rate": 5e-06, + "loss": 0.2058, + "num_input_tokens_seen": 392880088, + "step": 2273 + }, + { + "epoch": 0.8705476828801225, + "loss": 0.20134717226028442, + "loss_ce": 0.02117139846086502, + "loss_iou": 1.0266752243041992, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 392880088, + "step": 2273 + }, + { + "epoch": 0.8709306779011873, + "grad_norm": 35.38446460366821, + "learning_rate": 5e-06, + "loss": 0.2135, + "num_input_tokens_seen": 393053272, + "step": 2274 + }, + { + "epoch": 0.8709306779011873, + "loss": 0.24780648946762085, + "loss_ce": 0.021793287247419357, + "loss_iou": 1.1017720699310303, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 393053272, + "step": 2274 + }, + { + "epoch": 0.871313672922252, + "grad_norm": 61.66856149553012, + "learning_rate": 5e-06, + "loss": 0.2248, + "num_input_tokens_seen": 393226288, + "step": 2275 + }, + { + "epoch": 0.871313672922252, + "loss": 0.2328328788280487, + "loss_ce": 0.02226157858967781, + "loss_iou": 1.0248265266418457, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 393226288, + "step": 2275 + }, + { + "epoch": 0.8716966679433167, + "grad_norm": 35.51432763180763, + "learning_rate": 5e-06, + "loss": 0.2252, + "num_input_tokens_seen": 393398824, + "step": 2276 + }, + { + "epoch": 0.8716966679433167, + "loss": 0.2406993806362152, + "loss_ce": 0.020484544336795807, + "loss_iou": 1.0057376623153687, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 393398824, + "step": 2276 + }, + { + "epoch": 0.8720796629643814, + "grad_norm": 52.04962328569182, + "learning_rate": 5e-06, + "loss": 0.2097, + "num_input_tokens_seen": 393571680, + "step": 2277 + }, + { + "epoch": 0.8720796629643814, + "loss": 0.19266335666179657, + "loss_ce": 0.02145974338054657, + "loss_iou": 1.0567872524261475, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 393571680, + "step": 2277 + }, + { + "epoch": 0.8724626579854462, + "grad_norm": 29.23762116128551, + "learning_rate": 5e-06, + "loss": 0.2211, + "num_input_tokens_seen": 393744744, + "step": 2278 + }, + { + "epoch": 0.8724626579854462, + "loss": 0.27350762486457825, + "loss_ce": 0.022042784839868546, + "loss_iou": 1.0850646495819092, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 393744744, + "step": 2278 + }, + { + "epoch": 0.872845653006511, + "grad_norm": 78.34644998257741, + "learning_rate": 5e-06, + "loss": 0.2937, + "num_input_tokens_seen": 393918376, + "step": 2279 + }, + { + "epoch": 0.872845653006511, + "loss": 0.30728715658187866, + "loss_ce": 0.021642638370394707, + "loss_iou": 1.0545094013214111, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 393918376, + "step": 2279 + }, + { + "epoch": 0.8732286480275756, + "grad_norm": 31.58669547372777, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 394091272, + "step": 2280 + }, + { + "epoch": 0.8732286480275756, + "loss": 0.21547313034534454, + "loss_ce": 0.020221669226884842, + "loss_iou": 1.0116897821426392, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 394091272, + "step": 2280 + }, + { + "epoch": 0.8736116430486404, + "grad_norm": 24.39006798014416, + "learning_rate": 5e-06, + "loss": 0.1925, + "num_input_tokens_seen": 394264520, + "step": 2281 + }, + { + "epoch": 0.8736116430486404, + "loss": 0.19357803463935852, + "loss_ce": 0.020970605313777924, + "loss_iou": 1.0049718618392944, + "loss_num": 0.1728515625, + "loss_xval": 0.1728515625, + "num_input_tokens_seen": 394264520, + "step": 2281 + }, + { + "epoch": 0.8739946380697051, + "grad_norm": 36.59112125689424, + "learning_rate": 5e-06, + "loss": 0.1889, + "num_input_tokens_seen": 394437224, + "step": 2282 + }, + { + "epoch": 0.8739946380697051, + "loss": 0.16868579387664795, + "loss_ce": 0.0213469248265028, + "loss_iou": 0.8756428956985474, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 394437224, + "step": 2282 + }, + { + "epoch": 0.8743776330907698, + "grad_norm": 42.07793700100101, + "learning_rate": 5e-06, + "loss": 0.2283, + "num_input_tokens_seen": 394610136, + "step": 2283 + }, + { + "epoch": 0.8743776330907698, + "loss": 0.2361423671245575, + "loss_ce": 0.020322054624557495, + "loss_iou": 1.027820110321045, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 394610136, + "step": 2283 + }, + { + "epoch": 0.8747606281118345, + "grad_norm": 30.9570465693495, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 394783072, + "step": 2284 + }, + { + "epoch": 0.8747606281118345, + "loss": 0.21155185997486115, + "loss_ce": 0.019901473075151443, + "loss_iou": 1.0322974920272827, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 394783072, + "step": 2284 + }, + { + "epoch": 0.8751436231328993, + "grad_norm": 22.795918529470963, + "learning_rate": 5e-06, + "loss": 0.2171, + "num_input_tokens_seen": 394955864, + "step": 2285 + }, + { + "epoch": 0.8751436231328993, + "loss": 0.21094149351119995, + "loss_ce": 0.02167147397994995, + "loss_iou": 1.0987880229949951, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 394955864, + "step": 2285 + }, + { + "epoch": 0.875526618153964, + "grad_norm": 21.65343377133573, + "learning_rate": 5e-06, + "loss": 0.1927, + "num_input_tokens_seen": 395128664, + "step": 2286 + }, + { + "epoch": 0.875526618153964, + "loss": 0.21473294496536255, + "loss_ce": 0.020763229578733444, + "loss_iou": 1.014101505279541, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 395128664, + "step": 2286 + }, + { + "epoch": 0.8759096131750287, + "grad_norm": 23.664828712220437, + "learning_rate": 5e-06, + "loss": 0.171, + "num_input_tokens_seen": 395301648, + "step": 2287 + }, + { + "epoch": 0.8759096131750287, + "loss": 0.19143562018871307, + "loss_ce": 0.020109932869672775, + "loss_iou": 1.0008352994918823, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 395301648, + "step": 2287 + }, + { + "epoch": 0.8762926081960934, + "grad_norm": 32.53198796879764, + "learning_rate": 5e-06, + "loss": 0.165, + "num_input_tokens_seen": 395474368, + "step": 2288 + }, + { + "epoch": 0.8762926081960934, + "loss": 0.1494501382112503, + "loss_ce": 0.021642513573169708, + "loss_iou": 1.0112065076828003, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 395474368, + "step": 2288 + }, + { + "epoch": 0.8766756032171582, + "grad_norm": 34.66682965373584, + "learning_rate": 5e-06, + "loss": 0.2111, + "num_input_tokens_seen": 395644016, + "step": 2289 + }, + { + "epoch": 0.8766756032171582, + "loss": 0.23358552157878876, + "loss_ce": 0.021305250003933907, + "loss_iou": 1.059319257736206, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 395644016, + "step": 2289 + }, + { + "epoch": 0.8770585982382229, + "grad_norm": 18.081521143637627, + "learning_rate": 5e-06, + "loss": 0.211, + "num_input_tokens_seen": 395816712, + "step": 2290 + }, + { + "epoch": 0.8770585982382229, + "loss": 0.17363527417182922, + "loss_ce": 0.021596703678369522, + "loss_iou": 1.0326075553894043, + "loss_num": 0.15234375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 395816712, + "step": 2290 + }, + { + "epoch": 0.8774415932592876, + "grad_norm": 18.210939402159955, + "learning_rate": 5e-06, + "loss": 0.2151, + "num_input_tokens_seen": 395989752, + "step": 2291 + }, + { + "epoch": 0.8774415932592876, + "loss": 0.228585883975029, + "loss_ce": 0.021676693111658096, + "loss_iou": 1.062607765197754, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 395989752, + "step": 2291 + }, + { + "epoch": 0.8778245882803524, + "grad_norm": 27.092469139972376, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 396162512, + "step": 2292 + }, + { + "epoch": 0.8778245882803524, + "loss": 0.18751798570156097, + "loss_ce": 0.020647864788770676, + "loss_iou": 8.32317092605775e+22, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 396162512, + "step": 2292 + }, + { + "epoch": 0.8782075833014171, + "grad_norm": 30.001011135359228, + "learning_rate": 5e-06, + "loss": 0.2066, + "num_input_tokens_seen": 396336072, + "step": 2293 + }, + { + "epoch": 0.8782075833014171, + "loss": 0.1969650387763977, + "loss_ce": 0.020817570388317108, + "loss_iou": 1.007326364517212, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 396336072, + "step": 2293 + }, + { + "epoch": 0.8785905783224818, + "grad_norm": 26.083469781572422, + "learning_rate": 5e-06, + "loss": 0.2127, + "num_input_tokens_seen": 396509304, + "step": 2294 + }, + { + "epoch": 0.8785905783224818, + "loss": 0.1951311081647873, + "loss_ce": 0.020448490977287292, + "loss_iou": 1.0216398239135742, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 396509304, + "step": 2294 + }, + { + "epoch": 0.8789735733435465, + "grad_norm": 27.761394867677065, + "learning_rate": 5e-06, + "loss": 0.2432, + "num_input_tokens_seen": 396682400, + "step": 2295 + }, + { + "epoch": 0.8789735733435465, + "loss": 0.2696751356124878, + "loss_ce": 0.021384134888648987, + "loss_iou": 1.0838731527328491, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 396682400, + "step": 2295 + }, + { + "epoch": 0.8793565683646113, + "grad_norm": 20.606424256937945, + "learning_rate": 5e-06, + "loss": 0.16, + "num_input_tokens_seen": 396854928, + "step": 2296 + }, + { + "epoch": 0.8793565683646113, + "loss": 0.1552027463912964, + "loss_ce": 0.020192988216876984, + "loss_iou": 1.001530647277832, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 396854928, + "step": 2296 + }, + { + "epoch": 0.8797395633856759, + "grad_norm": 13.681449188907138, + "learning_rate": 5e-06, + "loss": 0.1166, + "num_input_tokens_seen": 397027488, + "step": 2297 + }, + { + "epoch": 0.8797395633856759, + "loss": 0.12320609390735626, + "loss_ce": 0.020117711275815964, + "loss_iou": 1.0067899227142334, + "loss_num": 0.10302734375, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 397027488, + "step": 2297 + }, + { + "epoch": 0.8801225584067407, + "grad_norm": 20.135722752597573, + "learning_rate": 5e-06, + "loss": 0.1691, + "num_input_tokens_seen": 397200152, + "step": 2298 + }, + { + "epoch": 0.8801225584067407, + "loss": 0.17363953590393066, + "loss_ce": 0.021173711866140366, + "loss_iou": 1.0135451555252075, + "loss_num": 0.15234375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 397200152, + "step": 2298 + }, + { + "epoch": 0.8805055534278055, + "grad_norm": 30.52294808367406, + "learning_rate": 5e-06, + "loss": 0.1874, + "num_input_tokens_seen": 397372912, + "step": 2299 + }, + { + "epoch": 0.8805055534278055, + "loss": 0.2073453664779663, + "loss_ce": 0.02069985494017601, + "loss_iou": 1.1049034595489502, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 397372912, + "step": 2299 + }, + { + "epoch": 0.8808885484488702, + "grad_norm": 32.87993080154758, + "learning_rate": 5e-06, + "loss": 0.1795, + "num_input_tokens_seen": 397546000, + "step": 2300 + }, + { + "epoch": 0.8808885484488702, + "loss": 0.16229727864265442, + "loss_ce": 0.02075674757361412, + "loss_iou": 1.0005006790161133, + "loss_num": 0.1416015625, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 397546000, + "step": 2300 + }, + { + "epoch": 0.8812715434699349, + "grad_norm": 25.04198557838158, + "learning_rate": 5e-06, + "loss": 0.1954, + "num_input_tokens_seen": 397718872, + "step": 2301 + }, + { + "epoch": 0.8812715434699349, + "loss": 0.1789538711309433, + "loss_ce": 0.02154420129954815, + "loss_iou": 1.0006424188613892, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 397718872, + "step": 2301 + }, + { + "epoch": 0.8816545384909996, + "grad_norm": 21.51257930812808, + "learning_rate": 5e-06, + "loss": 0.1907, + "num_input_tokens_seen": 397891928, + "step": 2302 + }, + { + "epoch": 0.8816545384909996, + "loss": 0.20330749452114105, + "loss_ce": 0.021727904677391052, + "loss_iou": 1.0173156261444092, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 397891928, + "step": 2302 + }, + { + "epoch": 0.8820375335120644, + "grad_norm": 23.850911095849863, + "learning_rate": 5e-06, + "loss": 0.2333, + "num_input_tokens_seen": 398064856, + "step": 2303 + }, + { + "epoch": 0.8820375335120644, + "loss": 0.23615139722824097, + "loss_ce": 0.020147982984781265, + "loss_iou": 1.3376621007919312, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 398064856, + "step": 2303 + }, + { + "epoch": 0.882420528533129, + "grad_norm": 28.43514558943183, + "learning_rate": 5e-06, + "loss": 0.1375, + "num_input_tokens_seen": 398237736, + "step": 2304 + }, + { + "epoch": 0.882420528533129, + "loss": 0.1300610601902008, + "loss_ce": 0.019282255321741104, + "loss_iou": 1.0002906322479248, + "loss_num": 0.11083984375, + "loss_xval": 0.11083984375, + "num_input_tokens_seen": 398237736, + "step": 2304 + }, + { + "epoch": 0.8828035235541938, + "grad_norm": 23.604836666199777, + "learning_rate": 5e-06, + "loss": 0.1471, + "num_input_tokens_seen": 398410496, + "step": 2305 + }, + { + "epoch": 0.8828035235541938, + "loss": 0.1526983082294464, + "loss_ce": 0.020496167242527008, + "loss_iou": 1.0016909837722778, + "loss_num": 0.1318359375, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 398410496, + "step": 2305 + }, + { + "epoch": 0.8831865185752585, + "grad_norm": 28.21258043854851, + "learning_rate": 5e-06, + "loss": 0.1752, + "num_input_tokens_seen": 398583592, + "step": 2306 + }, + { + "epoch": 0.8831865185752585, + "loss": 0.19757401943206787, + "loss_ce": 0.020572058856487274, + "loss_iou": 1.0885980129241943, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 398583592, + "step": 2306 + }, + { + "epoch": 0.8835695135963233, + "grad_norm": 24.9937839191693, + "learning_rate": 5e-06, + "loss": 0.1695, + "num_input_tokens_seen": 398756624, + "step": 2307 + }, + { + "epoch": 0.8835695135963233, + "loss": 0.14079371094703674, + "loss_ce": 0.020798582583665848, + "loss_iou": 1.0043283700942993, + "loss_num": 0.1201171875, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 398756624, + "step": 2307 + }, + { + "epoch": 0.8839525086173879, + "grad_norm": 22.69790201692408, + "learning_rate": 5e-06, + "loss": 0.2299, + "num_input_tokens_seen": 398929480, + "step": 2308 + }, + { + "epoch": 0.8839525086173879, + "loss": 0.19839608669281006, + "loss_ce": 0.02188241109251976, + "loss_iou": 1.0025566816329956, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 398929480, + "step": 2308 + }, + { + "epoch": 0.8843355036384527, + "grad_norm": 22.367970040983835, + "learning_rate": 5e-06, + "loss": 0.2076, + "num_input_tokens_seen": 399102480, + "step": 2309 + }, + { + "epoch": 0.8843355036384527, + "loss": 0.24297428131103516, + "loss_ce": 0.021843906491994858, + "loss_iou": 1.0768591165542603, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 399102480, + "step": 2309 + }, + { + "epoch": 0.8847184986595175, + "grad_norm": 23.681641625883703, + "learning_rate": 5e-06, + "loss": 0.2082, + "num_input_tokens_seen": 399275288, + "step": 2310 + }, + { + "epoch": 0.8847184986595175, + "loss": 0.2108621895313263, + "loss_ce": 0.020432502031326294, + "loss_iou": 1.0344903469085693, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 399275288, + "step": 2310 + }, + { + "epoch": 0.8851014936805821, + "grad_norm": 24.10603166601443, + "learning_rate": 5e-06, + "loss": 0.2664, + "num_input_tokens_seen": 399447968, + "step": 2311 + }, + { + "epoch": 0.8851014936805821, + "loss": 0.2505452632904053, + "loss_ce": 0.020808910951018333, + "loss_iou": 1.1432623863220215, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 399447968, + "step": 2311 + }, + { + "epoch": 0.8854844887016469, + "grad_norm": 22.023493830738364, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 399620456, + "step": 2312 + }, + { + "epoch": 0.8854844887016469, + "loss": 0.17484048008918762, + "loss_ce": 0.020421534776687622, + "loss_iou": 1.0182372331619263, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 399620456, + "step": 2312 + }, + { + "epoch": 0.8858674837227116, + "grad_norm": 25.7498391692391, + "learning_rate": 5e-06, + "loss": 0.2168, + "num_input_tokens_seen": 399793472, + "step": 2313 + }, + { + "epoch": 0.8858674837227116, + "loss": 0.1925235092639923, + "loss_ce": 0.02113679237663746, + "loss_iou": 1.0034724473953247, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 399793472, + "step": 2313 + }, + { + "epoch": 0.8862504787437764, + "grad_norm": 25.791077663218175, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 399966424, + "step": 2314 + }, + { + "epoch": 0.8862504787437764, + "loss": 0.17002347111701965, + "loss_ce": 0.019938018172979355, + "loss_iou": 1.0325002670288086, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 399966424, + "step": 2314 + }, + { + "epoch": 0.886633473764841, + "grad_norm": 26.5356666547837, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 400139488, + "step": 2315 + }, + { + "epoch": 0.886633473764841, + "loss": 0.263716459274292, + "loss_ce": 0.020247209817171097, + "loss_iou": 1.0764899253845215, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 400139488, + "step": 2315 + }, + { + "epoch": 0.8870164687859058, + "grad_norm": 23.15099092876043, + "learning_rate": 5e-06, + "loss": 0.2141, + "num_input_tokens_seen": 400312184, + "step": 2316 + }, + { + "epoch": 0.8870164687859058, + "loss": 0.22713661193847656, + "loss_ce": 0.01992225833237171, + "loss_iou": 1.0985429286956787, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 400312184, + "step": 2316 + }, + { + "epoch": 0.8873994638069705, + "grad_norm": 30.484934669020465, + "learning_rate": 5e-06, + "loss": 0.1671, + "num_input_tokens_seen": 400485120, + "step": 2317 + }, + { + "epoch": 0.8873994638069705, + "loss": 0.1689269244670868, + "loss_ce": 0.02097770944237709, + "loss_iou": 1.0121673345565796, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 400485120, + "step": 2317 + }, + { + "epoch": 0.8877824588280352, + "grad_norm": 28.322684395014303, + "learning_rate": 5e-06, + "loss": 0.2812, + "num_input_tokens_seen": 400658064, + "step": 2318 + }, + { + "epoch": 0.8877824588280352, + "loss": 0.3154546916484833, + "loss_ce": 0.019800391048192978, + "loss_iou": 1.0829479694366455, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 400658064, + "step": 2318 + }, + { + "epoch": 0.8881654538491, + "grad_norm": 30.649776439699774, + "learning_rate": 5e-06, + "loss": 0.3124, + "num_input_tokens_seen": 400830888, + "step": 2319 + }, + { + "epoch": 0.8881654538491, + "loss": 0.39187049865722656, + "loss_ce": 0.019617099314928055, + "loss_iou": 1.1072109937667847, + "loss_num": 0.373046875, + "loss_xval": 0.373046875, + "num_input_tokens_seen": 400830888, + "step": 2319 + }, + { + "epoch": 0.8885484488701647, + "grad_norm": 21.581339684611716, + "learning_rate": 5e-06, + "loss": 0.2397, + "num_input_tokens_seen": 401003832, + "step": 2320 + }, + { + "epoch": 0.8885484488701647, + "loss": 0.2565586566925049, + "loss_ce": 0.02132917195558548, + "loss_iou": 1.0717475414276123, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 401003832, + "step": 2320 + }, + { + "epoch": 0.8889314438912295, + "grad_norm": 37.18032737189331, + "learning_rate": 5e-06, + "loss": 0.2298, + "num_input_tokens_seen": 401176816, + "step": 2321 + }, + { + "epoch": 0.8889314438912295, + "loss": 0.24256186187267303, + "loss_ce": 0.02124839276075363, + "loss_iou": 1.0216059684753418, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 401176816, + "step": 2321 + }, + { + "epoch": 0.8893144389122941, + "grad_norm": 28.287794798157343, + "learning_rate": 5e-06, + "loss": 0.1649, + "num_input_tokens_seen": 401349880, + "step": 2322 + }, + { + "epoch": 0.8893144389122941, + "loss": 0.1690763682126999, + "loss_ce": 0.02088300511240959, + "loss_iou": 1.0003900527954102, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 401349880, + "step": 2322 + }, + { + "epoch": 0.8896974339333589, + "grad_norm": 29.561315514949193, + "learning_rate": 5e-06, + "loss": 0.2366, + "num_input_tokens_seen": 401522624, + "step": 2323 + }, + { + "epoch": 0.8896974339333589, + "loss": 0.2799972891807556, + "loss_ce": 0.018339548259973526, + "loss_iou": 0.9488890171051025, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 401522624, + "step": 2323 + }, + { + "epoch": 0.8900804289544236, + "grad_norm": 40.101712454574034, + "learning_rate": 5e-06, + "loss": 0.2302, + "num_input_tokens_seen": 401695440, + "step": 2324 + }, + { + "epoch": 0.8900804289544236, + "loss": 0.23104974627494812, + "loss_ce": 0.021516067907214165, + "loss_iou": 1.0083305835723877, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 401695440, + "step": 2324 + }, + { + "epoch": 0.8904634239754883, + "grad_norm": 38.15009426096198, + "learning_rate": 5e-06, + "loss": 0.3425, + "num_input_tokens_seen": 401864912, + "step": 2325 + }, + { + "epoch": 0.8904634239754883, + "loss": 0.2780781090259552, + "loss_ce": 0.020875953137874603, + "loss_iou": 1.0925370454788208, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 401864912, + "step": 2325 + }, + { + "epoch": 0.890846418996553, + "grad_norm": 25.608395885078057, + "learning_rate": 5e-06, + "loss": 0.2329, + "num_input_tokens_seen": 402037568, + "step": 2326 + }, + { + "epoch": 0.890846418996553, + "loss": 0.2409341037273407, + "loss_ce": 0.022062024101614952, + "loss_iou": 1.021268367767334, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 402037568, + "step": 2326 + }, + { + "epoch": 0.8912294140176178, + "grad_norm": 23.19897108760206, + "learning_rate": 5e-06, + "loss": 0.208, + "num_input_tokens_seen": 402210232, + "step": 2327 + }, + { + "epoch": 0.8912294140176178, + "loss": 0.23976726830005646, + "loss_ce": 0.02107830159366131, + "loss_iou": 1.0104955434799194, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 402210232, + "step": 2327 + }, + { + "epoch": 0.8916124090386826, + "grad_norm": 27.320496113020106, + "learning_rate": 5e-06, + "loss": 0.2069, + "num_input_tokens_seen": 402382840, + "step": 2328 + }, + { + "epoch": 0.8916124090386826, + "loss": 0.2346423864364624, + "loss_ce": 0.020470030605793, + "loss_iou": 1.071311354637146, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 402382840, + "step": 2328 + }, + { + "epoch": 0.8919954040597472, + "grad_norm": 43.879260420265275, + "learning_rate": 5e-06, + "loss": 0.1992, + "num_input_tokens_seen": 402555744, + "step": 2329 + }, + { + "epoch": 0.8919954040597472, + "loss": 0.21297091245651245, + "loss_ce": 0.021320518106222153, + "loss_iou": 1.0636193752288818, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 402555744, + "step": 2329 + }, + { + "epoch": 0.892378399080812, + "grad_norm": 34.839557886600105, + "learning_rate": 5e-06, + "loss": 0.2513, + "num_input_tokens_seen": 402729064, + "step": 2330 + }, + { + "epoch": 0.892378399080812, + "loss": 0.29425114393234253, + "loss_ce": 0.020325375720858574, + "loss_iou": 1.0446100234985352, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 402729064, + "step": 2330 + }, + { + "epoch": 0.8927613941018767, + "grad_norm": 35.37658936273886, + "learning_rate": 5e-06, + "loss": 0.2368, + "num_input_tokens_seen": 402902120, + "step": 2331 + }, + { + "epoch": 0.8927613941018767, + "loss": 0.19274011254310608, + "loss_ce": 0.021658584475517273, + "loss_iou": 1.0351388454437256, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 402902120, + "step": 2331 + }, + { + "epoch": 0.8931443891229414, + "grad_norm": 16.206575345603024, + "learning_rate": 5e-06, + "loss": 0.1766, + "num_input_tokens_seen": 403075048, + "step": 2332 + }, + { + "epoch": 0.8931443891229414, + "loss": 0.15430493652820587, + "loss_ce": 0.020760010927915573, + "loss_iou": 1.0188767910003662, + "loss_num": 0.1337890625, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 403075048, + "step": 2332 + }, + { + "epoch": 0.8935273841440061, + "grad_norm": 51.08128692755289, + "learning_rate": 5e-06, + "loss": 0.2559, + "num_input_tokens_seen": 403247920, + "step": 2333 + }, + { + "epoch": 0.8935273841440061, + "loss": 0.23070073127746582, + "loss_ce": 0.020251505076885223, + "loss_iou": 1.1131281852722168, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 403247920, + "step": 2333 + }, + { + "epoch": 0.8939103791650709, + "grad_norm": 37.256973862167605, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 403420944, + "step": 2334 + }, + { + "epoch": 0.8939103791650709, + "loss": 0.2458370476961136, + "loss_ce": 0.020739397034049034, + "loss_iou": 1.0012136697769165, + "loss_num": 0.224609375, + "loss_xval": 0.224609375, + "num_input_tokens_seen": 403420944, + "step": 2334 + }, + { + "epoch": 0.8942933741861355, + "grad_norm": 41.35793357453431, + "learning_rate": 5e-06, + "loss": 0.1832, + "num_input_tokens_seen": 403593960, + "step": 2335 + }, + { + "epoch": 0.8942933741861355, + "loss": 0.1998976767063141, + "loss_ce": 0.02149190567433834, + "loss_iou": 1.020889163017273, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 403593960, + "step": 2335 + }, + { + "epoch": 0.8946763692072003, + "grad_norm": 28.510224293167948, + "learning_rate": 5e-06, + "loss": 0.2628, + "num_input_tokens_seen": 403766944, + "step": 2336 + }, + { + "epoch": 0.8946763692072003, + "loss": 0.27586835622787476, + "loss_ce": 0.021046578884124756, + "loss_iou": 1.0037611722946167, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 403766944, + "step": 2336 + }, + { + "epoch": 0.895059364228265, + "grad_norm": 52.29913170388032, + "learning_rate": 5e-06, + "loss": 0.2434, + "num_input_tokens_seen": 403939816, + "step": 2337 + }, + { + "epoch": 0.895059364228265, + "loss": 0.19048947095870972, + "loss_ce": 0.021238982677459717, + "loss_iou": 1.0058882236480713, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 403939816, + "step": 2337 + }, + { + "epoch": 0.8954423592493298, + "grad_norm": 28.3177053650063, + "learning_rate": 5e-06, + "loss": 0.2138, + "num_input_tokens_seen": 404112640, + "step": 2338 + }, + { + "epoch": 0.8954423592493298, + "loss": 0.2756827473640442, + "loss_ce": 0.020616818219423294, + "loss_iou": 1.1016333103179932, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 404112640, + "step": 2338 + }, + { + "epoch": 0.8958253542703944, + "grad_norm": 45.586183419404904, + "learning_rate": 5e-06, + "loss": 0.2483, + "num_input_tokens_seen": 404285592, + "step": 2339 + }, + { + "epoch": 0.8958253542703944, + "loss": 0.19825810194015503, + "loss_ce": 0.021988561376929283, + "loss_iou": 0.97584068775177, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 404285592, + "step": 2339 + }, + { + "epoch": 0.8962083492914592, + "grad_norm": 35.55583502603937, + "learning_rate": 5e-06, + "loss": 0.2635, + "num_input_tokens_seen": 404458248, + "step": 2340 + }, + { + "epoch": 0.8962083492914592, + "loss": 0.22127856314182281, + "loss_ce": 0.019374258816242218, + "loss_iou": 1.1606025695800781, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 404458248, + "step": 2340 + }, + { + "epoch": 0.896591344312524, + "grad_norm": 50.96979750171572, + "learning_rate": 5e-06, + "loss": 0.2206, + "num_input_tokens_seen": 404631176, + "step": 2341 + }, + { + "epoch": 0.896591344312524, + "loss": 0.239015594124794, + "loss_ce": 0.021242156624794006, + "loss_iou": 1.0026321411132812, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 404631176, + "step": 2341 + }, + { + "epoch": 0.8969743393335886, + "grad_norm": 34.36341472556792, + "learning_rate": 5e-06, + "loss": 0.2009, + "num_input_tokens_seen": 404804424, + "step": 2342 + }, + { + "epoch": 0.8969743393335886, + "loss": 0.19493183493614197, + "loss_ce": 0.020432332530617714, + "loss_iou": 1.0039664506912231, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 404804424, + "step": 2342 + }, + { + "epoch": 0.8973573343546534, + "grad_norm": 36.966826818188856, + "learning_rate": 5e-06, + "loss": 0.2087, + "num_input_tokens_seen": 404977240, + "step": 2343 + }, + { + "epoch": 0.8973573343546534, + "loss": 0.2385326772928238, + "loss_ce": 0.02033199742436409, + "loss_iou": 1.0842691659927368, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 404977240, + "step": 2343 + }, + { + "epoch": 0.8977403293757181, + "grad_norm": 27.400570672450467, + "learning_rate": 5e-06, + "loss": 0.1975, + "num_input_tokens_seen": 405150168, + "step": 2344 + }, + { + "epoch": 0.8977403293757181, + "loss": 0.16610579192638397, + "loss_ce": 0.01998763158917427, + "loss_iou": 1.0006134510040283, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 405150168, + "step": 2344 + }, + { + "epoch": 0.8981233243967829, + "grad_norm": 45.70484841006972, + "learning_rate": 5e-06, + "loss": 0.1995, + "num_input_tokens_seen": 405323056, + "step": 2345 + }, + { + "epoch": 0.8981233243967829, + "loss": 0.17091411352157593, + "loss_ce": 0.020889708772301674, + "loss_iou": 1.0392924547195435, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 405323056, + "step": 2345 + }, + { + "epoch": 0.8985063194178475, + "grad_norm": 39.637040555052735, + "learning_rate": 5e-06, + "loss": 0.2342, + "num_input_tokens_seen": 405495648, + "step": 2346 + }, + { + "epoch": 0.8985063194178475, + "loss": 0.17984412610530853, + "loss_ce": 0.019687866792082787, + "loss_iou": 1.0187370777130127, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 405495648, + "step": 2346 + }, + { + "epoch": 0.8988893144389123, + "grad_norm": 30.57360473875093, + "learning_rate": 5e-06, + "loss": 0.1576, + "num_input_tokens_seen": 405668616, + "step": 2347 + }, + { + "epoch": 0.8988893144389123, + "loss": 0.1363660991191864, + "loss_ce": 0.0201551616191864, + "loss_iou": 1.0004048347473145, + "loss_num": 0.1162109375, + "loss_xval": 0.1162109375, + "num_input_tokens_seen": 405668616, + "step": 2347 + }, + { + "epoch": 0.899272309459977, + "grad_norm": 32.006914027939956, + "learning_rate": 5e-06, + "loss": 0.2305, + "num_input_tokens_seen": 405841544, + "step": 2348 + }, + { + "epoch": 0.899272309459977, + "loss": 0.26787641644477844, + "loss_ce": 0.020745061337947845, + "loss_iou": 1.089160442352295, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 405841544, + "step": 2348 + }, + { + "epoch": 0.8996553044810417, + "grad_norm": 72.12367727509918, + "learning_rate": 5e-06, + "loss": 0.2419, + "num_input_tokens_seen": 406014736, + "step": 2349 + }, + { + "epoch": 0.8996553044810417, + "loss": 0.2803584635257721, + "loss_ce": 0.022545959800481796, + "loss_iou": 2.920067548751831, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 406014736, + "step": 2349 + }, + { + "epoch": 0.9000382995021065, + "grad_norm": 20.010034228136895, + "learning_rate": 5e-06, + "loss": 0.2155, + "num_input_tokens_seen": 406182336, + "step": 2350 + }, + { + "epoch": 0.9000382995021065, + "loss": 0.187499538064003, + "loss_ce": 0.02062942273914814, + "loss_iou": 1.0197784900665283, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 406182336, + "step": 2350 + }, + { + "epoch": 0.9004212945231712, + "grad_norm": 22.701776072233677, + "learning_rate": 5e-06, + "loss": 0.1878, + "num_input_tokens_seen": 406355256, + "step": 2351 + }, + { + "epoch": 0.9004212945231712, + "loss": 0.20990976691246033, + "loss_ce": 0.021860448643565178, + "loss_iou": 1.080086350440979, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 406355256, + "step": 2351 + }, + { + "epoch": 0.900804289544236, + "grad_norm": 20.61023095429758, + "learning_rate": 5e-06, + "loss": 0.2031, + "num_input_tokens_seen": 406528296, + "step": 2352 + }, + { + "epoch": 0.900804289544236, + "loss": 0.2028670758008957, + "loss_ce": 0.01994471065700054, + "loss_iou": 1.007051706314087, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 406528296, + "step": 2352 + }, + { + "epoch": 0.9011872845653006, + "grad_norm": 21.73740240986204, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 406701456, + "step": 2353 + }, + { + "epoch": 0.9011872845653006, + "loss": 0.16568265855312347, + "loss_ce": 0.02170073240995407, + "loss_iou": 1.048267126083374, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 406701456, + "step": 2353 + }, + { + "epoch": 0.9015702795863654, + "grad_norm": 22.73960217988902, + "learning_rate": 5e-06, + "loss": 0.1836, + "num_input_tokens_seen": 406874688, + "step": 2354 + }, + { + "epoch": 0.9015702795863654, + "loss": 0.18578344583511353, + "loss_ce": 0.02214818075299263, + "loss_iou": 1.0128567218780518, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 406874688, + "step": 2354 + }, + { + "epoch": 0.9019532746074301, + "grad_norm": 29.40987383685999, + "learning_rate": 5e-06, + "loss": 0.1872, + "num_input_tokens_seen": 407047728, + "step": 2355 + }, + { + "epoch": 0.9019532746074301, + "loss": 0.1829497069120407, + "loss_ce": 0.02132861688733101, + "loss_iou": 1.0231342315673828, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 407047728, + "step": 2355 + }, + { + "epoch": 0.9023362696284948, + "grad_norm": 40.44204858238806, + "learning_rate": 5e-06, + "loss": 0.235, + "num_input_tokens_seen": 407220592, + "step": 2356 + }, + { + "epoch": 0.9023362696284948, + "loss": 0.2262561023235321, + "loss_ce": 0.0208728164434433, + "loss_iou": 1.0273866653442383, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 407220592, + "step": 2356 + }, + { + "epoch": 0.9027192646495595, + "grad_norm": 24.87077660065261, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 407393568, + "step": 2357 + }, + { + "epoch": 0.9027192646495595, + "loss": 0.19813472032546997, + "loss_ce": 0.022719688713550568, + "loss_iou": 1.0532939434051514, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 407393568, + "step": 2357 + }, + { + "epoch": 0.9031022596706243, + "grad_norm": 22.827788690630538, + "learning_rate": 5e-06, + "loss": 0.2113, + "num_input_tokens_seen": 407566344, + "step": 2358 + }, + { + "epoch": 0.9031022596706243, + "loss": 0.18151769042015076, + "loss_ce": 0.021544544026255608, + "loss_iou": 1.0150020122528076, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 407566344, + "step": 2358 + }, + { + "epoch": 0.903485254691689, + "grad_norm": 22.379163306053044, + "learning_rate": 5e-06, + "loss": 0.2535, + "num_input_tokens_seen": 407738880, + "step": 2359 + }, + { + "epoch": 0.903485254691689, + "loss": 0.32208019495010376, + "loss_ce": 0.021298956125974655, + "loss_iou": 10.810672760009766, + "loss_num": 0.30078125, + "loss_xval": 0.30078125, + "num_input_tokens_seen": 407738880, + "step": 2359 + }, + { + "epoch": 0.9038682497127537, + "grad_norm": 25.666087321083204, + "learning_rate": 5e-06, + "loss": 0.172, + "num_input_tokens_seen": 407911744, + "step": 2360 + }, + { + "epoch": 0.9038682497127537, + "loss": 0.19929426908493042, + "loss_ce": 0.01826399937272072, + "loss_iou": 1.1633094549179077, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 407911744, + "step": 2360 + }, + { + "epoch": 0.9042512447338185, + "grad_norm": 16.089832093242897, + "learning_rate": 5e-06, + "loss": 0.28, + "num_input_tokens_seen": 408084720, + "step": 2361 + }, + { + "epoch": 0.9042512447338185, + "loss": 0.30379611253738403, + "loss_ce": 0.020470933988690376, + "loss_iou": 1.242593765258789, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 408084720, + "step": 2361 + }, + { + "epoch": 0.9046342397548832, + "grad_norm": 18.99963223087458, + "learning_rate": 5e-06, + "loss": 0.1829, + "num_input_tokens_seen": 408257656, + "step": 2362 + }, + { + "epoch": 0.9046342397548832, + "loss": 0.186909019947052, + "loss_ce": 0.0196116641163826, + "loss_iou": 1.025866150856018, + "loss_num": 0.1669921875, + "loss_xval": 0.1669921875, + "num_input_tokens_seen": 408257656, + "step": 2362 + }, + { + "epoch": 0.9050172347759479, + "grad_norm": 16.0476265285785, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 408431064, + "step": 2363 + }, + { + "epoch": 0.9050172347759479, + "loss": 0.20397067070007324, + "loss_ce": 0.021414516493678093, + "loss_iou": 1.0676087141036987, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 408431064, + "step": 2363 + }, + { + "epoch": 0.9054002297970126, + "grad_norm": 41.37737189647491, + "learning_rate": 5e-06, + "loss": 0.2086, + "num_input_tokens_seen": 408603920, + "step": 2364 + }, + { + "epoch": 0.9054002297970126, + "loss": 0.20587798953056335, + "loss_ce": 0.02161286026239395, + "loss_iou": 1.01041579246521, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 408603920, + "step": 2364 + }, + { + "epoch": 0.9057832248180774, + "grad_norm": 35.201733243723055, + "learning_rate": 5e-06, + "loss": 0.2086, + "num_input_tokens_seen": 408776576, + "step": 2365 + }, + { + "epoch": 0.9057832248180774, + "loss": 0.22596822679042816, + "loss_ce": 0.021927697584033012, + "loss_iou": 1.0464751720428467, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 408776576, + "step": 2365 + }, + { + "epoch": 0.9061662198391421, + "grad_norm": 26.22058701727546, + "learning_rate": 5e-06, + "loss": 0.3156, + "num_input_tokens_seen": 408949328, + "step": 2366 + }, + { + "epoch": 0.9061662198391421, + "loss": 0.2362404614686966, + "loss_ce": 0.021213598549365997, + "loss_iou": 1.0060431957244873, + "loss_num": 0.21484375, + "loss_xval": 0.21484375, + "num_input_tokens_seen": 408949328, + "step": 2366 + }, + { + "epoch": 0.9065492148602068, + "grad_norm": 28.728109473112713, + "learning_rate": 5e-06, + "loss": 0.1718, + "num_input_tokens_seen": 409122152, + "step": 2367 + }, + { + "epoch": 0.9065492148602068, + "loss": 0.18759480118751526, + "loss_ce": 0.019992249086499214, + "loss_iou": 1.0010476112365723, + "loss_num": 0.16796875, + "loss_xval": 0.16796875, + "num_input_tokens_seen": 409122152, + "step": 2367 + }, + { + "epoch": 0.9069322098812715, + "grad_norm": 22.84123618774518, + "learning_rate": 5e-06, + "loss": 0.1925, + "num_input_tokens_seen": 409295160, + "step": 2368 + }, + { + "epoch": 0.9069322098812715, + "loss": 0.16528218984603882, + "loss_ce": 0.01965230703353882, + "loss_iou": 1.0095326900482178, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 409295160, + "step": 2368 + }, + { + "epoch": 0.9073152049023363, + "grad_norm": 38.60956499979633, + "learning_rate": 5e-06, + "loss": 0.2157, + "num_input_tokens_seen": 409468280, + "step": 2369 + }, + { + "epoch": 0.9073152049023363, + "loss": 0.24069184064865112, + "loss_ce": 0.02023286744952202, + "loss_iou": 1.0126674175262451, + "loss_num": 0.220703125, + "loss_xval": 0.220703125, + "num_input_tokens_seen": 409468280, + "step": 2369 + }, + { + "epoch": 0.907698199923401, + "grad_norm": 23.961042618032646, + "learning_rate": 5e-06, + "loss": 0.2062, + "num_input_tokens_seen": 409641272, + "step": 2370 + }, + { + "epoch": 0.907698199923401, + "loss": 0.18946030735969543, + "loss_ce": 0.019965671002864838, + "loss_iou": 0.9818347096443176, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 409641272, + "step": 2370 + }, + { + "epoch": 0.9080811949444657, + "grad_norm": 48.243077347673, + "learning_rate": 5e-06, + "loss": 0.1906, + "num_input_tokens_seen": 409814064, + "step": 2371 + }, + { + "epoch": 0.9080811949444657, + "loss": 0.16905364394187927, + "loss_ce": 0.021836843341588974, + "loss_iou": 1.0045949220657349, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 409814064, + "step": 2371 + }, + { + "epoch": 0.9084641899655305, + "grad_norm": 26.146141822474032, + "learning_rate": 5e-06, + "loss": 0.2315, + "num_input_tokens_seen": 409987160, + "step": 2372 + }, + { + "epoch": 0.9084641899655305, + "loss": 0.25680333375930786, + "loss_ce": 0.022001095116138458, + "loss_iou": 1.0284111499786377, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 409987160, + "step": 2372 + }, + { + "epoch": 0.9088471849865952, + "grad_norm": 22.502885624134297, + "learning_rate": 5e-06, + "loss": 0.2325, + "num_input_tokens_seen": 410160072, + "step": 2373 + }, + { + "epoch": 0.9088471849865952, + "loss": 0.20859001576900482, + "loss_ce": 0.02072381228208542, + "loss_iou": 1.0815236568450928, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 410160072, + "step": 2373 + }, + { + "epoch": 0.9092301800076599, + "grad_norm": 19.128803175523963, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 410332584, + "step": 2374 + }, + { + "epoch": 0.9092301800076599, + "loss": 0.190344899892807, + "loss_ce": 0.020239923149347305, + "loss_iou": 1.0056294202804565, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 410332584, + "step": 2374 + }, + { + "epoch": 0.9096131750287246, + "grad_norm": 25.13174196757647, + "learning_rate": 5e-06, + "loss": 0.1785, + "num_input_tokens_seen": 410505744, + "step": 2375 + }, + { + "epoch": 0.9096131750287246, + "loss": 0.16508866846561432, + "loss_ce": 0.022083301097154617, + "loss_iou": 1.0263667106628418, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 410505744, + "step": 2375 + }, + { + "epoch": 0.9099961700497894, + "grad_norm": 22.44469459578855, + "learning_rate": 5e-06, + "loss": 0.2237, + "num_input_tokens_seen": 410678744, + "step": 2376 + }, + { + "epoch": 0.9099961700497894, + "loss": 0.23425155878067017, + "loss_ce": 0.02056746929883957, + "loss_iou": 1.0533339977264404, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 410678744, + "step": 2376 + }, + { + "epoch": 0.910379165070854, + "grad_norm": 25.35229109462942, + "learning_rate": 5e-06, + "loss": 0.224, + "num_input_tokens_seen": 410851328, + "step": 2377 + }, + { + "epoch": 0.910379165070854, + "loss": 0.222754567861557, + "loss_ce": 0.019995778799057007, + "loss_iou": 1.049330472946167, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 410851328, + "step": 2377 + }, + { + "epoch": 0.9107621600919188, + "grad_norm": 21.17120095168434, + "learning_rate": 5e-06, + "loss": 0.1934, + "num_input_tokens_seen": 411024296, + "step": 2378 + }, + { + "epoch": 0.9107621600919188, + "loss": 0.14764659106731415, + "loss_ce": 0.020937608554959297, + "loss_iou": 1.0029979944229126, + "loss_num": 0.126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 411024296, + "step": 2378 + }, + { + "epoch": 0.9111451551129836, + "grad_norm": 36.91029657137582, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 411197248, + "step": 2379 + }, + { + "epoch": 0.9111451551129836, + "loss": 0.144285649061203, + "loss_ce": 0.021604998037219048, + "loss_iou": 1.0007081031799316, + "loss_num": 0.12255859375, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 411197248, + "step": 2379 + }, + { + "epoch": 0.9115281501340483, + "grad_norm": 27.327345254548007, + "learning_rate": 5e-06, + "loss": 0.2386, + "num_input_tokens_seen": 411370328, + "step": 2380 + }, + { + "epoch": 0.9115281501340483, + "loss": 0.2468884289264679, + "loss_ce": 0.01947145164012909, + "loss_iou": 1.0034431219100952, + "loss_num": 0.2275390625, + "loss_xval": 0.2275390625, + "num_input_tokens_seen": 411370328, + "step": 2380 + }, + { + "epoch": 0.911911145155113, + "grad_norm": 34.977127587219535, + "learning_rate": 5e-06, + "loss": 0.2414, + "num_input_tokens_seen": 411543456, + "step": 2381 + }, + { + "epoch": 0.911911145155113, + "loss": 0.21146827936172485, + "loss_ce": 0.020672377198934555, + "loss_iou": 1.002312183380127, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 411543456, + "step": 2381 + }, + { + "epoch": 0.9122941401761777, + "grad_norm": 15.96437352384054, + "learning_rate": 5e-06, + "loss": 0.1853, + "num_input_tokens_seen": 411716576, + "step": 2382 + }, + { + "epoch": 0.9122941401761777, + "loss": 0.20538511872291565, + "loss_ce": 0.021943964064121246, + "loss_iou": 1.0280776023864746, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 411716576, + "step": 2382 + }, + { + "epoch": 0.9126771351972425, + "grad_norm": 35.51237483478961, + "learning_rate": 5e-06, + "loss": 0.2434, + "num_input_tokens_seen": 411889760, + "step": 2383 + }, + { + "epoch": 0.9126771351972425, + "loss": 0.1973738968372345, + "loss_ce": 0.021104369312524796, + "loss_iou": 1.0060076713562012, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 411889760, + "step": 2383 + }, + { + "epoch": 0.9130601302183071, + "grad_norm": 25.647787431160904, + "learning_rate": 5e-06, + "loss": 0.2108, + "num_input_tokens_seen": 412062928, + "step": 2384 + }, + { + "epoch": 0.9130601302183071, + "loss": 0.17348915338516235, + "loss_ce": 0.021877823397517204, + "loss_iou": 1.005397081375122, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 412062928, + "step": 2384 + }, + { + "epoch": 0.9134431252393719, + "grad_norm": 23.053344913429793, + "learning_rate": 5e-06, + "loss": 0.2619, + "num_input_tokens_seen": 412235584, + "step": 2385 + }, + { + "epoch": 0.9134431252393719, + "loss": 0.22121822834014893, + "loss_ce": 0.02090083621442318, + "loss_iou": 1.191190242767334, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 412235584, + "step": 2385 + }, + { + "epoch": 0.9138261202604366, + "grad_norm": 26.292265105545425, + "learning_rate": 5e-06, + "loss": 0.2143, + "num_input_tokens_seen": 412408272, + "step": 2386 + }, + { + "epoch": 0.9138261202604366, + "loss": 0.1930234730243683, + "loss_ce": 0.018768098205327988, + "loss_iou": 1.0049376487731934, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 412408272, + "step": 2386 + }, + { + "epoch": 0.9142091152815014, + "grad_norm": 24.98979994341704, + "learning_rate": 5e-06, + "loss": 0.1405, + "num_input_tokens_seen": 412581104, + "step": 2387 + }, + { + "epoch": 0.9142091152815014, + "loss": 0.15191525220870972, + "loss_ce": 0.020811742171645164, + "loss_iou": 1.0201034545898438, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 412581104, + "step": 2387 + }, + { + "epoch": 0.914592110302566, + "grad_norm": 23.354223453219895, + "learning_rate": 5e-06, + "loss": 0.2023, + "num_input_tokens_seen": 412754104, + "step": 2388 + }, + { + "epoch": 0.914592110302566, + "loss": 0.16768714785575867, + "loss_ce": 0.020897595211863518, + "loss_iou": 1.000795602798462, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 412754104, + "step": 2388 + }, + { + "epoch": 0.9149751053236308, + "grad_norm": 42.04023037420445, + "learning_rate": 5e-06, + "loss": 0.1797, + "num_input_tokens_seen": 412927000, + "step": 2389 + }, + { + "epoch": 0.9149751053236308, + "loss": 0.1643044352531433, + "loss_ce": 0.020688720047473907, + "loss_iou": 1.00919508934021, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 412927000, + "step": 2389 + }, + { + "epoch": 0.9153581003446956, + "grad_norm": 38.683829128169535, + "learning_rate": 5e-06, + "loss": 0.2661, + "num_input_tokens_seen": 413099632, + "step": 2390 + }, + { + "epoch": 0.9153581003446956, + "loss": 0.25069767236709595, + "loss_ce": 0.021632760763168335, + "loss_iou": 1.0596530437469482, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 413099632, + "step": 2390 + }, + { + "epoch": 0.9157410953657602, + "grad_norm": 17.870399262431235, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 413272616, + "step": 2391 + }, + { + "epoch": 0.9157410953657602, + "loss": 0.1694970577955246, + "loss_ce": 0.023409409448504448, + "loss_iou": 1.0341999530792236, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 413272616, + "step": 2391 + }, + { + "epoch": 0.916124090386825, + "grad_norm": 83.70239355912715, + "learning_rate": 5e-06, + "loss": 0.2372, + "num_input_tokens_seen": 413445360, + "step": 2392 + }, + { + "epoch": 0.916124090386825, + "loss": 0.23829631507396698, + "loss_ce": 0.019668392837047577, + "loss_iou": 1.054909586906433, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 413445360, + "step": 2392 + }, + { + "epoch": 0.9165070854078897, + "grad_norm": 42.05504986560037, + "learning_rate": 5e-06, + "loss": 0.2355, + "num_input_tokens_seen": 413618760, + "step": 2393 + }, + { + "epoch": 0.9165070854078897, + "loss": 0.22386720776557922, + "loss_ce": 0.021657757461071014, + "loss_iou": 1.0026435852050781, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 413618760, + "step": 2393 + }, + { + "epoch": 0.9168900804289544, + "grad_norm": 37.73854328490997, + "learning_rate": 5e-06, + "loss": 0.2432, + "num_input_tokens_seen": 413791664, + "step": 2394 + }, + { + "epoch": 0.9168900804289544, + "loss": 0.22379669547080994, + "loss_ce": 0.02054961770772934, + "loss_iou": 1.001222848892212, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 413791664, + "step": 2394 + }, + { + "epoch": 0.9172730754500191, + "grad_norm": 18.763365736694332, + "learning_rate": 5e-06, + "loss": 0.1778, + "num_input_tokens_seen": 413964616, + "step": 2395 + }, + { + "epoch": 0.9172730754500191, + "loss": 0.20379221439361572, + "loss_ce": 0.020869851112365723, + "loss_iou": 1.0439505577087402, + "loss_num": 0.1826171875, + "loss_xval": 0.1826171875, + "num_input_tokens_seen": 413964616, + "step": 2395 + }, + { + "epoch": 0.9176560704710839, + "grad_norm": 21.442752576039208, + "learning_rate": 5e-06, + "loss": 0.1763, + "num_input_tokens_seen": 414138008, + "step": 2396 + }, + { + "epoch": 0.9176560704710839, + "loss": 0.18055325746536255, + "loss_ce": 0.0207021776586771, + "loss_iou": 1.007684350013733, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 414138008, + "step": 2396 + }, + { + "epoch": 0.9180390654921486, + "grad_norm": 50.31807377112917, + "learning_rate": 5e-06, + "loss": 0.2828, + "num_input_tokens_seen": 414310904, + "step": 2397 + }, + { + "epoch": 0.9180390654921486, + "loss": 0.26484301686286926, + "loss_ce": 0.020580342039465904, + "loss_iou": 1.010157585144043, + "loss_num": 0.244140625, + "loss_xval": 0.244140625, + "num_input_tokens_seen": 414310904, + "step": 2397 + }, + { + "epoch": 0.9184220605132133, + "grad_norm": 48.38373924778628, + "learning_rate": 5e-06, + "loss": 0.2294, + "num_input_tokens_seen": 414483704, + "step": 2398 + }, + { + "epoch": 0.9184220605132133, + "loss": 0.18663525581359863, + "loss_ce": 0.020619627088308334, + "loss_iou": 1.0017452239990234, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 414483704, + "step": 2398 + }, + { + "epoch": 0.918805055534278, + "grad_norm": 21.248214249983533, + "learning_rate": 5e-06, + "loss": 0.2166, + "num_input_tokens_seen": 414656816, + "step": 2399 + }, + { + "epoch": 0.918805055534278, + "loss": 0.2035892903804779, + "loss_ce": 0.021460391581058502, + "loss_iou": 1.0131632089614868, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 414656816, + "step": 2399 + }, + { + "epoch": 0.9191880505553428, + "grad_norm": 31.826929765619607, + "learning_rate": 5e-06, + "loss": 0.3054, + "num_input_tokens_seen": 414829968, + "step": 2400 + }, + { + "epoch": 0.9191880505553428, + "loss": 0.32269665598869324, + "loss_ce": 0.02069469541311264, + "loss_iou": 1.033881425857544, + "loss_num": 0.302734375, + "loss_xval": 0.302734375, + "num_input_tokens_seen": 414829968, + "step": 2400 + }, + { + "epoch": 0.9195710455764075, + "grad_norm": 36.31243766548263, + "learning_rate": 5e-06, + "loss": 0.2791, + "num_input_tokens_seen": 415003472, + "step": 2401 + }, + { + "epoch": 0.9195710455764075, + "loss": 0.1605401337146759, + "loss_ce": 0.022234464064240456, + "loss_iou": 1.016472339630127, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 415003472, + "step": 2401 + }, + { + "epoch": 0.9199540405974722, + "grad_norm": 52.23825924030564, + "learning_rate": 5e-06, + "loss": 0.2401, + "num_input_tokens_seen": 415176336, + "step": 2402 + }, + { + "epoch": 0.9199540405974722, + "loss": 0.24016940593719482, + "loss_ce": 0.020320788025856018, + "loss_iou": 1.043022632598877, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 415176336, + "step": 2402 + }, + { + "epoch": 0.920337035618537, + "grad_norm": 34.65960734586794, + "learning_rate": 5e-06, + "loss": 0.1916, + "num_input_tokens_seen": 415349016, + "step": 2403 + }, + { + "epoch": 0.920337035618537, + "loss": 0.186640664935112, + "loss_ce": 0.022578159347176552, + "loss_iou": 1.025832176208496, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 415349016, + "step": 2403 + }, + { + "epoch": 0.9207200306396017, + "grad_norm": 33.46523262659949, + "learning_rate": 5e-06, + "loss": 0.2845, + "num_input_tokens_seen": 415521760, + "step": 2404 + }, + { + "epoch": 0.9207200306396017, + "loss": 0.25068753957748413, + "loss_ce": 0.02180570550262928, + "loss_iou": 1.0948412418365479, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 415521760, + "step": 2404 + }, + { + "epoch": 0.9211030256606664, + "grad_norm": 20.460913905721025, + "learning_rate": 5e-06, + "loss": 0.1433, + "num_input_tokens_seen": 415694848, + "step": 2405 + }, + { + "epoch": 0.9211030256606664, + "loss": 0.14977923035621643, + "loss_ce": 0.02184954844415188, + "loss_iou": 1.0027718544006348, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 415694848, + "step": 2405 + }, + { + "epoch": 0.9214860206817311, + "grad_norm": 50.72670269751362, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 415867912, + "step": 2406 + }, + { + "epoch": 0.9214860206817311, + "loss": 0.19672946631908417, + "loss_ce": 0.021985813975334167, + "loss_iou": 1.00660240650177, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 415867912, + "step": 2406 + }, + { + "epoch": 0.9218690157027959, + "grad_norm": 29.623117232777613, + "learning_rate": 5e-06, + "loss": 0.2317, + "num_input_tokens_seen": 416040912, + "step": 2407 + }, + { + "epoch": 0.9218690157027959, + "loss": 0.14956334233283997, + "loss_ce": 0.021755734458565712, + "loss_iou": 1.0036413669586182, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 416040912, + "step": 2407 + }, + { + "epoch": 0.9222520107238605, + "grad_norm": 35.23146260409525, + "learning_rate": 5e-06, + "loss": 0.2066, + "num_input_tokens_seen": 416213568, + "step": 2408 + }, + { + "epoch": 0.9222520107238605, + "loss": 0.17996053397655487, + "loss_ce": 0.02139120362699032, + "loss_iou": 1.0081908702850342, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 416213568, + "step": 2408 + }, + { + "epoch": 0.9226350057449253, + "grad_norm": 66.48201776577294, + "learning_rate": 5e-06, + "loss": 0.239, + "num_input_tokens_seen": 416386272, + "step": 2409 + }, + { + "epoch": 0.9226350057449253, + "loss": 0.2576059401035309, + "loss_ce": 0.021521952003240585, + "loss_iou": 1.1739799976348877, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 416386272, + "step": 2409 + }, + { + "epoch": 0.92301800076599, + "grad_norm": 32.69023473660693, + "learning_rate": 5e-06, + "loss": 0.2489, + "num_input_tokens_seen": 416559384, + "step": 2410 + }, + { + "epoch": 0.92301800076599, + "loss": 0.19133315980434418, + "loss_ce": 0.01982436515390873, + "loss_iou": 1.0112589597702026, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 416559384, + "step": 2410 + }, + { + "epoch": 0.9234009957870548, + "grad_norm": 24.341024657896455, + "learning_rate": 5e-06, + "loss": 0.2366, + "num_input_tokens_seen": 416732424, + "step": 2411 + }, + { + "epoch": 0.9234009957870548, + "loss": 0.21062758564949036, + "loss_ce": 0.020319968461990356, + "loss_iou": 1.11098051071167, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 416732424, + "step": 2411 + }, + { + "epoch": 0.9237839908081195, + "grad_norm": 23.874798414372748, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 416905416, + "step": 2412 + }, + { + "epoch": 0.9237839908081195, + "loss": 0.17523883283138275, + "loss_ce": 0.020392633974552155, + "loss_iou": 1.0309886932373047, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 416905416, + "step": 2412 + }, + { + "epoch": 0.9241669858291842, + "grad_norm": 24.93969323539543, + "learning_rate": 5e-06, + "loss": 0.1812, + "num_input_tokens_seen": 417078432, + "step": 2413 + }, + { + "epoch": 0.9241669858291842, + "loss": 0.16211159527301788, + "loss_ce": 0.021120378747582436, + "loss_iou": 1.0268622636795044, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 417078432, + "step": 2413 + }, + { + "epoch": 0.924549980850249, + "grad_norm": 35.228836376996796, + "learning_rate": 5e-06, + "loss": 0.2267, + "num_input_tokens_seen": 417251472, + "step": 2414 + }, + { + "epoch": 0.924549980850249, + "loss": 0.23107093572616577, + "loss_ce": 0.02049964852631092, + "loss_iou": 1.0622409582138062, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 417251472, + "step": 2414 + }, + { + "epoch": 0.9249329758713136, + "grad_norm": 24.71475558465295, + "learning_rate": 5e-06, + "loss": 0.2123, + "num_input_tokens_seen": 417424664, + "step": 2415 + }, + { + "epoch": 0.9249329758713136, + "loss": 0.17129939794540405, + "loss_ce": 0.02045101299881935, + "loss_iou": 1.0016270875930786, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 417424664, + "step": 2415 + }, + { + "epoch": 0.9253159708923784, + "grad_norm": 34.31229272225131, + "learning_rate": 5e-06, + "loss": 0.2047, + "num_input_tokens_seen": 417597624, + "step": 2416 + }, + { + "epoch": 0.9253159708923784, + "loss": 0.20795701444149017, + "loss_ce": 0.021494608372449875, + "loss_iou": 1.0274615287780762, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 417597624, + "step": 2416 + }, + { + "epoch": 0.9256989659134431, + "grad_norm": 25.928366534167534, + "learning_rate": 5e-06, + "loss": 0.2272, + "num_input_tokens_seen": 417770784, + "step": 2417 + }, + { + "epoch": 0.9256989659134431, + "loss": 0.23110869526863098, + "loss_ce": 0.021636024117469788, + "loss_iou": 1.0217278003692627, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 417770784, + "step": 2417 + }, + { + "epoch": 0.9260819609345079, + "grad_norm": 51.19708150594874, + "learning_rate": 5e-06, + "loss": 0.2705, + "num_input_tokens_seen": 417944048, + "step": 2418 + }, + { + "epoch": 0.9260819609345079, + "loss": 0.2860315144062042, + "loss_ce": 0.02101687341928482, + "loss_iou": 1.1807692050933838, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 417944048, + "step": 2418 + }, + { + "epoch": 0.9264649559555725, + "grad_norm": 38.28563578186094, + "learning_rate": 5e-06, + "loss": 0.2517, + "num_input_tokens_seen": 418116832, + "step": 2419 + }, + { + "epoch": 0.9264649559555725, + "loss": 0.21095630526542664, + "loss_ce": 0.020282475277781487, + "loss_iou": 1.0341272354125977, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 418116832, + "step": 2419 + }, + { + "epoch": 0.9268479509766373, + "grad_norm": 36.43215353743023, + "learning_rate": 5e-06, + "loss": 0.2319, + "num_input_tokens_seen": 418289800, + "step": 2420 + }, + { + "epoch": 0.9268479509766373, + "loss": 0.23096629977226257, + "loss_ce": 0.020028796046972275, + "loss_iou": 1.0649144649505615, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 418289800, + "step": 2420 + }, + { + "epoch": 0.9272309459977021, + "grad_norm": 32.00124685256224, + "learning_rate": 5e-06, + "loss": 0.1744, + "num_input_tokens_seen": 418462696, + "step": 2421 + }, + { + "epoch": 0.9272309459977021, + "loss": 0.18664014339447021, + "loss_ce": 0.02172316424548626, + "loss_iou": 1.004173755645752, + "loss_num": 0.1650390625, + "loss_xval": 0.1650390625, + "num_input_tokens_seen": 418462696, + "step": 2421 + }, + { + "epoch": 0.9276139410187667, + "grad_norm": 57.02463023651104, + "learning_rate": 5e-06, + "loss": 0.2429, + "num_input_tokens_seen": 418635648, + "step": 2422 + }, + { + "epoch": 0.9276139410187667, + "loss": 0.23585951328277588, + "loss_ce": 0.02174818143248558, + "loss_iou": 1.022843360900879, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 418635648, + "step": 2422 + }, + { + "epoch": 0.9279969360398315, + "grad_norm": 28.207117397336745, + "learning_rate": 5e-06, + "loss": 0.2404, + "num_input_tokens_seen": 418808968, + "step": 2423 + }, + { + "epoch": 0.9279969360398315, + "loss": 0.26323407888412476, + "loss_ce": 0.02184002473950386, + "loss_iou": 1.1213366985321045, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 418808968, + "step": 2423 + }, + { + "epoch": 0.9283799310608962, + "grad_norm": 44.109485209680216, + "learning_rate": 5e-06, + "loss": 0.2911, + "num_input_tokens_seen": 418982176, + "step": 2424 + }, + { + "epoch": 0.9283799310608962, + "loss": 0.25499510765075684, + "loss_ce": 0.019460441544651985, + "loss_iou": 1.2126247882843018, + "loss_num": 0.2353515625, + "loss_xval": 0.2353515625, + "num_input_tokens_seen": 418982176, + "step": 2424 + }, + { + "epoch": 0.928762926081961, + "grad_norm": 38.387519998801864, + "learning_rate": 5e-06, + "loss": 0.2096, + "num_input_tokens_seen": 419155328, + "step": 2425 + }, + { + "epoch": 0.928762926081961, + "loss": 0.19838780164718628, + "loss_ce": 0.021996185183525085, + "loss_iou": 1.0736583471298218, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 419155328, + "step": 2425 + }, + { + "epoch": 0.9291459211030256, + "grad_norm": 28.7370779052248, + "learning_rate": 5e-06, + "loss": 0.1685, + "num_input_tokens_seen": 419328512, + "step": 2426 + }, + { + "epoch": 0.9291459211030256, + "loss": 0.17336629331111908, + "loss_ce": 0.02138875052332878, + "loss_iou": 0.9574381113052368, + "loss_num": 0.15234375, + "loss_xval": 0.15234375, + "num_input_tokens_seen": 419328512, + "step": 2426 + }, + { + "epoch": 0.9295289161240904, + "grad_norm": 31.995777644262525, + "learning_rate": 5e-06, + "loss": 0.2352, + "num_input_tokens_seen": 419501688, + "step": 2427 + }, + { + "epoch": 0.9295289161240904, + "loss": 0.24376463890075684, + "loss_ce": 0.021840814501047134, + "loss_iou": 1.0403281450271606, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 419501688, + "step": 2427 + }, + { + "epoch": 0.9299119111451551, + "grad_norm": 43.915227363341806, + "learning_rate": 5e-06, + "loss": 0.1724, + "num_input_tokens_seen": 419674408, + "step": 2428 + }, + { + "epoch": 0.9299119111451551, + "loss": 0.16767174005508423, + "loss_ce": 0.019753042608499527, + "loss_iou": 1.0534336566925049, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 419674408, + "step": 2428 + }, + { + "epoch": 0.9302949061662198, + "grad_norm": 34.99340342966463, + "learning_rate": 5e-06, + "loss": 0.2348, + "num_input_tokens_seen": 419847200, + "step": 2429 + }, + { + "epoch": 0.9302949061662198, + "loss": 0.24999958276748657, + "loss_ce": 0.020019114017486572, + "loss_iou": 1.1393516063690186, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 419847200, + "step": 2429 + }, + { + "epoch": 0.9306779011872846, + "grad_norm": 27.957835315109037, + "learning_rate": 5e-06, + "loss": 0.2015, + "num_input_tokens_seen": 420020184, + "step": 2430 + }, + { + "epoch": 0.9306779011872846, + "loss": 0.2168235331773758, + "loss_ce": 0.021633097901940346, + "loss_iou": 1.0590548515319824, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 420020184, + "step": 2430 + }, + { + "epoch": 0.9310608962083493, + "grad_norm": 30.479759737812316, + "learning_rate": 5e-06, + "loss": 0.1696, + "num_input_tokens_seen": 420192984, + "step": 2431 + }, + { + "epoch": 0.9310608962083493, + "loss": 0.2150260955095291, + "loss_ce": 0.01977463811635971, + "loss_iou": 1.0108896493911743, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 420192984, + "step": 2431 + }, + { + "epoch": 0.9314438912294141, + "grad_norm": 29.95663225231501, + "learning_rate": 5e-06, + "loss": 0.2143, + "num_input_tokens_seen": 420365680, + "step": 2432 + }, + { + "epoch": 0.9314438912294141, + "loss": 0.18899086117744446, + "loss_ce": 0.019984519109129906, + "loss_iou": 1.0001256465911865, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 420365680, + "step": 2432 + }, + { + "epoch": 0.9318268862504787, + "grad_norm": 28.49949267539661, + "learning_rate": 5e-06, + "loss": 0.1758, + "num_input_tokens_seen": 420538536, + "step": 2433 + }, + { + "epoch": 0.9318268862504787, + "loss": 0.1715395152568817, + "loss_ce": 0.02011130005121231, + "loss_iou": 1.0529817342758179, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 420538536, + "step": 2433 + }, + { + "epoch": 0.9322098812715435, + "grad_norm": 23.356378197389986, + "learning_rate": 5e-06, + "loss": 0.2315, + "num_input_tokens_seen": 420711392, + "step": 2434 + }, + { + "epoch": 0.9322098812715435, + "loss": 0.2523745894432068, + "loss_ce": 0.02257724106311798, + "loss_iou": 1.0776373147964478, + "loss_num": 0.2294921875, + "loss_xval": 0.2294921875, + "num_input_tokens_seen": 420711392, + "step": 2434 + }, + { + "epoch": 0.9325928762926082, + "grad_norm": 27.878087081766314, + "learning_rate": 5e-06, + "loss": 0.1885, + "num_input_tokens_seen": 420883976, + "step": 2435 + }, + { + "epoch": 0.9325928762926082, + "loss": 0.16960689425468445, + "loss_ce": 0.022023893892765045, + "loss_iou": 90.7451171875, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 420883976, + "step": 2435 + }, + { + "epoch": 0.9329758713136729, + "grad_norm": 21.591229060591598, + "learning_rate": 5e-06, + "loss": 0.1899, + "num_input_tokens_seen": 421056688, + "step": 2436 + }, + { + "epoch": 0.9329758713136729, + "loss": 0.1906215250492096, + "loss_ce": 0.020638613030314445, + "loss_iou": 1.0376107692718506, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 421056688, + "step": 2436 + }, + { + "epoch": 0.9333588663347376, + "grad_norm": 27.294068486612392, + "learning_rate": 5e-06, + "loss": 0.1863, + "num_input_tokens_seen": 421229760, + "step": 2437 + }, + { + "epoch": 0.9333588663347376, + "loss": 0.18855778872966766, + "loss_ce": 0.02214544080197811, + "loss_iou": 1.0555882453918457, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 421229760, + "step": 2437 + }, + { + "epoch": 0.9337418613558024, + "grad_norm": 32.587310319264084, + "learning_rate": 5e-06, + "loss": 0.189, + "num_input_tokens_seen": 421402960, + "step": 2438 + }, + { + "epoch": 0.9337418613558024, + "loss": 0.19774208962917328, + "loss_ce": 0.02147255465388298, + "loss_iou": 1.0686249732971191, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 421402960, + "step": 2438 + }, + { + "epoch": 0.9341248563768672, + "grad_norm": 24.589879187900202, + "learning_rate": 5e-06, + "loss": 0.237, + "num_input_tokens_seen": 421576272, + "step": 2439 + }, + { + "epoch": 0.9341248563768672, + "loss": 0.2968590557575226, + "loss_ce": 0.02152945101261139, + "loss_iou": 1.2773818969726562, + "loss_num": 0.275390625, + "loss_xval": 0.275390625, + "num_input_tokens_seen": 421576272, + "step": 2439 + }, + { + "epoch": 0.9345078513979318, + "grad_norm": 34.9548196804876, + "learning_rate": 5e-06, + "loss": 0.2036, + "num_input_tokens_seen": 421749328, + "step": 2440 + }, + { + "epoch": 0.9345078513979318, + "loss": 0.2245294451713562, + "loss_ce": 0.021831698715686798, + "loss_iou": 1.032029390335083, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 421749328, + "step": 2440 + }, + { + "epoch": 0.9348908464189966, + "grad_norm": 28.641058877782275, + "learning_rate": 5e-06, + "loss": 0.1641, + "num_input_tokens_seen": 421922496, + "step": 2441 + }, + { + "epoch": 0.9348908464189966, + "loss": 0.1931767463684082, + "loss_ce": 0.021057605743408203, + "loss_iou": 1.0118610858917236, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 421922496, + "step": 2441 + }, + { + "epoch": 0.9352738414400613, + "grad_norm": 28.024374780013545, + "learning_rate": 5e-06, + "loss": 0.2141, + "num_input_tokens_seen": 422095488, + "step": 2442 + }, + { + "epoch": 0.9352738414400613, + "loss": 0.18159523606300354, + "loss_ce": 0.01881447806954384, + "loss_iou": 1.083962321281433, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 422095488, + "step": 2442 + }, + { + "epoch": 0.935656836461126, + "grad_norm": 18.739231319484176, + "learning_rate": 5e-06, + "loss": 0.1999, + "num_input_tokens_seen": 422268552, + "step": 2443 + }, + { + "epoch": 0.935656836461126, + "loss": 0.21589979529380798, + "loss_ce": 0.019702300429344177, + "loss_iou": 1.099543571472168, + "loss_num": 0.1962890625, + "loss_xval": 0.1962890625, + "num_input_tokens_seen": 422268552, + "step": 2443 + }, + { + "epoch": 0.9360398314821907, + "grad_norm": 42.80397169834053, + "learning_rate": 5e-06, + "loss": 0.2415, + "num_input_tokens_seen": 422441688, + "step": 2444 + }, + { + "epoch": 0.9360398314821907, + "loss": 0.2545335292816162, + "loss_ce": 0.022660961374640465, + "loss_iou": 1.0548175573349, + "loss_num": 0.2314453125, + "loss_xval": 0.2314453125, + "num_input_tokens_seen": 422441688, + "step": 2444 + }, + { + "epoch": 0.9364228265032555, + "grad_norm": 26.968170587405464, + "learning_rate": 5e-06, + "loss": 0.2625, + "num_input_tokens_seen": 422614744, + "step": 2445 + }, + { + "epoch": 0.9364228265032555, + "loss": 0.2984175682067871, + "loss_ce": 0.0207686685025692, + "loss_iou": 1.228839635848999, + "loss_num": 0.27734375, + "loss_xval": 0.27734375, + "num_input_tokens_seen": 422614744, + "step": 2445 + }, + { + "epoch": 0.9368058215243202, + "grad_norm": 21.29776441160989, + "learning_rate": 5e-06, + "loss": 0.1967, + "num_input_tokens_seen": 422787888, + "step": 2446 + }, + { + "epoch": 0.9368058215243202, + "loss": 0.15890049934387207, + "loss_ce": 0.022547969594597816, + "loss_iou": 1.0457978248596191, + "loss_num": 0.13671875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 422787888, + "step": 2446 + }, + { + "epoch": 0.9371888165453849, + "grad_norm": 27.680384244658374, + "learning_rate": 5e-06, + "loss": 0.1658, + "num_input_tokens_seen": 422961024, + "step": 2447 + }, + { + "epoch": 0.9371888165453849, + "loss": 0.20146608352661133, + "loss_ce": 0.021625995635986328, + "loss_iou": 1.2909531593322754, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 422961024, + "step": 2447 + }, + { + "epoch": 0.9375718115664496, + "grad_norm": 36.61282361352501, + "learning_rate": 5e-06, + "loss": 0.2415, + "num_input_tokens_seen": 423133776, + "step": 2448 + }, + { + "epoch": 0.9375718115664496, + "loss": 0.22485896944999695, + "loss_ce": 0.021245690062642097, + "loss_iou": 1.0258800983428955, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 423133776, + "step": 2448 + }, + { + "epoch": 0.9379548065875144, + "grad_norm": 26.416596468709095, + "learning_rate": 5e-06, + "loss": 0.1476, + "num_input_tokens_seen": 423306536, + "step": 2449 + }, + { + "epoch": 0.9379548065875144, + "loss": 0.14405319094657898, + "loss_ce": 0.02247115597128868, + "loss_iou": 0.9872841835021973, + "loss_num": 0.12158203125, + "loss_xval": 0.12158203125, + "num_input_tokens_seen": 423306536, + "step": 2449 + }, + { + "epoch": 0.938337801608579, + "grad_norm": 27.007111721030263, + "learning_rate": 5e-06, + "loss": 0.1971, + "num_input_tokens_seen": 423479520, + "step": 2450 + }, + { + "epoch": 0.938337801608579, + "loss": 0.2385256439447403, + "loss_ce": 0.021728765219449997, + "loss_iou": 1.0041799545288086, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 423479520, + "step": 2450 + }, + { + "epoch": 0.9387207966296438, + "grad_norm": 31.69896577460898, + "learning_rate": 5e-06, + "loss": 0.2496, + "num_input_tokens_seen": 423652704, + "step": 2451 + }, + { + "epoch": 0.9387207966296438, + "loss": 0.21565771102905273, + "loss_ce": 0.021749012172222137, + "loss_iou": 1.0183502435684204, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 423652704, + "step": 2451 + }, + { + "epoch": 0.9391037916507086, + "grad_norm": 31.989382374706516, + "learning_rate": 5e-06, + "loss": 0.2617, + "num_input_tokens_seen": 423825736, + "step": 2452 + }, + { + "epoch": 0.9391037916507086, + "loss": 0.2783448100090027, + "loss_ce": 0.01998301036655903, + "loss_iou": 1.0539875030517578, + "loss_num": 0.2578125, + "loss_xval": 0.2578125, + "num_input_tokens_seen": 423825736, + "step": 2452 + }, + { + "epoch": 0.9394867866717733, + "grad_norm": 22.154819849787714, + "learning_rate": 5e-06, + "loss": 0.1582, + "num_input_tokens_seen": 423998760, + "step": 2453 + }, + { + "epoch": 0.9394867866717733, + "loss": 0.13484029471874237, + "loss_ce": 0.02216940000653267, + "loss_iou": 1.0007212162017822, + "loss_num": 0.11279296875, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 423998760, + "step": 2453 + }, + { + "epoch": 0.939869781692838, + "grad_norm": 31.694484928270235, + "learning_rate": 5e-06, + "loss": 0.2342, + "num_input_tokens_seen": 424171520, + "step": 2454 + }, + { + "epoch": 0.939869781692838, + "loss": 0.22024646401405334, + "loss_ce": 0.022370483726263046, + "loss_iou": 1.0082926750183105, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 424171520, + "step": 2454 + }, + { + "epoch": 0.9402527767139027, + "grad_norm": 27.88397412282971, + "learning_rate": 5e-06, + "loss": 0.2001, + "num_input_tokens_seen": 424344736, + "step": 2455 + }, + { + "epoch": 0.9402527767139027, + "loss": 0.17761653661727905, + "loss_ce": 0.02136654406785965, + "loss_iou": 1.0251858234405518, + "loss_num": 0.15625, + "loss_xval": 0.15625, + "num_input_tokens_seen": 424344736, + "step": 2455 + }, + { + "epoch": 0.9406357717349675, + "grad_norm": 48.855361931225794, + "learning_rate": 5e-06, + "loss": 0.295, + "num_input_tokens_seen": 424516104, + "step": 2456 + }, + { + "epoch": 0.9406357717349675, + "loss": 0.2889312505722046, + "loss_ce": 0.022146565839648247, + "loss_iou": 1.675811529159546, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 424516104, + "step": 2456 + }, + { + "epoch": 0.9410187667560321, + "grad_norm": 30.33568671434178, + "learning_rate": 5e-06, + "loss": 0.2114, + "num_input_tokens_seen": 424689408, + "step": 2457 + }, + { + "epoch": 0.9410187667560321, + "loss": 0.17778730392456055, + "loss_ce": 0.02098798379302025, + "loss_iou": 1.013716697692871, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 424689408, + "step": 2457 + }, + { + "epoch": 0.9414017617770969, + "grad_norm": 38.34361219398389, + "learning_rate": 5e-06, + "loss": 0.2282, + "num_input_tokens_seen": 424862320, + "step": 2458 + }, + { + "epoch": 0.9414017617770969, + "loss": 0.2448033094406128, + "loss_ce": 0.021658774465322495, + "loss_iou": 1.003727674484253, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 424862320, + "step": 2458 + }, + { + "epoch": 0.9417847567981616, + "grad_norm": 21.74090728512593, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 425035480, + "step": 2459 + }, + { + "epoch": 0.9417847567981616, + "loss": 0.2162160873413086, + "loss_ce": 0.02041531540453434, + "loss_iou": 1.0237390995025635, + "loss_num": 0.1953125, + "loss_xval": 0.1953125, + "num_input_tokens_seen": 425035480, + "step": 2459 + }, + { + "epoch": 0.9421677518192263, + "grad_norm": 36.583530825826195, + "learning_rate": 5e-06, + "loss": 0.2563, + "num_input_tokens_seen": 425208728, + "step": 2460 + }, + { + "epoch": 0.9421677518192263, + "loss": 0.23762497305870056, + "loss_ce": 0.02137741819024086, + "loss_iou": 1.1218187808990479, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 425208728, + "step": 2460 + }, + { + "epoch": 0.942550746840291, + "grad_norm": 22.02133972261915, + "learning_rate": 5e-06, + "loss": 0.1593, + "num_input_tokens_seen": 425381832, + "step": 2461 + }, + { + "epoch": 0.942550746840291, + "loss": 0.1533421277999878, + "loss_ce": 0.021078944206237793, + "loss_iou": 1.0010343790054321, + "loss_num": 0.1318359375, + "loss_xval": 0.1318359375, + "num_input_tokens_seen": 425381832, + "step": 2461 + }, + { + "epoch": 0.9429337418613558, + "grad_norm": 20.812638383616566, + "learning_rate": 5e-06, + "loss": 0.1997, + "num_input_tokens_seen": 425555104, + "step": 2462 + }, + { + "epoch": 0.9429337418613558, + "loss": 0.1622682511806488, + "loss_ce": 0.021368587389588356, + "loss_iou": 1.0101661682128906, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 425555104, + "step": 2462 + }, + { + "epoch": 0.9433167368824206, + "grad_norm": 23.13334762573491, + "learning_rate": 5e-06, + "loss": 0.1648, + "num_input_tokens_seen": 425727848, + "step": 2463 + }, + { + "epoch": 0.9433167368824206, + "loss": 0.214143767952919, + "loss_ce": 0.022371307015419006, + "loss_iou": 1.0665621757507324, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 425727848, + "step": 2463 + }, + { + "epoch": 0.9436997319034852, + "grad_norm": 28.141571096769116, + "learning_rate": 5e-06, + "loss": 0.1967, + "num_input_tokens_seen": 425900896, + "step": 2464 + }, + { + "epoch": 0.9436997319034852, + "loss": 0.17636796832084656, + "loss_ce": 0.021460752934217453, + "loss_iou": 1.0157206058502197, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 425900896, + "step": 2464 + }, + { + "epoch": 0.94408272692455, + "grad_norm": 28.273967629327604, + "learning_rate": 5e-06, + "loss": 0.1526, + "num_input_tokens_seen": 426074000, + "step": 2465 + }, + { + "epoch": 0.94408272692455, + "loss": 0.14048194885253906, + "loss_ce": 0.020334240049123764, + "loss_iou": 0.9708118438720703, + "loss_num": 0.1201171875, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 426074000, + "step": 2465 + }, + { + "epoch": 0.9444657219456147, + "grad_norm": 26.325953505132922, + "learning_rate": 5e-06, + "loss": 0.207, + "num_input_tokens_seen": 426247016, + "step": 2466 + }, + { + "epoch": 0.9444657219456147, + "loss": 0.242500901222229, + "loss_ce": 0.019539479166269302, + "loss_iou": 1.0102567672729492, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 426247016, + "step": 2466 + }, + { + "epoch": 0.9448487169666794, + "grad_norm": 20.862450952264904, + "learning_rate": 5e-06, + "loss": 0.1364, + "num_input_tokens_seen": 426416488, + "step": 2467 + }, + { + "epoch": 0.9448487169666794, + "loss": 0.14106445014476776, + "loss_ce": 0.022198490798473358, + "loss_iou": 1.0052566528320312, + "loss_num": 0.11865234375, + "loss_xval": 0.11865234375, + "num_input_tokens_seen": 426416488, + "step": 2467 + }, + { + "epoch": 0.9452317119877441, + "grad_norm": 38.57116748053257, + "learning_rate": 5e-06, + "loss": 0.2787, + "num_input_tokens_seen": 426589128, + "step": 2468 + }, + { + "epoch": 0.9452317119877441, + "loss": 0.26711416244506836, + "loss_ce": 0.019067276269197464, + "loss_iou": 1.2741436958312988, + "loss_num": 0.248046875, + "loss_xval": 0.248046875, + "num_input_tokens_seen": 426589128, + "step": 2468 + }, + { + "epoch": 0.9456147070088089, + "grad_norm": 37.05523511090258, + "learning_rate": 5e-06, + "loss": 0.2053, + "num_input_tokens_seen": 426762136, + "step": 2469 + }, + { + "epoch": 0.9456147070088089, + "loss": 0.18002773821353912, + "loss_ce": 0.021000638604164124, + "loss_iou": 1.007516622543335, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 426762136, + "step": 2469 + }, + { + "epoch": 0.9459977020298737, + "grad_norm": 22.47467923645663, + "learning_rate": 5e-06, + "loss": 0.2386, + "num_input_tokens_seen": 426934976, + "step": 2470 + }, + { + "epoch": 0.9459977020298737, + "loss": 0.22461026906967163, + "loss_ce": 0.01965421438217163, + "loss_iou": 0.9954475164413452, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 426934976, + "step": 2470 + }, + { + "epoch": 0.9463806970509383, + "grad_norm": 35.96795631207314, + "learning_rate": 5e-06, + "loss": 0.2329, + "num_input_tokens_seen": 427107808, + "step": 2471 + }, + { + "epoch": 0.9463806970509383, + "loss": 0.2343979775905609, + "loss_ce": 0.02199564129114151, + "loss_iou": 1.1027498245239258, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 427107808, + "step": 2471 + }, + { + "epoch": 0.9467636920720031, + "grad_norm": 32.507799383535485, + "learning_rate": 5e-06, + "loss": 0.1832, + "num_input_tokens_seen": 427280736, + "step": 2472 + }, + { + "epoch": 0.9467636920720031, + "loss": 0.17686253786087036, + "loss_ce": 0.01963598094880581, + "loss_iou": 1.0173301696777344, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 427280736, + "step": 2472 + }, + { + "epoch": 0.9471466870930678, + "grad_norm": 24.407314706980245, + "learning_rate": 5e-06, + "loss": 0.227, + "num_input_tokens_seen": 427453840, + "step": 2473 + }, + { + "epoch": 0.9471466870930678, + "loss": 0.18174517154693604, + "loss_ce": 0.022748585790395737, + "loss_iou": 1.0485135316848755, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 427453840, + "step": 2473 + }, + { + "epoch": 0.9475296821141325, + "grad_norm": 26.028871470573005, + "learning_rate": 5e-06, + "loss": 0.2004, + "num_input_tokens_seen": 427627000, + "step": 2474 + }, + { + "epoch": 0.9475296821141325, + "loss": 0.15802933275699615, + "loss_ce": 0.021127481013536453, + "loss_iou": 1.0013928413391113, + "loss_num": 0.13671875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 427627000, + "step": 2474 + }, + { + "epoch": 0.9479126771351972, + "grad_norm": 33.81848883195221, + "learning_rate": 5e-06, + "loss": 0.2085, + "num_input_tokens_seen": 427800448, + "step": 2475 + }, + { + "epoch": 0.9479126771351972, + "loss": 0.16287113726139069, + "loss_ce": 0.021513711661100388, + "loss_iou": 1.0379126071929932, + "loss_num": 0.1416015625, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 427800448, + "step": 2475 + }, + { + "epoch": 0.948295672156262, + "grad_norm": 49.474482420045476, + "learning_rate": 5e-06, + "loss": 0.2371, + "num_input_tokens_seen": 427973656, + "step": 2476 + }, + { + "epoch": 0.948295672156262, + "loss": 0.26268815994262695, + "loss_ce": 0.021416183561086655, + "loss_iou": 1.0379433631896973, + "loss_num": 0.2412109375, + "loss_xval": 0.2412109375, + "num_input_tokens_seen": 427973656, + "step": 2476 + }, + { + "epoch": 0.9486786671773267, + "grad_norm": 28.928450317786854, + "learning_rate": 5e-06, + "loss": 0.2998, + "num_input_tokens_seen": 428146792, + "step": 2477 + }, + { + "epoch": 0.9486786671773267, + "loss": 0.2834222912788391, + "loss_ce": 0.020604893565177917, + "loss_iou": 1.092254400253296, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 428146792, + "step": 2477 + }, + { + "epoch": 0.9490616621983914, + "grad_norm": 23.9767630999348, + "learning_rate": 5e-06, + "loss": 0.2223, + "num_input_tokens_seen": 428319744, + "step": 2478 + }, + { + "epoch": 0.9490616621983914, + "loss": 0.22838380932807922, + "loss_ce": 0.021047383546829224, + "loss_iou": 1.040341854095459, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 428319744, + "step": 2478 + }, + { + "epoch": 0.9494446572194561, + "grad_norm": 27.230865238357527, + "learning_rate": 5e-06, + "loss": 0.2295, + "num_input_tokens_seen": 428492616, + "step": 2479 + }, + { + "epoch": 0.9494446572194561, + "loss": 0.1958887279033661, + "loss_ce": 0.01968022808432579, + "loss_iou": 0.9880644083023071, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 428492616, + "step": 2479 + }, + { + "epoch": 0.9498276522405209, + "grad_norm": 42.87427055455577, + "learning_rate": 5e-06, + "loss": 0.248, + "num_input_tokens_seen": 428666016, + "step": 2480 + }, + { + "epoch": 0.9498276522405209, + "loss": 0.25098004937171936, + "loss_ce": 0.02032819390296936, + "loss_iou": 1.064856767654419, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 428666016, + "step": 2480 + }, + { + "epoch": 0.9502106472615856, + "grad_norm": 32.41868064726956, + "learning_rate": 5e-06, + "loss": 0.2358, + "num_input_tokens_seen": 428838920, + "step": 2481 + }, + { + "epoch": 0.9502106472615856, + "loss": 0.19582206010818481, + "loss_ce": 0.02101735584437847, + "loss_iou": 1.0015753507614136, + "loss_num": 0.1748046875, + "loss_xval": 0.1748046875, + "num_input_tokens_seen": 428838920, + "step": 2481 + }, + { + "epoch": 0.9505936422826503, + "grad_norm": 24.455258994810496, + "learning_rate": 5e-06, + "loss": 0.3003, + "num_input_tokens_seen": 429012136, + "step": 2482 + }, + { + "epoch": 0.9505936422826503, + "loss": 0.31088584661483765, + "loss_ce": 0.020663654431700706, + "loss_iou": 1.1978161334991455, + "loss_num": 0.291015625, + "loss_xval": 0.291015625, + "num_input_tokens_seen": 429012136, + "step": 2482 + }, + { + "epoch": 0.9509766373037151, + "grad_norm": 24.558980927253444, + "learning_rate": 5e-06, + "loss": 0.2565, + "num_input_tokens_seen": 429184968, + "step": 2483 + }, + { + "epoch": 0.9509766373037151, + "loss": 0.22562754154205322, + "loss_ce": 0.020793568342924118, + "loss_iou": 1.0763115882873535, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 429184968, + "step": 2483 + }, + { + "epoch": 0.9513596323247798, + "grad_norm": 57.7302153478801, + "learning_rate": 5e-06, + "loss": 0.2699, + "num_input_tokens_seen": 429358384, + "step": 2484 + }, + { + "epoch": 0.9513596323247798, + "loss": 0.27880072593688965, + "loss_ce": 0.022575143724679947, + "loss_iou": 1.048409342765808, + "loss_num": 0.255859375, + "loss_xval": 0.255859375, + "num_input_tokens_seen": 429358384, + "step": 2484 + }, + { + "epoch": 0.9517426273458445, + "grad_norm": 41.352072095365656, + "learning_rate": 5e-06, + "loss": 0.3364, + "num_input_tokens_seen": 429531624, + "step": 2485 + }, + { + "epoch": 0.9517426273458445, + "loss": 0.25817739963531494, + "loss_ce": 0.02197132632136345, + "loss_iou": 1.0042860507965088, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 429531624, + "step": 2485 + }, + { + "epoch": 0.9521256223669092, + "grad_norm": 127.31234523264732, + "learning_rate": 5e-06, + "loss": 0.5231, + "num_input_tokens_seen": 429704504, + "step": 2486 + }, + { + "epoch": 0.9521256223669092, + "loss": 0.4990769028663635, + "loss_ce": 0.02153782919049263, + "loss_iou": 1.0146496295928955, + "loss_num": 0.4765625, + "loss_xval": 0.4765625, + "num_input_tokens_seen": 429704504, + "step": 2486 + }, + { + "epoch": 0.952508617387974, + "grad_norm": 37.48308836335706, + "learning_rate": 5e-06, + "loss": 0.1841, + "num_input_tokens_seen": 429877656, + "step": 2487 + }, + { + "epoch": 0.952508617387974, + "loss": 0.23241500556468964, + "loss_ce": 0.019768521189689636, + "loss_iou": 1.0113465785980225, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 429877656, + "step": 2487 + }, + { + "epoch": 0.9528916124090386, + "grad_norm": 67.83485410278199, + "learning_rate": 5e-06, + "loss": 0.6158, + "num_input_tokens_seen": 430050504, + "step": 2488 + }, + { + "epoch": 0.9528916124090386, + "loss": 0.6186951398849487, + "loss_ce": 0.019085798412561417, + "loss_iou": 1.0142709016799927, + "loss_num": 0.6015625, + "loss_xval": 0.6015625, + "num_input_tokens_seen": 430050504, + "step": 2488 + }, + { + "epoch": 0.9532746074301034, + "grad_norm": 43.22575450579638, + "learning_rate": 5e-06, + "loss": 0.271, + "num_input_tokens_seen": 430223656, + "step": 2489 + }, + { + "epoch": 0.9532746074301034, + "loss": 0.28194326162338257, + "loss_ce": 0.019858306273818016, + "loss_iou": 1.0584187507629395, + "loss_num": 0.26171875, + "loss_xval": 0.26171875, + "num_input_tokens_seen": 430223656, + "step": 2489 + }, + { + "epoch": 0.9536576024511682, + "grad_norm": 48.527551822606995, + "learning_rate": 5e-06, + "loss": 0.3893, + "num_input_tokens_seen": 430396144, + "step": 2490 + }, + { + "epoch": 0.9536576024511682, + "loss": 0.3745991587638855, + "loss_ce": 0.021083537489175797, + "loss_iou": 1.0061142444610596, + "loss_num": 0.353515625, + "loss_xval": 0.353515625, + "num_input_tokens_seen": 430396144, + "step": 2490 + }, + { + "epoch": 0.9540405974722329, + "grad_norm": 38.51900506222416, + "learning_rate": 5e-06, + "loss": 0.2578, + "num_input_tokens_seen": 430568832, + "step": 2491 + }, + { + "epoch": 0.9540405974722329, + "loss": 0.29192161560058594, + "loss_ce": 0.01946067623794079, + "loss_iou": 1.0029220581054688, + "loss_num": 0.2734375, + "loss_xval": 0.2734375, + "num_input_tokens_seen": 430568832, + "step": 2491 + }, + { + "epoch": 0.9544235924932976, + "grad_norm": 33.67553454531333, + "learning_rate": 5e-06, + "loss": 0.21, + "num_input_tokens_seen": 430741736, + "step": 2492 + }, + { + "epoch": 0.9544235924932976, + "loss": 0.2243577241897583, + "loss_ce": 0.020378228276968002, + "loss_iou": 1.1379162073135376, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 430741736, + "step": 2492 + }, + { + "epoch": 0.9548065875143623, + "grad_norm": 40.622192134653055, + "learning_rate": 5e-06, + "loss": 0.2468, + "num_input_tokens_seen": 430914824, + "step": 2493 + }, + { + "epoch": 0.9548065875143623, + "loss": 0.25958168506622314, + "loss_ce": 0.02160559967160225, + "loss_iou": 1.0875476598739624, + "loss_num": 0.23828125, + "loss_xval": 0.23828125, + "num_input_tokens_seen": 430914824, + "step": 2493 + }, + { + "epoch": 0.9551895825354271, + "grad_norm": 32.91565123938936, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 431087672, + "step": 2494 + }, + { + "epoch": 0.9551895825354271, + "loss": 0.21976381540298462, + "loss_ce": 0.021460603922605515, + "loss_iou": 1.0525672435760498, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 431087672, + "step": 2494 + }, + { + "epoch": 0.9555725775564917, + "grad_norm": 30.327394649264097, + "learning_rate": 5e-06, + "loss": 0.2404, + "num_input_tokens_seen": 431260584, + "step": 2495 + }, + { + "epoch": 0.9555725775564917, + "loss": 0.22192560136318207, + "loss_ce": 0.021242011338472366, + "loss_iou": 1.0752569437026978, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 431260584, + "step": 2495 + }, + { + "epoch": 0.9559555725775565, + "grad_norm": 28.437906400554276, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 431433752, + "step": 2496 + }, + { + "epoch": 0.9559555725775565, + "loss": 0.15172705054283142, + "loss_ce": 0.020867664366960526, + "loss_iou": 1.0025279521942139, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 431433752, + "step": 2496 + }, + { + "epoch": 0.9563385675986212, + "grad_norm": 28.213415813890716, + "learning_rate": 5e-06, + "loss": 0.1673, + "num_input_tokens_seen": 431607072, + "step": 2497 + }, + { + "epoch": 0.9563385675986212, + "loss": 0.17967821657657623, + "loss_ce": 0.020254386588931084, + "loss_iou": 1.0270193815231323, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 431607072, + "step": 2497 + }, + { + "epoch": 0.956721562619686, + "grad_norm": 31.790672881180054, + "learning_rate": 5e-06, + "loss": 0.1963, + "num_input_tokens_seen": 431779568, + "step": 2498 + }, + { + "epoch": 0.956721562619686, + "loss": 0.22004179656505585, + "loss_ce": 0.02082304283976555, + "loss_iou": 1.3927192687988281, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 431779568, + "step": 2498 + }, + { + "epoch": 0.9571045576407506, + "grad_norm": 32.857413340516864, + "learning_rate": 5e-06, + "loss": 0.2231, + "num_input_tokens_seen": 431952488, + "step": 2499 + }, + { + "epoch": 0.9571045576407506, + "loss": 0.18316613137722015, + "loss_ce": 0.02185022458434105, + "loss_iou": 1.0009148120880127, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 431952488, + "step": 2499 + }, + { + "epoch": 0.9574875526618154, + "grad_norm": 29.62708282557836, + "learning_rate": 5e-06, + "loss": 0.2037, + "num_input_tokens_seen": 432125408, + "step": 2500 + }, + { + "epoch": 0.9574875526618154, + "eval_websight_new_CIoU": 0.9313753843307495, + "eval_websight_new_GIoU": 0.9312478005886078, + "eval_websight_new_IoU": 0.9315329194068909, + "eval_websight_new_MAE_all": 0.006993612740188837, + "eval_websight_new_MAE_h": 0.009050879394635558, + "eval_websight_new_MAE_w": 0.007409252226352692, + "eval_websight_new_MAE_x": 0.007410084828734398, + "eval_websight_new_MAE_y": 0.0041042345110327005, + "eval_websight_new_NUM_probability": 8.344057278009132e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.11439123004674911, + "eval_websight_new_loss_ce": 0.04191363602876663, + "eval_websight_new_loss_iou": 1.0002729892730713, + "eval_websight_new_loss_num": 0.06982421875, + "eval_websight_new_loss_xval": 0.06982421875, + "eval_websight_new_runtime": 55.5425, + "eval_websight_new_samples_per_second": 0.9, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 432125408, + "step": 2500 + }, + { + "epoch": 0.9574875526618154, + "eval_seeclick_CIoU": 0.6130296289920807, + "eval_seeclick_GIoU": 0.6226363182067871, + "eval_seeclick_IoU": 0.6598637700080872, + "eval_seeclick_MAE_all": 0.07341223955154419, + "eval_seeclick_MAE_h": 0.02738715149462223, + "eval_seeclick_MAE_w": 0.1214686743915081, + "eval_seeclick_MAE_x": 0.11567392945289612, + "eval_seeclick_MAE_y": 0.029119192622601986, + "eval_seeclick_NUM_probability": 0.00011037691729143262, + "eval_seeclick_inside_bbox": 0.8697916567325592, + "eval_seeclick_loss": 0.5349428653717041, + "eval_seeclick_loss_ce": 0.032741205766797066, + "eval_seeclick_loss_iou": 1.181551992893219, + "eval_seeclick_loss_num": 0.489105224609375, + "eval_seeclick_loss_xval": 0.489105224609375, + "eval_seeclick_runtime": 92.3956, + "eval_seeclick_samples_per_second": 0.541, + "eval_seeclick_steps_per_second": 0.022, + "num_input_tokens_seen": 432125408, + "step": 2500 + }, + { + "epoch": 0.9574875526618154, + "eval_icons_CIoU": 0.8740069270133972, + "eval_icons_GIoU": 0.8716206848621368, + "eval_icons_IoU": 0.8827140927314758, + "eval_icons_MAE_all": 0.02025833074003458, + "eval_icons_MAE_h": 0.01597935613244772, + "eval_icons_MAE_w": 0.027698833495378494, + "eval_icons_MAE_x": 0.023655809462070465, + "eval_icons_MAE_y": 0.013699323870241642, + "eval_icons_NUM_probability": 7.427079981425777e-05, + "eval_icons_inside_bbox": 0.984375, + "eval_icons_loss": 0.16799156367778778, + "eval_icons_loss_ce": 0.028499008156359196, + "eval_icons_loss_iou": 1.042014479637146, + "eval_icons_loss_num": 0.127960205078125, + "eval_icons_loss_xval": 0.127960205078125, + "eval_icons_runtime": 82.8347, + "eval_icons_samples_per_second": 0.604, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 432125408, + "step": 2500 + }, + { + "epoch": 0.9574875526618154, + "loss": 0.15654532611370087, + "loss_ce": 0.028493572026491165, + "loss_iou": 1.0288493633270264, + "loss_num": 0.1279296875, + "loss_xval": 0.1279296875, + "num_input_tokens_seen": 432125408, + "step": 2500 + }, + { + "epoch": 0.9578705476828802, + "grad_norm": 34.23645201056457, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 432298384, + "step": 2501 + }, + { + "epoch": 0.9578705476828802, + "loss": 0.16409295797348022, + "loss_ce": 0.02001948095858097, + "loss_iou": 1.0105680227279663, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 432298384, + "step": 2501 + }, + { + "epoch": 0.9582535427039448, + "grad_norm": 28.206097624638492, + "learning_rate": 5e-06, + "loss": 0.1565, + "num_input_tokens_seen": 432471304, + "step": 2502 + }, + { + "epoch": 0.9582535427039448, + "loss": 0.1427156925201416, + "loss_ce": 0.019851919263601303, + "loss_iou": 1.00319504737854, + "loss_num": 0.123046875, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 432471304, + "step": 2502 + }, + { + "epoch": 0.9586365377250096, + "grad_norm": 23.28794682294048, + "learning_rate": 5e-06, + "loss": 0.1888, + "num_input_tokens_seen": 432644128, + "step": 2503 + }, + { + "epoch": 0.9586365377250096, + "loss": 0.18110212683677673, + "loss_ce": 0.019450515508651733, + "loss_iou": 1.0770268440246582, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 432644128, + "step": 2503 + }, + { + "epoch": 0.9590195327460743, + "grad_norm": 28.321850181258878, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 432817176, + "step": 2504 + }, + { + "epoch": 0.9590195327460743, + "loss": 0.15393656492233276, + "loss_ce": 0.019933875650167465, + "loss_iou": 1.052881121635437, + "loss_num": 0.1337890625, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 432817176, + "step": 2504 + }, + { + "epoch": 0.9594025277671391, + "grad_norm": 21.905521174487735, + "learning_rate": 5e-06, + "loss": 0.1719, + "num_input_tokens_seen": 432990048, + "step": 2505 + }, + { + "epoch": 0.9594025277671391, + "loss": 0.1624247133731842, + "loss_ce": 0.022226953878998756, + "loss_iou": 1.0590568780899048, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 432990048, + "step": 2505 + }, + { + "epoch": 0.9597855227882037, + "grad_norm": 39.579709731044446, + "learning_rate": 5e-06, + "loss": 0.1799, + "num_input_tokens_seen": 433163088, + "step": 2506 + }, + { + "epoch": 0.9597855227882037, + "loss": 0.19605445861816406, + "loss_ce": 0.020212167873978615, + "loss_iou": 1.0139884948730469, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 433163088, + "step": 2506 + }, + { + "epoch": 0.9601685178092685, + "grad_norm": 29.228807115410213, + "learning_rate": 5e-06, + "loss": 0.2188, + "num_input_tokens_seen": 433335904, + "step": 2507 + }, + { + "epoch": 0.9601685178092685, + "loss": 0.15629726648330688, + "loss_ce": 0.02250819280743599, + "loss_iou": 1.0016660690307617, + "loss_num": 0.1337890625, + "loss_xval": 0.1337890625, + "num_input_tokens_seen": 433335904, + "step": 2507 + }, + { + "epoch": 0.9605515128303332, + "grad_norm": 31.417712113405898, + "learning_rate": 5e-06, + "loss": 0.1762, + "num_input_tokens_seen": 433509104, + "step": 2508 + }, + { + "epoch": 0.9605515128303332, + "loss": 0.16565579175949097, + "loss_ce": 0.020636267960071564, + "loss_iou": 1.0083417892456055, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 433509104, + "step": 2508 + }, + { + "epoch": 0.9609345078513979, + "grad_norm": 23.38483090549218, + "learning_rate": 5e-06, + "loss": 0.1705, + "num_input_tokens_seen": 433682216, + "step": 2509 + }, + { + "epoch": 0.9609345078513979, + "loss": 0.16561777889728546, + "loss_ce": 0.02194102294743061, + "loss_iou": 1.0171895027160645, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 433682216, + "step": 2509 + }, + { + "epoch": 0.9613175028724626, + "grad_norm": 29.654963437029014, + "learning_rate": 5e-06, + "loss": 0.2501, + "num_input_tokens_seen": 433854968, + "step": 2510 + }, + { + "epoch": 0.9613175028724626, + "loss": 0.23729385435581207, + "loss_ce": 0.021046288311481476, + "loss_iou": 1.159039855003357, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 433854968, + "step": 2510 + }, + { + "epoch": 0.9617004978935274, + "grad_norm": 23.730611767262584, + "learning_rate": 5e-06, + "loss": 0.1672, + "num_input_tokens_seen": 434027888, + "step": 2511 + }, + { + "epoch": 0.9617004978935274, + "loss": 0.1595851480960846, + "loss_ce": 0.021157419309020042, + "loss_iou": 1.0851759910583496, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 434027888, + "step": 2511 + }, + { + "epoch": 0.9620834929145922, + "grad_norm": 21.111844972999215, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 434200968, + "step": 2512 + }, + { + "epoch": 0.9620834929145922, + "loss": 0.1313307285308838, + "loss_ce": 0.02110123634338379, + "loss_iou": 1.0032182931900024, + "loss_num": 0.1103515625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 434200968, + "step": 2512 + }, + { + "epoch": 0.9624664879356568, + "grad_norm": 22.055065060453778, + "learning_rate": 5e-06, + "loss": 0.1905, + "num_input_tokens_seen": 434373864, + "step": 2513 + }, + { + "epoch": 0.9624664879356568, + "loss": 0.22645601630210876, + "loss_ce": 0.021560989320278168, + "loss_iou": 1.0219660997390747, + "loss_num": 0.205078125, + "loss_xval": 0.205078125, + "num_input_tokens_seen": 434373864, + "step": 2513 + }, + { + "epoch": 0.9628494829567216, + "grad_norm": 23.41958901805516, + "learning_rate": 5e-06, + "loss": 0.1681, + "num_input_tokens_seen": 434547056, + "step": 2514 + }, + { + "epoch": 0.9628494829567216, + "loss": 0.20366087555885315, + "loss_ce": 0.020433329045772552, + "loss_iou": 1.0604270696640015, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 434547056, + "step": 2514 + }, + { + "epoch": 0.9632324779777863, + "grad_norm": 25.817644090311354, + "learning_rate": 5e-06, + "loss": 0.2051, + "num_input_tokens_seen": 434720016, + "step": 2515 + }, + { + "epoch": 0.9632324779777863, + "loss": 0.19977524876594543, + "loss_ce": 0.02118638902902603, + "loss_iou": 1.0252264738082886, + "loss_num": 0.1787109375, + "loss_xval": 0.1787109375, + "num_input_tokens_seen": 434720016, + "step": 2515 + }, + { + "epoch": 0.963615472998851, + "grad_norm": 21.44654044806338, + "learning_rate": 5e-06, + "loss": 0.2181, + "num_input_tokens_seen": 434892784, + "step": 2516 + }, + { + "epoch": 0.963615472998851, + "loss": 0.17512735724449158, + "loss_ce": 0.021501857787370682, + "loss_iou": 1.0227500200271606, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 434892784, + "step": 2516 + }, + { + "epoch": 0.9639984680199157, + "grad_norm": 29.243444115137073, + "learning_rate": 5e-06, + "loss": 0.2127, + "num_input_tokens_seen": 435065904, + "step": 2517 + }, + { + "epoch": 0.9639984680199157, + "loss": 0.16162803769111633, + "loss_ce": 0.020270608365535736, + "loss_iou": 0.9842989444732666, + "loss_num": 0.1416015625, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 435065904, + "step": 2517 + }, + { + "epoch": 0.9643814630409805, + "grad_norm": 19.00245730141102, + "learning_rate": 5e-06, + "loss": 0.1528, + "num_input_tokens_seen": 435239144, + "step": 2518 + }, + { + "epoch": 0.9643814630409805, + "loss": 0.13501927256584167, + "loss_ce": 0.02204321324825287, + "loss_iou": 1.0068590641021729, + "loss_num": 0.11279296875, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 435239144, + "step": 2518 + }, + { + "epoch": 0.9647644580620451, + "grad_norm": 27.28207204256603, + "learning_rate": 5e-06, + "loss": 0.2288, + "num_input_tokens_seen": 435411984, + "step": 2519 + }, + { + "epoch": 0.9647644580620451, + "loss": 0.22544914484024048, + "loss_ce": 0.021469654515385628, + "loss_iou": 1.056739091873169, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 435411984, + "step": 2519 + }, + { + "epoch": 0.9651474530831099, + "grad_norm": 23.159181319312985, + "learning_rate": 5e-06, + "loss": 0.1664, + "num_input_tokens_seen": 435584632, + "step": 2520 + }, + { + "epoch": 0.9651474530831099, + "loss": 0.1685417741537094, + "loss_ce": 0.020409442484378815, + "loss_iou": 1.0068516731262207, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 435584632, + "step": 2520 + }, + { + "epoch": 0.9655304481041747, + "grad_norm": 21.30143045733993, + "learning_rate": 5e-06, + "loss": 0.1843, + "num_input_tokens_seen": 435757240, + "step": 2521 + }, + { + "epoch": 0.9655304481041747, + "loss": 0.24511337280273438, + "loss_ce": 0.019344326108694077, + "loss_iou": 1.0494751930236816, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 435757240, + "step": 2521 + }, + { + "epoch": 0.9659134431252394, + "grad_norm": 25.198745190813224, + "learning_rate": 5e-06, + "loss": 0.1939, + "num_input_tokens_seen": 435930544, + "step": 2522 + }, + { + "epoch": 0.9659134431252394, + "loss": 0.18986818194389343, + "loss_ce": 0.019885260611772537, + "loss_iou": 1.0129027366638184, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 435930544, + "step": 2522 + }, + { + "epoch": 0.9662964381463041, + "grad_norm": 29.056411759244, + "learning_rate": 5e-06, + "loss": 0.1893, + "num_input_tokens_seen": 436103592, + "step": 2523 + }, + { + "epoch": 0.9662964381463041, + "loss": 0.21499845385551453, + "loss_ce": 0.02170010842382908, + "loss_iou": 1.0486135482788086, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 436103592, + "step": 2523 + }, + { + "epoch": 0.9666794331673688, + "grad_norm": 30.64720355993145, + "learning_rate": 5e-06, + "loss": 0.2418, + "num_input_tokens_seen": 436276536, + "step": 2524 + }, + { + "epoch": 0.9666794331673688, + "loss": 0.2639557719230652, + "loss_ce": 0.02103586122393608, + "loss_iou": 1.0420210361480713, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 436276536, + "step": 2524 + }, + { + "epoch": 0.9670624281884336, + "grad_norm": 25.76626566408051, + "learning_rate": 5e-06, + "loss": 0.216, + "num_input_tokens_seen": 436449248, + "step": 2525 + }, + { + "epoch": 0.9670624281884336, + "loss": 0.24285364151000977, + "loss_ce": 0.021357063204050064, + "loss_iou": 1.0765326023101807, + "loss_num": 0.2216796875, + "loss_xval": 0.2216796875, + "num_input_tokens_seen": 436449248, + "step": 2525 + }, + { + "epoch": 0.9674454232094982, + "grad_norm": 27.500392147356273, + "learning_rate": 5e-06, + "loss": 0.1725, + "num_input_tokens_seen": 436622304, + "step": 2526 + }, + { + "epoch": 0.9674454232094982, + "loss": 0.16017383337020874, + "loss_ce": 0.02189869061112404, + "loss_iou": 1.0175732374191284, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 436622304, + "step": 2526 + }, + { + "epoch": 0.967828418230563, + "grad_norm": 25.47773672192388, + "learning_rate": 5e-06, + "loss": 0.1636, + "num_input_tokens_seen": 436795032, + "step": 2527 + }, + { + "epoch": 0.967828418230563, + "loss": 0.12045948952436447, + "loss_ce": 0.02060597762465477, + "loss_iou": 1.0010665655136108, + "loss_num": 0.099609375, + "loss_xval": 0.099609375, + "num_input_tokens_seen": 436795032, + "step": 2527 + }, + { + "epoch": 0.9682114132516277, + "grad_norm": 22.648055947928828, + "learning_rate": 5e-06, + "loss": 0.1987, + "num_input_tokens_seen": 436968056, + "step": 2528 + }, + { + "epoch": 0.9682114132516277, + "loss": 0.23438477516174316, + "loss_ce": 0.02057860977947712, + "loss_iou": 1.0746012926101685, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 436968056, + "step": 2528 + }, + { + "epoch": 0.9685944082726925, + "grad_norm": 23.70877903619858, + "learning_rate": 5e-06, + "loss": 0.2398, + "num_input_tokens_seen": 437141232, + "step": 2529 + }, + { + "epoch": 0.9685944082726925, + "loss": 0.2138303965330124, + "loss_ce": 0.02138654887676239, + "loss_iou": 1.0094234943389893, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 437141232, + "step": 2529 + }, + { + "epoch": 0.9689774032937571, + "grad_norm": 22.86932490243878, + "learning_rate": 5e-06, + "loss": 0.1436, + "num_input_tokens_seen": 437314296, + "step": 2530 + }, + { + "epoch": 0.9689774032937571, + "loss": 0.1513497680425644, + "loss_ce": 0.018689867109060287, + "loss_iou": 1.0209299325942993, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 437314296, + "step": 2530 + }, + { + "epoch": 0.9693603983148219, + "grad_norm": 26.33529692364274, + "learning_rate": 5e-06, + "loss": 0.2707, + "num_input_tokens_seen": 437486992, + "step": 2531 + }, + { + "epoch": 0.9693603983148219, + "loss": 0.3272448182106018, + "loss_ce": 0.021214541047811508, + "loss_iou": 1.067396640777588, + "loss_num": 0.306640625, + "loss_xval": 0.306640625, + "num_input_tokens_seen": 437486992, + "step": 2531 + }, + { + "epoch": 0.9697433933358867, + "grad_norm": 27.425904108762616, + "learning_rate": 5e-06, + "loss": 0.18, + "num_input_tokens_seen": 437659840, + "step": 2532 + }, + { + "epoch": 0.9697433933358867, + "loss": 0.17225991189479828, + "loss_ce": 0.021686187013983727, + "loss_iou": 1.0260432958602905, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 437659840, + "step": 2532 + }, + { + "epoch": 0.9701263883569513, + "grad_norm": 21.04805762064608, + "learning_rate": 5e-06, + "loss": 0.1602, + "num_input_tokens_seen": 437832872, + "step": 2533 + }, + { + "epoch": 0.9701263883569513, + "loss": 0.1586833894252777, + "loss_ce": 0.020072538405656815, + "loss_iou": 1.0158920288085938, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 437832872, + "step": 2533 + }, + { + "epoch": 0.9705093833780161, + "grad_norm": 54.9970946542518, + "learning_rate": 5e-06, + "loss": 0.2388, + "num_input_tokens_seen": 438005496, + "step": 2534 + }, + { + "epoch": 0.9705093833780161, + "loss": 0.22775228321552277, + "loss_ce": 0.020507417619228363, + "loss_iou": 1.063530445098877, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 438005496, + "step": 2534 + }, + { + "epoch": 0.9708923783990808, + "grad_norm": 27.47018740555721, + "learning_rate": 5e-06, + "loss": 0.1288, + "num_input_tokens_seen": 438178304, + "step": 2535 + }, + { + "epoch": 0.9708923783990808, + "loss": 0.12139128148555756, + "loss_ce": 0.021446216851472855, + "loss_iou": 1.0063189268112183, + "loss_num": 0.10009765625, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 438178304, + "step": 2535 + }, + { + "epoch": 0.9712753734201456, + "grad_norm": 24.763260067649266, + "learning_rate": 5e-06, + "loss": 0.151, + "num_input_tokens_seen": 438351224, + "step": 2536 + }, + { + "epoch": 0.9712753734201456, + "loss": 0.1290130615234375, + "loss_ce": 0.019638072699308395, + "loss_iou": 1.0127249956130981, + "loss_num": 0.109375, + "loss_xval": 0.109375, + "num_input_tokens_seen": 438351224, + "step": 2536 + }, + { + "epoch": 0.9716583684412102, + "grad_norm": 20.584487121470104, + "learning_rate": 5e-06, + "loss": 0.1623, + "num_input_tokens_seen": 438524552, + "step": 2537 + }, + { + "epoch": 0.9716583684412102, + "loss": 0.17038914561271667, + "loss_ce": 0.020730936899781227, + "loss_iou": 1.0084894895553589, + "loss_num": 0.1494140625, + "loss_xval": 0.1494140625, + "num_input_tokens_seen": 438524552, + "step": 2537 + }, + { + "epoch": 0.972041363462275, + "grad_norm": 22.849692926258793, + "learning_rate": 5e-06, + "loss": 0.1942, + "num_input_tokens_seen": 438698088, + "step": 2538 + }, + { + "epoch": 0.972041363462275, + "loss": 0.19734948873519897, + "loss_ce": 0.02098839357495308, + "loss_iou": 1.0326783657073975, + "loss_num": 0.1767578125, + "loss_xval": 0.1767578125, + "num_input_tokens_seen": 438698088, + "step": 2538 + }, + { + "epoch": 0.9724243584833397, + "grad_norm": 42.844511246710226, + "learning_rate": 5e-06, + "loss": 0.2173, + "num_input_tokens_seen": 438870816, + "step": 2539 + }, + { + "epoch": 0.9724243584833397, + "loss": 0.27259647846221924, + "loss_ce": 0.01948370225727558, + "loss_iou": 1.029143214225769, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 438870816, + "step": 2539 + }, + { + "epoch": 0.9728073535044044, + "grad_norm": 21.005623578912854, + "learning_rate": 5e-06, + "loss": 0.1626, + "num_input_tokens_seen": 439043440, + "step": 2540 + }, + { + "epoch": 0.9728073535044044, + "loss": 0.1620333343744278, + "loss_ce": 0.019912976771593094, + "loss_iou": 1.0526223182678223, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 439043440, + "step": 2540 + }, + { + "epoch": 0.9731903485254692, + "grad_norm": 17.165563583152803, + "learning_rate": 5e-06, + "loss": 0.1666, + "num_input_tokens_seen": 439216064, + "step": 2541 + }, + { + "epoch": 0.9731903485254692, + "loss": 0.18346142768859863, + "loss_ce": 0.020924817770719528, + "loss_iou": 1.0263832807540894, + "loss_num": 0.162109375, + "loss_xval": 0.162109375, + "num_input_tokens_seen": 439216064, + "step": 2541 + }, + { + "epoch": 0.9735733435465339, + "grad_norm": 13.940829930362852, + "learning_rate": 5e-06, + "loss": 0.1615, + "num_input_tokens_seen": 439389288, + "step": 2542 + }, + { + "epoch": 0.9735733435465339, + "loss": 0.15158751606941223, + "loss_ce": 0.020789172500371933, + "loss_iou": 1.0165517330169678, + "loss_num": 0.130859375, + "loss_xval": 0.130859375, + "num_input_tokens_seen": 439389288, + "step": 2542 + }, + { + "epoch": 0.9739563385675987, + "grad_norm": 28.670148954166926, + "learning_rate": 5e-06, + "loss": 0.1747, + "num_input_tokens_seen": 439562032, + "step": 2543 + }, + { + "epoch": 0.9739563385675987, + "loss": 0.18583689630031586, + "loss_ce": 0.02183542773127556, + "loss_iou": 1.0023903846740723, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 439562032, + "step": 2543 + }, + { + "epoch": 0.9743393335886633, + "grad_norm": 25.67718614600306, + "learning_rate": 5e-06, + "loss": 0.1972, + "num_input_tokens_seen": 439735200, + "step": 2544 + }, + { + "epoch": 0.9743393335886633, + "loss": 0.2517070770263672, + "loss_ce": 0.02160453237593174, + "loss_iou": 1.0602805614471436, + "loss_num": 0.23046875, + "loss_xval": 0.23046875, + "num_input_tokens_seen": 439735200, + "step": 2544 + }, + { + "epoch": 0.9747223286097281, + "grad_norm": 35.183277933179696, + "learning_rate": 5e-06, + "loss": 0.1793, + "num_input_tokens_seen": 439908008, + "step": 2545 + }, + { + "epoch": 0.9747223286097281, + "loss": 0.19800016283988953, + "loss_ce": 0.02038784883916378, + "loss_iou": 1.0084583759307861, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 439908008, + "step": 2545 + }, + { + "epoch": 0.9751053236307928, + "grad_norm": 25.51645935562071, + "learning_rate": 5e-06, + "loss": 0.2091, + "num_input_tokens_seen": 440080880, + "step": 2546 + }, + { + "epoch": 0.9751053236307928, + "loss": 0.23107486963272095, + "loss_ce": 0.02038150653243065, + "loss_iou": 1.071962594985962, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 440080880, + "step": 2546 + }, + { + "epoch": 0.9754883186518575, + "grad_norm": 27.49943928004506, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 440253680, + "step": 2547 + }, + { + "epoch": 0.9754883186518575, + "loss": 0.16668814420700073, + "loss_ce": 0.021180346608161926, + "loss_iou": 1.0026423931121826, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 440253680, + "step": 2547 + }, + { + "epoch": 0.9758713136729222, + "grad_norm": 31.990871381822163, + "learning_rate": 5e-06, + "loss": 0.2257, + "num_input_tokens_seen": 440426568, + "step": 2548 + }, + { + "epoch": 0.9758713136729222, + "loss": 0.19652478396892548, + "loss_ce": 0.020896129310131073, + "loss_iou": 1.0123958587646484, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 440426568, + "step": 2548 + }, + { + "epoch": 0.976254308693987, + "grad_norm": 29.020982825762303, + "learning_rate": 5e-06, + "loss": 0.2053, + "num_input_tokens_seen": 440599144, + "step": 2549 + }, + { + "epoch": 0.976254308693987, + "loss": 0.20318135619163513, + "loss_ce": 0.01955709606409073, + "loss_iou": 1.0952755212783813, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 440599144, + "step": 2549 + }, + { + "epoch": 0.9766373037150518, + "grad_norm": 24.228724393713808, + "learning_rate": 5e-06, + "loss": 0.1567, + "num_input_tokens_seen": 440772152, + "step": 2550 + }, + { + "epoch": 0.9766373037150518, + "loss": 0.17433422803878784, + "loss_ce": 0.019182849675416946, + "loss_iou": 1.0140941143035889, + "loss_num": 0.1552734375, + "loss_xval": 0.1552734375, + "num_input_tokens_seen": 440772152, + "step": 2550 + }, + { + "epoch": 0.9770202987361164, + "grad_norm": 25.817141077281935, + "learning_rate": 5e-06, + "loss": 0.2186, + "num_input_tokens_seen": 440945232, + "step": 2551 + }, + { + "epoch": 0.9770202987361164, + "loss": 0.1617930382490158, + "loss_ce": 0.02104596421122551, + "loss_iou": 1.002140998840332, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 440945232, + "step": 2551 + }, + { + "epoch": 0.9774032937571812, + "grad_norm": 17.31820511238446, + "learning_rate": 5e-06, + "loss": 0.2124, + "num_input_tokens_seen": 441116728, + "step": 2552 + }, + { + "epoch": 0.9774032937571812, + "loss": 0.18396136164665222, + "loss_ce": 0.020295601338148117, + "loss_iou": 1.076249361038208, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 441116728, + "step": 2552 + }, + { + "epoch": 0.9777862887782459, + "grad_norm": 42.41528006791648, + "learning_rate": 5e-06, + "loss": 0.2129, + "num_input_tokens_seen": 441289624, + "step": 2553 + }, + { + "epoch": 0.9777862887782459, + "loss": 0.23856016993522644, + "loss_ce": 0.021702256053686142, + "loss_iou": 1.062626600265503, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 441289624, + "step": 2553 + }, + { + "epoch": 0.9781692837993106, + "grad_norm": 31.993248906786572, + "learning_rate": 5e-06, + "loss": 0.2483, + "num_input_tokens_seen": 441462816, + "step": 2554 + }, + { + "epoch": 0.9781692837993106, + "loss": 0.26399481296539307, + "loss_ce": 0.021196939051151276, + "loss_iou": 1.0644161701202393, + "loss_num": 0.2431640625, + "loss_xval": 0.2431640625, + "num_input_tokens_seen": 441462816, + "step": 2554 + }, + { + "epoch": 0.9785522788203753, + "grad_norm": 24.620900857402145, + "learning_rate": 5e-06, + "loss": 0.2296, + "num_input_tokens_seen": 441635728, + "step": 2555 + }, + { + "epoch": 0.9785522788203753, + "loss": 0.20849530398845673, + "loss_ce": 0.021178405731916428, + "loss_iou": 1.034769892692566, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 441635728, + "step": 2555 + }, + { + "epoch": 0.9789352738414401, + "grad_norm": 33.034510362091545, + "learning_rate": 5e-06, + "loss": 0.18, + "num_input_tokens_seen": 441808936, + "step": 2556 + }, + { + "epoch": 0.9789352738414401, + "loss": 0.1863858699798584, + "loss_ce": 0.022323377430438995, + "loss_iou": 1.0175793170928955, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 441808936, + "step": 2556 + }, + { + "epoch": 0.9793182688625048, + "grad_norm": 41.240118072591805, + "learning_rate": 5e-06, + "loss": 0.2394, + "num_input_tokens_seen": 441981856, + "step": 2557 + }, + { + "epoch": 0.9793182688625048, + "loss": 0.28613021969795227, + "loss_ce": 0.020566251128911972, + "loss_iou": 1.1313062906265259, + "loss_num": 0.265625, + "loss_xval": 0.265625, + "num_input_tokens_seen": 441981856, + "step": 2557 + }, + { + "epoch": 0.9797012638835695, + "grad_norm": 22.57894844857494, + "learning_rate": 5e-06, + "loss": 0.1881, + "num_input_tokens_seen": 442154616, + "step": 2558 + }, + { + "epoch": 0.9797012638835695, + "loss": 0.2461327314376831, + "loss_ce": 0.019692298024892807, + "loss_iou": 1.016852617263794, + "loss_num": 0.2265625, + "loss_xval": 0.2265625, + "num_input_tokens_seen": 442154616, + "step": 2558 + }, + { + "epoch": 0.9800842589046342, + "grad_norm": 28.10806253106997, + "learning_rate": 5e-06, + "loss": 0.2214, + "num_input_tokens_seen": 442327432, + "step": 2559 + }, + { + "epoch": 0.9800842589046342, + "loss": 0.22265824675559998, + "loss_ce": 0.02301226556301117, + "loss_iou": 1.0936853885650635, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 442327432, + "step": 2559 + }, + { + "epoch": 0.980467253925699, + "grad_norm": 25.40437390070875, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 442500600, + "step": 2560 + }, + { + "epoch": 0.980467253925699, + "loss": 0.1724667251110077, + "loss_ce": 0.020977461710572243, + "loss_iou": 1.0222909450531006, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 442500600, + "step": 2560 + }, + { + "epoch": 0.9808502489467636, + "grad_norm": 28.458831937750002, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 442673584, + "step": 2561 + }, + { + "epoch": 0.9808502489467636, + "loss": 0.20998351275920868, + "loss_ce": 0.02034728042781353, + "loss_iou": 1.0627766847610474, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 442673584, + "step": 2561 + }, + { + "epoch": 0.9812332439678284, + "grad_norm": 25.891144878268932, + "learning_rate": 5e-06, + "loss": 0.1874, + "num_input_tokens_seen": 442846120, + "step": 2562 + }, + { + "epoch": 0.9812332439678284, + "loss": 0.1612968146800995, + "loss_ce": 0.02061077207326889, + "loss_iou": 1.0302972793579102, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 442846120, + "step": 2562 + }, + { + "epoch": 0.9816162389888932, + "grad_norm": 27.943424729093966, + "learning_rate": 5e-06, + "loss": 0.1868, + "num_input_tokens_seen": 443019264, + "step": 2563 + }, + { + "epoch": 0.9816162389888932, + "loss": 0.18382543325424194, + "loss_ce": 0.020617440342903137, + "loss_iou": 1.006981372833252, + "loss_num": 0.1630859375, + "loss_xval": 0.1630859375, + "num_input_tokens_seen": 443019264, + "step": 2563 + }, + { + "epoch": 0.9819992340099579, + "grad_norm": 24.980205216177644, + "learning_rate": 5e-06, + "loss": 0.2163, + "num_input_tokens_seen": 443192032, + "step": 2564 + }, + { + "epoch": 0.9819992340099579, + "loss": 0.16928371787071228, + "loss_ce": 0.021151382476091385, + "loss_iou": 1.0022011995315552, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 443192032, + "step": 2564 + }, + { + "epoch": 0.9823822290310226, + "grad_norm": 33.59013075971121, + "learning_rate": 5e-06, + "loss": 0.138, + "num_input_tokens_seen": 443365096, + "step": 2565 + }, + { + "epoch": 0.9823822290310226, + "loss": 0.12297128140926361, + "loss_ce": 0.020798426121473312, + "loss_iou": 1.00077486038208, + "loss_num": 0.10205078125, + "loss_xval": 0.10205078125, + "num_input_tokens_seen": 443365096, + "step": 2565 + }, + { + "epoch": 0.9827652240520873, + "grad_norm": 41.872255241259964, + "learning_rate": 5e-06, + "loss": 0.2303, + "num_input_tokens_seen": 443538264, + "step": 2566 + }, + { + "epoch": 0.9827652240520873, + "loss": 0.2431665062904358, + "loss_ce": 0.02093750610947609, + "loss_iou": 1.0195742845535278, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 443538264, + "step": 2566 + }, + { + "epoch": 0.9831482190731521, + "grad_norm": 39.45379360719268, + "learning_rate": 5e-06, + "loss": 0.1935, + "num_input_tokens_seen": 443711328, + "step": 2567 + }, + { + "epoch": 0.9831482190731521, + "loss": 0.22139954566955566, + "loss_ce": 0.021143201738595963, + "loss_iou": 1.073280692100525, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 443711328, + "step": 2567 + }, + { + "epoch": 0.9835312140942167, + "grad_norm": 23.73753841797015, + "learning_rate": 5e-06, + "loss": 0.1913, + "num_input_tokens_seen": 443884392, + "step": 2568 + }, + { + "epoch": 0.9835312140942167, + "loss": 0.19663631916046143, + "loss_ce": 0.02070249244570732, + "loss_iou": 1.0560352802276611, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 443884392, + "step": 2568 + }, + { + "epoch": 0.9839142091152815, + "grad_norm": 24.083009439187595, + "learning_rate": 5e-06, + "loss": 0.2446, + "num_input_tokens_seen": 444057472, + "step": 2569 + }, + { + "epoch": 0.9839142091152815, + "loss": 0.2394472062587738, + "loss_ce": 0.019598571583628654, + "loss_iou": 1.0479086637496948, + "loss_num": 0.2197265625, + "loss_xval": 0.2197265625, + "num_input_tokens_seen": 444057472, + "step": 2569 + }, + { + "epoch": 0.9842972041363462, + "grad_norm": 28.394723621777164, + "learning_rate": 5e-06, + "loss": 0.1834, + "num_input_tokens_seen": 444230328, + "step": 2570 + }, + { + "epoch": 0.9842972041363462, + "loss": 0.14444415271282196, + "loss_ce": 0.02176349051296711, + "loss_iou": 1.0061496496200562, + "loss_num": 0.12255859375, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 444230328, + "step": 2570 + }, + { + "epoch": 0.984680199157411, + "grad_norm": 34.516951239161614, + "learning_rate": 5e-06, + "loss": 0.2691, + "num_input_tokens_seen": 444403144, + "step": 2571 + }, + { + "epoch": 0.984680199157411, + "loss": 0.2842923104763031, + "loss_ce": 0.021047698333859444, + "loss_iou": 1.0349102020263672, + "loss_num": 0.263671875, + "loss_xval": 0.263671875, + "num_input_tokens_seen": 444403144, + "step": 2571 + }, + { + "epoch": 0.9850631941784757, + "grad_norm": 22.553912274036357, + "learning_rate": 5e-06, + "loss": 0.1653, + "num_input_tokens_seen": 444576072, + "step": 2572 + }, + { + "epoch": 0.9850631941784757, + "loss": 0.1860370934009552, + "loss_ce": 0.022279776632785797, + "loss_iou": 1.0185402631759644, + "loss_num": 0.1640625, + "loss_xval": 0.1640625, + "num_input_tokens_seen": 444576072, + "step": 2572 + }, + { + "epoch": 0.9854461891995404, + "grad_norm": 28.51811051258652, + "learning_rate": 5e-06, + "loss": 0.18, + "num_input_tokens_seen": 444748992, + "step": 2573 + }, + { + "epoch": 0.9854461891995404, + "loss": 0.18080393970012665, + "loss_ce": 0.019671138375997543, + "loss_iou": 1.0438616275787354, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 444748992, + "step": 2573 + }, + { + "epoch": 0.9858291842206052, + "grad_norm": 21.75087601397511, + "learning_rate": 5e-06, + "loss": 0.2008, + "num_input_tokens_seen": 444922000, + "step": 2574 + }, + { + "epoch": 0.9858291842206052, + "loss": 0.21475309133529663, + "loss_ce": 0.02060026302933693, + "loss_iou": 1.039602518081665, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 444922000, + "step": 2574 + }, + { + "epoch": 0.9862121792416698, + "grad_norm": 31.05637531059788, + "learning_rate": 5e-06, + "loss": 0.2755, + "num_input_tokens_seen": 445095000, + "step": 2575 + }, + { + "epoch": 0.9862121792416698, + "loss": 0.21724437177181244, + "loss_ce": 0.01833079382777214, + "loss_iou": 4.5821070671081543e-07, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 445095000, + "step": 2575 + }, + { + "epoch": 0.9865951742627346, + "grad_norm": 31.039617786281315, + "learning_rate": 5e-06, + "loss": 0.1922, + "num_input_tokens_seen": 445267944, + "step": 2576 + }, + { + "epoch": 0.9865951742627346, + "loss": 0.20234927535057068, + "loss_ce": 0.018694482743740082, + "loss_iou": 1.0001885890960693, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 445267944, + "step": 2576 + }, + { + "epoch": 0.9869781692837993, + "grad_norm": 27.339199641718203, + "learning_rate": 5e-06, + "loss": 0.2087, + "num_input_tokens_seen": 445440944, + "step": 2577 + }, + { + "epoch": 0.9869781692837993, + "loss": 0.17758116126060486, + "loss_ce": 0.02041563019156456, + "loss_iou": 1.0020227432250977, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 445440944, + "step": 2577 + }, + { + "epoch": 0.9873611643048641, + "grad_norm": 25.9871456221162, + "learning_rate": 5e-06, + "loss": 0.1379, + "num_input_tokens_seen": 445614016, + "step": 2578 + }, + { + "epoch": 0.9873611643048641, + "loss": 0.13205954432487488, + "loss_ce": 0.02195211499929428, + "loss_iou": 1.0053297281265259, + "loss_num": 0.1103515625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 445614016, + "step": 2578 + }, + { + "epoch": 0.9877441593259287, + "grad_norm": 29.412608075507368, + "learning_rate": 5e-06, + "loss": 0.2376, + "num_input_tokens_seen": 445786792, + "step": 2579 + }, + { + "epoch": 0.9877441593259287, + "loss": 0.1717311441898346, + "loss_ce": 0.021157432347536087, + "loss_iou": 1.031954050064087, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 445786792, + "step": 2579 + }, + { + "epoch": 0.9881271543469935, + "grad_norm": 24.35383108838625, + "learning_rate": 5e-06, + "loss": 0.1826, + "num_input_tokens_seen": 445959464, + "step": 2580 + }, + { + "epoch": 0.9881271543469935, + "loss": 0.1787584125995636, + "loss_ce": 0.020250104367733, + "loss_iou": 1.003459095954895, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 445959464, + "step": 2580 + }, + { + "epoch": 0.9885101493680583, + "grad_norm": 26.71496003431928, + "learning_rate": 5e-06, + "loss": 0.2517, + "num_input_tokens_seen": 446132280, + "step": 2581 + }, + { + "epoch": 0.9885101493680583, + "loss": 0.2053559273481369, + "loss_ce": 0.020541464909911156, + "loss_iou": 1.047257900238037, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 446132280, + "step": 2581 + }, + { + "epoch": 0.9888931443891229, + "grad_norm": 22.444135524638458, + "learning_rate": 5e-06, + "loss": 0.1933, + "num_input_tokens_seen": 446305624, + "step": 2582 + }, + { + "epoch": 0.9888931443891229, + "loss": 0.13972869515419006, + "loss_ce": 0.01991669088602066, + "loss_iou": 1.0086948871612549, + "loss_num": 0.11962890625, + "loss_xval": 0.11962890625, + "num_input_tokens_seen": 446305624, + "step": 2582 + }, + { + "epoch": 0.9892761394101877, + "grad_norm": 45.64098443506414, + "learning_rate": 5e-06, + "loss": 0.2166, + "num_input_tokens_seen": 446478768, + "step": 2583 + }, + { + "epoch": 0.9892761394101877, + "loss": 0.17628300189971924, + "loss_ce": 0.02168095111846924, + "loss_iou": 1.0157346725463867, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 446478768, + "step": 2583 + }, + { + "epoch": 0.9896591344312524, + "grad_norm": 39.25585080550029, + "learning_rate": 5e-06, + "loss": 0.2946, + "num_input_tokens_seen": 446652000, + "step": 2584 + }, + { + "epoch": 0.9896591344312524, + "loss": 0.27448806166648865, + "loss_ce": 0.022565443068742752, + "loss_iou": 1.0934661626815796, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 446652000, + "step": 2584 + }, + { + "epoch": 0.9900421294523171, + "grad_norm": 43.83002101259563, + "learning_rate": 5e-06, + "loss": 0.2272, + "num_input_tokens_seen": 446824616, + "step": 2585 + }, + { + "epoch": 0.9900421294523171, + "loss": 0.24637830257415771, + "loss_ce": 0.02048720419406891, + "loss_iou": 0.9968891739845276, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 446824616, + "step": 2585 + }, + { + "epoch": 0.9904251244733818, + "grad_norm": 20.64342158416282, + "learning_rate": 5e-06, + "loss": 0.2007, + "num_input_tokens_seen": 446997200, + "step": 2586 + }, + { + "epoch": 0.9904251244733818, + "loss": 0.2109462469816208, + "loss_ce": 0.020943813025951385, + "loss_iou": 1.0423662662506104, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 446997200, + "step": 2586 + }, + { + "epoch": 0.9908081194944466, + "grad_norm": 56.14326889968074, + "learning_rate": 5e-06, + "loss": 0.2198, + "num_input_tokens_seen": 447170112, + "step": 2587 + }, + { + "epoch": 0.9908081194944466, + "loss": 0.23894494771957397, + "loss_ce": 0.02111048437654972, + "loss_iou": 1.1187589168548584, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 447170112, + "step": 2587 + }, + { + "epoch": 0.9911911145155113, + "grad_norm": 35.00610524319135, + "learning_rate": 5e-06, + "loss": 0.2362, + "num_input_tokens_seen": 447343136, + "step": 2588 + }, + { + "epoch": 0.9911911145155113, + "loss": 0.21689264476299286, + "loss_ce": 0.022190498188138008, + "loss_iou": 1.0384045839309692, + "loss_num": 0.1943359375, + "loss_xval": 0.1943359375, + "num_input_tokens_seen": 447343136, + "step": 2588 + }, + { + "epoch": 0.991574109536576, + "grad_norm": 28.2697251072908, + "learning_rate": 5e-06, + "loss": 0.1733, + "num_input_tokens_seen": 447516080, + "step": 2589 + }, + { + "epoch": 0.991574109536576, + "loss": 0.15879778563976288, + "loss_ce": 0.02085833251476288, + "loss_iou": 1.036719799041748, + "loss_num": 0.1376953125, + "loss_xval": 0.1376953125, + "num_input_tokens_seen": 447516080, + "step": 2589 + }, + { + "epoch": 0.9919571045576407, + "grad_norm": 39.47081502445282, + "learning_rate": 5e-06, + "loss": 0.2003, + "num_input_tokens_seen": 447689096, + "step": 2590 + }, + { + "epoch": 0.9919571045576407, + "loss": 0.17974254488945007, + "loss_ce": 0.022943221032619476, + "loss_iou": 1.0426125526428223, + "loss_num": 0.1572265625, + "loss_xval": 0.1572265625, + "num_input_tokens_seen": 447689096, + "step": 2590 + }, + { + "epoch": 0.9923400995787055, + "grad_norm": 63.24975708634823, + "learning_rate": 5e-06, + "loss": 0.2663, + "num_input_tokens_seen": 447862232, + "step": 2591 + }, + { + "epoch": 0.9923400995787055, + "loss": 0.33286064863204956, + "loss_ce": 0.02280203439295292, + "loss_iou": 1.0508623123168945, + "loss_num": 0.310546875, + "loss_xval": 0.310546875, + "num_input_tokens_seen": 447862232, + "step": 2591 + }, + { + "epoch": 0.9927230945997702, + "grad_norm": 24.562342961105927, + "learning_rate": 5e-06, + "loss": 0.192, + "num_input_tokens_seen": 448035240, + "step": 2592 + }, + { + "epoch": 0.9927230945997702, + "loss": 0.2081054151058197, + "loss_ce": 0.021124217659235, + "loss_iou": 1.079519271850586, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 448035240, + "step": 2592 + }, + { + "epoch": 0.9931060896208349, + "grad_norm": 39.36316765832567, + "learning_rate": 5e-06, + "loss": 0.2356, + "num_input_tokens_seen": 448207928, + "step": 2593 + }, + { + "epoch": 0.9931060896208349, + "loss": 0.223251074552536, + "loss_ce": 0.019576773047447205, + "loss_iou": 1.0996551513671875, + "loss_num": 0.2041015625, + "loss_xval": 0.2041015625, + "num_input_tokens_seen": 448207928, + "step": 2593 + }, + { + "epoch": 0.9934890846418997, + "grad_norm": 34.030114413781845, + "learning_rate": 5e-06, + "loss": 0.2907, + "num_input_tokens_seen": 448380904, + "step": 2594 + }, + { + "epoch": 0.9934890846418997, + "loss": 0.31716522574424744, + "loss_ce": 0.021724559366703033, + "loss_iou": 1.1569843292236328, + "loss_num": 0.294921875, + "loss_xval": 0.294921875, + "num_input_tokens_seen": 448380904, + "step": 2594 + }, + { + "epoch": 0.9938720796629644, + "grad_norm": 45.04046472214304, + "learning_rate": 5e-06, + "loss": 0.2346, + "num_input_tokens_seen": 448553768, + "step": 2595 + }, + { + "epoch": 0.9938720796629644, + "loss": 0.21021856367588043, + "loss_ce": 0.01966679096221924, + "loss_iou": 1.0189051628112793, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 448553768, + "step": 2595 + }, + { + "epoch": 0.9942550746840291, + "grad_norm": 25.264268495162163, + "learning_rate": 5e-06, + "loss": 0.2099, + "num_input_tokens_seen": 448726416, + "step": 2596 + }, + { + "epoch": 0.9942550746840291, + "loss": 0.21929392218589783, + "loss_ce": 0.01989205926656723, + "loss_iou": 0.8835408687591553, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 448726416, + "step": 2596 + }, + { + "epoch": 0.9946380697050938, + "grad_norm": 20.300640982375388, + "learning_rate": 5e-06, + "loss": 0.1662, + "num_input_tokens_seen": 448899352, + "step": 2597 + }, + { + "epoch": 0.9946380697050938, + "loss": 0.16648760437965393, + "loss_ce": 0.01927081122994423, + "loss_iou": 1.029667854309082, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 448899352, + "step": 2597 + }, + { + "epoch": 0.9950210647261586, + "grad_norm": 43.651510203906085, + "learning_rate": 5e-06, + "loss": 0.2064, + "num_input_tokens_seen": 449072120, + "step": 2598 + }, + { + "epoch": 0.9950210647261586, + "loss": 0.23701754212379456, + "loss_ce": 0.020403757691383362, + "loss_iou": 1.1127095222473145, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 449072120, + "step": 2598 + }, + { + "epoch": 0.9954040597472232, + "grad_norm": 22.81379268659071, + "learning_rate": 5e-06, + "loss": 0.2165, + "num_input_tokens_seen": 449244872, + "step": 2599 + }, + { + "epoch": 0.9954040597472232, + "loss": 0.21868206560611725, + "loss_ce": 0.019890563562512398, + "loss_iou": 0.9505128860473633, + "loss_num": 0.19921875, + "loss_xval": 0.19921875, + "num_input_tokens_seen": 449244872, + "step": 2599 + }, + { + "epoch": 0.995787054768288, + "grad_norm": 29.275378725887922, + "learning_rate": 5e-06, + "loss": 0.194, + "num_input_tokens_seen": 449417752, + "step": 2600 + }, + { + "epoch": 0.995787054768288, + "loss": 0.15811039507389069, + "loss_ce": 0.021635789424180984, + "loss_iou": 1.0020790100097656, + "loss_num": 0.13671875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 449417752, + "step": 2600 + }, + { + "epoch": 0.9961700497893528, + "grad_norm": 23.962375684845348, + "learning_rate": 5e-06, + "loss": 0.1902, + "num_input_tokens_seen": 449590616, + "step": 2601 + }, + { + "epoch": 0.9961700497893528, + "loss": 0.14931534230709076, + "loss_ce": 0.02065323479473591, + "loss_iou": 1.0113236904144287, + "loss_num": 0.12890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 449590616, + "step": 2601 + }, + { + "epoch": 0.9965530448104175, + "grad_norm": 21.851142549349564, + "learning_rate": 5e-06, + "loss": 0.1965, + "num_input_tokens_seen": 449763296, + "step": 2602 + }, + { + "epoch": 0.9965530448104175, + "loss": 0.17328669130802155, + "loss_ce": 0.019142404198646545, + "loss_iou": 1.076406717300415, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 449763296, + "step": 2602 + }, + { + "epoch": 0.9969360398314822, + "grad_norm": 20.320872643821353, + "learning_rate": 5e-06, + "loss": 0.2369, + "num_input_tokens_seen": 449936216, + "step": 2603 + }, + { + "epoch": 0.9969360398314822, + "loss": 0.2127036452293396, + "loss_ce": 0.022579146549105644, + "loss_iou": 1.0166010856628418, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 449936216, + "step": 2603 + }, + { + "epoch": 0.9973190348525469, + "grad_norm": 21.00771430933762, + "learning_rate": 5e-06, + "loss": 0.1887, + "num_input_tokens_seen": 450109200, + "step": 2604 + }, + { + "epoch": 0.9973190348525469, + "loss": 0.1232326477766037, + "loss_ce": 0.023073958232998848, + "loss_iou": 1.0016324520111084, + "loss_num": 0.10009765625, + "loss_xval": 0.10009765625, + "num_input_tokens_seen": 450109200, + "step": 2604 + }, + { + "epoch": 0.9977020298736117, + "grad_norm": 26.40707134566965, + "learning_rate": 5e-06, + "loss": 0.1996, + "num_input_tokens_seen": 450282088, + "step": 2605 + }, + { + "epoch": 0.9977020298736117, + "loss": 0.16095593571662903, + "loss_ce": 0.01874401792883873, + "loss_iou": 1.0023956298828125, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 450282088, + "step": 2605 + }, + { + "epoch": 0.9980850248946763, + "grad_norm": 22.127976472193634, + "learning_rate": 5e-06, + "loss": 0.2074, + "num_input_tokens_seen": 450455496, + "step": 2606 + }, + { + "epoch": 0.9980850248946763, + "loss": 0.23797369003295898, + "loss_ce": 0.020688533782958984, + "loss_iou": 1.4147803783416748, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 450455496, + "step": 2606 + }, + { + "epoch": 0.9984680199157411, + "grad_norm": 24.30602607449792, + "learning_rate": 5e-06, + "loss": 0.1747, + "num_input_tokens_seen": 450628328, + "step": 2607 + }, + { + "epoch": 0.9984680199157411, + "loss": 0.20958831906318665, + "loss_ce": 0.020501410588622093, + "loss_iou": 1.335038185119629, + "loss_num": 0.189453125, + "loss_xval": 0.189453125, + "num_input_tokens_seen": 450628328, + "step": 2607 + }, + { + "epoch": 0.9988510149368058, + "grad_norm": 24.772227090561294, + "learning_rate": 5e-06, + "loss": 0.2028, + "num_input_tokens_seen": 450801640, + "step": 2608 + }, + { + "epoch": 0.9988510149368058, + "loss": 0.2230081856250763, + "loss_ce": 0.021714244037866592, + "loss_iou": 1.085056185722351, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 450801640, + "step": 2608 + }, + { + "epoch": 0.9992340099578706, + "grad_norm": 31.85535411790147, + "learning_rate": 5e-06, + "loss": 0.191, + "num_input_tokens_seen": 450974184, + "step": 2609 + }, + { + "epoch": 0.9992340099578706, + "loss": 0.2219005823135376, + "loss_ce": 0.019446972757577896, + "loss_iou": 1.210686445236206, + "loss_num": 0.2021484375, + "loss_xval": 0.2021484375, + "num_input_tokens_seen": 450974184, + "step": 2609 + }, + { + "epoch": 0.9996170049789352, + "grad_norm": 27.50167474855051, + "learning_rate": 5e-06, + "loss": 0.2368, + "num_input_tokens_seen": 451146848, + "step": 2610 + }, + { + "epoch": 0.9996170049789352, + "loss": 0.2298506498336792, + "loss_ce": 0.020255915820598602, + "loss_iou": 1.0357897281646729, + "loss_num": 0.2099609375, + "loss_xval": 0.2099609375, + "num_input_tokens_seen": 451146848, + "step": 2610 + }, + { + "epoch": 1.0, + "grad_norm": 23.82388520113061, + "learning_rate": 5e-06, + "loss": 0.1789, + "num_input_tokens_seen": 451319744, + "step": 2611 + }, + { + "epoch": 1.0, + "loss": 0.19941318035125732, + "loss_ce": 0.021495692431926727, + "loss_iou": 1.0034189224243164, + "loss_num": 0.177734375, + "loss_xval": 0.177734375, + "num_input_tokens_seen": 451319744, + "step": 2611 + }, + { + "epoch": 1.0003829950210648, + "grad_norm": 20.103405756766083, + "learning_rate": 5e-06, + "loss": 0.1692, + "num_input_tokens_seen": 451492768, + "step": 2612 + }, + { + "epoch": 1.0003829950210648, + "loss": 0.16814805567264557, + "loss_ce": 0.019527453929185867, + "loss_iou": 1.0039558410644531, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 451492768, + "step": 2612 + }, + { + "epoch": 1.0007659900421295, + "grad_norm": 30.33221650252089, + "learning_rate": 5e-06, + "loss": 0.1691, + "num_input_tokens_seen": 451665952, + "step": 2613 + }, + { + "epoch": 1.0007659900421295, + "loss": 0.16631048917770386, + "loss_ce": 0.022145451977849007, + "loss_iou": 1.0690546035766602, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 451665952, + "step": 2613 + }, + { + "epoch": 1.0011489850631943, + "grad_norm": 29.113994599823556, + "learning_rate": 5e-06, + "loss": 0.1324, + "num_input_tokens_seen": 451838648, + "step": 2614 + }, + { + "epoch": 1.0011489850631943, + "loss": 0.14101362228393555, + "loss_ce": 0.018210891634225845, + "loss_iou": 1.007625699043274, + "loss_num": 0.123046875, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 451838648, + "step": 2614 + }, + { + "epoch": 1.0015319800842588, + "grad_norm": 26.460448235571473, + "learning_rate": 5e-06, + "loss": 0.169, + "num_input_tokens_seen": 452011384, + "step": 2615 + }, + { + "epoch": 1.0015319800842588, + "loss": 0.17002080380916595, + "loss_ce": 0.019508112221956253, + "loss_iou": 1.0012292861938477, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 452011384, + "step": 2615 + }, + { + "epoch": 1.0019149751053236, + "grad_norm": 22.107495489047828, + "learning_rate": 5e-06, + "loss": 0.1728, + "num_input_tokens_seen": 452183680, + "step": 2616 + }, + { + "epoch": 1.0019149751053236, + "loss": 0.14123472571372986, + "loss_ce": 0.02011045441031456, + "loss_iou": 1.0368467569351196, + "loss_num": 0.12109375, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 452183680, + "step": 2616 + }, + { + "epoch": 1.0022979701263883, + "grad_norm": 24.84546531113666, + "learning_rate": 5e-06, + "loss": 0.1521, + "num_input_tokens_seen": 452356632, + "step": 2617 + }, + { + "epoch": 1.0022979701263883, + "loss": 0.13976626098155975, + "loss_ce": 0.01964908093214035, + "loss_iou": 1.001399040222168, + "loss_num": 0.1201171875, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 452356632, + "step": 2617 + }, + { + "epoch": 1.002680965147453, + "grad_norm": 21.598581182574627, + "learning_rate": 5e-06, + "loss": 0.1558, + "num_input_tokens_seen": 452529648, + "step": 2618 + }, + { + "epoch": 1.002680965147453, + "loss": 0.14163951575756073, + "loss_ce": 0.01844004914164543, + "loss_iou": 1.0433669090270996, + "loss_num": 0.123046875, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 452529648, + "step": 2618 + }, + { + "epoch": 1.0030639601685178, + "grad_norm": 25.008835471138518, + "learning_rate": 5e-06, + "loss": 0.2027, + "num_input_tokens_seen": 452702528, + "step": 2619 + }, + { + "epoch": 1.0030639601685178, + "loss": 0.14462541043758392, + "loss_ce": 0.018099535256624222, + "loss_iou": 1.0006023645401, + "loss_num": 0.126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 452702528, + "step": 2619 + }, + { + "epoch": 1.0034469551895826, + "grad_norm": 15.996517487548525, + "learning_rate": 5e-06, + "loss": 0.203, + "num_input_tokens_seen": 452875272, + "step": 2620 + }, + { + "epoch": 1.0034469551895826, + "loss": 0.22338154911994934, + "loss_ce": 0.02019551210105419, + "loss_iou": 1.002182960510254, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 452875272, + "step": 2620 + }, + { + "epoch": 1.0038299502106474, + "grad_norm": 26.077149324673446, + "learning_rate": 5e-06, + "loss": 0.1983, + "num_input_tokens_seen": 453048480, + "step": 2621 + }, + { + "epoch": 1.0038299502106474, + "loss": 0.266177237033844, + "loss_ce": 0.02118212729692459, + "loss_iou": 1.065863847732544, + "loss_num": 0.2451171875, + "loss_xval": 0.2451171875, + "num_input_tokens_seen": 453048480, + "step": 2621 + }, + { + "epoch": 1.004212945231712, + "grad_norm": 24.889998973498113, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 453221336, + "step": 2622 + }, + { + "epoch": 1.004212945231712, + "loss": 0.20550508797168732, + "loss_ce": 0.02093476988375187, + "loss_iou": 1.0505411624908447, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 453221336, + "step": 2622 + }, + { + "epoch": 1.0045959402527767, + "grad_norm": 18.0054008237673, + "learning_rate": 5e-06, + "loss": 0.1792, + "num_input_tokens_seen": 453394408, + "step": 2623 + }, + { + "epoch": 1.0045959402527767, + "loss": 0.13319995999336243, + "loss_ce": 0.020590096712112427, + "loss_iou": 1.0009472370147705, + "loss_num": 0.11279296875, + "loss_xval": 0.11279296875, + "num_input_tokens_seen": 453394408, + "step": 2623 + }, + { + "epoch": 1.0049789352738414, + "grad_norm": 21.48118129655174, + "learning_rate": 5e-06, + "loss": 0.168, + "num_input_tokens_seen": 453567472, + "step": 2624 + }, + { + "epoch": 1.0049789352738414, + "loss": 0.16904892027378082, + "loss_ce": 0.02048935554921627, + "loss_iou": 1.0616116523742676, + "loss_num": 0.1484375, + "loss_xval": 0.1484375, + "num_input_tokens_seen": 453567472, + "step": 2624 + }, + { + "epoch": 1.0053619302949062, + "grad_norm": 24.030889297447565, + "learning_rate": 5e-06, + "loss": 0.1929, + "num_input_tokens_seen": 453740280, + "step": 2625 + }, + { + "epoch": 1.0053619302949062, + "loss": 0.16246747970581055, + "loss_ce": 0.019828319549560547, + "loss_iou": 1.0066397190093994, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 453740280, + "step": 2625 + }, + { + "epoch": 1.005744925315971, + "grad_norm": 29.232199595915233, + "learning_rate": 5e-06, + "loss": 0.1712, + "num_input_tokens_seen": 453913384, + "step": 2626 + }, + { + "epoch": 1.005744925315971, + "loss": 0.20079506933689117, + "loss_ce": 0.020314104855060577, + "loss_iou": 1.0599197149276733, + "loss_num": 0.1806640625, + "loss_xval": 0.1806640625, + "num_input_tokens_seen": 453913384, + "step": 2626 + }, + { + "epoch": 1.0061279203370357, + "grad_norm": 31.868805292436047, + "learning_rate": 5e-06, + "loss": 0.2713, + "num_input_tokens_seen": 454086320, + "step": 2627 + }, + { + "epoch": 1.0061279203370357, + "loss": 0.254930704832077, + "loss_ce": 0.020067419856786728, + "loss_iou": 1.2221109867095947, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 454086320, + "step": 2627 + }, + { + "epoch": 1.0065109153581004, + "grad_norm": 25.275369950358368, + "learning_rate": 5e-06, + "loss": 0.1447, + "num_input_tokens_seen": 454259512, + "step": 2628 + }, + { + "epoch": 1.0065109153581004, + "loss": 0.16165083646774292, + "loss_ce": 0.021117383614182472, + "loss_iou": 1.0255322456359863, + "loss_num": 0.140625, + "loss_xval": 0.140625, + "num_input_tokens_seen": 454259512, + "step": 2628 + }, + { + "epoch": 1.006893910379165, + "grad_norm": 28.217706011517446, + "learning_rate": 5e-06, + "loss": 0.2765, + "num_input_tokens_seen": 454432672, + "step": 2629 + }, + { + "epoch": 1.006893910379165, + "loss": 0.22138825058937073, + "loss_ce": 0.02009430341422558, + "loss_iou": 1.1180342435836792, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 454432672, + "step": 2629 + }, + { + "epoch": 1.0072769054002297, + "grad_norm": 23.197617638130787, + "learning_rate": 5e-06, + "loss": 0.2146, + "num_input_tokens_seen": 454605624, + "step": 2630 + }, + { + "epoch": 1.0072769054002297, + "loss": 0.2131013423204422, + "loss_ce": 0.0229157917201519, + "loss_iou": 1.0519930124282837, + "loss_num": 0.1904296875, + "loss_xval": 0.1904296875, + "num_input_tokens_seen": 454605624, + "step": 2630 + }, + { + "epoch": 1.0076599004212945, + "grad_norm": 23.264612619001955, + "learning_rate": 5e-06, + "loss": 0.1406, + "num_input_tokens_seen": 454778656, + "step": 2631 + }, + { + "epoch": 1.0076599004212945, + "loss": 0.12333838641643524, + "loss_ce": 0.01970069482922554, + "loss_iou": 1.0003728866577148, + "loss_num": 0.103515625, + "loss_xval": 0.103515625, + "num_input_tokens_seen": 454778656, + "step": 2631 + }, + { + "epoch": 1.0080428954423593, + "grad_norm": 27.308085755809195, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 454952344, + "step": 2632 + }, + { + "epoch": 1.0080428954423593, + "loss": 0.18797634541988373, + "loss_ce": 0.02202175185084343, + "loss_iou": 1.0183154344558716, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 454952344, + "step": 2632 + }, + { + "epoch": 1.008425890463424, + "grad_norm": 51.13928430698843, + "learning_rate": 5e-06, + "loss": 0.1993, + "num_input_tokens_seen": 455125024, + "step": 2633 + }, + { + "epoch": 1.008425890463424, + "loss": 0.23166146874427795, + "loss_ce": 0.020723965018987656, + "loss_iou": 1.5644888877868652, + "loss_num": 0.2109375, + "loss_xval": 0.2109375, + "num_input_tokens_seen": 455125024, + "step": 2633 + }, + { + "epoch": 1.0088088854844888, + "grad_norm": 36.3187290682828, + "learning_rate": 5e-06, + "loss": 0.1898, + "num_input_tokens_seen": 455298184, + "step": 2634 + }, + { + "epoch": 1.0088088854844888, + "loss": 0.1815185546875, + "loss_ce": 0.020202644169330597, + "loss_iou": 1.0271393060684204, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 455298184, + "step": 2634 + }, + { + "epoch": 1.0091918805055535, + "grad_norm": 23.522381250220548, + "learning_rate": 5e-06, + "loss": 0.165, + "num_input_tokens_seen": 455471224, + "step": 2635 + }, + { + "epoch": 1.0091918805055535, + "loss": 0.20535528659820557, + "loss_ce": 0.02023564837872982, + "loss_iou": 1.0670104026794434, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 455471224, + "step": 2635 + }, + { + "epoch": 1.009574875526618, + "grad_norm": 30.0462320621947, + "learning_rate": 5e-06, + "loss": 0.1998, + "num_input_tokens_seen": 455643912, + "step": 2636 + }, + { + "epoch": 1.009574875526618, + "loss": 0.17439775168895721, + "loss_ce": 0.019734669476747513, + "loss_iou": 1.0009647607803345, + "loss_num": 0.154296875, + "loss_xval": 0.154296875, + "num_input_tokens_seen": 455643912, + "step": 2636 + }, + { + "epoch": 1.0099578705476828, + "grad_norm": 39.616333037041514, + "learning_rate": 5e-06, + "loss": 0.2247, + "num_input_tokens_seen": 455817056, + "step": 2637 + }, + { + "epoch": 1.0099578705476828, + "loss": 0.21939142048358917, + "loss_ce": 0.021210266277194023, + "loss_iou": 1.058443307876587, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 455817056, + "step": 2637 + }, + { + "epoch": 1.0103408655687476, + "grad_norm": 28.92030922603685, + "learning_rate": 5e-06, + "loss": 0.2297, + "num_input_tokens_seen": 455990240, + "step": 2638 + }, + { + "epoch": 1.0103408655687476, + "loss": 0.2293231040239334, + "loss_ce": 0.022902213037014008, + "loss_iou": 1.330636739730835, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 455990240, + "step": 2638 + }, + { + "epoch": 1.0107238605898123, + "grad_norm": 21.164605163067154, + "learning_rate": 5e-06, + "loss": 0.1839, + "num_input_tokens_seen": 456162992, + "step": 2639 + }, + { + "epoch": 1.0107238605898123, + "loss": 0.18436919152736664, + "loss_ce": 0.018292535096406937, + "loss_iou": 1.0153162479400635, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 456162992, + "step": 2639 + }, + { + "epoch": 1.011106855610877, + "grad_norm": 30.72034630005925, + "learning_rate": 5e-06, + "loss": 0.2067, + "num_input_tokens_seen": 456336144, + "step": 2640 + }, + { + "epoch": 1.011106855610877, + "loss": 0.20957262814044952, + "loss_ce": 0.021706413477659225, + "loss_iou": 1.0251965522766113, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 456336144, + "step": 2640 + }, + { + "epoch": 1.0114898506319419, + "grad_norm": 24.02837295298551, + "learning_rate": 5e-06, + "loss": 0.1673, + "num_input_tokens_seen": 456508832, + "step": 2641 + }, + { + "epoch": 1.0114898506319419, + "loss": 0.15824568271636963, + "loss_ce": 0.019970526918768883, + "loss_iou": 1.0397586822509766, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 456508832, + "step": 2641 + }, + { + "epoch": 1.0118728456530066, + "grad_norm": 46.078856687371086, + "learning_rate": 5e-06, + "loss": 0.2773, + "num_input_tokens_seen": 456681824, + "step": 2642 + }, + { + "epoch": 1.0118728456530066, + "loss": 0.2912445068359375, + "loss_ce": 0.020553607493638992, + "loss_iou": 1.063234806060791, + "loss_num": 0.271484375, + "loss_xval": 0.271484375, + "num_input_tokens_seen": 456681824, + "step": 2642 + }, + { + "epoch": 1.0122558406740712, + "grad_norm": 28.80071795580853, + "learning_rate": 5e-06, + "loss": 0.2123, + "num_input_tokens_seen": 456854728, + "step": 2643 + }, + { + "epoch": 1.0122558406740712, + "loss": 0.2342531681060791, + "loss_ce": 0.01867700181901455, + "loss_iou": 1.106345772743225, + "loss_num": 0.2158203125, + "loss_xval": 0.2158203125, + "num_input_tokens_seen": 456854728, + "step": 2643 + }, + { + "epoch": 1.012638835695136, + "grad_norm": 27.452590449993657, + "learning_rate": 5e-06, + "loss": 0.2459, + "num_input_tokens_seen": 457027496, + "step": 2644 + }, + { + "epoch": 1.012638835695136, + "loss": 0.2144833505153656, + "loss_ce": 0.021246036514639854, + "loss_iou": 1.0015583038330078, + "loss_num": 0.193359375, + "loss_xval": 0.193359375, + "num_input_tokens_seen": 457027496, + "step": 2644 + }, + { + "epoch": 1.0130218307162007, + "grad_norm": 27.130762085346163, + "learning_rate": 5e-06, + "loss": 0.2251, + "num_input_tokens_seen": 457200544, + "step": 2645 + }, + { + "epoch": 1.0130218307162007, + "loss": 0.20875662565231323, + "loss_ce": 0.020829375833272934, + "loss_iou": 1.0030250549316406, + "loss_num": 0.1875, + "loss_xval": 0.1875, + "num_input_tokens_seen": 457200544, + "step": 2645 + }, + { + "epoch": 1.0134048257372654, + "grad_norm": 47.90456318918338, + "learning_rate": 5e-06, + "loss": 0.2206, + "num_input_tokens_seen": 457373624, + "step": 2646 + }, + { + "epoch": 1.0134048257372654, + "loss": 0.26886725425720215, + "loss_ce": 0.022163134068250656, + "loss_iou": 1.0137746334075928, + "loss_num": 0.2470703125, + "loss_xval": 0.2470703125, + "num_input_tokens_seen": 457373624, + "step": 2646 + }, + { + "epoch": 1.0137878207583302, + "grad_norm": 45.40839199207175, + "learning_rate": 5e-06, + "loss": 0.2435, + "num_input_tokens_seen": 457546344, + "step": 2647 + }, + { + "epoch": 1.0137878207583302, + "loss": 0.2117205560207367, + "loss_ce": 0.020436381921172142, + "loss_iou": 1.1137763261795044, + "loss_num": 0.19140625, + "loss_xval": 0.19140625, + "num_input_tokens_seen": 457546344, + "step": 2647 + }, + { + "epoch": 1.014170815779395, + "grad_norm": 31.369161286808033, + "learning_rate": 5e-06, + "loss": 0.1555, + "num_input_tokens_seen": 457719240, + "step": 2648 + }, + { + "epoch": 1.014170815779395, + "loss": 0.13760226964950562, + "loss_ce": 0.02322237938642502, + "loss_iou": 1.0234071016311646, + "loss_num": 0.1142578125, + "loss_xval": 0.1142578125, + "num_input_tokens_seen": 457719240, + "step": 2648 + }, + { + "epoch": 1.0145538108004597, + "grad_norm": 19.891610461402454, + "learning_rate": 5e-06, + "loss": 0.1579, + "num_input_tokens_seen": 457892416, + "step": 2649 + }, + { + "epoch": 1.0145538108004597, + "loss": 0.18401141464710236, + "loss_ce": 0.022756528109312057, + "loss_iou": 1.0163376331329346, + "loss_num": 0.1611328125, + "loss_xval": 0.1611328125, + "num_input_tokens_seen": 457892416, + "step": 2649 + }, + { + "epoch": 1.0149368058215242, + "grad_norm": 23.259849293220743, + "learning_rate": 5e-06, + "loss": 0.1973, + "num_input_tokens_seen": 458065152, + "step": 2650 + }, + { + "epoch": 1.0149368058215242, + "loss": 0.1957165151834488, + "loss_ce": 0.021766329184174538, + "loss_iou": 1.0006461143493652, + "loss_num": 0.173828125, + "loss_xval": 0.173828125, + "num_input_tokens_seen": 458065152, + "step": 2650 + }, + { + "epoch": 1.015319800842589, + "grad_norm": 43.91331783389807, + "learning_rate": 5e-06, + "loss": 0.1989, + "num_input_tokens_seen": 458237888, + "step": 2651 + }, + { + "epoch": 1.015319800842589, + "loss": 0.2078561782836914, + "loss_ce": 0.021271690726280212, + "loss_iou": 1.003035068511963, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 458237888, + "step": 2651 + }, + { + "epoch": 1.0157027958636538, + "grad_norm": 38.57047268554671, + "learning_rate": 5e-06, + "loss": 0.1556, + "num_input_tokens_seen": 458411048, + "step": 2652 + }, + { + "epoch": 1.0157027958636538, + "loss": 0.1338464617729187, + "loss_ce": 0.020778842270374298, + "loss_iou": 1.0011705160140991, + "loss_num": 0.11328125, + "loss_xval": 0.11328125, + "num_input_tokens_seen": 458411048, + "step": 2652 + }, + { + "epoch": 1.0160857908847185, + "grad_norm": 21.82677951625571, + "learning_rate": 5e-06, + "loss": 0.2378, + "num_input_tokens_seen": 458584168, + "step": 2653 + }, + { + "epoch": 1.0160857908847185, + "loss": 0.2706211805343628, + "loss_ce": 0.020132914185523987, + "loss_iou": 1.1052721738815308, + "loss_num": 0.25, + "loss_xval": 0.25, + "num_input_tokens_seen": 458584168, + "step": 2653 + }, + { + "epoch": 1.0164687859057833, + "grad_norm": 20.04536041659893, + "learning_rate": 5e-06, + "loss": 0.2075, + "num_input_tokens_seen": 458757376, + "step": 2654 + }, + { + "epoch": 1.0164687859057833, + "loss": 0.20592357218265533, + "loss_ce": 0.020742908120155334, + "loss_iou": 1.0075843334197998, + "loss_num": 0.185546875, + "loss_xval": 0.185546875, + "num_input_tokens_seen": 458757376, + "step": 2654 + }, + { + "epoch": 1.016851780926848, + "grad_norm": 44.789371331460984, + "learning_rate": 5e-06, + "loss": 0.1781, + "num_input_tokens_seen": 458930216, + "step": 2655 + }, + { + "epoch": 1.016851780926848, + "loss": 0.16285867989063263, + "loss_ce": 0.02147073857486248, + "loss_iou": 1.0407545566558838, + "loss_num": 0.1416015625, + "loss_xval": 0.1416015625, + "num_input_tokens_seen": 458930216, + "step": 2655 + }, + { + "epoch": 1.0172347759479128, + "grad_norm": 33.14025348410339, + "learning_rate": 5e-06, + "loss": 0.1942, + "num_input_tokens_seen": 459103216, + "step": 2656 + }, + { + "epoch": 1.0172347759479128, + "loss": 0.19790545105934143, + "loss_ce": 0.02188006415963173, + "loss_iou": 1.0012357234954834, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 459103216, + "step": 2656 + }, + { + "epoch": 1.0176177709689773, + "grad_norm": 29.809596683884156, + "learning_rate": 5e-06, + "loss": 0.2239, + "num_input_tokens_seen": 459276448, + "step": 2657 + }, + { + "epoch": 1.0176177709689773, + "loss": 0.23329783976078033, + "loss_ce": 0.01979687437415123, + "loss_iou": 1.0008468627929688, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 459276448, + "step": 2657 + }, + { + "epoch": 1.018000765990042, + "grad_norm": 30.763285231618685, + "learning_rate": 5e-06, + "loss": 0.156, + "num_input_tokens_seen": 459449216, + "step": 2658 + }, + { + "epoch": 1.018000765990042, + "loss": 0.1709698736667633, + "loss_ce": 0.019419565796852112, + "loss_iou": 1.0088496208190918, + "loss_num": 0.1513671875, + "loss_xval": 0.1513671875, + "num_input_tokens_seen": 459449216, + "step": 2658 + }, + { + "epoch": 1.0183837610111068, + "grad_norm": 41.45551240380358, + "learning_rate": 5e-06, + "loss": 0.1866, + "num_input_tokens_seen": 459622264, + "step": 2659 + }, + { + "epoch": 1.0183837610111068, + "loss": 0.16339613497257233, + "loss_ce": 0.019719377160072327, + "loss_iou": 1.0058441162109375, + "loss_num": 0.1435546875, + "loss_xval": 0.1435546875, + "num_input_tokens_seen": 459622264, + "step": 2659 + }, + { + "epoch": 1.0187667560321716, + "grad_norm": 24.041455167168042, + "learning_rate": 5e-06, + "loss": 0.2229, + "num_input_tokens_seen": 459794920, + "step": 2660 + }, + { + "epoch": 1.0187667560321716, + "loss": 0.22818854451179504, + "loss_ce": 0.019875554367899895, + "loss_iou": 1.0752671957015991, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 459794920, + "step": 2660 + }, + { + "epoch": 1.0191497510532364, + "grad_norm": 40.4623954131525, + "learning_rate": 5e-06, + "loss": 0.1492, + "num_input_tokens_seen": 459968016, + "step": 2661 + }, + { + "epoch": 1.0191497510532364, + "loss": 0.12593373656272888, + "loss_ce": 0.02003774419426918, + "loss_iou": 1.0006287097930908, + "loss_num": 0.10595703125, + "loss_xval": 0.10595703125, + "num_input_tokens_seen": 459968016, + "step": 2661 + }, + { + "epoch": 1.019532746074301, + "grad_norm": 23.331262836860812, + "learning_rate": 5e-06, + "loss": 0.2209, + "num_input_tokens_seen": 460140872, + "step": 2662 + }, + { + "epoch": 1.019532746074301, + "loss": 0.22041067481040955, + "loss_ce": 0.02052055299282074, + "loss_iou": 1.0110864639282227, + "loss_num": 0.2001953125, + "loss_xval": 0.2001953125, + "num_input_tokens_seen": 460140872, + "step": 2662 + }, + { + "epoch": 1.0199157410953659, + "grad_norm": 53.680906002538734, + "learning_rate": 5e-06, + "loss": 0.227, + "num_input_tokens_seen": 460314040, + "step": 2663 + }, + { + "epoch": 1.0199157410953659, + "loss": 0.245751291513443, + "loss_ce": 0.019982237368822098, + "loss_iou": 1.0646202564239502, + "loss_num": 0.2255859375, + "loss_xval": 0.2255859375, + "num_input_tokens_seen": 460314040, + "step": 2663 + }, + { + "epoch": 1.0202987361164304, + "grad_norm": 31.59724384859943, + "learning_rate": 5e-06, + "loss": 0.2249, + "num_input_tokens_seen": 460486744, + "step": 2664 + }, + { + "epoch": 1.0202987361164304, + "loss": 0.22602880001068115, + "loss_ce": 0.02326999604701996, + "loss_iou": 1.0191833972930908, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 460486744, + "step": 2664 + }, + { + "epoch": 1.0206817311374952, + "grad_norm": 55.58539040679261, + "learning_rate": 5e-06, + "loss": 0.2816, + "num_input_tokens_seen": 460659728, + "step": 2665 + }, + { + "epoch": 1.0206817311374952, + "loss": 0.3014325797557831, + "loss_ce": 0.020182587206363678, + "loss_iou": 1.0474450588226318, + "loss_num": 0.28125, + "loss_xval": 0.28125, + "num_input_tokens_seen": 460659728, + "step": 2665 + }, + { + "epoch": 1.02106472615856, + "grad_norm": 23.924529770016672, + "learning_rate": 5e-06, + "loss": 0.2308, + "num_input_tokens_seen": 460832664, + "step": 2666 + }, + { + "epoch": 1.02106472615856, + "loss": 0.23173928260803223, + "loss_ce": 0.019703146070241928, + "loss_iou": 1.03975510597229, + "loss_num": 0.2119140625, + "loss_xval": 0.2119140625, + "num_input_tokens_seen": 460832664, + "step": 2666 + }, + { + "epoch": 1.0214477211796247, + "grad_norm": 61.7786700457965, + "learning_rate": 5e-06, + "loss": 0.1617, + "num_input_tokens_seen": 461005496, + "step": 2667 + }, + { + "epoch": 1.0214477211796247, + "loss": 0.1547263115644455, + "loss_ce": 0.019594479352235794, + "loss_iou": 1.0006184577941895, + "loss_num": 0.134765625, + "loss_xval": 0.134765625, + "num_input_tokens_seen": 461005496, + "step": 2667 + }, + { + "epoch": 1.0218307162006894, + "grad_norm": 23.446864568879292, + "learning_rate": 5e-06, + "loss": 0.2579, + "num_input_tokens_seen": 461178544, + "step": 2668 + }, + { + "epoch": 1.0218307162006894, + "loss": 0.27943965792655945, + "loss_ce": 0.020040256902575493, + "loss_iou": 1.0679092407226562, + "loss_num": 0.259765625, + "loss_xval": 0.259765625, + "num_input_tokens_seen": 461178544, + "step": 2668 + }, + { + "epoch": 1.0222137112217542, + "grad_norm": 37.334407917544844, + "learning_rate": 5e-06, + "loss": 0.2227, + "num_input_tokens_seen": 461351560, + "step": 2669 + }, + { + "epoch": 1.0222137112217542, + "loss": 0.23498889803886414, + "loss_ce": 0.020877568051218987, + "loss_iou": 1.036283016204834, + "loss_num": 0.2138671875, + "loss_xval": 0.2138671875, + "num_input_tokens_seen": 461351560, + "step": 2669 + }, + { + "epoch": 1.022596706242819, + "grad_norm": 24.23724171944618, + "learning_rate": 5e-06, + "loss": 0.2083, + "num_input_tokens_seen": 461524936, + "step": 2670 + }, + { + "epoch": 1.022596706242819, + "loss": 0.19315654039382935, + "loss_ce": 0.02280740812420845, + "loss_iou": 1.0224149227142334, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 461524936, + "step": 2670 + }, + { + "epoch": 1.0229797012638835, + "grad_norm": 43.99273166540144, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 461697568, + "step": 2671 + }, + { + "epoch": 1.0229797012638835, + "loss": 0.18223366141319275, + "loss_ce": 0.021985851228237152, + "loss_iou": 1.0184319019317627, + "loss_num": 0.16015625, + "loss_xval": 0.16015625, + "num_input_tokens_seen": 461697568, + "step": 2671 + }, + { + "epoch": 1.0233626962849482, + "grad_norm": 27.24794231197852, + "learning_rate": 5e-06, + "loss": 0.2238, + "num_input_tokens_seen": 461870352, + "step": 2672 + }, + { + "epoch": 1.0233626962849482, + "loss": 0.24211221933364868, + "loss_ce": 0.02305705100297928, + "loss_iou": 1.0264095067977905, + "loss_num": 0.21875, + "loss_xval": 0.21875, + "num_input_tokens_seen": 461870352, + "step": 2672 + }, + { + "epoch": 1.023745691306013, + "grad_norm": 33.861199537220216, + "learning_rate": 5e-06, + "loss": 0.196, + "num_input_tokens_seen": 462043528, + "step": 2673 + }, + { + "epoch": 1.023745691306013, + "loss": 0.17982439696788788, + "loss_ce": 0.020705748349428177, + "loss_iou": 1.0384835004806519, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 462043528, + "step": 2673 + }, + { + "epoch": 1.0241286863270778, + "grad_norm": 22.053638294927843, + "learning_rate": 5e-06, + "loss": 0.2145, + "num_input_tokens_seen": 462216592, + "step": 2674 + }, + { + "epoch": 1.0241286863270778, + "loss": 0.23037189245224, + "loss_ce": 0.02327960543334484, + "loss_iou": 1.0616304874420166, + "loss_num": 0.20703125, + "loss_xval": 0.20703125, + "num_input_tokens_seen": 462216592, + "step": 2674 + }, + { + "epoch": 1.0245116813481425, + "grad_norm": 34.33366681501337, + "learning_rate": 5e-06, + "loss": 0.2301, + "num_input_tokens_seen": 462389864, + "step": 2675 + }, + { + "epoch": 1.0245116813481425, + "loss": 0.2765474319458008, + "loss_ce": 0.021786676719784737, + "loss_iou": 1.0377389192581177, + "loss_num": 0.25390625, + "loss_xval": 0.25390625, + "num_input_tokens_seen": 462389864, + "step": 2675 + }, + { + "epoch": 1.0248946763692073, + "grad_norm": 27.150370061380826, + "learning_rate": 5e-06, + "loss": 0.1892, + "num_input_tokens_seen": 462562808, + "step": 2676 + }, + { + "epoch": 1.0248946763692073, + "loss": 0.18681272864341736, + "loss_ce": 0.020858149975538254, + "loss_iou": 1.019316554069519, + "loss_num": 0.166015625, + "loss_xval": 0.166015625, + "num_input_tokens_seen": 462562808, + "step": 2676 + }, + { + "epoch": 1.0252776713902718, + "grad_norm": 23.518013552057838, + "learning_rate": 5e-06, + "loss": 0.1951, + "num_input_tokens_seen": 462735944, + "step": 2677 + }, + { + "epoch": 1.0252776713902718, + "loss": 0.15821880102157593, + "loss_ce": 0.01973002590239048, + "loss_iou": 1.0040994882583618, + "loss_num": 0.138671875, + "loss_xval": 0.138671875, + "num_input_tokens_seen": 462735944, + "step": 2677 + }, + { + "epoch": 1.0256606664113366, + "grad_norm": 14.889740274456658, + "learning_rate": 5e-06, + "loss": 0.1483, + "num_input_tokens_seen": 462908728, + "step": 2678 + }, + { + "epoch": 1.0256606664113366, + "loss": 0.1388009786605835, + "loss_ce": 0.0204537995159626, + "loss_iou": 1.004044771194458, + "loss_num": 0.1181640625, + "loss_xval": 0.1181640625, + "num_input_tokens_seen": 462908728, + "step": 2678 + }, + { + "epoch": 1.0260436614324013, + "grad_norm": 23.297625717045992, + "learning_rate": 5e-06, + "loss": 0.1822, + "num_input_tokens_seen": 463081704, + "step": 2679 + }, + { + "epoch": 1.0260436614324013, + "loss": 0.19627460837364197, + "loss_ce": 0.020249227061867714, + "loss_iou": 1.0534923076629639, + "loss_num": 0.17578125, + "loss_xval": 0.17578125, + "num_input_tokens_seen": 463081704, + "step": 2679 + }, + { + "epoch": 1.026426656453466, + "grad_norm": 19.64664560796669, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 463254840, + "step": 2680 + }, + { + "epoch": 1.026426656453466, + "loss": 0.143958181142807, + "loss_ce": 0.021491151303052902, + "loss_iou": 1.0011191368103027, + "loss_num": 0.12255859375, + "loss_xval": 0.12255859375, + "num_input_tokens_seen": 463254840, + "step": 2680 + }, + { + "epoch": 1.0268096514745308, + "grad_norm": 20.310161499712436, + "learning_rate": 5e-06, + "loss": 0.2153, + "num_input_tokens_seen": 463427896, + "step": 2681 + }, + { + "epoch": 1.0268096514745308, + "loss": 0.2070397436618805, + "loss_ce": 0.020577343180775642, + "loss_iou": 1.0429177284240723, + "loss_num": 0.1865234375, + "loss_xval": 0.1865234375, + "num_input_tokens_seen": 463427896, + "step": 2681 + }, + { + "epoch": 1.0271926464955956, + "grad_norm": 17.325785775797268, + "learning_rate": 5e-06, + "loss": 0.2248, + "num_input_tokens_seen": 463600600, + "step": 2682 + }, + { + "epoch": 1.0271926464955956, + "loss": 0.205996572971344, + "loss_ce": 0.022708002477884293, + "loss_iou": 1.047377586364746, + "loss_num": 0.18359375, + "loss_xval": 0.18359375, + "num_input_tokens_seen": 463600600, + "step": 2682 + }, + { + "epoch": 1.0275756415166604, + "grad_norm": 18.49515030440836, + "learning_rate": 5e-06, + "loss": 0.1786, + "num_input_tokens_seen": 463773552, + "step": 2683 + }, + { + "epoch": 1.0275756415166604, + "loss": 0.23582297563552856, + "loss_ce": 0.019453343003988266, + "loss_iou": 1.2463963031768799, + "loss_num": 0.216796875, + "loss_xval": 0.216796875, + "num_input_tokens_seen": 463773552, + "step": 2683 + }, + { + "epoch": 1.027958636537725, + "grad_norm": 21.69668716747673, + "learning_rate": 5e-06, + "loss": 0.1462, + "num_input_tokens_seen": 463946792, + "step": 2684 + }, + { + "epoch": 1.027958636537725, + "loss": 0.12513580918312073, + "loss_ce": 0.022016923874616623, + "loss_iou": 1.0008490085601807, + "loss_num": 0.10302734375, + "loss_xval": 0.10302734375, + "num_input_tokens_seen": 463946792, + "step": 2684 + }, + { + "epoch": 1.0283416315587897, + "grad_norm": 27.265711410161398, + "learning_rate": 5e-06, + "loss": 0.1631, + "num_input_tokens_seen": 464119312, + "step": 2685 + }, + { + "epoch": 1.0283416315587897, + "loss": 0.19970130920410156, + "loss_ce": 0.01970863714814186, + "loss_iou": 1.0485548973083496, + "loss_num": 0.1796875, + "loss_xval": 0.1796875, + "num_input_tokens_seen": 464119312, + "step": 2685 + }, + { + "epoch": 1.0287246265798544, + "grad_norm": 24.029693810443284, + "learning_rate": 5e-06, + "loss": 0.1921, + "num_input_tokens_seen": 464292416, + "step": 2686 + }, + { + "epoch": 1.0287246265798544, + "loss": 0.22213320434093475, + "loss_ce": 0.020717188715934753, + "loss_iou": 1.0227363109588623, + "loss_num": 0.201171875, + "loss_xval": 0.201171875, + "num_input_tokens_seen": 464292416, + "step": 2686 + }, + { + "epoch": 1.0291076216009192, + "grad_norm": 21.937065930896914, + "learning_rate": 5e-06, + "loss": 0.1715, + "num_input_tokens_seen": 464465192, + "step": 2687 + }, + { + "epoch": 1.0291076216009192, + "loss": 0.1793980896472931, + "loss_ce": 0.020157355815172195, + "loss_iou": 1.0054259300231934, + "loss_num": 0.1591796875, + "loss_xval": 0.1591796875, + "num_input_tokens_seen": 464465192, + "step": 2687 + }, + { + "epoch": 1.029490616621984, + "grad_norm": 23.353506224694193, + "learning_rate": 5e-06, + "loss": 0.143, + "num_input_tokens_seen": 464638496, + "step": 2688 + }, + { + "epoch": 1.029490616621984, + "loss": 0.13977614045143127, + "loss_ce": 0.020849131047725677, + "loss_iou": 1.0167276859283447, + "loss_num": 0.119140625, + "loss_xval": 0.119140625, + "num_input_tokens_seen": 464638496, + "step": 2688 + }, + { + "epoch": 1.0298736116430487, + "grad_norm": 23.77239661642061, + "learning_rate": 5e-06, + "loss": 0.1798, + "num_input_tokens_seen": 464810936, + "step": 2689 + }, + { + "epoch": 1.0298736116430487, + "loss": 0.21237342059612274, + "loss_ce": 0.02011268399655819, + "loss_iou": 1.2245774269104004, + "loss_num": 0.1923828125, + "loss_xval": 0.1923828125, + "num_input_tokens_seen": 464810936, + "step": 2689 + }, + { + "epoch": 1.0302566066641135, + "grad_norm": 17.674660483168775, + "learning_rate": 5e-06, + "loss": 0.1708, + "num_input_tokens_seen": 464983800, + "step": 2690 + }, + { + "epoch": 1.0302566066641135, + "loss": 0.14379066228866577, + "loss_ce": 0.020926905795931816, + "loss_iou": 1.0017061233520508, + "loss_num": 0.123046875, + "loss_xval": 0.123046875, + "num_input_tokens_seen": 464983800, + "step": 2690 + }, + { + "epoch": 1.030639601685178, + "grad_norm": 19.617520136686487, + "learning_rate": 5e-06, + "loss": 0.1952, + "num_input_tokens_seen": 465157104, + "step": 2691 + }, + { + "epoch": 1.030639601685178, + "loss": 0.16387206315994263, + "loss_ce": 0.01964598335325718, + "loss_iou": 1.0038049221038818, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 465157104, + "step": 2691 + }, + { + "epoch": 1.0310225967062427, + "grad_norm": 17.7155011736127, + "learning_rate": 5e-06, + "loss": 0.1993, + "num_input_tokens_seen": 465329920, + "step": 2692 + }, + { + "epoch": 1.0310225967062427, + "loss": 0.16195493936538696, + "loss_ce": 0.019468368962407112, + "loss_iou": 1.0223468542099, + "loss_num": 0.142578125, + "loss_xval": 0.142578125, + "num_input_tokens_seen": 465329920, + "step": 2692 + }, + { + "epoch": 1.0314055917273075, + "grad_norm": 17.70671620632501, + "learning_rate": 5e-06, + "loss": 0.1711, + "num_input_tokens_seen": 465502472, + "step": 2693 + }, + { + "epoch": 1.0314055917273075, + "loss": 0.12763410806655884, + "loss_ce": 0.01972394622862339, + "loss_iou": 1.0092477798461914, + "loss_num": 0.10791015625, + "loss_xval": 0.10791015625, + "num_input_tokens_seen": 465502472, + "step": 2693 + }, + { + "epoch": 1.0317885867483723, + "grad_norm": 22.52502334501282, + "learning_rate": 5e-06, + "loss": 0.1693, + "num_input_tokens_seen": 465675480, + "step": 2694 + }, + { + "epoch": 1.0317885867483723, + "loss": 0.19143357872962952, + "loss_ce": 0.020901355892419815, + "loss_iou": 0.988635778427124, + "loss_num": 0.1708984375, + "loss_xval": 0.1708984375, + "num_input_tokens_seen": 465675480, + "step": 2694 + }, + { + "epoch": 1.032171581769437, + "grad_norm": 23.850111143473104, + "learning_rate": 5e-06, + "loss": 0.2177, + "num_input_tokens_seen": 465848320, + "step": 2695 + }, + { + "epoch": 1.032171581769437, + "loss": 0.27090975642204285, + "loss_ce": 0.018712498247623444, + "loss_iou": 1.1247084140777588, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 465848320, + "step": 2695 + }, + { + "epoch": 1.0325545767905018, + "grad_norm": 24.100004859986235, + "learning_rate": 5e-06, + "loss": 0.1467, + "num_input_tokens_seen": 466021600, + "step": 2696 + }, + { + "epoch": 1.0325545767905018, + "loss": 0.1535719335079193, + "loss_ce": 0.02072891592979431, + "loss_iou": 1.008996605873108, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 466021600, + "step": 2696 + }, + { + "epoch": 1.0329375718115665, + "grad_norm": 39.686082647434645, + "learning_rate": 5e-06, + "loss": 0.1759, + "num_input_tokens_seen": 466194168, + "step": 2697 + }, + { + "epoch": 1.0329375718115665, + "loss": 0.18978765606880188, + "loss_ce": 0.02105596661567688, + "loss_iou": 1.017608642578125, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 466194168, + "step": 2697 + }, + { + "epoch": 1.033320566832631, + "grad_norm": 32.17313421412968, + "learning_rate": 5e-06, + "loss": 0.139, + "num_input_tokens_seen": 466366984, + "step": 2698 + }, + { + "epoch": 1.033320566832631, + "loss": 0.13241815567016602, + "loss_ce": 0.020967965945601463, + "loss_iou": 1.0103569030761719, + "loss_num": 0.111328125, + "loss_xval": 0.111328125, + "num_input_tokens_seen": 466366984, + "step": 2698 + }, + { + "epoch": 1.0337035618536958, + "grad_norm": 28.81487627958747, + "learning_rate": 5e-06, + "loss": 0.2004, + "num_input_tokens_seen": 466540200, + "step": 2699 + }, + { + "epoch": 1.0337035618536958, + "loss": 0.2008867859840393, + "loss_ce": 0.01924615353345871, + "loss_iou": 0.9303417205810547, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 466540200, + "step": 2699 + }, + { + "epoch": 1.0340865568747606, + "grad_norm": 21.392598667153916, + "learning_rate": 5e-06, + "loss": 0.1486, + "num_input_tokens_seen": 466713632, + "step": 2700 + }, + { + "epoch": 1.0340865568747606, + "loss": 0.14038464426994324, + "loss_ce": 0.01922985538840294, + "loss_iou": 1.0356340408325195, + "loss_num": 0.12109375, + "loss_xval": 0.12109375, + "num_input_tokens_seen": 466713632, + "step": 2700 + }, + { + "epoch": 1.0344695518958253, + "grad_norm": 29.166508226110697, + "learning_rate": 5e-06, + "loss": 0.2176, + "num_input_tokens_seen": 466885960, + "step": 2701 + }, + { + "epoch": 1.0344695518958253, + "loss": 0.27286502718925476, + "loss_ce": 0.02011844702064991, + "loss_iou": 1.4925570487976074, + "loss_num": 0.251953125, + "loss_xval": 0.251953125, + "num_input_tokens_seen": 466885960, + "step": 2701 + }, + { + "epoch": 1.03485254691689, + "grad_norm": 31.334617619095408, + "learning_rate": 5e-06, + "loss": 0.1873, + "num_input_tokens_seen": 467058840, + "step": 2702 + }, + { + "epoch": 1.03485254691689, + "loss": 0.217850923538208, + "loss_ce": 0.02040218934416771, + "loss_iou": 1.0190616846084595, + "loss_num": 0.197265625, + "loss_xval": 0.197265625, + "num_input_tokens_seen": 467058840, + "step": 2702 + }, + { + "epoch": 1.0352355419379549, + "grad_norm": 27.30942700897604, + "learning_rate": 5e-06, + "loss": 0.1852, + "num_input_tokens_seen": 467232000, + "step": 2703 + }, + { + "epoch": 1.0352355419379549, + "loss": 0.18020913004875183, + "loss_ce": 0.022494282573461533, + "loss_iou": 1.0234897136688232, + "loss_num": 0.158203125, + "loss_xval": 0.158203125, + "num_input_tokens_seen": 467232000, + "step": 2703 + }, + { + "epoch": 1.0356185369590196, + "grad_norm": 23.122492456689073, + "learning_rate": 5e-06, + "loss": 0.1686, + "num_input_tokens_seen": 467404456, + "step": 2704 + }, + { + "epoch": 1.0356185369590196, + "loss": 0.13935062289237976, + "loss_ce": 0.01920292340219021, + "loss_iou": 0.9711761474609375, + "loss_num": 0.1201171875, + "loss_xval": 0.1201171875, + "num_input_tokens_seen": 467404456, + "step": 2704 + }, + { + "epoch": 1.0360015319800842, + "grad_norm": 32.89013374464874, + "learning_rate": 5e-06, + "loss": 0.2026, + "num_input_tokens_seen": 467577312, + "step": 2705 + }, + { + "epoch": 1.0360015319800842, + "loss": 0.24207282066345215, + "loss_ce": 0.019843831658363342, + "loss_iou": 1.0349886417388916, + "loss_num": 0.22265625, + "loss_xval": 0.22265625, + "num_input_tokens_seen": 467577312, + "step": 2705 + }, + { + "epoch": 1.036384527001149, + "grad_norm": 21.513507526762115, + "learning_rate": 5e-06, + "loss": 0.2125, + "num_input_tokens_seen": 467749976, + "step": 2706 + }, + { + "epoch": 1.036384527001149, + "loss": 0.18888524174690247, + "loss_ce": 0.019329577684402466, + "loss_iou": 1.011885643005371, + "loss_num": 0.169921875, + "loss_xval": 0.169921875, + "num_input_tokens_seen": 467749976, + "step": 2706 + }, + { + "epoch": 1.0367675220222137, + "grad_norm": 21.080481269575095, + "learning_rate": 5e-06, + "loss": 0.1922, + "num_input_tokens_seen": 467922808, + "step": 2707 + }, + { + "epoch": 1.0367675220222137, + "loss": 0.17099253833293915, + "loss_ce": 0.020846057683229446, + "loss_iou": 1.0104583501815796, + "loss_num": 0.150390625, + "loss_xval": 0.150390625, + "num_input_tokens_seen": 467922808, + "step": 2707 + }, + { + "epoch": 1.0371505170432784, + "grad_norm": 28.250440462278117, + "learning_rate": 5e-06, + "loss": 0.217, + "num_input_tokens_seen": 468095312, + "step": 2708 + }, + { + "epoch": 1.0371505170432784, + "loss": 0.2283402681350708, + "loss_ce": 0.0191117562353611, + "loss_iou": 1.0676853656768799, + "loss_num": 0.208984375, + "loss_xval": 0.208984375, + "num_input_tokens_seen": 468095312, + "step": 2708 + }, + { + "epoch": 1.0375335120643432, + "grad_norm": 20.106679232759276, + "learning_rate": 5e-06, + "loss": 0.1596, + "num_input_tokens_seen": 468268128, + "step": 2709 + }, + { + "epoch": 1.0375335120643432, + "loss": 0.1397133767604828, + "loss_ce": 0.018985841423273087, + "loss_iou": 1.023972749710083, + "loss_num": 0.12060546875, + "loss_xval": 0.12060546875, + "num_input_tokens_seen": 468268128, + "step": 2709 + }, + { + "epoch": 1.037916507085408, + "grad_norm": 18.545317480370425, + "learning_rate": 5e-06, + "loss": 0.1516, + "num_input_tokens_seen": 468441168, + "step": 2710 + }, + { + "epoch": 1.037916507085408, + "loss": 0.17443513870239258, + "loss_ce": 0.021481044590473175, + "loss_iou": 1.008813738822937, + "loss_num": 0.1533203125, + "loss_xval": 0.1533203125, + "num_input_tokens_seen": 468441168, + "step": 2710 + }, + { + "epoch": 1.0382995021064727, + "grad_norm": 31.698141043016687, + "learning_rate": 5e-06, + "loss": 0.1895, + "num_input_tokens_seen": 468614712, + "step": 2711 + }, + { + "epoch": 1.0382995021064727, + "loss": 0.15882930159568787, + "loss_ce": 0.021683303639292717, + "loss_iou": 1.0105422735214233, + "loss_num": 0.13671875, + "loss_xval": 0.13671875, + "num_input_tokens_seen": 468614712, + "step": 2711 + }, + { + "epoch": 1.0386824971275372, + "grad_norm": 25.409130913828093, + "learning_rate": 5e-06, + "loss": 0.2247, + "num_input_tokens_seen": 468787384, + "step": 2712 + }, + { + "epoch": 1.0386824971275372, + "loss": 0.2565978467464447, + "loss_ce": 0.01978144608438015, + "loss_iou": 1.1115121841430664, + "loss_num": 0.236328125, + "loss_xval": 0.236328125, + "num_input_tokens_seen": 468787384, + "step": 2712 + }, + { + "epoch": 1.039065492148602, + "grad_norm": 23.513043474147057, + "learning_rate": 5e-06, + "loss": 0.1912, + "num_input_tokens_seen": 468960824, + "step": 2713 + }, + { + "epoch": 1.039065492148602, + "loss": 0.16786177456378937, + "loss_ce": 0.020431354641914368, + "loss_iou": 1.161240577697754, + "loss_num": 0.1474609375, + "loss_xval": 0.1474609375, + "num_input_tokens_seen": 468960824, + "step": 2713 + }, + { + "epoch": 1.0394484871696668, + "grad_norm": 26.585693680176337, + "learning_rate": 5e-06, + "loss": 0.1743, + "num_input_tokens_seen": 469133664, + "step": 2714 + }, + { + "epoch": 1.0394484871696668, + "loss": 0.19016367197036743, + "loss_ce": 0.020791107788681984, + "loss_iou": 1.0482709407806396, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 469133664, + "step": 2714 + }, + { + "epoch": 1.0398314821907315, + "grad_norm": 23.614034613887178, + "learning_rate": 5e-06, + "loss": 0.172, + "num_input_tokens_seen": 469306784, + "step": 2715 + }, + { + "epoch": 1.0398314821907315, + "loss": 0.20761451125144958, + "loss_ce": 0.018893800675868988, + "loss_iou": 0.9347192049026489, + "loss_num": 0.1884765625, + "loss_xval": 0.1884765625, + "num_input_tokens_seen": 469306784, + "step": 2715 + }, + { + "epoch": 1.0402144772117963, + "grad_norm": 16.3842713035619, + "learning_rate": 5e-06, + "loss": 0.2064, + "num_input_tokens_seen": 469479608, + "step": 2716 + }, + { + "epoch": 1.0402144772117963, + "loss": 0.20429082214832306, + "loss_ce": 0.01956792175769806, + "loss_iou": 1.0194833278656006, + "loss_num": 0.1845703125, + "loss_xval": 0.1845703125, + "num_input_tokens_seen": 469479608, + "step": 2716 + }, + { + "epoch": 1.040597472232861, + "grad_norm": 22.725221382510657, + "learning_rate": 5e-06, + "loss": 0.1585, + "num_input_tokens_seen": 469652632, + "step": 2717 + }, + { + "epoch": 1.040597472232861, + "loss": 0.14823034405708313, + "loss_ce": 0.021460331976413727, + "loss_iou": 1.013222336769104, + "loss_num": 0.126953125, + "loss_xval": 0.126953125, + "num_input_tokens_seen": 469652632, + "step": 2717 + }, + { + "epoch": 1.0409804672539258, + "grad_norm": 59.15850805484919, + "learning_rate": 5e-06, + "loss": 0.2061, + "num_input_tokens_seen": 469825560, + "step": 2718 + }, + { + "epoch": 1.0409804672539258, + "loss": 0.15422753989696503, + "loss_ce": 0.021292969584465027, + "loss_iou": 1.0036109685897827, + "loss_num": 0.1328125, + "loss_xval": 0.1328125, + "num_input_tokens_seen": 469825560, + "step": 2718 + }, + { + "epoch": 1.0413634622749903, + "grad_norm": 32.60760835613013, + "learning_rate": 5e-06, + "loss": 0.1614, + "num_input_tokens_seen": 469998592, + "step": 2719 + }, + { + "epoch": 1.0413634622749903, + "loss": 0.13662421703338623, + "loss_ce": 0.02145087718963623, + "loss_iou": 1.0014822483062744, + "loss_num": 0.115234375, + "loss_xval": 0.115234375, + "num_input_tokens_seen": 469998592, + "step": 2719 + }, + { + "epoch": 1.041746457296055, + "grad_norm": 22.858226704792663, + "learning_rate": 5e-06, + "loss": 0.155, + "num_input_tokens_seen": 470171240, + "step": 2720 + }, + { + "epoch": 1.041746457296055, + "loss": 0.13193772733211517, + "loss_ce": 0.021708227694034576, + "loss_iou": 1.0113179683685303, + "loss_num": 0.1103515625, + "loss_xval": 0.1103515625, + "num_input_tokens_seen": 470171240, + "step": 2720 + }, + { + "epoch": 1.0421294523171198, + "grad_norm": 32.50632891454952, + "learning_rate": 5e-06, + "loss": 0.1947, + "num_input_tokens_seen": 470344384, + "step": 2721 + }, + { + "epoch": 1.0421294523171198, + "loss": 0.16579297184944153, + "loss_ce": 0.01973584294319153, + "loss_iou": 0.9779366254806519, + "loss_num": 0.146484375, + "loss_xval": 0.146484375, + "num_input_tokens_seen": 470344384, + "step": 2721 + }, + { + "epoch": 1.0425124473381846, + "grad_norm": 22.57412908985093, + "learning_rate": 5e-06, + "loss": 0.1527, + "num_input_tokens_seen": 470517656, + "step": 2722 + }, + { + "epoch": 1.0425124473381846, + "loss": 0.1500321626663208, + "loss_ce": 0.020942814648151398, + "loss_iou": 1.0070710182189941, + "loss_num": 0.12890625, + "loss_xval": 0.12890625, + "num_input_tokens_seen": 470517656, + "step": 2722 + }, + { + "epoch": 1.0428954423592494, + "grad_norm": 48.54972079558057, + "learning_rate": 5e-06, + "loss": 0.2444, + "num_input_tokens_seen": 470690352, + "step": 2723 + }, + { + "epoch": 1.0428954423592494, + "loss": 0.25511783361434937, + "loss_ce": 0.02068181149661541, + "loss_iou": 1.0030978918075562, + "loss_num": 0.234375, + "loss_xval": 0.234375, + "num_input_tokens_seen": 470690352, + "step": 2723 + }, + { + "epoch": 1.0432784373803141, + "grad_norm": 35.89554204645707, + "learning_rate": 5e-06, + "loss": 0.175, + "num_input_tokens_seen": 470862976, + "step": 2724 + }, + { + "epoch": 1.0432784373803141, + "loss": 0.16105236113071442, + "loss_ce": 0.02122081257402897, + "loss_iou": 1.000620722770691, + "loss_num": 0.1396484375, + "loss_xval": 0.1396484375, + "num_input_tokens_seen": 470862976, + "step": 2724 + }, + { + "epoch": 1.0436614324013789, + "grad_norm": 39.07446599870957, + "learning_rate": 5e-06, + "loss": 0.2192, + "num_input_tokens_seen": 471035872, + "step": 2725 + }, + { + "epoch": 1.0436614324013789, + "loss": 0.22075331211090088, + "loss_ce": 0.022144924849271774, + "loss_iou": 1.0329551696777344, + "loss_num": 0.1982421875, + "loss_xval": 0.1982421875, + "num_input_tokens_seen": 471035872, + "step": 2725 + }, + { + "epoch": 1.0440444274224434, + "grad_norm": 28.588671946034065, + "learning_rate": 5e-06, + "loss": 0.2162, + "num_input_tokens_seen": 471208616, + "step": 2726 + }, + { + "epoch": 1.0440444274224434, + "loss": 0.20154617726802826, + "loss_ce": 0.019661404192447662, + "loss_iou": 1.0136446952819824, + "loss_num": 0.181640625, + "loss_xval": 0.181640625, + "num_input_tokens_seen": 471208616, + "step": 2726 + }, + { + "epoch": 1.0444274224435082, + "grad_norm": 40.00772693807137, + "learning_rate": 5e-06, + "loss": 0.1639, + "num_input_tokens_seen": 471381824, + "step": 2727 + }, + { + "epoch": 1.0444274224435082, + "loss": 0.16869238018989563, + "loss_ce": 0.02336767688393593, + "loss_iou": 1.0468491315841675, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 471381824, + "step": 2727 + }, + { + "epoch": 1.044810417464573, + "grad_norm": 28.13426340486647, + "learning_rate": 5e-06, + "loss": 0.1684, + "num_input_tokens_seen": 471554920, + "step": 2728 + }, + { + "epoch": 1.044810417464573, + "loss": 0.12310892343521118, + "loss_ce": 0.021546421572566032, + "loss_iou": 1.0003392696380615, + "loss_num": 0.1015625, + "loss_xval": 0.1015625, + "num_input_tokens_seen": 471554920, + "step": 2728 + }, + { + "epoch": 1.0451934124856377, + "grad_norm": 29.710748314738474, + "learning_rate": 5e-06, + "loss": 0.1646, + "num_input_tokens_seen": 471728248, + "step": 2729 + }, + { + "epoch": 1.0451934124856377, + "loss": 0.14808785915374756, + "loss_ce": 0.023087866604328156, + "loss_iou": 1.005138874053955, + "loss_num": 0.125, + "loss_xval": 0.125, + "num_input_tokens_seen": 471728248, + "step": 2729 + }, + { + "epoch": 1.0455764075067024, + "grad_norm": 39.41039975761352, + "learning_rate": 5e-06, + "loss": 0.177, + "num_input_tokens_seen": 471901096, + "step": 2730 + }, + { + "epoch": 1.0455764075067024, + "loss": 0.1642495095729828, + "loss_ce": 0.019199464470148087, + "loss_iou": 1.1425344944000244, + "loss_num": 0.1455078125, + "loss_xval": 0.1455078125, + "num_input_tokens_seen": 471901096, + "step": 2730 + }, + { + "epoch": 1.0459594025277672, + "grad_norm": 79.7902468080741, + "learning_rate": 5e-06, + "loss": 0.2167, + "num_input_tokens_seen": 472074096, + "step": 2731 + }, + { + "epoch": 1.0459594025277672, + "loss": 0.2294425517320633, + "loss_ce": 0.023570969700813293, + "loss_iou": 1.0148272514343262, + "loss_num": 0.2060546875, + "loss_xval": 0.2060546875, + "num_input_tokens_seen": 472074096, + "step": 2731 + }, + { + "epoch": 1.046342397548832, + "grad_norm": 27.929160338654125, + "learning_rate": 5e-06, + "loss": 0.2061, + "num_input_tokens_seen": 472246752, + "step": 2732 + }, + { + "epoch": 1.046342397548832, + "loss": 0.22269487380981445, + "loss_ce": 0.01993608847260475, + "loss_iou": 1.0086251497268677, + "loss_num": 0.203125, + "loss_xval": 0.203125, + "num_input_tokens_seen": 472246752, + "step": 2732 + }, + { + "epoch": 1.0467253925698965, + "grad_norm": 66.44112395580295, + "learning_rate": 5e-06, + "loss": 0.2601, + "num_input_tokens_seen": 472419664, + "step": 2733 + }, + { + "epoch": 1.0467253925698965, + "loss": 0.3042086362838745, + "loss_ce": 0.02039516344666481, + "loss_iou": 1.005509376525879, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 472419664, + "step": 2733 + }, + { + "epoch": 1.0471083875909613, + "grad_norm": 39.12380861449442, + "learning_rate": 5e-06, + "loss": 0.2179, + "num_input_tokens_seen": 472592408, + "step": 2734 + }, + { + "epoch": 1.0471083875909613, + "loss": 0.16589589416980743, + "loss_ce": 0.021212061867117882, + "loss_iou": 0.8990572094917297, + "loss_num": 0.14453125, + "loss_xval": 0.14453125, + "num_input_tokens_seen": 472592408, + "step": 2734 + }, + { + "epoch": 1.047491382612026, + "grad_norm": 87.35692921222793, + "learning_rate": 5e-06, + "loss": 0.3555, + "num_input_tokens_seen": 472765632, + "step": 2735 + }, + { + "epoch": 1.047491382612026, + "loss": 0.33498415350914, + "loss_ce": 0.021385516971349716, + "loss_iou": 1.032576560974121, + "loss_num": 0.314453125, + "loss_xval": 0.314453125, + "num_input_tokens_seen": 472765632, + "step": 2735 + }, + { + "epoch": 1.0478743776330908, + "grad_norm": 35.16914924326974, + "learning_rate": 5e-06, + "loss": 0.2094, + "num_input_tokens_seen": 472938440, + "step": 2736 + }, + { + "epoch": 1.0478743776330908, + "loss": 0.19124934077262878, + "loss_ce": 0.019740547984838486, + "loss_iou": 1.0017101764678955, + "loss_num": 0.171875, + "loss_xval": 0.171875, + "num_input_tokens_seen": 472938440, + "step": 2736 + }, + { + "epoch": 1.0482573726541555, + "grad_norm": 26.90994084526525, + "learning_rate": 5e-06, + "loss": 0.2423, + "num_input_tokens_seen": 473111736, + "step": 2737 + }, + { + "epoch": 1.0482573726541555, + "loss": 0.24850554764270782, + "loss_ce": 0.020356133580207825, + "loss_iou": 1.0285611152648926, + "loss_num": 0.228515625, + "loss_xval": 0.228515625, + "num_input_tokens_seen": 473111736, + "step": 2737 + }, + { + "epoch": 1.0486403676752203, + "grad_norm": 26.95539026300767, + "learning_rate": 5e-06, + "loss": 0.212, + "num_input_tokens_seen": 473284680, + "step": 2738 + }, + { + "epoch": 1.0486403676752203, + "loss": 0.1881379336118698, + "loss_ce": 0.019558828324079514, + "loss_iou": 1.0077383518218994, + "loss_num": 0.1689453125, + "loss_xval": 0.1689453125, + "num_input_tokens_seen": 473284680, + "step": 2738 + }, + { + "epoch": 1.049023362696285, + "grad_norm": 57.78363821465061, + "learning_rate": 5e-06, + "loss": 0.272, + "num_input_tokens_seen": 473457608, + "step": 2739 + }, + { + "epoch": 1.049023362696285, + "loss": 0.23863255977630615, + "loss_ce": 0.021103277802467346, + "loss_iou": 1.0214968919754028, + "loss_num": 0.2177734375, + "loss_xval": 0.2177734375, + "num_input_tokens_seen": 473457608, + "step": 2739 + }, + { + "epoch": 1.0494063577173496, + "grad_norm": 48.252095532275206, + "learning_rate": 5e-06, + "loss": 0.2814, + "num_input_tokens_seen": 473630624, + "step": 2740 + }, + { + "epoch": 1.0494063577173496, + "loss": 0.27010446786880493, + "loss_ce": 0.02132517099380493, + "loss_iou": 1.046252965927124, + "loss_num": 0.2490234375, + "loss_xval": 0.2490234375, + "num_input_tokens_seen": 473630624, + "step": 2740 + }, + { + "epoch": 1.0497893527384143, + "grad_norm": 48.47258890101169, + "learning_rate": 5e-06, + "loss": 0.2966, + "num_input_tokens_seen": 473803488, + "step": 2741 + }, + { + "epoch": 1.0497893527384143, + "loss": 0.28845489025115967, + "loss_ce": 0.021242991089820862, + "loss_iou": 1.019432783126831, + "loss_num": 0.267578125, + "loss_xval": 0.267578125, + "num_input_tokens_seen": 473803488, + "step": 2741 + }, + { + "epoch": 1.050172347759479, + "grad_norm": 34.68729107241668, + "learning_rate": 5e-06, + "loss": 0.2335, + "num_input_tokens_seen": 473976120, + "step": 2742 + }, + { + "epoch": 1.050172347759479, + "loss": 0.26116693019866943, + "loss_ce": 0.021237719804048538, + "loss_iou": 1.1353564262390137, + "loss_num": 0.240234375, + "loss_xval": 0.240234375, + "num_input_tokens_seen": 473976120, + "step": 2742 + }, + { + "epoch": 1.0505553427805439, + "grad_norm": 91.08869170976169, + "learning_rate": 5e-06, + "loss": 0.3836, + "num_input_tokens_seen": 474149240, + "step": 2743 + }, + { + "epoch": 1.0505553427805439, + "loss": 0.3859593868255615, + "loss_ce": 0.021701589226722717, + "loss_iou": 1.0187512636184692, + "loss_num": 0.36328125, + "loss_xval": 0.36328125, + "num_input_tokens_seen": 474149240, + "step": 2743 + }, + { + "epoch": 1.0509383378016086, + "grad_norm": 37.43984099159935, + "learning_rate": 5e-06, + "loss": 0.2488, + "num_input_tokens_seen": 474322184, + "step": 2744 + }, + { + "epoch": 1.0509383378016086, + "loss": 0.2325676679611206, + "loss_ce": 0.019677046686410904, + "loss_iou": 1.0216319561004639, + "loss_num": 0.212890625, + "loss_xval": 0.212890625, + "num_input_tokens_seen": 474322184, + "step": 2744 + }, + { + "epoch": 1.0513213328226734, + "grad_norm": 96.43731864902831, + "learning_rate": 5e-06, + "loss": 0.5828, + "num_input_tokens_seen": 474495384, + "step": 2745 + }, + { + "epoch": 1.0513213328226734, + "loss": 0.6128767728805542, + "loss_ce": 0.021079927682876587, + "loss_iou": 1.0349185466766357, + "loss_num": 0.59375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 474495384, + "step": 2745 + }, + { + "epoch": 1.0517043278437381, + "grad_norm": 33.287081245733056, + "learning_rate": 5e-06, + "loss": 0.1964, + "num_input_tokens_seen": 474668184, + "step": 2746 + }, + { + "epoch": 1.0517043278437381, + "loss": 0.22697071731090546, + "loss_ce": 0.019268084317445755, + "loss_iou": 1.056471347808838, + "loss_num": 0.2080078125, + "loss_xval": 0.2080078125, + "num_input_tokens_seen": 474668184, + "step": 2746 + }, + { + "epoch": 1.0520873228648027, + "grad_norm": 98.93006472064867, + "learning_rate": 5e-06, + "loss": 0.6221, + "num_input_tokens_seen": 474841064, + "step": 2747 + }, + { + "epoch": 1.0520873228648027, + "loss": 0.6143052577972412, + "loss_ce": 0.01860210858285427, + "loss_iou": 1.0286753177642822, + "loss_num": 0.59375, + "loss_xval": 0.59375, + "num_input_tokens_seen": 474841064, + "step": 2747 + }, + { + "epoch": 1.0524703178858674, + "grad_norm": 32.246487920245904, + "learning_rate": 5e-06, + "loss": 0.2712, + "num_input_tokens_seen": 475014160, + "step": 2748 + }, + { + "epoch": 1.0524703178858674, + "loss": 0.30843839049339294, + "loss_ce": 0.023648329079151154, + "loss_iou": 1.0537768602371216, + "loss_num": 0.28515625, + "loss_xval": 0.28515625, + "num_input_tokens_seen": 475014160, + "step": 2748 + }, + { + "epoch": 1.0528533129069322, + "grad_norm": 36.094590295797396, + "learning_rate": 5e-06, + "loss": 0.2888, + "num_input_tokens_seen": 475186936, + "step": 2749 + }, + { + "epoch": 1.0528533129069322, + "loss": 0.3027374744415283, + "loss_ce": 0.019778510555624962, + "loss_iou": 1.0816279649734497, + "loss_num": 0.283203125, + "loss_xval": 0.283203125, + "num_input_tokens_seen": 475186936, + "step": 2749 + }, + { + "epoch": 1.053236307927997, + "grad_norm": 31.006086293118372, + "learning_rate": 5e-06, + "loss": 0.2068, + "num_input_tokens_seen": 475359608, + "step": 2750 + }, + { + "epoch": 1.053236307927997, + "eval_websight_new_CIoU": 0.9340087175369263, + "eval_websight_new_GIoU": 0.9337825775146484, + "eval_websight_new_IoU": 0.9342544078826904, + "eval_websight_new_MAE_all": 0.006378704681992531, + "eval_websight_new_MAE_h": 0.005829754751175642, + "eval_websight_new_MAE_w": 0.007576425559818745, + "eval_websight_new_MAE_x": 0.00746025238186121, + "eval_websight_new_MAE_y": 0.004648384638130665, + "eval_websight_new_NUM_probability": 6.985414438531734e-05, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.10419899225234985, + "eval_websight_new_loss_ce": 0.04258443042635918, + "eval_websight_new_loss_iou": 1.0004280805587769, + "eval_websight_new_loss_num": 0.06005859375, + "eval_websight_new_loss_xval": 0.06005859375, + "eval_websight_new_runtime": 55.01, + "eval_websight_new_samples_per_second": 0.909, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 475359608, + "step": 2750 + }, + { + "epoch": 1.053236307927997, + "eval_seeclick_CIoU": 0.7091011106967926, + "eval_seeclick_GIoU": 0.715798556804657, + "eval_seeclick_IoU": 0.736909419298172, + "eval_seeclick_MAE_all": 0.04782724380493164, + "eval_seeclick_MAE_h": 0.025104526430368423, + "eval_seeclick_MAE_w": 0.07058498449623585, + "eval_seeclick_MAE_x": 0.06600936688482761, + "eval_seeclick_MAE_y": 0.029610102996230125, + "eval_seeclick_NUM_probability": 9.428920384380035e-05, + "eval_seeclick_inside_bbox": 0.8975694477558136, + "eval_seeclick_loss": 0.3992612361907959, + "eval_seeclick_loss_ce": 0.03321938402950764, + "eval_seeclick_loss_iou": 1.1248716711997986, + "eval_seeclick_loss_num": 0.3755340576171875, + "eval_seeclick_loss_xval": 0.3755340576171875, + "eval_seeclick_runtime": 89.4809, + "eval_seeclick_samples_per_second": 0.559, + "eval_seeclick_steps_per_second": 0.022, + "num_input_tokens_seen": 475359608, + "step": 2750 + }, + { + "epoch": 1.053236307927997, + "eval_icons_CIoU": 0.8451197743415833, + "eval_icons_GIoU": 0.8467774093151093, + "eval_icons_IoU": 0.8570410311222076, + "eval_icons_MAE_all": 0.0221947580575943, + "eval_icons_MAE_h": 0.013928203843533993, + "eval_icons_MAE_w": 0.03198568522930145, + "eval_icons_MAE_x": 0.03160202130675316, + "eval_icons_MAE_y": 0.011263125110417604, + "eval_icons_NUM_probability": 6.373344513121992e-05, + "eval_icons_inside_bbox": 0.9288194477558136, + "eval_icons_loss": 0.16468946635723114, + "eval_icons_loss_ce": 0.028911873698234558, + "eval_icons_loss_iou": 1.045575201511383, + "eval_icons_loss_num": 0.122589111328125, + "eval_icons_loss_xval": 0.122589111328125, + "eval_icons_runtime": 84.3458, + "eval_icons_samples_per_second": 0.593, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 475359608, + "step": 2750 + }, + { + "epoch": 1.0536193029490617, + "grad_norm": 20.972395160202332, + "learning_rate": 5e-06, + "loss": 4.9244, + "num_input_tokens_seen": 475532696, + "step": 2751 + }, + { + "epoch": 1.0536193029490617, + "loss": 4.94212532043457, + "loss_ce": 0.7298550605773926, + "loss_iou": 1.8575999736785889, + "loss_num": 0.099609375, + "loss_xval": 4.212270259857178, + "num_input_tokens_seen": 475532696, + "step": 2751 + }, + { + "epoch": 1.0540022979701265, + "grad_norm": 22.579961359595984, + "learning_rate": 5e-06, + "loss": 4.8401, + "num_input_tokens_seen": 475706144, + "step": 2752 + }, + { + "epoch": 1.0540022979701265, + "loss": 4.863560199737549, + "loss_ce": 0.7389425039291382, + "loss_iou": 1.8568644523620605, + "loss_num": 0.08203125, + "loss_xval": 4.124617576599121, + "num_input_tokens_seen": 475706144, + "step": 2752 + }, + { + "epoch": 1.0543852929911912, + "grad_norm": 38.90659473774426, + "learning_rate": 5e-06, + "loss": 4.5824, + "num_input_tokens_seen": 475878984, + "step": 2753 + }, + { + "epoch": 1.0543852929911912, + "loss": 4.571383476257324, + "loss_ce": 0.713236391544342, + "loss_iou": 1.7164270877838135, + "loss_num": 0.0849609375, + "loss_xval": 3.858147144317627, + "num_input_tokens_seen": 475878984, + "step": 2753 + }, + { + "epoch": 1.0547682880122558, + "grad_norm": 51.251617120540054, + "learning_rate": 5e-06, + "loss": 4.74, + "num_input_tokens_seen": 476051824, + "step": 2754 + }, + { + "epoch": 1.0547682880122558, + "loss": 4.697440147399902, + "loss_ce": 0.6825171709060669, + "loss_iou": 1.7874908447265625, + "loss_num": 0.087890625, + "loss_xval": 4.014923095703125, + "num_input_tokens_seen": 476051824, + "step": 2754 + }, + { + "epoch": 1.0551512830333205, + "grad_norm": 86.9999704302525, + "learning_rate": 5e-06, + "loss": 4.2924, + "num_input_tokens_seen": 476224392, + "step": 2755 + }, + { + "epoch": 1.0551512830333205, + "loss": 4.226644992828369, + "loss_ce": 0.586954653263092, + "loss_iou": 1.6256312131881714, + "loss_num": 0.07763671875, + "loss_xval": 3.6396901607513428, + "num_input_tokens_seen": 476224392, + "step": 2755 + }, + { + "epoch": 1.0555342780543853, + "grad_norm": 89.00515259187776, + "learning_rate": 5e-06, + "loss": 4.7428, + "num_input_tokens_seen": 476397344, + "step": 2756 + }, + { + "epoch": 1.0555342780543853, + "loss": 4.77473258972168, + "loss_ce": 0.7308616042137146, + "loss_iou": 1.7889032363891602, + "loss_num": 0.09326171875, + "loss_xval": 4.04387092590332, + "num_input_tokens_seen": 476397344, + "step": 2756 + }, + { + "epoch": 1.05591727307545, + "grad_norm": 185.9105092239929, + "learning_rate": 5e-06, + "loss": 4.0115, + "num_input_tokens_seen": 476566464, + "step": 2757 + }, + { + "epoch": 1.05591727307545, + "loss": 3.987050771713257, + "loss_ce": 0.747867226600647, + "loss_iou": 1.4663325548171997, + "loss_num": 0.061279296875, + "loss_xval": 3.2391836643218994, + "num_input_tokens_seen": 476566464, + "step": 2757 + }, + { + "epoch": 1.0563002680965148, + "grad_norm": 94.49240018056493, + "learning_rate": 5e-06, + "loss": 4.7167, + "num_input_tokens_seen": 476739216, + "step": 2758 + }, + { + "epoch": 1.0563002680965148, + "loss": 4.594268798828125, + "loss_ce": 0.6055147647857666, + "loss_iou": 1.3898848295211792, + "loss_num": 0.2421875, + "loss_xval": 3.9887540340423584, + "num_input_tokens_seen": 476739216, + "step": 2758 + }, + { + "epoch": 1.0566832631175795, + "grad_norm": 237.09172010163056, + "learning_rate": 5e-06, + "loss": 3.9164, + "num_input_tokens_seen": 476912080, + "step": 2759 + }, + { + "epoch": 1.0566832631175795, + "loss": 3.877072334289551, + "loss_ce": 0.6420649886131287, + "loss_iou": 1.0923571586608887, + "loss_num": 0.2099609375, + "loss_xval": 3.2350072860717773, + "num_input_tokens_seen": 476912080, + "step": 2759 + }, + { + "epoch": 1.0570662581386443, + "grad_norm": 150.1091672828241, + "learning_rate": 5e-06, + "loss": 3.3728, + "num_input_tokens_seen": 477085408, + "step": 2760 + }, + { + "epoch": 1.0570662581386443, + "loss": 3.46321177482605, + "loss_ce": 0.6880149841308594, + "loss_iou": 1.1341803073883057, + "loss_num": 0.1015625, + "loss_xval": 2.7751965522766113, + "num_input_tokens_seen": 477085408, + "step": 2760 + }, + { + "epoch": 1.0574492531597088, + "grad_norm": 285.7596434576294, + "learning_rate": 5e-06, + "loss": 3.9921, + "num_input_tokens_seen": 477257920, + "step": 2761 + }, + { + "epoch": 1.0574492531597088, + "loss": 3.7242329120635986, + "loss_ce": 0.5976026654243469, + "loss_iou": 1.0127780437469482, + "loss_num": 0.220703125, + "loss_xval": 3.1266303062438965, + "num_input_tokens_seen": 477257920, + "step": 2761 + }, + { + "epoch": 1.0578322481807736, + "grad_norm": 133.89055976999657, + "learning_rate": 5e-06, + "loss": 3.3863, + "num_input_tokens_seen": 477431136, + "step": 2762 + }, + { + "epoch": 1.0578322481807736, + "loss": 3.3770768642425537, + "loss_ce": 0.7506231069564819, + "loss_iou": 1.2131903171539307, + "loss_num": 0.0400390625, + "loss_xval": 2.6264538764953613, + "num_input_tokens_seen": 477431136, + "step": 2762 + }, + { + "epoch": 1.0582152432018384, + "grad_norm": 79.24134849478921, + "learning_rate": 5e-06, + "loss": 3.0953, + "num_input_tokens_seen": 477604256, + "step": 2763 + }, + { + "epoch": 1.0582152432018384, + "loss": 2.9831225872039795, + "loss_ce": 0.6824136972427368, + "loss_iou": 0.9375858902931213, + "loss_num": 0.0849609375, + "loss_xval": 2.3007090091705322, + "num_input_tokens_seen": 477604256, + "step": 2763 + }, + { + "epoch": 1.058598238222903, + "grad_norm": 92.58299551211535, + "learning_rate": 5e-06, + "loss": 2.6359, + "num_input_tokens_seen": 477776768, + "step": 2764 + }, + { + "epoch": 1.058598238222903, + "loss": 2.6042308807373047, + "loss_ce": 0.6946655511856079, + "loss_iou": 0.8491918444633484, + "loss_num": 0.042236328125, + "loss_xval": 1.9095653295516968, + "num_input_tokens_seen": 477776768, + "step": 2764 + }, + { + "epoch": 1.0589812332439679, + "grad_norm": 216.32596930989925, + "learning_rate": 5e-06, + "loss": 3.9107, + "num_input_tokens_seen": 477949632, + "step": 2765 + }, + { + "epoch": 1.0589812332439679, + "loss": 3.8467416763305664, + "loss_ce": 0.7250851392745972, + "loss_iou": 1.4247809648513794, + "loss_num": 0.054443359375, + "loss_xval": 3.121656656265259, + "num_input_tokens_seen": 477949632, + "step": 2765 + }, + { + "epoch": 1.0593642282650326, + "grad_norm": 39.851080910056396, + "learning_rate": 5e-06, + "loss": 2.8466, + "num_input_tokens_seen": 478121952, + "step": 2766 + }, + { + "epoch": 1.0593642282650326, + "loss": 3.068800687789917, + "loss_ce": 0.5916388034820557, + "loss_iou": 1.0230047702789307, + "loss_num": 0.08642578125, + "loss_xval": 2.4771618843078613, + "num_input_tokens_seen": 478121952, + "step": 2766 + }, + { + "epoch": 1.0597472232860974, + "grad_norm": 65.348712385135, + "learning_rate": 5e-06, + "loss": 2.6881, + "num_input_tokens_seen": 478294696, + "step": 2767 + }, + { + "epoch": 1.0597472232860974, + "loss": 2.7627148628234863, + "loss_ce": 0.7295560836791992, + "loss_iou": 0.8837059736251831, + "loss_num": 0.05322265625, + "loss_xval": 2.033158779144287, + "num_input_tokens_seen": 478294696, + "step": 2767 + }, + { + "epoch": 1.060130218307162, + "grad_norm": 167.5137190983422, + "learning_rate": 5e-06, + "loss": 2.9012, + "num_input_tokens_seen": 478467216, + "step": 2768 + }, + { + "epoch": 1.060130218307162, + "loss": 2.764270544052124, + "loss_ce": 0.6801129579544067, + "loss_iou": 0.9556529521942139, + "loss_num": 0.03466796875, + "loss_xval": 2.0841574668884277, + "num_input_tokens_seen": 478467216, + "step": 2768 + }, + { + "epoch": 1.0605132133282267, + "grad_norm": 97.4310789988781, + "learning_rate": 5e-06, + "loss": 3.3915, + "num_input_tokens_seen": 478640048, + "step": 2769 + }, + { + "epoch": 1.0605132133282267, + "loss": 3.468452215194702, + "loss_ce": 0.6962400674819946, + "loss_iou": 1.134763240814209, + "loss_num": 0.1005859375, + "loss_xval": 2.772212028503418, + "num_input_tokens_seen": 478640048, + "step": 2769 + }, + { + "epoch": 1.0608962083492914, + "grad_norm": 82.34273348062686, + "learning_rate": 5e-06, + "loss": 3.1041, + "num_input_tokens_seen": 478813136, + "step": 2770 + }, + { + "epoch": 1.0608962083492914, + "loss": 3.1314048767089844, + "loss_ce": 0.7152508497238159, + "loss_iou": 0.9957966804504395, + "loss_num": 0.0849609375, + "loss_xval": 2.416153907775879, + "num_input_tokens_seen": 478813136, + "step": 2770 + }, + { + "epoch": 1.0612792033703562, + "grad_norm": 60.91499911640653, + "learning_rate": 5e-06, + "loss": 2.7954, + "num_input_tokens_seen": 478986136, + "step": 2771 + }, + { + "epoch": 1.0612792033703562, + "loss": 2.714437961578369, + "loss_ce": 0.7429600954055786, + "loss_iou": 0.8766692280769348, + "loss_num": 0.043701171875, + "loss_xval": 1.9714781045913696, + "num_input_tokens_seen": 478986136, + "step": 2771 + }, + { + "epoch": 1.061662198391421, + "grad_norm": 220.12619331832371, + "learning_rate": 5e-06, + "loss": 2.967, + "num_input_tokens_seen": 479158960, + "step": 2772 + }, + { + "epoch": 1.061662198391421, + "loss": 3.0788824558258057, + "loss_ce": 0.6809020638465881, + "loss_iou": 1.1042025089263916, + "loss_num": 0.037841796875, + "loss_xval": 2.397980213165283, + "num_input_tokens_seen": 479158960, + "step": 2772 + }, + { + "epoch": 1.0620451934124857, + "grad_norm": 147.62426189121615, + "learning_rate": 5e-06, + "loss": 3.4391, + "num_input_tokens_seen": 479331592, + "step": 2773 + }, + { + "epoch": 1.0620451934124857, + "loss": 3.3189139366149902, + "loss_ce": 0.695880115032196, + "loss_iou": 1.0483334064483643, + "loss_num": 0.10498046875, + "loss_xval": 2.6230340003967285, + "num_input_tokens_seen": 479331592, + "step": 2773 + }, + { + "epoch": 1.0624281884335505, + "grad_norm": 81.79723496561954, + "learning_rate": 5e-06, + "loss": 3.1469, + "num_input_tokens_seen": 479504992, + "step": 2774 + }, + { + "epoch": 1.0624281884335505, + "loss": 3.269740104675293, + "loss_ce": 0.7365933656692505, + "loss_iou": 0.9709189534187317, + "loss_num": 0.1181640625, + "loss_xval": 2.533146381378174, + "num_input_tokens_seen": 479504992, + "step": 2774 + }, + { + "epoch": 1.062811183454615, + "grad_norm": 77.8703028378014, + "learning_rate": 5e-06, + "loss": 2.7768, + "num_input_tokens_seen": 479677592, + "step": 2775 + }, + { + "epoch": 1.062811183454615, + "loss": 2.8037338256835938, + "loss_ce": 0.69895339012146, + "loss_iou": 0.9126807451248169, + "loss_num": 0.055908203125, + "loss_xval": 2.104780435562134, + "num_input_tokens_seen": 479677592, + "step": 2775 + }, + { + "epoch": 1.0631941784756798, + "grad_norm": 125.95945099555713, + "learning_rate": 5e-06, + "loss": 2.9744, + "num_input_tokens_seen": 479850744, + "step": 2776 + }, + { + "epoch": 1.0631941784756798, + "loss": 2.8986823558807373, + "loss_ce": 0.6944018602371216, + "loss_iou": 0.9481484889984131, + "loss_num": 0.0615234375, + "loss_xval": 2.204280376434326, + "num_input_tokens_seen": 479850744, + "step": 2776 + }, + { + "epoch": 1.0635771734967445, + "grad_norm": 70.51303175752362, + "learning_rate": 5e-06, + "loss": 2.6296, + "num_input_tokens_seen": 480023552, + "step": 2777 + }, + { + "epoch": 1.0635771734967445, + "loss": 2.730379581451416, + "loss_ce": 0.7116711139678955, + "loss_iou": 0.8874670267105103, + "loss_num": 0.048828125, + "loss_xval": 2.0187084674835205, + "num_input_tokens_seen": 480023552, + "step": 2777 + }, + { + "epoch": 1.0639601685178093, + "grad_norm": 42.99076277028715, + "learning_rate": 5e-06, + "loss": 2.5426, + "num_input_tokens_seen": 480196704, + "step": 2778 + }, + { + "epoch": 1.0639601685178093, + "loss": 2.469575881958008, + "loss_ce": 0.718497097492218, + "loss_iou": 0.8037315607070923, + "loss_num": 0.0286865234375, + "loss_xval": 1.7510788440704346, + "num_input_tokens_seen": 480196704, + "step": 2778 + }, + { + "epoch": 1.064343163538874, + "grad_norm": 39.271392985638734, + "learning_rate": 5e-06, + "loss": 2.2897, + "num_input_tokens_seen": 480369528, + "step": 2779 + }, + { + "epoch": 1.064343163538874, + "loss": 2.2908480167388916, + "loss_ce": 0.7359123229980469, + "loss_iou": 0.7067586183547974, + "loss_num": 0.0283203125, + "loss_xval": 1.5549356937408447, + "num_input_tokens_seen": 480369528, + "step": 2779 + }, + { + "epoch": 1.0647261585599388, + "grad_norm": 131.43859578584966, + "learning_rate": 5e-06, + "loss": 2.5243, + "num_input_tokens_seen": 480542824, + "step": 2780 + }, + { + "epoch": 1.0647261585599388, + "loss": 2.5856826305389404, + "loss_ce": 0.6940747499465942, + "loss_iou": 0.8606293201446533, + "loss_num": 0.033935546875, + "loss_xval": 1.8916077613830566, + "num_input_tokens_seen": 480542824, + "step": 2780 + }, + { + "epoch": 1.0651091535810036, + "grad_norm": 87.11997550244749, + "learning_rate": 5e-06, + "loss": 3.6927, + "num_input_tokens_seen": 480715432, + "step": 2781 + }, + { + "epoch": 1.0651091535810036, + "loss": 3.9174413681030273, + "loss_ce": 0.6469007730484009, + "loss_iou": 1.4599162340164185, + "loss_num": 0.0703125, + "loss_xval": 3.270540475845337, + "num_input_tokens_seen": 480715432, + "step": 2781 + }, + { + "epoch": 1.065492148602068, + "grad_norm": 108.72209188483511, + "learning_rate": 5e-06, + "loss": 2.7006, + "num_input_tokens_seen": 480888360, + "step": 2782 + }, + { + "epoch": 1.065492148602068, + "loss": 2.7656283378601074, + "loss_ce": 0.7435428500175476, + "loss_iou": 0.8747512698173523, + "loss_num": 0.054443359375, + "loss_xval": 2.022085666656494, + "num_input_tokens_seen": 480888360, + "step": 2782 + }, + { + "epoch": 1.0658751436231328, + "grad_norm": 246.58809910604464, + "learning_rate": 5e-06, + "loss": 2.5774, + "num_input_tokens_seen": 481061448, + "step": 2783 + }, + { + "epoch": 1.0658751436231328, + "loss": 2.5640876293182373, + "loss_ce": 0.6850498914718628, + "loss_iou": 0.8536118865013123, + "loss_num": 0.034423828125, + "loss_xval": 1.8790377378463745, + "num_input_tokens_seen": 481061448, + "step": 2783 + }, + { + "epoch": 1.0662581386441976, + "grad_norm": 100.41665743419827, + "learning_rate": 5e-06, + "loss": 2.94, + "num_input_tokens_seen": 481234256, + "step": 2784 + }, + { + "epoch": 1.0662581386441976, + "loss": 2.8582053184509277, + "loss_ce": 0.6741833686828613, + "loss_iou": 0.9432071447372437, + "loss_num": 0.0595703125, + "loss_xval": 2.1840219497680664, + "num_input_tokens_seen": 481234256, + "step": 2784 + }, + { + "epoch": 1.0666411336652624, + "grad_norm": 130.65958261450226, + "learning_rate": 5e-06, + "loss": 2.6775, + "num_input_tokens_seen": 481407112, + "step": 2785 + }, + { + "epoch": 1.0666411336652624, + "loss": 2.661080837249756, + "loss_ce": 0.7737404108047485, + "loss_iou": 0.8334407210350037, + "loss_num": 0.0439453125, + "loss_xval": 1.8873404264450073, + "num_input_tokens_seen": 481407112, + "step": 2785 + }, + { + "epoch": 1.0670241286863271, + "grad_norm": 51.86095039881347, + "learning_rate": 5e-06, + "loss": 2.6063, + "num_input_tokens_seen": 481580584, + "step": 2786 + }, + { + "epoch": 1.0670241286863271, + "loss": 2.6839590072631836, + "loss_ce": 0.6764041185379028, + "loss_iou": 0.9079523086547852, + "loss_num": 0.038330078125, + "loss_xval": 2.0075550079345703, + "num_input_tokens_seen": 481580584, + "step": 2786 + }, + { + "epoch": 1.0674071237073919, + "grad_norm": 173.088161838326, + "learning_rate": 5e-06, + "loss": 2.3757, + "num_input_tokens_seen": 481753856, + "step": 2787 + }, + { + "epoch": 1.0674071237073919, + "loss": 2.3286516666412354, + "loss_ce": 0.6821632981300354, + "loss_iou": 0.7492085695266724, + "loss_num": 0.0296630859375, + "loss_xval": 1.6464884281158447, + "num_input_tokens_seen": 481753856, + "step": 2787 + }, + { + "epoch": 1.0677901187284564, + "grad_norm": 82.01832109897481, + "learning_rate": 5e-06, + "loss": 2.5041, + "num_input_tokens_seen": 481926768, + "step": 2788 + }, + { + "epoch": 1.0677901187284564, + "loss": 2.5003244876861572, + "loss_ce": 0.6895257234573364, + "loss_iou": 0.8330117464065552, + "loss_num": 0.0289306640625, + "loss_xval": 1.8107988834381104, + "num_input_tokens_seen": 481926768, + "step": 2788 + }, + { + "epoch": 1.0681731137495212, + "grad_norm": 289.3937243822588, + "learning_rate": 5e-06, + "loss": 2.7773, + "num_input_tokens_seen": 482099640, + "step": 2789 + }, + { + "epoch": 1.0681731137495212, + "loss": 2.9110474586486816, + "loss_ce": 0.6996558308601379, + "loss_iou": 1.0353833436965942, + "loss_num": 0.028076171875, + "loss_xval": 2.2113916873931885, + "num_input_tokens_seen": 482099640, + "step": 2789 + }, + { + "epoch": 1.068556108770586, + "grad_norm": 87.94871422248494, + "learning_rate": 5e-06, + "loss": 3.0462, + "num_input_tokens_seen": 482272576, + "step": 2790 + }, + { + "epoch": 1.068556108770586, + "loss": 3.0079867839813232, + "loss_ce": 0.7180944681167603, + "loss_iou": 0.9775877594947815, + "loss_num": 0.06689453125, + "loss_xval": 2.2898924350738525, + "num_input_tokens_seen": 482272576, + "step": 2790 + }, + { + "epoch": 1.0689391037916507, + "grad_norm": 67.37188580188071, + "learning_rate": 5e-06, + "loss": 2.6366, + "num_input_tokens_seen": 482445272, + "step": 2791 + }, + { + "epoch": 1.0689391037916507, + "loss": 2.634394645690918, + "loss_ce": 0.6736334562301636, + "loss_iou": 0.865878701210022, + "loss_num": 0.0458984375, + "loss_xval": 1.960761308670044, + "num_input_tokens_seen": 482445272, + "step": 2791 + }, + { + "epoch": 1.0693220988127154, + "grad_norm": 67.52657384742601, + "learning_rate": 5e-06, + "loss": 2.4575, + "num_input_tokens_seen": 482618344, + "step": 2792 + }, + { + "epoch": 1.0693220988127154, + "loss": 2.4036059379577637, + "loss_ce": 0.7781287431716919, + "loss_iou": 0.7347356081008911, + "loss_num": 0.03125, + "loss_xval": 1.6254770755767822, + "num_input_tokens_seen": 482618344, + "step": 2792 + }, + { + "epoch": 1.0697050938337802, + "grad_norm": 87.27920424026138, + "learning_rate": 5e-06, + "loss": 2.3177, + "num_input_tokens_seen": 482791072, + "step": 2793 + }, + { + "epoch": 1.0697050938337802, + "loss": 2.2667553424835205, + "loss_ce": 0.7141283750534058, + "loss_iou": 0.7067334651947021, + "loss_num": 0.02783203125, + "loss_xval": 1.5526270866394043, + "num_input_tokens_seen": 482791072, + "step": 2793 + }, + { + "epoch": 1.070088088854845, + "grad_norm": 192.24234268939296, + "learning_rate": 5e-06, + "loss": 3.2865, + "num_input_tokens_seen": 482964096, + "step": 2794 + }, + { + "epoch": 1.070088088854845, + "loss": 3.2384159564971924, + "loss_ce": 0.6809695959091187, + "loss_iou": 1.1539673805236816, + "loss_num": 0.0498046875, + "loss_xval": 2.5574464797973633, + "num_input_tokens_seen": 482964096, + "step": 2794 + }, + { + "epoch": 1.0704710838759097, + "grad_norm": 154.55315774871912, + "learning_rate": 5e-06, + "loss": 2.5979, + "num_input_tokens_seen": 483137496, + "step": 2795 + }, + { + "epoch": 1.0704710838759097, + "loss": 2.694046974182129, + "loss_ce": 0.7154624462127686, + "loss_iou": 0.898166835308075, + "loss_num": 0.036376953125, + "loss_xval": 1.97858464717865, + "num_input_tokens_seen": 483137496, + "step": 2795 + }, + { + "epoch": 1.0708540788969743, + "grad_norm": 241.487390491254, + "learning_rate": 5e-06, + "loss": 2.8126, + "num_input_tokens_seen": 483310328, + "step": 2796 + }, + { + "epoch": 1.0708540788969743, + "loss": 2.9573071002960205, + "loss_ce": 0.7261373996734619, + "loss_iou": 1.0228114128112793, + "loss_num": 0.037109375, + "loss_xval": 2.2311697006225586, + "num_input_tokens_seen": 483310328, + "step": 2796 + }, + { + "epoch": 1.071237073918039, + "grad_norm": 181.30192210869185, + "learning_rate": 5e-06, + "loss": 2.6225, + "num_input_tokens_seen": 483483296, + "step": 2797 + }, + { + "epoch": 1.071237073918039, + "loss": 2.587357521057129, + "loss_ce": 0.7516204118728638, + "loss_iou": 0.8230810761451721, + "loss_num": 0.037841796875, + "loss_xval": 1.8357373476028442, + "num_input_tokens_seen": 483483296, + "step": 2797 + }, + { + "epoch": 1.0716200689391038, + "grad_norm": 51.07158078303042, + "learning_rate": 5e-06, + "loss": 2.3856, + "num_input_tokens_seen": 483656160, + "step": 2798 + }, + { + "epoch": 1.0716200689391038, + "loss": 2.1889824867248535, + "loss_ce": 0.636174201965332, + "loss_iou": 0.7104403376579285, + "loss_num": 0.0263671875, + "loss_xval": 1.552808165550232, + "num_input_tokens_seen": 483656160, + "step": 2798 + }, + { + "epoch": 1.0720030639601685, + "grad_norm": 47.23474939291006, + "learning_rate": 5e-06, + "loss": 2.1921, + "num_input_tokens_seen": 483828888, + "step": 2799 + }, + { + "epoch": 1.0720030639601685, + "loss": 2.226820230484009, + "loss_ce": 0.6776589751243591, + "loss_iou": 0.715834379196167, + "loss_num": 0.0235595703125, + "loss_xval": 1.549161434173584, + "num_input_tokens_seen": 483828888, + "step": 2799 + }, + { + "epoch": 1.0723860589812333, + "grad_norm": 53.960842263662435, + "learning_rate": 5e-06, + "loss": 2.1868, + "num_input_tokens_seen": 484002144, + "step": 2800 + }, + { + "epoch": 1.0723860589812333, + "loss": 2.277585506439209, + "loss_ce": 0.7320078611373901, + "loss_iou": 0.7191082835197449, + "loss_num": 0.021484375, + "loss_xval": 1.5455774068832397, + "num_input_tokens_seen": 484002144, + "step": 2800 + }, + { + "epoch": 1.072769054002298, + "grad_norm": 263.27582944108934, + "learning_rate": 5e-06, + "loss": 2.1796, + "num_input_tokens_seen": 484174920, + "step": 2801 + }, + { + "epoch": 1.072769054002298, + "loss": 2.3287782669067383, + "loss_ce": 0.6678345203399658, + "loss_iou": 0.7781646847724915, + "loss_num": 0.0208740234375, + "loss_xval": 1.660943627357483, + "num_input_tokens_seen": 484174920, + "step": 2801 + }, + { + "epoch": 1.0731520490233626, + "grad_norm": 117.56310256583214, + "learning_rate": 5e-06, + "loss": 3.2097, + "num_input_tokens_seen": 484348112, + "step": 2802 + }, + { + "epoch": 1.0731520490233626, + "loss": 3.3388285636901855, + "loss_ce": 0.6726955771446228, + "loss_iou": 1.181455135345459, + "loss_num": 0.060546875, + "loss_xval": 2.666132926940918, + "num_input_tokens_seen": 484348112, + "step": 2802 + }, + { + "epoch": 1.0735350440444273, + "grad_norm": 104.58298648616196, + "learning_rate": 5e-06, + "loss": 2.5987, + "num_input_tokens_seen": 484521336, + "step": 2803 + }, + { + "epoch": 1.0735350440444273, + "loss": 2.521456003189087, + "loss_ce": 0.7201911211013794, + "loss_iou": 0.7994971871376038, + "loss_num": 0.04052734375, + "loss_xval": 1.8012648820877075, + "num_input_tokens_seen": 484521336, + "step": 2803 + }, + { + "epoch": 1.073918039065492, + "grad_norm": 96.78154982441428, + "learning_rate": 5e-06, + "loss": 2.4816, + "num_input_tokens_seen": 484694392, + "step": 2804 + }, + { + "epoch": 1.073918039065492, + "loss": 2.4801526069641113, + "loss_ce": 0.7741658091545105, + "loss_iou": 0.778072714805603, + "loss_num": 0.0299072265625, + "loss_xval": 1.705986738204956, + "num_input_tokens_seen": 484694392, + "step": 2804 + }, + { + "epoch": 1.0743010340865569, + "grad_norm": 279.11723943979456, + "learning_rate": 5e-06, + "loss": 2.6919, + "num_input_tokens_seen": 484867720, + "step": 2805 + }, + { + "epoch": 1.0743010340865569, + "loss": 2.7241034507751465, + "loss_ce": 0.7051548361778259, + "loss_iou": 0.9290604591369629, + "loss_num": 0.0322265625, + "loss_xval": 2.018948554992676, + "num_input_tokens_seen": 484867720, + "step": 2805 + }, + { + "epoch": 1.0746840291076216, + "grad_norm": 91.67594166503147, + "learning_rate": 5e-06, + "loss": 3.1007, + "num_input_tokens_seen": 485040416, + "step": 2806 + }, + { + "epoch": 1.0746840291076216, + "loss": 3.09213924407959, + "loss_ce": 0.6244996190071106, + "loss_iou": 1.0971009731292725, + "loss_num": 0.0546875, + "loss_xval": 2.467639446258545, + "num_input_tokens_seen": 485040416, + "step": 2806 + }, + { + "epoch": 1.0750670241286864, + "grad_norm": 85.68762205479932, + "learning_rate": 5e-06, + "loss": 2.4442, + "num_input_tokens_seen": 485212800, + "step": 2807 + }, + { + "epoch": 1.0750670241286864, + "loss": 2.4304544925689697, + "loss_ce": 0.6337364912033081, + "loss_iou": 0.7999093532562256, + "loss_num": 0.039306640625, + "loss_xval": 1.7967181205749512, + "num_input_tokens_seen": 485212800, + "step": 2807 + }, + { + "epoch": 1.0754500191497511, + "grad_norm": 77.01171669214705, + "learning_rate": 5e-06, + "loss": 2.4726, + "num_input_tokens_seen": 485385712, + "step": 2808 + }, + { + "epoch": 1.0754500191497511, + "loss": 2.4367589950561523, + "loss_ce": 0.7392523288726807, + "loss_iou": 0.7764266133308411, + "loss_num": 0.0289306640625, + "loss_xval": 1.6975065469741821, + "num_input_tokens_seen": 485385712, + "step": 2808 + }, + { + "epoch": 1.075833014170816, + "grad_norm": 94.84736549098368, + "learning_rate": 5e-06, + "loss": 2.5519, + "num_input_tokens_seen": 485558496, + "step": 2809 + }, + { + "epoch": 1.075833014170816, + "loss": 2.4850964546203613, + "loss_ce": 0.6541642546653748, + "loss_iou": 0.8250730633735657, + "loss_num": 0.0361328125, + "loss_xval": 1.8309322595596313, + "num_input_tokens_seen": 485558496, + "step": 2809 + }, + { + "epoch": 1.0762160091918804, + "grad_norm": 150.29738868003025, + "learning_rate": 5e-06, + "loss": 2.2246, + "num_input_tokens_seen": 485726712, + "step": 2810 + }, + { + "epoch": 1.0762160091918804, + "loss": 2.1678030490875244, + "loss_ce": 0.7016191482543945, + "loss_iou": 0.6786486506462097, + "loss_num": 0.021728515625, + "loss_xval": 1.4661840200424194, + "num_input_tokens_seen": 485726712, + "step": 2810 + }, + { + "epoch": 1.0765990042129452, + "grad_norm": 177.0528897847532, + "learning_rate": 5e-06, + "loss": 2.7007, + "num_input_tokens_seen": 485899624, + "step": 2811 + }, + { + "epoch": 1.0765990042129452, + "loss": 2.6600353717803955, + "loss_ce": 0.639045774936676, + "loss_iou": 0.9138761162757874, + "loss_num": 0.03857421875, + "loss_xval": 2.020989418029785, + "num_input_tokens_seen": 485899624, + "step": 2811 + }, + { + "epoch": 1.07698199923401, + "grad_norm": 102.07468339681954, + "learning_rate": 5e-06, + "loss": 2.2049, + "num_input_tokens_seen": 486072344, + "step": 2812 + }, + { + "epoch": 1.07698199923401, + "loss": 2.532315731048584, + "loss_ce": 0.6821858286857605, + "loss_iou": 0.8397378325462341, + "loss_num": 0.0341796875, + "loss_xval": 1.8501299619674683, + "num_input_tokens_seen": 486072344, + "step": 2812 + }, + { + "epoch": 1.0773649942550747, + "grad_norm": 131.57059991411325, + "learning_rate": 5e-06, + "loss": 2.3435, + "num_input_tokens_seen": 486245128, + "step": 2813 + }, + { + "epoch": 1.0773649942550747, + "loss": 2.2519588470458984, + "loss_ce": 0.7076823711395264, + "loss_iou": 0.714551568031311, + "loss_num": 0.0230712890625, + "loss_xval": 1.544276475906372, + "num_input_tokens_seen": 486245128, + "step": 2813 + }, + { + "epoch": 1.0777479892761395, + "grad_norm": 278.5990370028417, + "learning_rate": 5e-06, + "loss": 2.7089, + "num_input_tokens_seen": 486417960, + "step": 2814 + }, + { + "epoch": 1.0777479892761395, + "loss": 2.645965576171875, + "loss_ce": 0.6513679027557373, + "loss_iou": 0.9244838953018188, + "loss_num": 0.029052734375, + "loss_xval": 1.9945976734161377, + "num_input_tokens_seen": 486417960, + "step": 2814 + }, + { + "epoch": 1.0781309842972042, + "grad_norm": 54.95344309622083, + "learning_rate": 5e-06, + "loss": 2.3731, + "num_input_tokens_seen": 486590672, + "step": 2815 + }, + { + "epoch": 1.0781309842972042, + "loss": 2.100264549255371, + "loss_ce": 0.6261555552482605, + "loss_iou": 0.6392761468887329, + "loss_num": 0.0390625, + "loss_xval": 1.4356567859649658, + "num_input_tokens_seen": 486590672, + "step": 2815 + }, + { + "epoch": 1.0785139793182688, + "grad_norm": 107.60883534856733, + "learning_rate": 5e-06, + "loss": 2.4516, + "num_input_tokens_seen": 486763376, + "step": 2816 + }, + { + "epoch": 1.0785139793182688, + "loss": 2.4624667167663574, + "loss_ce": 0.7012736797332764, + "loss_iou": 0.7974972128868103, + "loss_num": 0.033203125, + "loss_xval": 1.7611931562423706, + "num_input_tokens_seen": 486763376, + "step": 2816 + }, + { + "epoch": 1.0788969743393335, + "grad_norm": 120.23417852325592, + "learning_rate": 5e-06, + "loss": 2.1123, + "num_input_tokens_seen": 486932832, + "step": 2817 + }, + { + "epoch": 1.0788969743393335, + "loss": 2.0849769115448, + "loss_ce": 0.6629939675331116, + "loss_iou": 6.256191116505706e+25, + "loss_num": 0.0223388671875, + "loss_xval": -9.735556609752802e+33, + "num_input_tokens_seen": 486932832, + "step": 2817 + }, + { + "epoch": 1.0792799693603983, + "grad_norm": 116.6227009118233, + "learning_rate": 5e-06, + "loss": 2.4347, + "num_input_tokens_seen": 487105624, + "step": 2818 + }, + { + "epoch": 1.0792799693603983, + "loss": 2.4521543979644775, + "loss_ce": 0.6253559589385986, + "loss_iou": 0.8615193367004395, + "loss_num": 0.020751953125, + "loss_xval": 1.826798439025879, + "num_input_tokens_seen": 487105624, + "step": 2818 + }, + { + "epoch": 1.079662964381463, + "grad_norm": 149.29655584940656, + "learning_rate": 5e-06, + "loss": 2.1704, + "num_input_tokens_seen": 487278464, + "step": 2819 + }, + { + "epoch": 1.079662964381463, + "loss": 2.266498565673828, + "loss_ce": 0.6545871496200562, + "loss_iou": 0.7549304366111755, + "loss_num": 0.0203857421875, + "loss_xval": 1.611911654472351, + "num_input_tokens_seen": 487278464, + "step": 2819 + }, + { + "epoch": 1.0800459594025278, + "grad_norm": 148.14156207961628, + "learning_rate": 5e-06, + "loss": 2.5219, + "num_input_tokens_seen": 487451344, + "step": 2820 + }, + { + "epoch": 1.0800459594025278, + "loss": 2.676058769226074, + "loss_ce": 0.6396024227142334, + "loss_iou": 0.925424337387085, + "loss_num": 0.037109375, + "loss_xval": 2.03645658493042, + "num_input_tokens_seen": 487451344, + "step": 2820 + }, + { + "epoch": 1.0804289544235925, + "grad_norm": 103.59302016767316, + "learning_rate": 5e-06, + "loss": 2.2078, + "num_input_tokens_seen": 487624448, + "step": 2821 + }, + { + "epoch": 1.0804289544235925, + "loss": 2.300532341003418, + "loss_ce": 0.649788498878479, + "loss_iou": 0.7663967609405518, + "loss_num": 0.0235595703125, + "loss_xval": 1.6507439613342285, + "num_input_tokens_seen": 487624448, + "step": 2821 + }, + { + "epoch": 1.0808119494446573, + "grad_norm": 121.75501520149543, + "learning_rate": 5e-06, + "loss": 2.4232, + "num_input_tokens_seen": 487797296, + "step": 2822 + }, + { + "epoch": 1.0808119494446573, + "loss": 2.333770275115967, + "loss_ce": 0.6665495038032532, + "loss_iou": 0.7725751996040344, + "loss_num": 0.0244140625, + "loss_xval": 1.6672207117080688, + "num_input_tokens_seen": 487797296, + "step": 2822 + }, + { + "epoch": 1.081194944465722, + "grad_norm": 167.1612778930809, + "learning_rate": 5e-06, + "loss": 2.1984, + "num_input_tokens_seen": 487970016, + "step": 2823 + }, + { + "epoch": 1.081194944465722, + "loss": 2.09686279296875, + "loss_ce": 0.6161590814590454, + "loss_iou": 0.6905776262283325, + "loss_num": 0.0198974609375, + "loss_xval": 1.480703592300415, + "num_input_tokens_seen": 487970016, + "step": 2823 + }, + { + "epoch": 1.0815779394867866, + "grad_norm": 92.4429662904512, + "learning_rate": 5e-06, + "loss": 2.616, + "num_input_tokens_seen": 488143296, + "step": 2824 + }, + { + "epoch": 1.0815779394867866, + "loss": 2.497222900390625, + "loss_ce": 0.669642984867096, + "loss_iou": 0.8369162082672119, + "loss_num": 0.03076171875, + "loss_xval": 1.8275799751281738, + "num_input_tokens_seen": 488143296, + "step": 2824 + }, + { + "epoch": 1.0819609345078514, + "grad_norm": 193.15992522059403, + "learning_rate": 5e-06, + "loss": 2.1213, + "num_input_tokens_seen": 488316384, + "step": 2825 + }, + { + "epoch": 1.0819609345078514, + "loss": 2.1571407318115234, + "loss_ce": 0.6244411468505859, + "loss_iou": 0.7029953002929688, + "loss_num": 0.025390625, + "loss_xval": 1.5326995849609375, + "num_input_tokens_seen": 488316384, + "step": 2825 + }, + { + "epoch": 1.0823439295289161, + "grad_norm": 313.7898566610402, + "learning_rate": 5e-06, + "loss": 2.3996, + "num_input_tokens_seen": 488489136, + "step": 2826 + }, + { + "epoch": 1.0823439295289161, + "loss": 2.307638645172119, + "loss_ce": 0.6069540977478027, + "loss_iou": 0.7825322151184082, + "loss_num": 0.027099609375, + "loss_xval": 1.7006845474243164, + "num_input_tokens_seen": 488489136, + "step": 2826 + }, + { + "epoch": 1.0827269245499809, + "grad_norm": 99.87707022706083, + "learning_rate": 5e-06, + "loss": 2.5446, + "num_input_tokens_seen": 488662056, + "step": 2827 + }, + { + "epoch": 1.0827269245499809, + "loss": 2.5995631217956543, + "loss_ce": 0.6866831183433533, + "loss_iou": 0.8688545227050781, + "loss_num": 0.034912109375, + "loss_xval": 1.9128799438476562, + "num_input_tokens_seen": 488662056, + "step": 2827 + }, + { + "epoch": 1.0831099195710456, + "grad_norm": 372.36293237635937, + "learning_rate": 5e-06, + "loss": 2.5754, + "num_input_tokens_seen": 488835248, + "step": 2828 + }, + { + "epoch": 1.0831099195710456, + "loss": 2.4599075317382812, + "loss_ce": 0.6324270963668823, + "loss_iou": 0.8389416337013245, + "loss_num": 0.0299072265625, + "loss_xval": 1.827480435371399, + "num_input_tokens_seen": 488835248, + "step": 2828 + }, + { + "epoch": 1.0834929145921104, + "grad_norm": 123.33707719794329, + "learning_rate": 5e-06, + "loss": 2.2394, + "num_input_tokens_seen": 489008408, + "step": 2829 + }, + { + "epoch": 1.0834929145921104, + "loss": 2.0730512142181396, + "loss_ce": 0.6280667781829834, + "loss_iou": 0.6656379699707031, + "loss_num": 0.022705078125, + "loss_xval": 1.4449844360351562, + "num_input_tokens_seen": 489008408, + "step": 2829 + }, + { + "epoch": 1.083875909613175, + "grad_norm": 313.0034887809654, + "learning_rate": 5e-06, + "loss": 2.7079, + "num_input_tokens_seen": 489181368, + "step": 2830 + }, + { + "epoch": 1.083875909613175, + "loss": 2.8506133556365967, + "loss_ce": 0.6318695545196533, + "loss_iou": 1.0071990489959717, + "loss_num": 0.040771484375, + "loss_xval": 2.2187438011169434, + "num_input_tokens_seen": 489181368, + "step": 2830 + }, + { + "epoch": 1.0842589046342397, + "grad_norm": 168.8747494529447, + "learning_rate": 5e-06, + "loss": 2.7962, + "num_input_tokens_seen": 489354272, + "step": 2831 + }, + { + "epoch": 1.0842589046342397, + "loss": 2.8026838302612305, + "loss_ce": 0.6917127370834351, + "loss_iou": 0.9522751569747925, + "loss_num": 0.041259765625, + "loss_xval": 2.110971450805664, + "num_input_tokens_seen": 489354272, + "step": 2831 + }, + { + "epoch": 1.0846418996553044, + "grad_norm": 57.168527638211046, + "learning_rate": 5e-06, + "loss": 2.6492, + "num_input_tokens_seen": 489527768, + "step": 2832 + }, + { + "epoch": 1.0846418996553044, + "loss": 2.810802698135376, + "loss_ce": 0.68154376745224, + "loss_iou": 0.9660577774047852, + "loss_num": 0.039306640625, + "loss_xval": 2.1292591094970703, + "num_input_tokens_seen": 489527768, + "step": 2832 + }, + { + "epoch": 1.0850248946763692, + "grad_norm": 53.892156776320036, + "learning_rate": 5e-06, + "loss": 2.32, + "num_input_tokens_seen": 489700904, + "step": 2833 + }, + { + "epoch": 1.0850248946763692, + "loss": 2.2683029174804688, + "loss_ce": 0.6636078953742981, + "loss_iou": 0.7453100681304932, + "loss_num": 0.0228271484375, + "loss_xval": 1.6046948432922363, + "num_input_tokens_seen": 489700904, + "step": 2833 + }, + { + "epoch": 1.085407889697434, + "grad_norm": 85.90525825970508, + "learning_rate": 5e-06, + "loss": 2.0927, + "num_input_tokens_seen": 489873864, + "step": 2834 + }, + { + "epoch": 1.085407889697434, + "loss": 2.0061228275299072, + "loss_ce": 0.603214681148529, + "loss_iou": -2.0070057552195992e+22, + "loss_num": 0.027099609375, + "loss_xval": 6.770755103529415e+36, + "num_input_tokens_seen": 489873864, + "step": 2834 + }, + { + "epoch": 1.0857908847184987, + "grad_norm": 171.33366141709405, + "learning_rate": 5e-06, + "loss": 2.0763, + "num_input_tokens_seen": 490046992, + "step": 2835 + }, + { + "epoch": 1.0857908847184987, + "loss": 2.1125564575195312, + "loss_ce": 0.6527152061462402, + "loss_iou": 0.680894136428833, + "loss_num": 0.0196533203125, + "loss_xval": 1.459841251373291, + "num_input_tokens_seen": 490046992, + "step": 2835 + }, + { + "epoch": 1.0861738797395635, + "grad_norm": 160.93263017016588, + "learning_rate": 5e-06, + "loss": 2.6295, + "num_input_tokens_seen": 490220232, + "step": 2836 + }, + { + "epoch": 1.0861738797395635, + "loss": 2.500584602355957, + "loss_ce": 0.6741725206375122, + "loss_iou": 0.8312051296234131, + "loss_num": 0.03271484375, + "loss_xval": 1.8264117240905762, + "num_input_tokens_seen": 490220232, + "step": 2836 + }, + { + "epoch": 1.086556874760628, + "grad_norm": 98.89169004201597, + "learning_rate": 5e-06, + "loss": 2.0596, + "num_input_tokens_seen": 490393368, + "step": 2837 + }, + { + "epoch": 1.086556874760628, + "loss": 2.169426918029785, + "loss_ce": 0.6463626027107239, + "loss_iou": 0.7101861834526062, + "loss_num": 0.0205078125, + "loss_xval": 1.5230640172958374, + "num_input_tokens_seen": 490393368, + "step": 2837 + }, + { + "epoch": 1.0869398697816928, + "grad_norm": 171.29138796187056, + "learning_rate": 5e-06, + "loss": 2.0366, + "num_input_tokens_seen": 490565872, + "step": 2838 + }, + { + "epoch": 1.0869398697816928, + "loss": 1.9540889263153076, + "loss_ce": 0.5884835720062256, + "loss_iou": 0.6313195824623108, + "loss_num": 0.0206298828125, + "loss_xval": 1.3656054735183716, + "num_input_tokens_seen": 490565872, + "step": 2838 + }, + { + "epoch": 1.0873228648027575, + "grad_norm": 294.0112567125096, + "learning_rate": 5e-06, + "loss": 2.7592, + "num_input_tokens_seen": 490739336, + "step": 2839 + }, + { + "epoch": 1.0873228648027575, + "loss": 2.7852325439453125, + "loss_ce": 0.6034911274909973, + "loss_iou": 1.039387583732605, + "loss_num": 0.0206298828125, + "loss_xval": 2.18174147605896, + "num_input_tokens_seen": 490739336, + "step": 2839 + }, + { + "epoch": 1.0877058598238223, + "grad_norm": 138.16441247610052, + "learning_rate": 5e-06, + "loss": 2.692, + "num_input_tokens_seen": 490912320, + "step": 2840 + }, + { + "epoch": 1.0877058598238223, + "loss": 2.710242748260498, + "loss_ce": 0.6224827766418457, + "loss_iou": 0.9509843587875366, + "loss_num": 0.037109375, + "loss_xval": 2.0877597332000732, + "num_input_tokens_seen": 490912320, + "step": 2840 + }, + { + "epoch": 1.088088854844887, + "grad_norm": 82.79934485866873, + "learning_rate": 5e-06, + "loss": 2.2325, + "num_input_tokens_seen": 491085120, + "step": 2841 + }, + { + "epoch": 1.088088854844887, + "loss": 2.093496799468994, + "loss_ce": 0.6058523654937744, + "loss_iou": 0.6878225207328796, + "loss_num": 0.0223388671875, + "loss_xval": 1.4876445531845093, + "num_input_tokens_seen": 491085120, + "step": 2841 + }, + { + "epoch": 1.0884718498659518, + "grad_norm": 112.61014009674933, + "learning_rate": 5e-06, + "loss": 2.3055, + "num_input_tokens_seen": 491258304, + "step": 2842 + }, + { + "epoch": 1.0884718498659518, + "loss": 2.3159525394439697, + "loss_ce": 0.6486879587173462, + "loss_iou": 0.7727192640304565, + "loss_num": 0.0244140625, + "loss_xval": 1.667264699935913, + "num_input_tokens_seen": 491258304, + "step": 2842 + }, + { + "epoch": 1.0888548448870166, + "grad_norm": 157.64913384604296, + "learning_rate": 5e-06, + "loss": 2.2396, + "num_input_tokens_seen": 491431200, + "step": 2843 + }, + { + "epoch": 1.0888548448870166, + "loss": 2.414534568786621, + "loss_ce": 0.630474328994751, + "loss_iou": 0.8160413503646851, + "loss_num": 0.0303955078125, + "loss_xval": 1.7840602397918701, + "num_input_tokens_seen": 491431200, + "step": 2843 + }, + { + "epoch": 1.089237839908081, + "grad_norm": 128.54688564774014, + "learning_rate": 5e-06, + "loss": 2.1839, + "num_input_tokens_seen": 491604360, + "step": 2844 + }, + { + "epoch": 1.089237839908081, + "loss": 2.1136837005615234, + "loss_ce": 0.6559200882911682, + "loss_iou": 0.6847227811813354, + "loss_num": 0.0177001953125, + "loss_xval": 1.457763433456421, + "num_input_tokens_seen": 491604360, + "step": 2844 + }, + { + "epoch": 1.0896208349291459, + "grad_norm": 133.58048026425544, + "learning_rate": 5e-06, + "loss": 2.3242, + "num_input_tokens_seen": 491777240, + "step": 2845 + }, + { + "epoch": 1.0896208349291459, + "loss": 2.1787991523742676, + "loss_ce": 0.5843753814697266, + "loss_iou": 0.7383129596710205, + "loss_num": 0.0235595703125, + "loss_xval": 1.594423770904541, + "num_input_tokens_seen": 491777240, + "step": 2845 + }, + { + "epoch": 1.0900038299502106, + "grad_norm": 122.44268752260001, + "learning_rate": 5e-06, + "loss": 2.3311, + "num_input_tokens_seen": 491950152, + "step": 2846 + }, + { + "epoch": 1.0900038299502106, + "loss": 2.378659725189209, + "loss_ce": 0.6128594279289246, + "loss_iou": 0.823634922504425, + "loss_num": 0.023681640625, + "loss_xval": 1.76580011844635, + "num_input_tokens_seen": 491950152, + "step": 2846 + }, + { + "epoch": 1.0903868249712754, + "grad_norm": 207.86810599802814, + "learning_rate": 5e-06, + "loss": 2.4016, + "num_input_tokens_seen": 492123440, + "step": 2847 + }, + { + "epoch": 1.0903868249712754, + "loss": 2.386385917663574, + "loss_ce": 0.5922180414199829, + "loss_iou": 0.8199966549873352, + "loss_num": 0.03076171875, + "loss_xval": 1.7941681146621704, + "num_input_tokens_seen": 492123440, + "step": 2847 + }, + { + "epoch": 1.0907698199923401, + "grad_norm": 289.7377745310439, + "learning_rate": 5e-06, + "loss": 2.5178, + "num_input_tokens_seen": 492296328, + "step": 2848 + }, + { + "epoch": 1.0907698199923401, + "loss": 2.3781158924102783, + "loss_ce": 0.6338722109794617, + "loss_iou": 0.8073635101318359, + "loss_num": 0.02587890625, + "loss_xval": 1.7442436218261719, + "num_input_tokens_seen": 492296328, + "step": 2848 + }, + { + "epoch": 1.0911528150134049, + "grad_norm": 58.10374609007535, + "learning_rate": 5e-06, + "loss": 2.5232, + "num_input_tokens_seen": 492469600, + "step": 2849 + }, + { + "epoch": 1.0911528150134049, + "loss": 2.472416400909424, + "loss_ce": 0.6209355592727661, + "loss_iou": 0.8428851366043091, + "loss_num": 0.033203125, + "loss_xval": 1.8514807224273682, + "num_input_tokens_seen": 492469600, + "step": 2849 + }, + { + "epoch": 1.0915358100344696, + "grad_norm": 174.9568940032917, + "learning_rate": 5e-06, + "loss": 2.2801, + "num_input_tokens_seen": 492642400, + "step": 2850 + }, + { + "epoch": 1.0915358100344696, + "loss": 2.270138740539551, + "loss_ce": 0.6334530711174011, + "loss_iou": 0.752272367477417, + "loss_num": 0.0264892578125, + "loss_xval": 1.636685848236084, + "num_input_tokens_seen": 492642400, + "step": 2850 + }, + { + "epoch": 1.0919188050555342, + "grad_norm": 126.78242624920647, + "learning_rate": 5e-06, + "loss": 2.1337, + "num_input_tokens_seen": 492815128, + "step": 2851 + }, + { + "epoch": 1.0919188050555342, + "loss": 2.2764225006103516, + "loss_ce": 0.6189845204353333, + "loss_iou": 0.7655476331710815, + "loss_num": 0.0252685546875, + "loss_xval": 1.657438039779663, + "num_input_tokens_seen": 492815128, + "step": 2851 + }, + { + "epoch": 1.092301800076599, + "grad_norm": 261.650315207769, + "learning_rate": 5e-06, + "loss": 2.4944, + "num_input_tokens_seen": 492987896, + "step": 2852 + }, + { + "epoch": 1.092301800076599, + "loss": 2.5657906532287598, + "loss_ce": 0.6617599725723267, + "loss_iou": 0.8752331733703613, + "loss_num": 0.03076171875, + "loss_xval": 1.9040307998657227, + "num_input_tokens_seen": 492987896, + "step": 2852 + }, + { + "epoch": 1.0926847950976637, + "grad_norm": 118.42784116999931, + "learning_rate": 5e-06, + "loss": 2.1608, + "num_input_tokens_seen": 493160384, + "step": 2853 + }, + { + "epoch": 1.0926847950976637, + "loss": 1.999885082244873, + "loss_ce": 0.5869404077529907, + "loss_iou": 0.6425074338912964, + "loss_num": 0.025634765625, + "loss_xval": 1.4001271724700928, + "num_input_tokens_seen": 493160384, + "step": 2853 + }, + { + "epoch": 1.0930677901187285, + "grad_norm": 150.2787262374642, + "learning_rate": 5e-06, + "loss": 2.3293, + "num_input_tokens_seen": 493333056, + "step": 2854 + }, + { + "epoch": 1.0930677901187285, + "loss": 2.2261369228363037, + "loss_ce": 0.5819826126098633, + "loss_iou": 0.7748359441757202, + "loss_num": 0.0189208984375, + "loss_xval": 1.6441543102264404, + "num_input_tokens_seen": 493333056, + "step": 2854 + }, + { + "epoch": 1.0934507851397932, + "grad_norm": 336.4674460277969, + "learning_rate": 5e-06, + "loss": 2.317, + "num_input_tokens_seen": 493506240, + "step": 2855 + }, + { + "epoch": 1.0934507851397932, + "loss": 2.2473292350769043, + "loss_ce": 0.6243661642074585, + "loss_iou": 0.7441293001174927, + "loss_num": 0.0269775390625, + "loss_xval": 1.6229631900787354, + "num_input_tokens_seen": 493506240, + "step": 2855 + }, + { + "epoch": 1.093833780160858, + "grad_norm": 135.67138766139968, + "learning_rate": 5e-06, + "loss": 2.1961, + "num_input_tokens_seen": 493679272, + "step": 2856 + }, + { + "epoch": 1.093833780160858, + "loss": 2.183864116668701, + "loss_ce": 0.6299283504486084, + "loss_iou": 0.7201136350631714, + "loss_num": 0.022705078125, + "loss_xval": 1.5539357662200928, + "num_input_tokens_seen": 493679272, + "step": 2856 + }, + { + "epoch": 1.0942167751819227, + "grad_norm": 148.68457907104002, + "learning_rate": 5e-06, + "loss": 2.204, + "num_input_tokens_seen": 493851736, + "step": 2857 + }, + { + "epoch": 1.0942167751819227, + "loss": 2.527132511138916, + "loss_ce": 0.6028381586074829, + "loss_iou": 0.8985790610313416, + "loss_num": 0.025390625, + "loss_xval": 1.924294352531433, + "num_input_tokens_seen": 493851736, + "step": 2857 + }, + { + "epoch": 1.0945997702029873, + "grad_norm": 110.51339810601723, + "learning_rate": 5e-06, + "loss": 2.6957, + "num_input_tokens_seen": 494024760, + "step": 2858 + }, + { + "epoch": 1.0945997702029873, + "loss": 2.516145706176758, + "loss_ce": 0.649133563041687, + "loss_iou": 0.8449745178222656, + "loss_num": 0.035400390625, + "loss_xval": 1.8670120239257812, + "num_input_tokens_seen": 494024760, + "step": 2858 + }, + { + "epoch": 1.094982765224052, + "grad_norm": 99.21795225591899, + "learning_rate": 5e-06, + "loss": 2.016, + "num_input_tokens_seen": 494197848, + "step": 2859 + }, + { + "epoch": 1.094982765224052, + "loss": 2.1738383769989014, + "loss_ce": 0.6236727833747864, + "loss_iou": 0.7145969867706299, + "loss_num": 0.024169921875, + "loss_xval": 1.5501655340194702, + "num_input_tokens_seen": 494197848, + "step": 2859 + }, + { + "epoch": 1.0953657602451168, + "grad_norm": 197.07936172925068, + "learning_rate": 5e-06, + "loss": 1.9726, + "num_input_tokens_seen": 494371280, + "step": 2860 + }, + { + "epoch": 1.0953657602451168, + "loss": 2.044356346130371, + "loss_ce": 0.6590442061424255, + "loss_iou": 0.6417223215103149, + "loss_num": 0.0203857421875, + "loss_xval": 1.3853123188018799, + "num_input_tokens_seen": 494371280, + "step": 2860 + }, + { + "epoch": 1.0957487552661815, + "grad_norm": 108.94740994693298, + "learning_rate": 5e-06, + "loss": 2.6536, + "num_input_tokens_seen": 494544024, + "step": 2861 + }, + { + "epoch": 1.0957487552661815, + "loss": 2.578054189682007, + "loss_ce": 0.5826956629753113, + "loss_iou": 0.9052110314369202, + "loss_num": 0.036865234375, + "loss_xval": 1.9953584671020508, + "num_input_tokens_seen": 494544024, + "step": 2861 + }, + { + "epoch": 1.0961317502872463, + "grad_norm": 167.67119584141844, + "learning_rate": 5e-06, + "loss": 2.0504, + "num_input_tokens_seen": 494717120, + "step": 2862 + }, + { + "epoch": 1.0961317502872463, + "loss": 1.9566445350646973, + "loss_ce": 0.5846694707870483, + "loss_iou": 0.6443310379981995, + "loss_num": 0.0166015625, + "loss_xval": 1.371975064277649, + "num_input_tokens_seen": 494717120, + "step": 2862 + }, + { + "epoch": 1.096514745308311, + "grad_norm": 332.4719782769773, + "learning_rate": 5e-06, + "loss": 2.8235, + "num_input_tokens_seen": 494890240, + "step": 2863 + }, + { + "epoch": 1.096514745308311, + "loss": 2.830749988555908, + "loss_ce": 0.5865357518196106, + "loss_iou": 1.0550904273986816, + "loss_num": 0.02685546875, + "loss_xval": 2.2442140579223633, + "num_input_tokens_seen": 494890240, + "step": 2863 + }, + { + "epoch": 1.0968977403293758, + "grad_norm": 136.13828964738934, + "learning_rate": 5e-06, + "loss": 2.4489, + "num_input_tokens_seen": 495063232, + "step": 2864 + }, + { + "epoch": 1.0968977403293758, + "loss": 2.4033021926879883, + "loss_ce": 0.6511270999908447, + "loss_iou": 0.8048596382141113, + "loss_num": 0.0284423828125, + "loss_xval": 1.752175211906433, + "num_input_tokens_seen": 495063232, + "step": 2864 + }, + { + "epoch": 1.0972807353504404, + "grad_norm": 384.33973063577145, + "learning_rate": 5e-06, + "loss": 2.7995, + "num_input_tokens_seen": 495236240, + "step": 2865 + }, + { + "epoch": 1.0972807353504404, + "loss": 2.769808292388916, + "loss_ce": 0.6287542581558228, + "loss_iou": 0.9799509048461914, + "loss_num": 0.0361328125, + "loss_xval": 2.141054153442383, + "num_input_tokens_seen": 495236240, + "step": 2865 + }, + { + "epoch": 1.097663730371505, + "grad_norm": 188.15476483245635, + "learning_rate": 5e-06, + "loss": 2.6408, + "num_input_tokens_seen": 495409144, + "step": 2866 + }, + { + "epoch": 1.097663730371505, + "loss": 2.7341835498809814, + "loss_ce": 0.5780057311058044, + "loss_iou": 0.9697515368461609, + "loss_num": 0.04345703125, + "loss_xval": 2.1561777591705322, + "num_input_tokens_seen": 495409144, + "step": 2866 + }, + { + "epoch": 1.0980467253925699, + "grad_norm": 298.01300219408984, + "learning_rate": 5e-06, + "loss": 2.7389, + "num_input_tokens_seen": 495582432, + "step": 2867 + }, + { + "epoch": 1.0980467253925699, + "loss": 2.7545623779296875, + "loss_ce": 0.6666834950447083, + "loss_iou": 0.9440858364105225, + "loss_num": 0.0400390625, + "loss_xval": 2.087878704071045, + "num_input_tokens_seen": 495582432, + "step": 2867 + }, + { + "epoch": 1.0984297204136346, + "grad_norm": 62.530187986737516, + "learning_rate": 5e-06, + "loss": 2.5512, + "num_input_tokens_seen": 495755416, + "step": 2868 + }, + { + "epoch": 1.0984297204136346, + "loss": 2.5481717586517334, + "loss_ce": 0.6293148398399353, + "loss_iou": 0.8566452264785767, + "loss_num": 0.041015625, + "loss_xval": 1.9188568592071533, + "num_input_tokens_seen": 495755416, + "step": 2868 + }, + { + "epoch": 1.0988127154346994, + "grad_norm": 76.01019855681703, + "learning_rate": 5e-06, + "loss": 2.2085, + "num_input_tokens_seen": 495928232, + "step": 2869 + }, + { + "epoch": 1.0988127154346994, + "loss": 2.2282767295837402, + "loss_ce": 0.6378158330917358, + "loss_iou": 0.7301059365272522, + "loss_num": 0.026123046875, + "loss_xval": 1.5904608964920044, + "num_input_tokens_seen": 495928232, + "step": 2869 + }, + { + "epoch": 1.0991957104557641, + "grad_norm": 136.03440512249384, + "learning_rate": 5e-06, + "loss": 2.3114, + "num_input_tokens_seen": 496100928, + "step": 2870 + }, + { + "epoch": 1.0991957104557641, + "loss": 2.460556983947754, + "loss_ce": 0.6350305676460266, + "loss_iou": 0.8189521431922913, + "loss_num": 0.03759765625, + "loss_xval": 1.8255263566970825, + "num_input_tokens_seen": 496100928, + "step": 2870 + }, + { + "epoch": 1.099578705476829, + "grad_norm": 160.81883728934292, + "learning_rate": 5e-06, + "loss": 2.185, + "num_input_tokens_seen": 496274040, + "step": 2871 + }, + { + "epoch": 1.099578705476829, + "loss": 2.116295099258423, + "loss_ce": 0.5956770181655884, + "loss_iou": 0.7045534253120422, + "loss_num": 0.0223388671875, + "loss_xval": 1.5206180810928345, + "num_input_tokens_seen": 496274040, + "step": 2871 + }, + { + "epoch": 1.0999617004978934, + "grad_norm": 127.7551622154229, + "learning_rate": 5e-06, + "loss": 2.1363, + "num_input_tokens_seen": 496447016, + "step": 2872 + }, + { + "epoch": 1.0999617004978934, + "loss": 2.1486854553222656, + "loss_ce": 0.5900260210037231, + "loss_iou": 0.7154869437217712, + "loss_num": 0.0255126953125, + "loss_xval": 1.5586594343185425, + "num_input_tokens_seen": 496447016, + "step": 2872 + }, + { + "epoch": 1.1003446955189582, + "grad_norm": 99.36040266112705, + "learning_rate": 5e-06, + "loss": 2.1568, + "num_input_tokens_seen": 496620016, + "step": 2873 + }, + { + "epoch": 1.1003446955189582, + "loss": 2.2553670406341553, + "loss_ce": 0.6284530162811279, + "loss_iou": 0.7608752250671387, + "loss_num": 0.02099609375, + "loss_xval": 1.6269140243530273, + "num_input_tokens_seen": 496620016, + "step": 2873 + }, + { + "epoch": 1.100727690540023, + "grad_norm": 113.41540259100084, + "learning_rate": 5e-06, + "loss": 1.851, + "num_input_tokens_seen": 496793232, + "step": 2874 + }, + { + "epoch": 1.100727690540023, + "loss": 2.0813405513763428, + "loss_ce": 0.5790365934371948, + "loss_iou": 0.7055282592773438, + "loss_num": 0.018310546875, + "loss_xval": 1.5023040771484375, + "num_input_tokens_seen": 496793232, + "step": 2874 + }, + { + "epoch": 1.1011106855610877, + "grad_norm": 204.24479564880605, + "learning_rate": 5e-06, + "loss": 2.4847, + "num_input_tokens_seen": 496966232, + "step": 2875 + }, + { + "epoch": 1.1011106855610877, + "loss": 2.5521883964538574, + "loss_ce": 0.6590052247047424, + "loss_iou": 0.8944981098175049, + "loss_num": 0.0208740234375, + "loss_xval": 1.8931832313537598, + "num_input_tokens_seen": 496966232, + "step": 2875 + }, + { + "epoch": 1.1014936805821525, + "grad_norm": 89.06032181286207, + "learning_rate": 5e-06, + "loss": 2.5199, + "num_input_tokens_seen": 497139064, + "step": 2876 + }, + { + "epoch": 1.1014936805821525, + "loss": 2.587961435317993, + "loss_ce": 0.5859717130661011, + "loss_iou": 0.9097473621368408, + "loss_num": 0.03662109375, + "loss_xval": 2.0019898414611816, + "num_input_tokens_seen": 497139064, + "step": 2876 + }, + { + "epoch": 1.1018766756032172, + "grad_norm": 53.350404368714756, + "learning_rate": 5e-06, + "loss": 2.1928, + "num_input_tokens_seen": 497312136, + "step": 2877 + }, + { + "epoch": 1.1018766756032172, + "loss": 2.1687560081481934, + "loss_ce": 0.6387738585472107, + "loss_iou": 0.7056039571762085, + "loss_num": 0.023681640625, + "loss_xval": 1.529982328414917, + "num_input_tokens_seen": 497312136, + "step": 2877 + }, + { + "epoch": 1.102259670624282, + "grad_norm": 87.39247123786168, + "learning_rate": 5e-06, + "loss": 1.912, + "num_input_tokens_seen": 497481128, + "step": 2878 + }, + { + "epoch": 1.102259670624282, + "loss": 1.871387243270874, + "loss_ce": 0.6387965083122253, + "loss_iou": 0.573814868927002, + "loss_num": 0.0169677734375, + "loss_xval": 1.232590675354004, + "num_input_tokens_seen": 497481128, + "step": 2878 + }, + { + "epoch": 1.1026426656453465, + "grad_norm": 192.25773845240278, + "learning_rate": 5e-06, + "loss": 2.3301, + "num_input_tokens_seen": 497654184, + "step": 2879 + }, + { + "epoch": 1.1026426656453465, + "loss": 2.184957981109619, + "loss_ce": 0.6186071634292603, + "loss_iou": 0.7164029479026794, + "loss_num": 0.0267333984375, + "loss_xval": 1.5663508176803589, + "num_input_tokens_seen": 497654184, + "step": 2879 + }, + { + "epoch": 1.1030256606664113, + "grad_norm": 313.25544215760783, + "learning_rate": 5e-06, + "loss": 2.887, + "num_input_tokens_seen": 497827048, + "step": 2880 + }, + { + "epoch": 1.1030256606664113, + "loss": 2.810183048248291, + "loss_ce": 0.6071299314498901, + "loss_iou": 1.0216926336288452, + "loss_num": 0.031982421875, + "loss_xval": 2.2030529975891113, + "num_input_tokens_seen": 497827048, + "step": 2880 + }, + { + "epoch": 1.103408655687476, + "grad_norm": 72.68253705884098, + "learning_rate": 5e-06, + "loss": 2.4853, + "num_input_tokens_seen": 497999984, + "step": 2881 + }, + { + "epoch": 1.103408655687476, + "loss": 2.429750919342041, + "loss_ce": 0.6344648599624634, + "loss_iou": 0.7948598861694336, + "loss_num": 0.041015625, + "loss_xval": 1.7819806337356567, + "num_input_tokens_seen": 497999984, + "step": 2881 + }, + { + "epoch": 1.1037916507085408, + "grad_norm": 99.60009967625733, + "learning_rate": 5e-06, + "loss": 2.1784, + "num_input_tokens_seen": 498172688, + "step": 2882 + }, + { + "epoch": 1.1037916507085408, + "loss": 2.157282829284668, + "loss_ce": 0.6364111304283142, + "loss_iou": 0.7046801447868347, + "loss_num": 0.0223388671875, + "loss_xval": 1.5208715200424194, + "num_input_tokens_seen": 498172688, + "step": 2882 + }, + { + "epoch": 1.1041746457296056, + "grad_norm": 104.85088022303032, + "learning_rate": 5e-06, + "loss": 1.9184, + "num_input_tokens_seen": 498345624, + "step": 2883 + }, + { + "epoch": 1.1041746457296056, + "loss": 2.0137667655944824, + "loss_ce": 0.657676100730896, + "loss_iou": 0.623815655708313, + "loss_num": 0.021728515625, + "loss_xval": 1.356090784072876, + "num_input_tokens_seen": 498345624, + "step": 2883 + }, + { + "epoch": 1.1045576407506703, + "grad_norm": 260.0011582682272, + "learning_rate": 5e-06, + "loss": 2.3415, + "num_input_tokens_seen": 498518072, + "step": 2884 + }, + { + "epoch": 1.1045576407506703, + "loss": 2.4038548469543457, + "loss_ce": 0.6130247116088867, + "loss_iou": 0.8475940227508545, + "loss_num": 0.0191650390625, + "loss_xval": 1.790830135345459, + "num_input_tokens_seen": 498518072, + "step": 2884 + }, + { + "epoch": 1.104940635771735, + "grad_norm": 84.77041030695203, + "learning_rate": 5e-06, + "loss": 2.7126, + "num_input_tokens_seen": 498690768, + "step": 2885 + }, + { + "epoch": 1.104940635771735, + "loss": 2.818016529083252, + "loss_ce": 0.6027964949607849, + "loss_iou": 1.0119068622589111, + "loss_num": 0.038330078125, + "loss_xval": 2.2152199745178223, + "num_input_tokens_seen": 498690768, + "step": 2885 + }, + { + "epoch": 1.1053236307927996, + "grad_norm": 118.91458595205364, + "learning_rate": 5e-06, + "loss": 2.4023, + "num_input_tokens_seen": 498863592, + "step": 2886 + }, + { + "epoch": 1.1053236307927996, + "loss": 2.473665714263916, + "loss_ce": 0.6676845550537109, + "loss_iou": 0.8347227573394775, + "loss_num": 0.02734375, + "loss_xval": 1.805981159210205, + "num_input_tokens_seen": 498863592, + "step": 2886 + }, + { + "epoch": 1.1057066258138644, + "grad_norm": 65.16102474275446, + "learning_rate": 5e-06, + "loss": 2.2125, + "num_input_tokens_seen": 499036496, + "step": 2887 + }, + { + "epoch": 1.1057066258138644, + "loss": 2.2524232864379883, + "loss_ce": 0.6581348776817322, + "loss_iou": 0.7283729314804077, + "loss_num": 0.0274658203125, + "loss_xval": 1.5942885875701904, + "num_input_tokens_seen": 499036496, + "step": 2887 + }, + { + "epoch": 1.1060896208349291, + "grad_norm": 139.32643251555038, + "learning_rate": 5e-06, + "loss": 2.1848, + "num_input_tokens_seen": 499209312, + "step": 2888 + }, + { + "epoch": 1.1060896208349291, + "loss": 2.2043979167938232, + "loss_ce": 0.6207559704780579, + "loss_iou": 0.7400783896446228, + "loss_num": 0.0206298828125, + "loss_xval": 1.5836418867111206, + "num_input_tokens_seen": 499209312, + "step": 2888 + }, + { + "epoch": 1.1064726158559939, + "grad_norm": 134.19208541204503, + "learning_rate": 5e-06, + "loss": 2.5185, + "num_input_tokens_seen": 499382184, + "step": 2889 + }, + { + "epoch": 1.1064726158559939, + "loss": 2.557345390319824, + "loss_ce": 0.610958456993103, + "loss_iou": 0.8938783407211304, + "loss_num": 0.03173828125, + "loss_xval": 1.9463870525360107, + "num_input_tokens_seen": 499382184, + "step": 2889 + }, + { + "epoch": 1.1068556108770586, + "grad_norm": 76.11039531379505, + "learning_rate": 5e-06, + "loss": 1.9668, + "num_input_tokens_seen": 499555192, + "step": 2890 + }, + { + "epoch": 1.1068556108770586, + "loss": 1.968073844909668, + "loss_ce": 0.6216538548469543, + "loss_iou": 0.6257856488227844, + "loss_num": 0.0189208984375, + "loss_xval": 1.3464199304580688, + "num_input_tokens_seen": 499555192, + "step": 2890 + }, + { + "epoch": 1.1072386058981234, + "grad_norm": 154.2424569238608, + "learning_rate": 5e-06, + "loss": 2.2009, + "num_input_tokens_seen": 499728504, + "step": 2891 + }, + { + "epoch": 1.1072386058981234, + "loss": 2.0825328826904297, + "loss_ce": 0.6169934272766113, + "loss_iou": 0.6926695704460144, + "loss_num": 0.0159912109375, + "loss_xval": 1.4655393362045288, + "num_input_tokens_seen": 499728504, + "step": 2891 + }, + { + "epoch": 1.1076216009191882, + "grad_norm": 226.64459263529557, + "learning_rate": 5e-06, + "loss": 2.4927, + "num_input_tokens_seen": 499901288, + "step": 2892 + }, + { + "epoch": 1.1076216009191882, + "loss": 2.600008487701416, + "loss_ce": 0.6066851615905762, + "loss_iou": 0.9301334023475647, + "loss_num": 0.026611328125, + "loss_xval": 1.9933234453201294, + "num_input_tokens_seen": 499901288, + "step": 2892 + }, + { + "epoch": 1.1080045959402527, + "grad_norm": 137.34916796942628, + "learning_rate": 5e-06, + "loss": 2.3934, + "num_input_tokens_seen": 500074440, + "step": 2893 + }, + { + "epoch": 1.1080045959402527, + "loss": 2.4239723682403564, + "loss_ce": 0.6311205625534058, + "loss_iou": 0.8262354135513306, + "loss_num": 0.028076171875, + "loss_xval": 1.7928516864776611, + "num_input_tokens_seen": 500074440, + "step": 2893 + }, + { + "epoch": 1.1083875909613174, + "grad_norm": 87.32613970301273, + "learning_rate": 5e-06, + "loss": 2.1623, + "num_input_tokens_seen": 500247240, + "step": 2894 + }, + { + "epoch": 1.1083875909613174, + "loss": 2.1889734268188477, + "loss_ce": 0.5862493515014648, + "loss_iou": 0.7315835952758789, + "loss_num": 0.0279541015625, + "loss_xval": 1.6027240753173828, + "num_input_tokens_seen": 500247240, + "step": 2894 + }, + { + "epoch": 1.1087705859823822, + "grad_norm": 131.23641455431007, + "learning_rate": 5e-06, + "loss": 2.1098, + "num_input_tokens_seen": 500420064, + "step": 2895 + }, + { + "epoch": 1.1087705859823822, + "loss": 2.164780855178833, + "loss_ce": 0.6054773330688477, + "loss_iou": 0.7021981477737427, + "loss_num": 0.031005859375, + "loss_xval": 1.5593035221099854, + "num_input_tokens_seen": 500420064, + "step": 2895 + }, + { + "epoch": 1.109153581003447, + "grad_norm": 149.03837118405545, + "learning_rate": 5e-06, + "loss": 1.924, + "num_input_tokens_seen": 500593088, + "step": 2896 + }, + { + "epoch": 1.109153581003447, + "loss": 1.8993990421295166, + "loss_ce": 0.5676180124282837, + "loss_iou": 0.619732677936554, + "loss_num": 0.0184326171875, + "loss_xval": 1.331781029701233, + "num_input_tokens_seen": 500593088, + "step": 2896 + }, + { + "epoch": 1.1095365760245117, + "grad_norm": 122.9939154883094, + "learning_rate": 5e-06, + "loss": 2.3066, + "num_input_tokens_seen": 500765976, + "step": 2897 + }, + { + "epoch": 1.1095365760245117, + "loss": 2.4041805267333984, + "loss_ce": 0.645162045955658, + "loss_iou": 0.8066943287849426, + "loss_num": 0.0291748046875, + "loss_xval": 1.7590185403823853, + "num_input_tokens_seen": 500765976, + "step": 2897 + }, + { + "epoch": 1.1099195710455765, + "grad_norm": 88.42111700464596, + "learning_rate": 5e-06, + "loss": 2.1336, + "num_input_tokens_seen": 500938864, + "step": 2898 + }, + { + "epoch": 1.1099195710455765, + "loss": 2.1812314987182617, + "loss_ce": 0.5808380842208862, + "loss_iou": 0.7426711320877075, + "loss_num": 0.0230712890625, + "loss_xval": 1.6003934144973755, + "num_input_tokens_seen": 500938864, + "step": 2898 + }, + { + "epoch": 1.110302566066641, + "grad_norm": 108.04490992874197, + "learning_rate": 5e-06, + "loss": 1.9805, + "num_input_tokens_seen": 501111808, + "step": 2899 + }, + { + "epoch": 1.110302566066641, + "loss": 1.9913827180862427, + "loss_ce": 0.6420392394065857, + "loss_iou": 0.6279035210609436, + "loss_num": 0.018798828125, + "loss_xval": 1.3493434190750122, + "num_input_tokens_seen": 501111808, + "step": 2899 + }, + { + "epoch": 1.1106855610877058, + "grad_norm": 179.89219576201393, + "learning_rate": 5e-06, + "loss": 2.3296, + "num_input_tokens_seen": 501285208, + "step": 2900 + }, + { + "epoch": 1.1106855610877058, + "loss": 2.362100124359131, + "loss_ce": 0.6323884725570679, + "loss_iou": 0.8084898591041565, + "loss_num": 0.0224609375, + "loss_xval": 1.729711651802063, + "num_input_tokens_seen": 501285208, + "step": 2900 + }, + { + "epoch": 1.1110685561087705, + "grad_norm": 125.27576242226647, + "learning_rate": 5e-06, + "loss": 2.3567, + "num_input_tokens_seen": 501457992, + "step": 2901 + }, + { + "epoch": 1.1110685561087705, + "loss": 2.4861788749694824, + "loss_ce": 0.6386929750442505, + "loss_iou": 0.8328006267547607, + "loss_num": 0.036376953125, + "loss_xval": 1.8474860191345215, + "num_input_tokens_seen": 501457992, + "step": 2901 + }, + { + "epoch": 1.1114515511298353, + "grad_norm": 79.61821369387268, + "learning_rate": 5e-06, + "loss": 1.9526, + "num_input_tokens_seen": 501631104, + "step": 2902 + }, + { + "epoch": 1.1114515511298353, + "loss": 2.082009792327881, + "loss_ce": 0.6238939762115479, + "loss_iou": 0.6812369227409363, + "loss_num": 0.0191650390625, + "loss_xval": 1.4581159353256226, + "num_input_tokens_seen": 501631104, + "step": 2902 + }, + { + "epoch": 1.1118345461509, + "grad_norm": 99.69354139503271, + "learning_rate": 5e-06, + "loss": 1.9434, + "num_input_tokens_seen": 501804296, + "step": 2903 + }, + { + "epoch": 1.1118345461509, + "loss": 1.9701728820800781, + "loss_ce": 0.6007962226867676, + "loss_iou": 0.6232259273529053, + "loss_num": 0.024658203125, + "loss_xval": 1.3693766593933105, + "num_input_tokens_seen": 501804296, + "step": 2903 + }, + { + "epoch": 1.1122175411719648, + "grad_norm": 183.53801831093696, + "learning_rate": 5e-06, + "loss": 2.2145, + "num_input_tokens_seen": 501977424, + "step": 2904 + }, + { + "epoch": 1.1122175411719648, + "loss": 2.293793201446533, + "loss_ce": 0.5702643990516663, + "loss_iou": 0.7880339622497559, + "loss_num": 0.029541015625, + "loss_xval": 1.7235288619995117, + "num_input_tokens_seen": 501977424, + "step": 2904 + }, + { + "epoch": 1.1126005361930296, + "grad_norm": 201.893057671295, + "learning_rate": 5e-06, + "loss": 2.1023, + "num_input_tokens_seen": 502150080, + "step": 2905 + }, + { + "epoch": 1.1126005361930296, + "loss": 2.3387532234191895, + "loss_ce": 0.6087428331375122, + "loss_iou": 0.8072353601455688, + "loss_num": 0.0230712890625, + "loss_xval": 1.7300102710723877, + "num_input_tokens_seen": 502150080, + "step": 2905 + }, + { + "epoch": 1.1129835312140943, + "grad_norm": 190.64929744087027, + "learning_rate": 5e-06, + "loss": 2.4365, + "num_input_tokens_seen": 502323136, + "step": 2906 + }, + { + "epoch": 1.1129835312140943, + "loss": 2.474616050720215, + "loss_ce": 0.6042324304580688, + "loss_iou": 0.8702505230903625, + "loss_num": 0.0260009765625, + "loss_xval": 1.8703839778900146, + "num_input_tokens_seen": 502323136, + "step": 2906 + }, + { + "epoch": 1.1133665262351589, + "grad_norm": 202.96116314184982, + "learning_rate": 5e-06, + "loss": 2.3781, + "num_input_tokens_seen": 502496312, + "step": 2907 + }, + { + "epoch": 1.1133665262351589, + "loss": 2.477080821990967, + "loss_ce": 0.5928637385368347, + "loss_iou": 0.8861697912216187, + "loss_num": 0.0223388671875, + "loss_xval": 1.8842170238494873, + "num_input_tokens_seen": 502496312, + "step": 2907 + }, + { + "epoch": 1.1137495212562236, + "grad_norm": 247.36583450430214, + "learning_rate": 5e-06, + "loss": 2.714, + "num_input_tokens_seen": 502669216, + "step": 2908 + }, + { + "epoch": 1.1137495212562236, + "loss": 2.628657341003418, + "loss_ce": 0.5726035833358765, + "loss_iou": 0.9327510595321655, + "loss_num": 0.0380859375, + "loss_xval": 2.035790205001831, + "num_input_tokens_seen": 502669216, + "step": 2908 + }, + { + "epoch": 1.1141325162772884, + "grad_norm": 297.24115073489264, + "learning_rate": 5e-06, + "loss": 2.3879, + "num_input_tokens_seen": 502842104, + "step": 2909 + }, + { + "epoch": 1.1141325162772884, + "loss": 2.3422248363494873, + "loss_ce": 0.610176682472229, + "loss_iou": 0.8055077195167542, + "loss_num": 0.024169921875, + "loss_xval": 1.7320481538772583, + "num_input_tokens_seen": 502842104, + "step": 2909 + }, + { + "epoch": 1.1145155112983531, + "grad_norm": 75.55251896683606, + "learning_rate": 5e-06, + "loss": 2.6398, + "num_input_tokens_seen": 503015640, + "step": 2910 + }, + { + "epoch": 1.1145155112983531, + "loss": 2.694908857345581, + "loss_ce": 0.6324771642684937, + "loss_iou": 0.9035302400588989, + "loss_num": 0.051025390625, + "loss_xval": 2.062431812286377, + "num_input_tokens_seen": 503015640, + "step": 2910 + }, + { + "epoch": 1.114898506319418, + "grad_norm": 103.0143953814475, + "learning_rate": 5e-06, + "loss": 2.1158, + "num_input_tokens_seen": 503188752, + "step": 2911 + }, + { + "epoch": 1.114898506319418, + "loss": 2.159280776977539, + "loss_ce": 0.5753985643386841, + "loss_iou": 0.7022194862365723, + "loss_num": 0.035888671875, + "loss_xval": 1.5838823318481445, + "num_input_tokens_seen": 503188752, + "step": 2911 + }, + { + "epoch": 1.1152815013404827, + "grad_norm": 172.0503308676565, + "learning_rate": 5e-06, + "loss": 2.1211, + "num_input_tokens_seen": 503361272, + "step": 2912 + }, + { + "epoch": 1.1152815013404827, + "loss": 1.9591606855392456, + "loss_ce": 0.5694947242736816, + "loss_iou": 0.6471339464187622, + "loss_num": 0.0191650390625, + "loss_xval": 1.3896658420562744, + "num_input_tokens_seen": 503361272, + "step": 2912 + }, + { + "epoch": 1.1156644963615472, + "grad_norm": 146.1986484175921, + "learning_rate": 5e-06, + "loss": 2.661, + "num_input_tokens_seen": 503533984, + "step": 2913 + }, + { + "epoch": 1.1156644963615472, + "loss": 2.7327136993408203, + "loss_ce": 0.6500381231307983, + "loss_iou": 0.9599170088768005, + "loss_num": 0.03271484375, + "loss_xval": 2.0826759338378906, + "num_input_tokens_seen": 503533984, + "step": 2913 + }, + { + "epoch": 1.116047491382612, + "grad_norm": 102.3933499563087, + "learning_rate": 5e-06, + "loss": 2.4212, + "num_input_tokens_seen": 503706936, + "step": 2914 + }, + { + "epoch": 1.116047491382612, + "loss": 2.3756508827209473, + "loss_ce": 0.6869063377380371, + "loss_iou": 0.7599606513977051, + "loss_num": 0.03369140625, + "loss_xval": 1.6887445449829102, + "num_input_tokens_seen": 503706936, + "step": 2914 + }, + { + "epoch": 1.1164304864036767, + "grad_norm": 135.0876229271413, + "learning_rate": 5e-06, + "loss": 2.1516, + "num_input_tokens_seen": 503879768, + "step": 2915 + }, + { + "epoch": 1.1164304864036767, + "loss": 2.2691879272460938, + "loss_ce": 0.6411617994308472, + "loss_iou": 0.7393975257873535, + "loss_num": 0.02978515625, + "loss_xval": 1.628026008605957, + "num_input_tokens_seen": 503879768, + "step": 2915 + }, + { + "epoch": 1.1168134814247415, + "grad_norm": 302.1638750177775, + "learning_rate": 5e-06, + "loss": 2.2163, + "num_input_tokens_seen": 504052560, + "step": 2916 + }, + { + "epoch": 1.1168134814247415, + "loss": 2.273881435394287, + "loss_ce": 0.6011604070663452, + "loss_iou": 0.7756763100624084, + "loss_num": 0.0242919921875, + "loss_xval": 1.672721028327942, + "num_input_tokens_seen": 504052560, + "step": 2916 + }, + { + "epoch": 1.1171964764458062, + "grad_norm": 67.07883381518754, + "learning_rate": 5e-06, + "loss": 2.2384, + "num_input_tokens_seen": 504225232, + "step": 2917 + }, + { + "epoch": 1.1171964764458062, + "loss": 2.1237258911132812, + "loss_ce": 0.6211868524551392, + "loss_iou": 0.680865466594696, + "loss_num": 0.0281982421875, + "loss_xval": 1.502539038658142, + "num_input_tokens_seen": 504225232, + "step": 2917 + }, + { + "epoch": 1.117579471466871, + "grad_norm": 159.24011522402344, + "learning_rate": 5e-06, + "loss": 2.0324, + "num_input_tokens_seen": 504398160, + "step": 2918 + }, + { + "epoch": 1.117579471466871, + "loss": 2.0517094135284424, + "loss_ce": 0.603214681148529, + "loss_iou": 0.667332112789154, + "loss_num": 0.022705078125, + "loss_xval": 1.448494791984558, + "num_input_tokens_seen": 504398160, + "step": 2918 + }, + { + "epoch": 1.1179624664879357, + "grad_norm": 93.94077699502306, + "learning_rate": 5e-06, + "loss": 2.1048, + "num_input_tokens_seen": 504571184, + "step": 2919 + }, + { + "epoch": 1.1179624664879357, + "loss": 2.038072109222412, + "loss_ce": 0.5945290327072144, + "loss_iou": 0.6829226016998291, + "loss_num": 0.01556396484375, + "loss_xval": 1.4435429573059082, + "num_input_tokens_seen": 504571184, + "step": 2919 + }, + { + "epoch": 1.1183454615090005, + "grad_norm": 120.5352238902658, + "learning_rate": 5e-06, + "loss": 2.2054, + "num_input_tokens_seen": 504744248, + "step": 2920 + }, + { + "epoch": 1.1183454615090005, + "loss": 2.1758975982666016, + "loss_ce": 0.5827404856681824, + "loss_iou": 0.7432339191436768, + "loss_num": 0.0213623046875, + "loss_xval": 1.5931572914123535, + "num_input_tokens_seen": 504744248, + "step": 2920 + }, + { + "epoch": 1.118728456530065, + "grad_norm": 147.69704849799925, + "learning_rate": 5e-06, + "loss": 2.3654, + "num_input_tokens_seen": 504916992, + "step": 2921 + }, + { + "epoch": 1.118728456530065, + "loss": 2.4102468490600586, + "loss_ce": 0.605229377746582, + "loss_iou": 0.8305176496505737, + "loss_num": 0.02880859375, + "loss_xval": 1.8050172328948975, + "num_input_tokens_seen": 504916992, + "step": 2921 + }, + { + "epoch": 1.1191114515511298, + "grad_norm": 150.70132009449856, + "learning_rate": 5e-06, + "loss": 2.4769, + "num_input_tokens_seen": 505090440, + "step": 2922 + }, + { + "epoch": 1.1191114515511298, + "loss": 2.4750609397888184, + "loss_ce": 0.6045688390731812, + "loss_iou": 0.8626751899719238, + "loss_num": 0.029052734375, + "loss_xval": 1.8704919815063477, + "num_input_tokens_seen": 505090440, + "step": 2922 + }, + { + "epoch": 1.1194944465721945, + "grad_norm": 50.74361178457912, + "learning_rate": 5e-06, + "loss": 2.0561, + "num_input_tokens_seen": 505263352, + "step": 2923 + }, + { + "epoch": 1.1194944465721945, + "loss": 2.148237705230713, + "loss_ce": 0.6184177398681641, + "loss_iou": 0.7168753147125244, + "loss_num": 0.019287109375, + "loss_xval": 1.5298199653625488, + "num_input_tokens_seen": 505263352, + "step": 2923 + }, + { + "epoch": 1.1198774415932593, + "grad_norm": 59.80759277741302, + "learning_rate": 5e-06, + "loss": 1.8143, + "num_input_tokens_seen": 505436224, + "step": 2924 + }, + { + "epoch": 1.1198774415932593, + "loss": 1.801448106765747, + "loss_ce": 0.5888853669166565, + "loss_iou": 0.5577735900878906, + "loss_num": 0.0194091796875, + "loss_xval": 1.2125625610351562, + "num_input_tokens_seen": 505436224, + "step": 2924 + }, + { + "epoch": 1.120260436614324, + "grad_norm": 130.05618151174428, + "learning_rate": 5e-06, + "loss": 2.0072, + "num_input_tokens_seen": 505609024, + "step": 2925 + }, + { + "epoch": 1.120260436614324, + "loss": 2.052910566329956, + "loss_ce": 0.5666494369506836, + "loss_iou": 0.7000855207443237, + "loss_num": 0.0172119140625, + "loss_xval": 1.4862611293792725, + "num_input_tokens_seen": 505609024, + "step": 2925 + }, + { + "epoch": 1.1206434316353888, + "grad_norm": 133.19126693332592, + "learning_rate": 5e-06, + "loss": 2.3704, + "num_input_tokens_seen": 505781896, + "step": 2926 + }, + { + "epoch": 1.1206434316353888, + "loss": 2.4285738468170166, + "loss_ce": 0.5899032354354858, + "loss_iou": 0.8476800918579102, + "loss_num": 0.0286865234375, + "loss_xval": 1.8386707305908203, + "num_input_tokens_seen": 505781896, + "step": 2926 + }, + { + "epoch": 1.1210264266564534, + "grad_norm": 115.55355336966625, + "learning_rate": 5e-06, + "loss": 1.7624, + "num_input_tokens_seen": 505955000, + "step": 2927 + }, + { + "epoch": 1.1210264266564534, + "loss": 1.7960158586502075, + "loss_ce": 0.5944313406944275, + "loss_iou": 0.5624850392341614, + "loss_num": 0.01531982421875, + "loss_xval": 1.2015844583511353, + "num_input_tokens_seen": 505955000, + "step": 2927 + }, + { + "epoch": 1.1214094216775181, + "grad_norm": 167.47206572003535, + "learning_rate": 5e-06, + "loss": 2.2812, + "num_input_tokens_seen": 506127864, + "step": 2928 + }, + { + "epoch": 1.1214094216775181, + "loss": 2.132957935333252, + "loss_ce": 0.5568255186080933, + "loss_iou": 0.7221788167953491, + "loss_num": 0.0263671875, + "loss_xval": 1.5761325359344482, + "num_input_tokens_seen": 506127864, + "step": 2928 + }, + { + "epoch": 1.1217924166985829, + "grad_norm": 283.35226630248883, + "learning_rate": 5e-06, + "loss": 2.0465, + "num_input_tokens_seen": 506300784, + "step": 2929 + }, + { + "epoch": 1.1217924166985829, + "loss": 2.1811602115631104, + "loss_ce": 0.5846886038780212, + "loss_iou": 0.7596005201339722, + "loss_num": 0.01544189453125, + "loss_xval": 1.5964715480804443, + "num_input_tokens_seen": 506300784, + "step": 2929 + }, + { + "epoch": 1.1221754117196476, + "grad_norm": 87.36076360074662, + "learning_rate": 5e-06, + "loss": 2.4209, + "num_input_tokens_seen": 506473792, + "step": 2930 + }, + { + "epoch": 1.1221754117196476, + "loss": 2.3417320251464844, + "loss_ce": 0.5851880311965942, + "loss_iou": 0.8060064315795898, + "loss_num": 0.0289306640625, + "loss_xval": 1.7565441131591797, + "num_input_tokens_seen": 506473792, + "step": 2930 + }, + { + "epoch": 1.1225584067407124, + "grad_norm": 134.59615822917155, + "learning_rate": 5e-06, + "loss": 2.0875, + "num_input_tokens_seen": 506646888, + "step": 2931 + }, + { + "epoch": 1.1225584067407124, + "loss": 2.0470709800720215, + "loss_ce": 0.6375364065170288, + "loss_iou": 0.6499576568603516, + "loss_num": 0.02197265625, + "loss_xval": 1.4095344543457031, + "num_input_tokens_seen": 506646888, + "step": 2931 + }, + { + "epoch": 1.1229414017617771, + "grad_norm": 183.85111989359334, + "learning_rate": 5e-06, + "loss": 2.3452, + "num_input_tokens_seen": 506820112, + "step": 2932 + }, + { + "epoch": 1.1229414017617771, + "loss": 2.2805278301239014, + "loss_ce": 0.6472984552383423, + "loss_iou": 0.7543282508850098, + "loss_num": 0.02490234375, + "loss_xval": 1.6332292556762695, + "num_input_tokens_seen": 506820112, + "step": 2932 + }, + { + "epoch": 1.123324396782842, + "grad_norm": 100.58133962340794, + "learning_rate": 5e-06, + "loss": 2.2745, + "num_input_tokens_seen": 506993024, + "step": 2933 + }, + { + "epoch": 1.123324396782842, + "loss": 2.497943162918091, + "loss_ce": 0.5707536935806274, + "loss_iou": 0.8855613470077515, + "loss_num": 0.03125, + "loss_xval": 1.927189588546753, + "num_input_tokens_seen": 506993024, + "step": 2933 + }, + { + "epoch": 1.1237073918039067, + "grad_norm": 65.21433535769, + "learning_rate": 5e-06, + "loss": 2.0247, + "num_input_tokens_seen": 507165872, + "step": 2934 + }, + { + "epoch": 1.1237073918039067, + "loss": 1.8829970359802246, + "loss_ce": 0.569800853729248, + "loss_iou": 0.6100282073020935, + "loss_num": 0.0186767578125, + "loss_xval": 1.313196063041687, + "num_input_tokens_seen": 507165872, + "step": 2934 + }, + { + "epoch": 1.1240903868249712, + "grad_norm": 102.68592955931913, + "learning_rate": 5e-06, + "loss": 1.9043, + "num_input_tokens_seen": 507338480, + "step": 2935 + }, + { + "epoch": 1.1240903868249712, + "loss": 1.7282692193984985, + "loss_ce": 0.5764355063438416, + "loss_iou": 0.5376782417297363, + "loss_num": 0.01531982421875, + "loss_xval": 1.1518335342407227, + "num_input_tokens_seen": 507338480, + "step": 2935 + }, + { + "epoch": 1.124473381846036, + "grad_norm": 141.3760785132849, + "learning_rate": 5e-06, + "loss": 2.6952, + "num_input_tokens_seen": 507511264, + "step": 2936 + }, + { + "epoch": 1.124473381846036, + "loss": 2.7415976524353027, + "loss_ce": 0.5869454145431519, + "loss_iou": 0.9786018133163452, + "loss_num": 0.03955078125, + "loss_xval": 2.1546523571014404, + "num_input_tokens_seen": 507511264, + "step": 2936 + }, + { + "epoch": 1.1248563768671007, + "grad_norm": 111.42610780173028, + "learning_rate": 5e-06, + "loss": 2.0939, + "num_input_tokens_seen": 507684232, + "step": 2937 + }, + { + "epoch": 1.1248563768671007, + "loss": 2.3174006938934326, + "loss_ce": 0.5758513808250427, + "loss_iou": 0.7888349294662476, + "loss_num": 0.03271484375, + "loss_xval": 1.7415492534637451, + "num_input_tokens_seen": 507684232, + "step": 2937 + }, + { + "epoch": 1.1252393718881655, + "grad_norm": 61.77266775174526, + "learning_rate": 5e-06, + "loss": 1.9409, + "num_input_tokens_seen": 507856928, + "step": 2938 + }, + { + "epoch": 1.1252393718881655, + "loss": 1.7175027132034302, + "loss_ce": 0.5949825644493103, + "loss_iou": 0.524501621723175, + "loss_num": 0.01470947265625, + "loss_xval": 1.122520089149475, + "num_input_tokens_seen": 507856928, + "step": 2938 + }, + { + "epoch": 1.1256223669092302, + "grad_norm": 103.4123729731931, + "learning_rate": 5e-06, + "loss": 1.9969, + "num_input_tokens_seen": 508029744, + "step": 2939 + }, + { + "epoch": 1.1256223669092302, + "loss": 2.0798449516296387, + "loss_ce": 0.5971314311027527, + "loss_iou": 0.6966027617454529, + "loss_num": 0.0179443359375, + "loss_xval": 1.4827135801315308, + "num_input_tokens_seen": 508029744, + "step": 2939 + }, + { + "epoch": 1.126005361930295, + "grad_norm": 168.68270993108666, + "learning_rate": 5e-06, + "loss": 2.1583, + "num_input_tokens_seen": 508202720, + "step": 2940 + }, + { + "epoch": 1.126005361930295, + "loss": 2.2846107482910156, + "loss_ce": 0.6081647872924805, + "loss_iou": 0.7893948554992676, + "loss_num": 0.01953125, + "loss_xval": 1.6764459609985352, + "num_input_tokens_seen": 508202720, + "step": 2940 + }, + { + "epoch": 1.1263883569513595, + "grad_norm": 162.22939937828494, + "learning_rate": 5e-06, + "loss": 2.2816, + "num_input_tokens_seen": 508375792, + "step": 2941 + }, + { + "epoch": 1.1263883569513595, + "loss": 2.4022090435028076, + "loss_ce": 0.5834155082702637, + "loss_iou": 0.842105507850647, + "loss_num": 0.0269775390625, + "loss_xval": 1.818793535232544, + "num_input_tokens_seen": 508375792, + "step": 2941 + }, + { + "epoch": 1.1267713519724243, + "grad_norm": 133.3995652593231, + "learning_rate": 5e-06, + "loss": 2.007, + "num_input_tokens_seen": 508548848, + "step": 2942 + }, + { + "epoch": 1.1267713519724243, + "loss": 2.081279754638672, + "loss_ce": 0.5720000267028809, + "loss_iou": 0.7090771794319153, + "loss_num": 0.0181884765625, + "loss_xval": 1.5092798471450806, + "num_input_tokens_seen": 508548848, + "step": 2942 + }, + { + "epoch": 1.127154346993489, + "grad_norm": 138.05559501511522, + "learning_rate": 5e-06, + "loss": 2.197, + "num_input_tokens_seen": 508721712, + "step": 2943 + }, + { + "epoch": 1.127154346993489, + "loss": 2.1268904209136963, + "loss_ce": 0.5533802509307861, + "loss_iou": 0.7246366143226624, + "loss_num": 0.0247802734375, + "loss_xval": 1.5735102891921997, + "num_input_tokens_seen": 508721712, + "step": 2943 + }, + { + "epoch": 1.1275373420145538, + "grad_norm": 162.67668917262557, + "learning_rate": 5e-06, + "loss": 1.9532, + "num_input_tokens_seen": 508895048, + "step": 2944 + }, + { + "epoch": 1.1275373420145538, + "loss": 2.0485429763793945, + "loss_ce": 0.5689271092414856, + "loss_iou": 0.6940621137619019, + "loss_num": 0.018310546875, + "loss_xval": 1.4796159267425537, + "num_input_tokens_seen": 508895048, + "step": 2944 + }, + { + "epoch": 1.1279203370356186, + "grad_norm": 183.67846924849545, + "learning_rate": 5e-06, + "loss": 2.0898, + "num_input_tokens_seen": 509067848, + "step": 2945 + }, + { + "epoch": 1.1279203370356186, + "loss": 2.27622127532959, + "loss_ce": 0.5981290340423584, + "loss_iou": 0.7724873423576355, + "loss_num": 0.026611328125, + "loss_xval": 1.678092360496521, + "num_input_tokens_seen": 509067848, + "step": 2945 + }, + { + "epoch": 1.1283033320566833, + "grad_norm": 90.26400795267458, + "learning_rate": 5e-06, + "loss": 2.2719, + "num_input_tokens_seen": 509241072, + "step": 2946 + }, + { + "epoch": 1.1283033320566833, + "loss": 2.257774591445923, + "loss_ce": 0.5660457611083984, + "loss_iou": 0.7746363878250122, + "loss_num": 0.028564453125, + "loss_xval": 1.6917288303375244, + "num_input_tokens_seen": 509241072, + "step": 2946 + }, + { + "epoch": 1.128686327077748, + "grad_norm": 94.63258031697269, + "learning_rate": 5e-06, + "loss": 1.8902, + "num_input_tokens_seen": 509414184, + "step": 2947 + }, + { + "epoch": 1.128686327077748, + "loss": 1.777647852897644, + "loss_ce": 0.5984277725219727, + "loss_iou": 0.5606946349143982, + "loss_num": 0.0115966796875, + "loss_xval": 1.1792200803756714, + "num_input_tokens_seen": 509414184, + "step": 2947 + }, + { + "epoch": 1.1290693220988128, + "grad_norm": 134.63031029028897, + "learning_rate": 5e-06, + "loss": 2.0517, + "num_input_tokens_seen": 509587032, + "step": 2948 + }, + { + "epoch": 1.1290693220988128, + "loss": 2.034257650375366, + "loss_ce": 0.5827445387840271, + "loss_iou": 0.6827267408370972, + "loss_num": 0.0172119140625, + "loss_xval": 1.4515130519866943, + "num_input_tokens_seen": 509587032, + "step": 2948 + }, + { + "epoch": 1.1294523171198774, + "grad_norm": 171.3631026885139, + "learning_rate": 5e-06, + "loss": 2.1489, + "num_input_tokens_seen": 509760200, + "step": 2949 + }, + { + "epoch": 1.1294523171198774, + "loss": 2.174747943878174, + "loss_ce": 0.5836296081542969, + "loss_iou": 0.7315332293510437, + "loss_num": 0.025634765625, + "loss_xval": 1.5911182165145874, + "num_input_tokens_seen": 509760200, + "step": 2949 + }, + { + "epoch": 1.1298353121409421, + "grad_norm": 130.8085905636241, + "learning_rate": 5e-06, + "loss": 2.3057, + "num_input_tokens_seen": 509933456, + "step": 2950 + }, + { + "epoch": 1.1298353121409421, + "loss": 2.243117332458496, + "loss_ce": 0.53421950340271, + "loss_iou": 0.7882257699966431, + "loss_num": 0.0264892578125, + "loss_xval": 1.7088978290557861, + "num_input_tokens_seen": 509933456, + "step": 2950 + }, + { + "epoch": 1.1302183071620069, + "grad_norm": 83.9293575351945, + "learning_rate": 5e-06, + "loss": 2.0456, + "num_input_tokens_seen": 510106688, + "step": 2951 + }, + { + "epoch": 1.1302183071620069, + "loss": 2.049988031387329, + "loss_ce": 0.551052451133728, + "loss_iou": 0.6941241025924683, + "loss_num": 0.0220947265625, + "loss_xval": 1.4989354610443115, + "num_input_tokens_seen": 510106688, + "step": 2951 + }, + { + "epoch": 1.1306013021830716, + "grad_norm": 216.60638569578, + "learning_rate": 5e-06, + "loss": 1.9574, + "num_input_tokens_seen": 510279328, + "step": 2952 + }, + { + "epoch": 1.1306013021830716, + "loss": 2.0311362743377686, + "loss_ce": 0.5471563339233398, + "loss_iou": 0.6727455854415894, + "loss_num": 0.0277099609375, + "loss_xval": 1.4839799404144287, + "num_input_tokens_seen": 510279328, + "step": 2952 + }, + { + "epoch": 1.1309842972041364, + "grad_norm": 132.50570065785007, + "learning_rate": 5e-06, + "loss": 2.0608, + "num_input_tokens_seen": 510452544, + "step": 2953 + }, + { + "epoch": 1.1309842972041364, + "loss": 2.091780662536621, + "loss_ce": 0.5471699237823486, + "loss_iou": 0.7210510969161987, + "loss_num": 0.0205078125, + "loss_xval": 1.5446107387542725, + "num_input_tokens_seen": 510452544, + "step": 2953 + }, + { + "epoch": 1.1313672922252012, + "grad_norm": 124.17635472163106, + "learning_rate": 5e-06, + "loss": 2.0695, + "num_input_tokens_seen": 510625568, + "step": 2954 + }, + { + "epoch": 1.1313672922252012, + "loss": 2.0901994705200195, + "loss_ce": 0.5760436654090881, + "loss_iou": 0.6860941052436829, + "loss_num": 0.0284423828125, + "loss_xval": 1.5141559839248657, + "num_input_tokens_seen": 510625568, + "step": 2954 + }, + { + "epoch": 1.1317502872462657, + "grad_norm": 120.58443867857027, + "learning_rate": 5e-06, + "loss": 2.1215, + "num_input_tokens_seen": 510798616, + "step": 2955 + }, + { + "epoch": 1.1317502872462657, + "loss": 2.036712884902954, + "loss_ce": 0.6060461401939392, + "loss_iou": 0.6764082908630371, + "loss_num": 0.01556396484375, + "loss_xval": 1.4306669235229492, + "num_input_tokens_seen": 510798616, + "step": 2955 + }, + { + "epoch": 1.1321332822673305, + "grad_norm": 207.7984739476321, + "learning_rate": 5e-06, + "loss": 2.0438, + "num_input_tokens_seen": 510971456, + "step": 2956 + }, + { + "epoch": 1.1321332822673305, + "loss": 1.9081439971923828, + "loss_ce": 0.5633724927902222, + "loss_iou": 0.6264721155166626, + "loss_num": 0.018310546875, + "loss_xval": 1.3447716236114502, + "num_input_tokens_seen": 510971456, + "step": 2956 + }, + { + "epoch": 1.1325162772883952, + "grad_norm": 135.05079194814368, + "learning_rate": 5e-06, + "loss": 2.0915, + "num_input_tokens_seen": 511144496, + "step": 2957 + }, + { + "epoch": 1.1325162772883952, + "loss": 2.1809394359588623, + "loss_ce": 0.5449835658073425, + "loss_iou": 0.762374997138977, + "loss_num": 0.022216796875, + "loss_xval": 1.635956048965454, + "num_input_tokens_seen": 511144496, + "step": 2957 + }, + { + "epoch": 1.13289927230946, + "grad_norm": 146.6064746236638, + "learning_rate": 5e-06, + "loss": 1.8096, + "num_input_tokens_seen": 511317528, + "step": 2958 + }, + { + "epoch": 1.13289927230946, + "loss": 1.8042373657226562, + "loss_ce": 0.5380661487579346, + "loss_iou": 0.5805953741073608, + "loss_num": 0.02099609375, + "loss_xval": 1.2661712169647217, + "num_input_tokens_seen": 511317528, + "step": 2958 + }, + { + "epoch": 1.1332822673305247, + "grad_norm": 182.20626784676352, + "learning_rate": 5e-06, + "loss": 2.1662, + "num_input_tokens_seen": 511486416, + "step": 2959 + }, + { + "epoch": 1.1332822673305247, + "loss": 2.0279202461242676, + "loss_ce": 0.5270369052886963, + "loss_iou": 0.6989279389381409, + "loss_num": 0.0206298828125, + "loss_xval": 1.5008832216262817, + "num_input_tokens_seen": 511486416, + "step": 2959 + }, + { + "epoch": 1.1336652623515895, + "grad_norm": 130.73128800985407, + "learning_rate": 5e-06, + "loss": 2.0859, + "num_input_tokens_seen": 511659280, + "step": 2960 + }, + { + "epoch": 1.1336652623515895, + "loss": 2.081346035003662, + "loss_ce": 0.6038487553596497, + "loss_iou": 0.6952458620071411, + "loss_num": 0.017333984375, + "loss_xval": 1.4774973392486572, + "num_input_tokens_seen": 511659280, + "step": 2960 + }, + { + "epoch": 1.1340482573726542, + "grad_norm": 104.33194520531575, + "learning_rate": 5e-06, + "loss": 2.1437, + "num_input_tokens_seen": 511832640, + "step": 2961 + }, + { + "epoch": 1.1340482573726542, + "loss": 2.028927803039551, + "loss_ce": 0.5392760038375854, + "loss_iou": 0.6836686730384827, + "loss_num": 0.0244140625, + "loss_xval": 1.4896517992019653, + "num_input_tokens_seen": 511832640, + "step": 2961 + }, + { + "epoch": 1.134431252393719, + "grad_norm": 185.51423538122006, + "learning_rate": 5e-06, + "loss": 2.0979, + "num_input_tokens_seen": 512006024, + "step": 2962 + }, + { + "epoch": 1.134431252393719, + "loss": 2.060588836669922, + "loss_ce": 0.5479211807250977, + "loss_iou": 0.7005019187927246, + "loss_num": 0.0223388671875, + "loss_xval": 1.5126676559448242, + "num_input_tokens_seen": 512006024, + "step": 2962 + }, + { + "epoch": 1.1348142474147835, + "grad_norm": 122.34527085825678, + "learning_rate": 5e-06, + "loss": 2.5203, + "num_input_tokens_seen": 512179048, + "step": 2963 + }, + { + "epoch": 1.1348142474147835, + "loss": 2.5278701782226562, + "loss_ce": 0.5822385549545288, + "loss_iou": 0.8813852071762085, + "loss_num": 0.03662109375, + "loss_xval": 1.945631742477417, + "num_input_tokens_seen": 512179048, + "step": 2963 + }, + { + "epoch": 1.1351972424358483, + "grad_norm": 78.56436353360586, + "learning_rate": 5e-06, + "loss": 2.0548, + "num_input_tokens_seen": 512352192, + "step": 2964 + }, + { + "epoch": 1.1351972424358483, + "loss": 1.862177848815918, + "loss_ce": 0.5344089269638062, + "loss_iou": 0.6073201894760132, + "loss_num": 0.022705078125, + "loss_xval": 1.3277690410614014, + "num_input_tokens_seen": 512352192, + "step": 2964 + }, + { + "epoch": 1.135580237456913, + "grad_norm": 279.6255196265593, + "learning_rate": 5e-06, + "loss": 2.1784, + "num_input_tokens_seen": 512524936, + "step": 2965 + }, + { + "epoch": 1.135580237456913, + "loss": 2.185105800628662, + "loss_ce": 0.5289831161499023, + "loss_iou": 0.7837650179862976, + "loss_num": 0.0177001953125, + "loss_xval": 1.6561225652694702, + "num_input_tokens_seen": 512524936, + "step": 2965 + }, + { + "epoch": 1.1359632324779778, + "grad_norm": 98.68948307232363, + "learning_rate": 5e-06, + "loss": 2.532, + "num_input_tokens_seen": 512697464, + "step": 2966 + }, + { + "epoch": 1.1359632324779778, + "loss": 2.500180959701538, + "loss_ce": 0.560333251953125, + "loss_iou": 0.8793476819992065, + "loss_num": 0.0361328125, + "loss_xval": 1.9398475885391235, + "num_input_tokens_seen": 512697464, + "step": 2966 + }, + { + "epoch": 1.1363462274990426, + "grad_norm": 201.91759851011244, + "learning_rate": 5e-06, + "loss": 2.4546, + "num_input_tokens_seen": 512870456, + "step": 2967 + }, + { + "epoch": 1.1363462274990426, + "loss": 2.580899953842163, + "loss_ce": 0.5556600093841553, + "loss_iou": 0.9261636734008789, + "loss_num": 0.03466796875, + "loss_xval": 2.025239944458008, + "num_input_tokens_seen": 512870456, + "step": 2967 + }, + { + "epoch": 1.1367292225201073, + "grad_norm": 90.42583979033348, + "learning_rate": 5e-06, + "loss": 2.2886, + "num_input_tokens_seen": 513043744, + "step": 2968 + }, + { + "epoch": 1.1367292225201073, + "loss": 2.3291778564453125, + "loss_ce": 0.5995110273361206, + "loss_iou": 0.8052021265029907, + "loss_num": 0.0238037109375, + "loss_xval": 1.7296669483184814, + "num_input_tokens_seen": 513043744, + "step": 2968 + }, + { + "epoch": 1.1371122175411719, + "grad_norm": 86.52715858494919, + "learning_rate": 5e-06, + "loss": 1.8593, + "num_input_tokens_seen": 513216520, + "step": 2969 + }, + { + "epoch": 1.1371122175411719, + "loss": 1.7987768650054932, + "loss_ce": 0.5653985142707825, + "loss_iou": 0.5767874717712402, + "loss_num": 0.0159912109375, + "loss_xval": 1.2333784103393555, + "num_input_tokens_seen": 513216520, + "step": 2969 + }, + { + "epoch": 1.1374952125622366, + "grad_norm": 122.87749878125817, + "learning_rate": 5e-06, + "loss": 2.0181, + "num_input_tokens_seen": 513389552, + "step": 2970 + }, + { + "epoch": 1.1374952125622366, + "loss": 1.8785089254379272, + "loss_ce": 0.5847567319869995, + "loss_iou": 0.6094768047332764, + "loss_num": 0.01495361328125, + "loss_xval": 1.2937521934509277, + "num_input_tokens_seen": 513389552, + "step": 2970 + }, + { + "epoch": 1.1378782075833014, + "grad_norm": 226.85791358507996, + "learning_rate": 5e-06, + "loss": 2.3399, + "num_input_tokens_seen": 513561832, + "step": 2971 + }, + { + "epoch": 1.1378782075833014, + "loss": 2.312203884124756, + "loss_ce": 0.5954175591468811, + "loss_iou": 0.8090767860412598, + "loss_num": 0.019775390625, + "loss_xval": 1.7167863845825195, + "num_input_tokens_seen": 513561832, + "step": 2971 + }, + { + "epoch": 1.1382612026043661, + "grad_norm": 90.29982701520944, + "learning_rate": 5e-06, + "loss": 2.2469, + "num_input_tokens_seen": 513734936, + "step": 2972 + }, + { + "epoch": 1.1382612026043661, + "loss": 2.257134199142456, + "loss_ce": 0.5403452515602112, + "loss_iou": 0.7982443571090698, + "loss_num": 0.0240478515625, + "loss_xval": 1.7167890071868896, + "num_input_tokens_seen": 513734936, + "step": 2972 + }, + { + "epoch": 1.138644197625431, + "grad_norm": 155.1021040353951, + "learning_rate": 5e-06, + "loss": 1.9722, + "num_input_tokens_seen": 513907872, + "step": 2973 + }, + { + "epoch": 1.138644197625431, + "loss": 1.933345079421997, + "loss_ce": 0.5490626096725464, + "loss_iou": 0.6550623178482056, + "loss_num": 0.01483154296875, + "loss_xval": 1.3842823505401611, + "num_input_tokens_seen": 513907872, + "step": 2973 + }, + { + "epoch": 1.1390271926464957, + "grad_norm": 112.20735893892689, + "learning_rate": 5e-06, + "loss": 2.0814, + "num_input_tokens_seen": 514080728, + "step": 2974 + }, + { + "epoch": 1.1390271926464957, + "loss": 2.0183908939361572, + "loss_ce": 0.5023434162139893, + "loss_iou": 0.693021297454834, + "loss_num": 0.0260009765625, + "loss_xval": 1.510798454284668, + "num_input_tokens_seen": 514080728, + "step": 2974 + }, + { + "epoch": 1.1394101876675604, + "grad_norm": 271.87382046629205, + "learning_rate": 5e-06, + "loss": 2.0258, + "num_input_tokens_seen": 514253856, + "step": 2975 + }, + { + "epoch": 1.1394101876675604, + "loss": 2.0958802700042725, + "loss_ce": 0.5488342046737671, + "loss_iou": 0.7046447992324829, + "loss_num": 0.027587890625, + "loss_xval": 1.5470459461212158, + "num_input_tokens_seen": 514253856, + "step": 2975 + }, + { + "epoch": 1.1397931826886252, + "grad_norm": 127.40662963833958, + "learning_rate": 5e-06, + "loss": 2.3439, + "num_input_tokens_seen": 514427240, + "step": 2976 + }, + { + "epoch": 1.1397931826886252, + "loss": 2.33837890625, + "loss_ce": 0.5436375141143799, + "loss_iou": 0.8391736149787903, + "loss_num": 0.0233154296875, + "loss_xval": 1.7947412729263306, + "num_input_tokens_seen": 514427240, + "step": 2976 + }, + { + "epoch": 1.1401761777096897, + "grad_norm": 265.98599893325144, + "learning_rate": 5e-06, + "loss": 2.076, + "num_input_tokens_seen": 514599880, + "step": 2977 + }, + { + "epoch": 1.1401761777096897, + "loss": 2.0318119525909424, + "loss_ce": 0.5489307641983032, + "loss_iou": 0.6901710033416748, + "loss_num": 0.0205078125, + "loss_xval": 1.4828810691833496, + "num_input_tokens_seen": 514599880, + "step": 2977 + }, + { + "epoch": 1.1405591727307545, + "grad_norm": 171.01161957565827, + "learning_rate": 5e-06, + "loss": 2.2905, + "num_input_tokens_seen": 514772440, + "step": 2978 + }, + { + "epoch": 1.1405591727307545, + "loss": 2.3822574615478516, + "loss_ce": 0.5285988450050354, + "loss_iou": 0.8416242003440857, + "loss_num": 0.0341796875, + "loss_xval": 1.8536585569381714, + "num_input_tokens_seen": 514772440, + "step": 2978 + }, + { + "epoch": 1.1409421677518192, + "grad_norm": 278.93089062665047, + "learning_rate": 5e-06, + "loss": 2.2799, + "num_input_tokens_seen": 514945344, + "step": 2979 + }, + { + "epoch": 1.1409421677518192, + "loss": 2.5735392570495605, + "loss_ce": 0.5378756523132324, + "loss_iou": 0.9553776383399963, + "loss_num": 0.02490234375, + "loss_xval": 2.035663604736328, + "num_input_tokens_seen": 514945344, + "step": 2979 + }, + { + "epoch": 1.141325162772884, + "grad_norm": 127.14942804815144, + "learning_rate": 5e-06, + "loss": 2.3917, + "num_input_tokens_seen": 515118176, + "step": 2980 + }, + { + "epoch": 1.141325162772884, + "loss": 2.3706796169281006, + "loss_ce": 0.5371127724647522, + "loss_iou": 0.836888313293457, + "loss_num": 0.031982421875, + "loss_xval": 1.833566665649414, + "num_input_tokens_seen": 515118176, + "step": 2980 + }, + { + "epoch": 1.1417081577939487, + "grad_norm": 98.17838159288843, + "learning_rate": 5e-06, + "loss": 1.6324, + "num_input_tokens_seen": 515290928, + "step": 2981 + }, + { + "epoch": 1.1417081577939487, + "loss": 1.5124866962432861, + "loss_ce": 0.5301163196563721, + "loss_iou": 0.45512863993644714, + "loss_num": 0.014404296875, + "loss_xval": 0.9823703169822693, + "num_input_tokens_seen": 515290928, + "step": 2981 + }, + { + "epoch": 1.1420911528150135, + "grad_norm": 231.40948469137263, + "learning_rate": 5e-06, + "loss": 2.637, + "num_input_tokens_seen": 515463840, + "step": 2982 + }, + { + "epoch": 1.1420911528150135, + "loss": 2.742429256439209, + "loss_ce": 0.5224322080612183, + "loss_iou": 1.048703908920288, + "loss_num": 0.0245361328125, + "loss_xval": 2.219996929168701, + "num_input_tokens_seen": 515463840, + "step": 2982 + }, + { + "epoch": 1.142474147836078, + "grad_norm": 94.91045147812316, + "learning_rate": 5e-06, + "loss": 2.4669, + "num_input_tokens_seen": 515636528, + "step": 2983 + }, + { + "epoch": 1.142474147836078, + "loss": 2.3929977416992188, + "loss_ce": 0.5427125692367554, + "loss_iou": 0.8329795598983765, + "loss_num": 0.036865234375, + "loss_xval": 1.8502854108810425, + "num_input_tokens_seen": 515636528, + "step": 2983 + }, + { + "epoch": 1.1428571428571428, + "grad_norm": 77.5044490935408, + "learning_rate": 5e-06, + "loss": 2.1173, + "num_input_tokens_seen": 515809480, + "step": 2984 + }, + { + "epoch": 1.1428571428571428, + "loss": 2.1471107006073, + "loss_ce": 0.5289281606674194, + "loss_iou": 0.7355438470840454, + "loss_num": 0.0294189453125, + "loss_xval": 1.6181824207305908, + "num_input_tokens_seen": 515809480, + "step": 2984 + }, + { + "epoch": 1.1432401378782076, + "grad_norm": 170.05717011259233, + "learning_rate": 5e-06, + "loss": 2.1984, + "num_input_tokens_seen": 515982408, + "step": 2985 + }, + { + "epoch": 1.1432401378782076, + "loss": 2.1551971435546875, + "loss_ce": 0.5516361594200134, + "loss_iou": 0.7555311918258667, + "loss_num": 0.0185546875, + "loss_xval": 1.6035611629486084, + "num_input_tokens_seen": 515982408, + "step": 2985 + }, + { + "epoch": 1.1436231328992723, + "grad_norm": 160.51003341679348, + "learning_rate": 5e-06, + "loss": 3.0738, + "num_input_tokens_seen": 516155320, + "step": 2986 + }, + { + "epoch": 1.1436231328992723, + "loss": 3.399182081222534, + "loss_ce": 0.5560451745986938, + "loss_iou": 1.3256211280822754, + "loss_num": 0.038330078125, + "loss_xval": 2.843136787414551, + "num_input_tokens_seen": 516155320, + "step": 2986 + }, + { + "epoch": 1.144006127920337, + "grad_norm": 128.32679133374626, + "learning_rate": 5e-06, + "loss": 2.2714, + "num_input_tokens_seen": 516328432, + "step": 2987 + }, + { + "epoch": 1.144006127920337, + "loss": 2.3910999298095703, + "loss_ce": 0.5498851537704468, + "loss_iou": 0.8372029066085815, + "loss_num": 0.033447265625, + "loss_xval": 1.841214895248413, + "num_input_tokens_seen": 516328432, + "step": 2987 + }, + { + "epoch": 1.1443891229414018, + "grad_norm": 64.3963790748517, + "learning_rate": 5e-06, + "loss": 2.0932, + "num_input_tokens_seen": 516498432, + "step": 2988 + }, + { + "epoch": 1.1443891229414018, + "loss": 2.0400850772857666, + "loss_ce": 0.5410438776016235, + "loss_iou": 0.6955808401107788, + "loss_num": 0.0216064453125, + "loss_xval": 1.4990413188934326, + "num_input_tokens_seen": 516498432, + "step": 2988 + }, + { + "epoch": 1.1447721179624666, + "grad_norm": 124.91494391282485, + "learning_rate": 5e-06, + "loss": 1.9027, + "num_input_tokens_seen": 516671136, + "step": 2989 + }, + { + "epoch": 1.1447721179624666, + "loss": 2.0308449268341064, + "loss_ce": 0.5373611450195312, + "loss_iou": 0.6931225061416626, + "loss_num": 0.021484375, + "loss_xval": 1.4934837818145752, + "num_input_tokens_seen": 516671136, + "step": 2989 + }, + { + "epoch": 1.1451551129835311, + "grad_norm": 183.98754523757103, + "learning_rate": 5e-06, + "loss": 2.2069, + "num_input_tokens_seen": 516843880, + "step": 2990 + }, + { + "epoch": 1.1451551129835311, + "loss": 2.306507110595703, + "loss_ce": 0.5649071931838989, + "loss_iou": 0.8115959167480469, + "loss_num": 0.023681640625, + "loss_xval": 1.7415999174118042, + "num_input_tokens_seen": 516843880, + "step": 2990 + }, + { + "epoch": 1.1455381080045959, + "grad_norm": 88.9670323543909, + "learning_rate": 5e-06, + "loss": 2.4037, + "num_input_tokens_seen": 517017168, + "step": 2991 + }, + { + "epoch": 1.1455381080045959, + "loss": 2.4599194526672363, + "loss_ce": 0.5973893404006958, + "loss_iou": 0.842336893081665, + "loss_num": 0.03564453125, + "loss_xval": 1.86253023147583, + "num_input_tokens_seen": 517017168, + "step": 2991 + }, + { + "epoch": 1.1459211030256606, + "grad_norm": 86.98039124727654, + "learning_rate": 5e-06, + "loss": 2.0201, + "num_input_tokens_seen": 517190192, + "step": 2992 + }, + { + "epoch": 1.1459211030256606, + "loss": 2.0168302059173584, + "loss_ce": 0.5655362606048584, + "loss_iou": 0.6668243408203125, + "loss_num": 0.0235595703125, + "loss_xval": 1.4512939453125, + "num_input_tokens_seen": 517190192, + "step": 2992 + }, + { + "epoch": 1.1463040980467254, + "grad_norm": 134.43718663507704, + "learning_rate": 5e-06, + "loss": 1.9381, + "num_input_tokens_seen": 517363216, + "step": 2993 + }, + { + "epoch": 1.1463040980467254, + "loss": 1.8722496032714844, + "loss_ce": 0.5516296625137329, + "loss_iou": 0.6261608600616455, + "loss_num": 0.013671875, + "loss_xval": 1.320620059967041, + "num_input_tokens_seen": 517363216, + "step": 2993 + }, + { + "epoch": 1.1466870930677902, + "grad_norm": 110.95566969817125, + "learning_rate": 5e-06, + "loss": 2.1158, + "num_input_tokens_seen": 517536136, + "step": 2994 + }, + { + "epoch": 1.1466870930677902, + "loss": 2.022862195968628, + "loss_ce": 0.5184419751167297, + "loss_iou": 0.6844000816345215, + "loss_num": 0.027099609375, + "loss_xval": 1.504420280456543, + "num_input_tokens_seen": 517536136, + "step": 2994 + }, + { + "epoch": 1.147070088088855, + "grad_norm": 88.68752519561478, + "learning_rate": 5e-06, + "loss": 1.9335, + "num_input_tokens_seen": 517708928, + "step": 2995 + }, + { + "epoch": 1.147070088088855, + "loss": 2.0943524837493896, + "loss_ce": 0.5301757454872131, + "loss_iou": 0.7113485932350159, + "loss_num": 0.0283203125, + "loss_xval": 1.5641766786575317, + "num_input_tokens_seen": 517708928, + "step": 2995 + }, + { + "epoch": 1.1474530831099194, + "grad_norm": 96.74101352871402, + "learning_rate": 5e-06, + "loss": 1.8792, + "num_input_tokens_seen": 517882120, + "step": 2996 + }, + { + "epoch": 1.1474530831099194, + "loss": 1.9112402200698853, + "loss_ce": 0.5335820317268372, + "loss_iou": 0.6322038173675537, + "loss_num": 0.0225830078125, + "loss_xval": 1.3776583671569824, + "num_input_tokens_seen": 517882120, + "step": 2996 + }, + { + "epoch": 1.1478360781309842, + "grad_norm": 157.84643782714286, + "learning_rate": 5e-06, + "loss": 1.8198, + "num_input_tokens_seen": 518054944, + "step": 2997 + }, + { + "epoch": 1.1478360781309842, + "loss": 2.0277700424194336, + "loss_ce": 0.5270143747329712, + "loss_iou": 0.7133447527885437, + "loss_num": 0.01483154296875, + "loss_xval": 1.5007556676864624, + "num_input_tokens_seen": 518054944, + "step": 2997 + }, + { + "epoch": 1.148219073152049, + "grad_norm": 128.03047628782417, + "learning_rate": 5e-06, + "loss": 2.1358, + "num_input_tokens_seen": 518227800, + "step": 2998 + }, + { + "epoch": 1.148219073152049, + "loss": 2.072675943374634, + "loss_ce": 0.5211225748062134, + "loss_iou": 0.7174270749092102, + "loss_num": 0.0233154296875, + "loss_xval": 1.5515533685684204, + "num_input_tokens_seen": 518227800, + "step": 2998 + }, + { + "epoch": 1.1486020681731137, + "grad_norm": 58.07235021252467, + "learning_rate": 5e-06, + "loss": 1.7931, + "num_input_tokens_seen": 518400528, + "step": 2999 + }, + { + "epoch": 1.1486020681731137, + "loss": 1.7334604263305664, + "loss_ce": 0.5397881269454956, + "loss_iou": 0.5571175217628479, + "loss_num": 0.015869140625, + "loss_xval": 1.1936722993850708, + "num_input_tokens_seen": 518400528, + "step": 2999 + }, + { + "epoch": 1.1489850631941785, + "grad_norm": 135.98954753552283, + "learning_rate": 5e-06, + "loss": 1.7787, + "num_input_tokens_seen": 518573152, + "step": 3000 + }, + { + "epoch": 1.1489850631941785, + "eval_websight_new_CIoU": 0.9289008677005768, + "eval_websight_new_GIoU": 0.9288975298404694, + "eval_websight_new_IoU": 0.9291035234928131, + "eval_websight_new_MAE_all": 0.009797717444598675, + "eval_websight_new_MAE_h": 0.01205116230994463, + "eval_websight_new_MAE_w": 0.014374295715242624, + "eval_websight_new_MAE_x": 0.00619434705004096, + "eval_websight_new_MAE_y": 0.006571063073351979, + "eval_websight_new_NUM_probability": 0.0017544926377013326, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 1.5373013019561768, + "eval_websight_new_loss_ce": 0.7034120559692383, + "eval_websight_new_loss_iou": 0.37897779047489166, + "eval_websight_new_loss_num": 0.009069442749023438, + "eval_websight_new_loss_xval": 0.8032665550708771, + "eval_websight_new_runtime": 59.7409, + "eval_websight_new_samples_per_second": 0.837, + "eval_websight_new_steps_per_second": 0.033, + "num_input_tokens_seen": 518573152, + "step": 3000 + }, + { + "epoch": 1.1489850631941785, + "eval_seeclick_CIoU": 0.683236688375473, + "eval_seeclick_GIoU": 0.6938706934452057, + "eval_seeclick_IoU": 0.713070422410965, + "eval_seeclick_MAE_all": 0.05435188487172127, + "eval_seeclick_MAE_h": 0.040214281529188156, + "eval_seeclick_MAE_w": 0.07368129119277, + "eval_seeclick_MAE_x": 0.06478147581219673, + "eval_seeclick_MAE_y": 0.03873049933463335, + "eval_seeclick_NUM_probability": 0.002822827664203942, + "eval_seeclick_inside_bbox": 0.9131944477558136, + "eval_seeclick_loss": 2.2081284523010254, + "eval_seeclick_loss_ce": 0.7061522006988525, + "eval_seeclick_loss_iou": 0.6415136754512787, + "eval_seeclick_loss_num": 0.0447235107421875, + "eval_seeclick_loss_xval": 1.5065380930900574, + "eval_seeclick_runtime": 81.9172, + "eval_seeclick_samples_per_second": 0.61, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 518573152, + "step": 3000 + }, + { + "epoch": 1.1489850631941785, + "eval_icons_CIoU": 0.8049757182598114, + "eval_icons_GIoU": 0.8091687560081482, + "eval_icons_IoU": 0.820679098367691, + "eval_icons_MAE_all": 0.03267125133424997, + "eval_icons_MAE_h": 0.025516432709991932, + "eval_icons_MAE_w": 0.05081920512020588, + "eval_icons_MAE_x": 0.04043934307992458, + "eval_icons_MAE_y": 0.013910012319684029, + "eval_icons_NUM_probability": 0.005454682279378176, + "eval_icons_inside_bbox": 0.9288194477558136, + "eval_icons_loss": 2.3851919174194336, + "eval_icons_loss_ce": 0.6270142197608948, + "eval_icons_loss_iou": 0.812638521194458, + "eval_icons_loss_num": 0.02117156982421875, + "eval_icons_loss_xval": 1.731142520904541, + "eval_icons_runtime": 83.6012, + "eval_icons_samples_per_second": 0.598, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 518573152, + "step": 3000 + }, + { + "epoch": 1.1489850631941785, + "loss": 2.4711475372314453, + "loss_ce": 0.6271020174026489, + "loss_iou": 0.863520622253418, + "loss_num": 0.0234375, + "loss_xval": 1.844045639038086, + "num_input_tokens_seen": 518573152, + "step": 3000 + }, + { + "epoch": 1.1493680582152432, + "grad_norm": 139.09242166658748, + "learning_rate": 5e-06, + "loss": 2.0149, + "num_input_tokens_seen": 518746240, + "step": 3001 + }, + { + "epoch": 1.1493680582152432, + "loss": 2.01564621925354, + "loss_ce": 0.5103839635848999, + "loss_iou": 0.6980351805686951, + "loss_num": 0.0218505859375, + "loss_xval": 1.5052622556686401, + "num_input_tokens_seen": 518746240, + "step": 3001 + }, + { + "epoch": 1.149751053236308, + "grad_norm": 131.59173575821038, + "learning_rate": 5e-06, + "loss": 1.8836, + "num_input_tokens_seen": 518919272, + "step": 3002 + }, + { + "epoch": 1.149751053236308, + "loss": 1.8277509212493896, + "loss_ce": 0.5466691255569458, + "loss_iou": 0.5962751507759094, + "loss_num": 0.0177001953125, + "loss_xval": 1.2810817956924438, + "num_input_tokens_seen": 518919272, + "step": 3002 + }, + { + "epoch": 1.1501340482573728, + "grad_norm": 108.20560324442643, + "learning_rate": 5e-06, + "loss": 2.3203, + "num_input_tokens_seen": 519092152, + "step": 3003 + }, + { + "epoch": 1.1501340482573728, + "loss": 2.4972357749938965, + "loss_ce": 0.5610809326171875, + "loss_iou": 0.8992298245429993, + "loss_num": 0.0274658203125, + "loss_xval": 1.9361549615859985, + "num_input_tokens_seen": 519092152, + "step": 3003 + }, + { + "epoch": 1.1505170432784373, + "grad_norm": 121.10687922785246, + "learning_rate": 5e-06, + "loss": 1.8939, + "num_input_tokens_seen": 519265104, + "step": 3004 + }, + { + "epoch": 1.1505170432784373, + "loss": 1.9453966617584229, + "loss_ce": 0.5247212648391724, + "loss_iou": 0.654505729675293, + "loss_num": 0.0223388671875, + "loss_xval": 1.420675277709961, + "num_input_tokens_seen": 519265104, + "step": 3004 + }, + { + "epoch": 1.150900038299502, + "grad_norm": 114.66267510067331, + "learning_rate": 5e-06, + "loss": 2.1012, + "num_input_tokens_seen": 519437728, + "step": 3005 + }, + { + "epoch": 1.150900038299502, + "loss": 2.1453614234924316, + "loss_ce": 0.5125018358230591, + "loss_iou": 0.7602317333221436, + "loss_num": 0.0224609375, + "loss_xval": 1.632859706878662, + "num_input_tokens_seen": 519437728, + "step": 3005 + }, + { + "epoch": 1.1512830333205668, + "grad_norm": 102.5620072458507, + "learning_rate": 5e-06, + "loss": 1.9573, + "num_input_tokens_seen": 519610488, + "step": 3006 + }, + { + "epoch": 1.1512830333205668, + "loss": 1.7833776473999023, + "loss_ce": 0.5496975183486938, + "loss_iou": 0.563983678817749, + "loss_num": 0.0211181640625, + "loss_xval": 1.233680248260498, + "num_input_tokens_seen": 519610488, + "step": 3006 + }, + { + "epoch": 1.1516660283416316, + "grad_norm": 278.0204483214526, + "learning_rate": 5e-06, + "loss": 1.9901, + "num_input_tokens_seen": 519783440, + "step": 3007 + }, + { + "epoch": 1.1516660283416316, + "loss": 1.9606174230575562, + "loss_ce": 0.5405219197273254, + "loss_iou": 0.6772871017456055, + "loss_num": 0.01312255859375, + "loss_xval": 1.420095443725586, + "num_input_tokens_seen": 519783440, + "step": 3007 + }, + { + "epoch": 1.1520490233626963, + "grad_norm": 100.00293202770162, + "learning_rate": 5e-06, + "loss": 2.4169, + "num_input_tokens_seen": 519956480, + "step": 3008 + }, + { + "epoch": 1.1520490233626963, + "loss": 2.4275078773498535, + "loss_ce": 0.5300484895706177, + "loss_iou": 0.8840935230255127, + "loss_num": 0.02587890625, + "loss_xval": 1.8974595069885254, + "num_input_tokens_seen": 519956480, + "step": 3008 + }, + { + "epoch": 1.152432018383761, + "grad_norm": 175.15205276773116, + "learning_rate": 5e-06, + "loss": 2.0254, + "num_input_tokens_seen": 520129472, + "step": 3009 + }, + { + "epoch": 1.152432018383761, + "loss": 1.9535837173461914, + "loss_ce": 0.5508226156234741, + "loss_iou": 0.6521862149238586, + "loss_num": 0.0196533203125, + "loss_xval": 1.4027611017227173, + "num_input_tokens_seen": 520129472, + "step": 3009 + }, + { + "epoch": 1.1528150134048256, + "grad_norm": 93.02316642342778, + "learning_rate": 5e-06, + "loss": 2.0402, + "num_input_tokens_seen": 520302256, + "step": 3010 + }, + { + "epoch": 1.1528150134048256, + "loss": 1.934401273727417, + "loss_ce": 0.5041775703430176, + "loss_iou": 0.6717768311500549, + "loss_num": 0.017333984375, + "loss_xval": 1.4177113771438599, + "num_input_tokens_seen": 520302256, + "step": 3010 + }, + { + "epoch": 1.1531980084258904, + "grad_norm": 142.03973974345791, + "learning_rate": 5e-06, + "loss": 1.8762, + "num_input_tokens_seen": 520475544, + "step": 3011 + }, + { + "epoch": 1.1531980084258904, + "loss": 1.8028934001922607, + "loss_ce": 0.5216626524925232, + "loss_iou": 0.6030939817428589, + "loss_num": 0.0150146484375, + "loss_xval": 1.2812306880950928, + "num_input_tokens_seen": 520475544, + "step": 3011 + }, + { + "epoch": 1.1535810034469551, + "grad_norm": 105.14038043841408, + "learning_rate": 5e-06, + "loss": 2.0907, + "num_input_tokens_seen": 520648496, + "step": 3012 + }, + { + "epoch": 1.1535810034469551, + "loss": 1.9208060503005981, + "loss_ce": 0.5305529832839966, + "loss_iou": 0.6518068313598633, + "loss_num": 0.017333984375, + "loss_xval": 1.3902530670166016, + "num_input_tokens_seen": 520648496, + "step": 3012 + }, + { + "epoch": 1.15396399846802, + "grad_norm": 112.28781251389441, + "learning_rate": 5e-06, + "loss": 1.9978, + "num_input_tokens_seen": 520821256, + "step": 3013 + }, + { + "epoch": 1.15396399846802, + "loss": 2.028007984161377, + "loss_ce": 0.5007326006889343, + "loss_iou": 0.7139551043510437, + "loss_num": 0.0198974609375, + "loss_xval": 1.5272754430770874, + "num_input_tokens_seen": 520821256, + "step": 3013 + }, + { + "epoch": 1.1543469934890846, + "grad_norm": 110.42064520217507, + "learning_rate": 5e-06, + "loss": 1.9384, + "num_input_tokens_seen": 520994744, + "step": 3014 + }, + { + "epoch": 1.1543469934890846, + "loss": 1.97543203830719, + "loss_ce": 0.5129232406616211, + "loss_iou": 0.6709364652633667, + "loss_num": 0.024169921875, + "loss_xval": 1.4625089168548584, + "num_input_tokens_seen": 520994744, + "step": 3014 + }, + { + "epoch": 1.1547299885101494, + "grad_norm": 146.10573365097127, + "learning_rate": 5e-06, + "loss": 2.1011, + "num_input_tokens_seen": 521167760, + "step": 3015 + }, + { + "epoch": 1.1547299885101494, + "loss": 2.122317314147949, + "loss_ce": 0.4962806701660156, + "loss_iou": 0.7608027458190918, + "loss_num": 0.0208740234375, + "loss_xval": 1.6260366439819336, + "num_input_tokens_seen": 521167760, + "step": 3015 + }, + { + "epoch": 1.1551129835312142, + "grad_norm": 224.75354314705422, + "learning_rate": 5e-06, + "loss": 2.1782, + "num_input_tokens_seen": 521340496, + "step": 3016 + }, + { + "epoch": 1.1551129835312142, + "loss": 2.319140911102295, + "loss_ce": 0.4974133372306824, + "loss_iou": 0.8658657073974609, + "loss_num": 0.0179443359375, + "loss_xval": 1.8217277526855469, + "num_input_tokens_seen": 521340496, + "step": 3016 + }, + { + "epoch": 1.155495978552279, + "grad_norm": 72.06937341953629, + "learning_rate": 5e-06, + "loss": 2.2732, + "num_input_tokens_seen": 521513288, + "step": 3017 + }, + { + "epoch": 1.155495978552279, + "loss": 2.2797303199768066, + "loss_ce": 0.5103136301040649, + "loss_iou": 0.8009376525878906, + "loss_num": 0.033447265625, + "loss_xval": 1.7694168090820312, + "num_input_tokens_seen": 521513288, + "step": 3017 + }, + { + "epoch": 1.1558789735733435, + "grad_norm": 97.42804989793225, + "learning_rate": 5e-06, + "loss": 1.844, + "num_input_tokens_seen": 521686168, + "step": 3018 + }, + { + "epoch": 1.1558789735733435, + "loss": 1.8331570625305176, + "loss_ce": 0.5346876382827759, + "loss_iou": 0.6024208068847656, + "loss_num": 0.0186767578125, + "loss_xval": 1.2984695434570312, + "num_input_tokens_seen": 521686168, + "step": 3018 + }, + { + "epoch": 1.1562619685944082, + "grad_norm": 224.27826224442722, + "learning_rate": 5e-06, + "loss": 2.6632, + "num_input_tokens_seen": 521859192, + "step": 3019 + }, + { + "epoch": 1.1562619685944082, + "loss": 2.9461395740509033, + "loss_ce": 0.4964212477207184, + "loss_iou": 1.1607111692428589, + "loss_num": 0.025634765625, + "loss_xval": 2.4497182369232178, + "num_input_tokens_seen": 521859192, + "step": 3019 + }, + { + "epoch": 1.156644963615473, + "grad_norm": 65.17701512819956, + "learning_rate": 5e-06, + "loss": 2.5659, + "num_input_tokens_seen": 522031760, + "step": 3020 + }, + { + "epoch": 1.156644963615473, + "loss": 2.4863595962524414, + "loss_ce": 0.5229304432868958, + "loss_iou": 0.8717292547225952, + "loss_num": 0.0439453125, + "loss_xval": 1.9634292125701904, + "num_input_tokens_seen": 522031760, + "step": 3020 + }, + { + "epoch": 1.1570279586365377, + "grad_norm": 121.37290926126369, + "learning_rate": 5e-06, + "loss": 2.1516, + "num_input_tokens_seen": 522204552, + "step": 3021 + }, + { + "epoch": 1.1570279586365377, + "loss": 2.1316518783569336, + "loss_ce": 0.5568913817405701, + "loss_iou": 0.7265586853027344, + "loss_num": 0.0242919921875, + "loss_xval": 1.5747604370117188, + "num_input_tokens_seen": 522204552, + "step": 3021 + }, + { + "epoch": 1.1574109536576025, + "grad_norm": 229.35583791473852, + "learning_rate": 5e-06, + "loss": 2.5877, + "num_input_tokens_seen": 522377560, + "step": 3022 + }, + { + "epoch": 1.1574109536576025, + "loss": 2.5526371002197266, + "loss_ce": 0.5315263271331787, + "loss_iou": 0.9546470642089844, + "loss_num": 0.0223388671875, + "loss_xval": 2.0211105346679688, + "num_input_tokens_seen": 522377560, + "step": 3022 + }, + { + "epoch": 1.1577939486786673, + "grad_norm": 167.43325952523725, + "learning_rate": 5e-06, + "loss": 2.6765, + "num_input_tokens_seen": 522550464, + "step": 3023 + }, + { + "epoch": 1.1577939486786673, + "loss": 2.6495301723480225, + "loss_ce": 0.5795937180519104, + "loss_iou": 0.9373120069503784, + "loss_num": 0.0390625, + "loss_xval": 2.069936513900757, + "num_input_tokens_seen": 522550464, + "step": 3023 + }, + { + "epoch": 1.1581769436997318, + "grad_norm": 157.84680622680904, + "learning_rate": 5e-06, + "loss": 2.5017, + "num_input_tokens_seen": 522723520, + "step": 3024 + }, + { + "epoch": 1.1581769436997318, + "loss": 2.427170753479004, + "loss_ce": 0.5697979927062988, + "loss_iou": 0.8340818881988525, + "loss_num": 0.037841796875, + "loss_xval": 1.857372760772705, + "num_input_tokens_seen": 522723520, + "step": 3024 + }, + { + "epoch": 1.1585599387207965, + "grad_norm": 82.11481296858165, + "learning_rate": 5e-06, + "loss": 2.2963, + "num_input_tokens_seen": 522896568, + "step": 3025 + }, + { + "epoch": 1.1585599387207965, + "loss": 2.3341455459594727, + "loss_ce": 0.583962082862854, + "loss_iou": 0.7866516709327698, + "loss_num": 0.035400390625, + "loss_xval": 1.7501832246780396, + "num_input_tokens_seen": 522896568, + "step": 3025 + }, + { + "epoch": 1.1589429337418613, + "grad_norm": 103.61180247622377, + "learning_rate": 5e-06, + "loss": 1.9232, + "num_input_tokens_seen": 523069224, + "step": 3026 + }, + { + "epoch": 1.1589429337418613, + "loss": 1.9566779136657715, + "loss_ce": 0.5542987585067749, + "loss_iou": 0.6405511498451233, + "loss_num": 0.024169921875, + "loss_xval": 1.4023791551589966, + "num_input_tokens_seen": 523069224, + "step": 3026 + }, + { + "epoch": 1.159325928762926, + "grad_norm": 287.0764509963293, + "learning_rate": 5e-06, + "loss": 2.9193, + "num_input_tokens_seen": 523242384, + "step": 3027 + }, + { + "epoch": 1.159325928762926, + "loss": 2.9638757705688477, + "loss_ce": 0.5367143154144287, + "loss_iou": 1.1446415185928345, + "loss_num": 0.027587890625, + "loss_xval": 2.427161455154419, + "num_input_tokens_seen": 523242384, + "step": 3027 + }, + { + "epoch": 1.1597089237839908, + "grad_norm": 99.91399363308518, + "learning_rate": 5e-06, + "loss": 2.3371, + "num_input_tokens_seen": 523415472, + "step": 3028 + }, + { + "epoch": 1.1597089237839908, + "loss": 2.3318090438842773, + "loss_ce": 0.6090725660324097, + "loss_iou": 0.7692052125930786, + "loss_num": 0.036865234375, + "loss_xval": 1.7227365970611572, + "num_input_tokens_seen": 523415472, + "step": 3028 + }, + { + "epoch": 1.1600919188050556, + "grad_norm": 95.4680485424965, + "learning_rate": 5e-06, + "loss": 2.0019, + "num_input_tokens_seen": 523588216, + "step": 3029 + }, + { + "epoch": 1.1600919188050556, + "loss": 1.8098641633987427, + "loss_ce": 0.5736472606658936, + "loss_iou": 0.5691583156585693, + "loss_num": 0.01953125, + "loss_xval": 1.2362170219421387, + "num_input_tokens_seen": 523588216, + "step": 3029 + }, + { + "epoch": 1.1604749138261203, + "grad_norm": 117.48196063963505, + "learning_rate": 5e-06, + "loss": 2.0943, + "num_input_tokens_seen": 523761080, + "step": 3030 + }, + { + "epoch": 1.1604749138261203, + "loss": 2.036898612976074, + "loss_ce": 0.582169234752655, + "loss_iou": 0.6922236680984497, + "loss_num": 0.0140380859375, + "loss_xval": 1.4547293186187744, + "num_input_tokens_seen": 523761080, + "step": 3030 + }, + { + "epoch": 1.160857908847185, + "grad_norm": 201.2175189580315, + "learning_rate": 5e-06, + "loss": 2.9544, + "num_input_tokens_seen": 523934072, + "step": 3031 + }, + { + "epoch": 1.160857908847185, + "loss": 2.9619741439819336, + "loss_ce": 0.6040058135986328, + "loss_iou": 1.1072983741760254, + "loss_num": 0.0286865234375, + "loss_xval": 2.357968330383301, + "num_input_tokens_seen": 523934072, + "step": 3031 + }, + { + "epoch": 1.1612409038682496, + "grad_norm": 146.70510454521533, + "learning_rate": 5e-06, + "loss": 2.2877, + "num_input_tokens_seen": 524107192, + "step": 3032 + }, + { + "epoch": 1.1612409038682496, + "loss": 2.3786630630493164, + "loss_ce": 0.5580625534057617, + "loss_iou": 0.8286963105201721, + "loss_num": 0.03271484375, + "loss_xval": 1.8206006288528442, + "num_input_tokens_seen": 524107192, + "step": 3032 + }, + { + "epoch": 1.1616238988893144, + "grad_norm": 232.97661371340706, + "learning_rate": 5e-06, + "loss": 1.93, + "num_input_tokens_seen": 524280192, + "step": 3033 + }, + { + "epoch": 1.1616238988893144, + "loss": 1.9253848791122437, + "loss_ce": 0.5511703491210938, + "loss_iou": 0.6458474397659302, + "loss_num": 0.0164794921875, + "loss_xval": 1.3742144107818604, + "num_input_tokens_seen": 524280192, + "step": 3033 + }, + { + "epoch": 1.1620068939103791, + "grad_norm": 103.81499572093095, + "learning_rate": 5e-06, + "loss": 2.2417, + "num_input_tokens_seen": 524453144, + "step": 3034 + }, + { + "epoch": 1.1620068939103791, + "loss": 2.3110437393188477, + "loss_ce": 0.5635131597518921, + "loss_iou": 0.8246625065803528, + "loss_num": 0.0196533203125, + "loss_xval": 1.7475305795669556, + "num_input_tokens_seen": 524453144, + "step": 3034 + }, + { + "epoch": 1.162389888931444, + "grad_norm": 193.80084892592967, + "learning_rate": 5e-06, + "loss": 2.2382, + "num_input_tokens_seen": 524625928, + "step": 3035 + }, + { + "epoch": 1.162389888931444, + "loss": 2.1590077877044678, + "loss_ce": 0.5422309637069702, + "loss_iou": 0.7368246912956238, + "loss_num": 0.0286865234375, + "loss_xval": 1.6167768239974976, + "num_input_tokens_seen": 524625928, + "step": 3035 + }, + { + "epoch": 1.1627728839525087, + "grad_norm": 223.50433193857575, + "learning_rate": 5e-06, + "loss": 1.9806, + "num_input_tokens_seen": 524798696, + "step": 3036 + }, + { + "epoch": 1.1627728839525087, + "loss": 2.0209784507751465, + "loss_ce": 0.5899786949157715, + "loss_iou": 0.6683807373046875, + "loss_num": 0.018798828125, + "loss_xval": 1.430999755859375, + "num_input_tokens_seen": 524798696, + "step": 3036 + }, + { + "epoch": 1.1631558789735734, + "grad_norm": 127.91361924924034, + "learning_rate": 5e-06, + "loss": 2.0166, + "num_input_tokens_seen": 524971760, + "step": 3037 + }, + { + "epoch": 1.1631558789735734, + "loss": 1.9538748264312744, + "loss_ce": 0.5548742413520813, + "loss_iou": 0.6514350771903992, + "loss_num": 0.0191650390625, + "loss_xval": 1.3990005254745483, + "num_input_tokens_seen": 524971760, + "step": 3037 + }, + { + "epoch": 1.163538873994638, + "grad_norm": 108.51388885330405, + "learning_rate": 5e-06, + "loss": 2.089, + "num_input_tokens_seen": 525144952, + "step": 3038 + }, + { + "epoch": 1.163538873994638, + "loss": 2.0692315101623535, + "loss_ce": 0.5200036764144897, + "loss_iou": 0.7181258201599121, + "loss_num": 0.0225830078125, + "loss_xval": 1.5492277145385742, + "num_input_tokens_seen": 525144952, + "step": 3038 + }, + { + "epoch": 1.1639218690157027, + "grad_norm": 112.63384375864676, + "learning_rate": 5e-06, + "loss": 1.8902, + "num_input_tokens_seen": 525317936, + "step": 3039 + }, + { + "epoch": 1.1639218690157027, + "loss": 1.8628909587860107, + "loss_ce": 0.5142827033996582, + "loss_iou": 0.6378051042556763, + "loss_num": 0.01458740234375, + "loss_xval": 1.3486082553863525, + "num_input_tokens_seen": 525317936, + "step": 3039 + }, + { + "epoch": 1.1643048640367675, + "grad_norm": 134.6131501116591, + "learning_rate": 5e-06, + "loss": 1.7304, + "num_input_tokens_seen": 525490816, + "step": 3040 + }, + { + "epoch": 1.1643048640367675, + "loss": 1.8034213781356812, + "loss_ce": 0.5231952667236328, + "loss_iou": 0.6067954897880554, + "loss_num": 0.0133056640625, + "loss_xval": 1.2802261114120483, + "num_input_tokens_seen": 525490816, + "step": 3040 + }, + { + "epoch": 1.1646878590578322, + "grad_norm": 114.7384873154603, + "learning_rate": 5e-06, + "loss": 2.1501, + "num_input_tokens_seen": 525663824, + "step": 3041 + }, + { + "epoch": 1.1646878590578322, + "loss": 2.2172675132751465, + "loss_ce": 0.5395190715789795, + "loss_iou": 0.7881540060043335, + "loss_num": 0.020263671875, + "loss_xval": 1.677748441696167, + "num_input_tokens_seen": 525663824, + "step": 3041 + }, + { + "epoch": 1.165070854078897, + "grad_norm": 86.38432166069843, + "learning_rate": 5e-06, + "loss": 1.8492, + "num_input_tokens_seen": 525836488, + "step": 3042 + }, + { + "epoch": 1.165070854078897, + "loss": 1.7687615156173706, + "loss_ce": 0.553092360496521, + "loss_iou": 0.56175297498703, + "loss_num": 0.0184326171875, + "loss_xval": 1.19821298122406, + "num_input_tokens_seen": 525836488, + "step": 3042 + }, + { + "epoch": 1.1654538490999617, + "grad_norm": 85.05003253130162, + "learning_rate": 5e-06, + "loss": 1.8564, + "num_input_tokens_seen": 526009024, + "step": 3043 + }, + { + "epoch": 1.1654538490999617, + "loss": 2.0090861320495605, + "loss_ce": 0.5071377158164978, + "loss_iou": 0.7066322565078735, + "loss_num": 0.0177001953125, + "loss_xval": 1.501948595046997, + "num_input_tokens_seen": 526009024, + "step": 3043 + }, + { + "epoch": 1.1658368441210265, + "grad_norm": 197.92156942534768, + "learning_rate": 5e-06, + "loss": 2.0245, + "num_input_tokens_seen": 526181768, + "step": 3044 + }, + { + "epoch": 1.1658368441210265, + "loss": 1.8668968677520752, + "loss_ce": 0.5239740610122681, + "loss_iou": 0.6356947422027588, + "loss_num": 0.0142822265625, + "loss_xval": 1.3429226875305176, + "num_input_tokens_seen": 526181768, + "step": 3044 + }, + { + "epoch": 1.1662198391420913, + "grad_norm": 99.4182298535617, + "learning_rate": 5e-06, + "loss": 2.4788, + "num_input_tokens_seen": 526354976, + "step": 3045 + }, + { + "epoch": 1.1662198391420913, + "loss": 2.455658435821533, + "loss_ce": 0.5290402173995972, + "loss_iou": 0.8945225477218628, + "loss_num": 0.027587890625, + "loss_xval": 1.9266183376312256, + "num_input_tokens_seen": 526354976, + "step": 3045 + }, + { + "epoch": 1.1666028341631558, + "grad_norm": 86.57337881425134, + "learning_rate": 5e-06, + "loss": 2.1731, + "num_input_tokens_seen": 526527656, + "step": 3046 + }, + { + "epoch": 1.1666028341631558, + "loss": 2.1833906173706055, + "loss_ce": 0.4955127239227295, + "loss_iou": 0.774236798286438, + "loss_num": 0.0279541015625, + "loss_xval": 1.687877893447876, + "num_input_tokens_seen": 526527656, + "step": 3046 + }, + { + "epoch": 1.1669858291842206, + "grad_norm": 139.61278862580969, + "learning_rate": 5e-06, + "loss": 1.9832, + "num_input_tokens_seen": 526700408, + "step": 3047 + }, + { + "epoch": 1.1669858291842206, + "loss": 1.8087213039398193, + "loss_ce": 0.5244953632354736, + "loss_iou": 0.5974199771881104, + "loss_num": 0.017822265625, + "loss_xval": 1.2842259407043457, + "num_input_tokens_seen": 526700408, + "step": 3047 + }, + { + "epoch": 1.1673688242052853, + "grad_norm": 137.95949675032745, + "learning_rate": 5e-06, + "loss": 2.1025, + "num_input_tokens_seen": 526873352, + "step": 3048 + }, + { + "epoch": 1.1673688242052853, + "loss": 2.092449188232422, + "loss_ce": 0.5285845994949341, + "loss_iou": 0.7295947074890137, + "loss_num": 0.02099609375, + "loss_xval": 1.5638647079467773, + "num_input_tokens_seen": 526873352, + "step": 3048 + }, + { + "epoch": 1.16775181922635, + "grad_norm": 91.15195807997374, + "learning_rate": 5e-06, + "loss": 1.9721, + "num_input_tokens_seen": 527046472, + "step": 3049 + }, + { + "epoch": 1.16775181922635, + "loss": 2.130580186843872, + "loss_ce": 0.5217145085334778, + "loss_iou": 0.7446946501731873, + "loss_num": 0.02392578125, + "loss_xval": 1.6088656187057495, + "num_input_tokens_seen": 527046472, + "step": 3049 + }, + { + "epoch": 1.1681348142474148, + "grad_norm": 91.44638543843973, + "learning_rate": 5e-06, + "loss": 1.9778, + "num_input_tokens_seen": 527219440, + "step": 3050 + }, + { + "epoch": 1.1681348142474148, + "loss": 1.8737220764160156, + "loss_ce": 0.5123358964920044, + "loss_iou": 0.6441329717636108, + "loss_num": 0.0146484375, + "loss_xval": 1.3613860607147217, + "num_input_tokens_seen": 527219440, + "step": 3050 + }, + { + "epoch": 1.1685178092684796, + "grad_norm": 176.32139285541254, + "learning_rate": 5e-06, + "loss": 2.0724, + "num_input_tokens_seen": 527392064, + "step": 3051 + }, + { + "epoch": 1.1685178092684796, + "loss": 2.222956657409668, + "loss_ce": 0.5154544711112976, + "loss_iou": 0.7708501815795898, + "loss_num": 0.033203125, + "loss_xval": 1.7075023651123047, + "num_input_tokens_seen": 527392064, + "step": 3051 + }, + { + "epoch": 1.1689008042895441, + "grad_norm": 99.75705022812105, + "learning_rate": 5e-06, + "loss": 2.3729, + "num_input_tokens_seen": 527565152, + "step": 3052 + }, + { + "epoch": 1.1689008042895441, + "loss": 2.274721384048462, + "loss_ce": 0.5184080600738525, + "loss_iou": 0.8150157928466797, + "loss_num": 0.0252685546875, + "loss_xval": 1.7563133239746094, + "num_input_tokens_seen": 527565152, + "step": 3052 + }, + { + "epoch": 1.1692837993106089, + "grad_norm": 115.9429975894583, + "learning_rate": 5e-06, + "loss": 2.1547, + "num_input_tokens_seen": 527738584, + "step": 3053 + }, + { + "epoch": 1.1692837993106089, + "loss": 2.240044355392456, + "loss_ce": 0.5178654789924622, + "loss_iou": 0.7991082668304443, + "loss_num": 0.0247802734375, + "loss_xval": 1.7221789360046387, + "num_input_tokens_seen": 527738584, + "step": 3053 + }, + { + "epoch": 1.1696667943316736, + "grad_norm": 146.86466547060593, + "learning_rate": 5e-06, + "loss": 1.9783, + "num_input_tokens_seen": 527911520, + "step": 3054 + }, + { + "epoch": 1.1696667943316736, + "loss": 1.9764864444732666, + "loss_ce": 0.4814373850822449, + "loss_iou": 0.6908839344978333, + "loss_num": 0.022705078125, + "loss_xval": 1.4950491189956665, + "num_input_tokens_seen": 527911520, + "step": 3054 + }, + { + "epoch": 1.1700497893527384, + "grad_norm": 161.57457181911457, + "learning_rate": 5e-06, + "loss": 2.138, + "num_input_tokens_seen": 528084400, + "step": 3055 + }, + { + "epoch": 1.1700497893527384, + "loss": 2.122333288192749, + "loss_ce": 0.4733904004096985, + "loss_iou": 0.7757652997970581, + "loss_num": 0.01953125, + "loss_xval": 1.6489427089691162, + "num_input_tokens_seen": 528084400, + "step": 3055 + }, + { + "epoch": 1.1704327843738032, + "grad_norm": 166.2849525229474, + "learning_rate": 5e-06, + "loss": 2.0086, + "num_input_tokens_seen": 528257360, + "step": 3056 + }, + { + "epoch": 1.1704327843738032, + "loss": 1.9678535461425781, + "loss_ce": 0.5155329704284668, + "loss_iou": 0.6903784275054932, + "loss_num": 0.0142822265625, + "loss_xval": 1.4523205757141113, + "num_input_tokens_seen": 528257360, + "step": 3056 + }, + { + "epoch": 1.170815779394868, + "grad_norm": 134.07302002271393, + "learning_rate": 5e-06, + "loss": 2.0798, + "num_input_tokens_seen": 528430256, + "step": 3057 + }, + { + "epoch": 1.170815779394868, + "loss": 2.149479866027832, + "loss_ce": 0.4850102365016937, + "loss_iou": 0.7758992910385132, + "loss_num": 0.0225830078125, + "loss_xval": 1.6644694805145264, + "num_input_tokens_seen": 528430256, + "step": 3057 + }, + { + "epoch": 1.1711987744159327, + "grad_norm": 121.44923981604792, + "learning_rate": 5e-06, + "loss": 2.1475, + "num_input_tokens_seen": 528603048, + "step": 3058 + }, + { + "epoch": 1.1711987744159327, + "loss": 2.1266353130340576, + "loss_ce": 0.489056795835495, + "loss_iou": 0.7569606900215149, + "loss_num": 0.0247802734375, + "loss_xval": 1.6375786066055298, + "num_input_tokens_seen": 528603048, + "step": 3058 + }, + { + "epoch": 1.1715817694369974, + "grad_norm": 107.73166820449426, + "learning_rate": 5e-06, + "loss": 1.7975, + "num_input_tokens_seen": 528776240, + "step": 3059 + }, + { + "epoch": 1.1715817694369974, + "loss": 1.757315993309021, + "loss_ce": 0.5063617825508118, + "loss_iou": 0.591266930103302, + "loss_num": 0.013671875, + "loss_xval": 1.250954270362854, + "num_input_tokens_seen": 528776240, + "step": 3059 + }, + { + "epoch": 1.171964764458062, + "grad_norm": 142.87362295853583, + "learning_rate": 5e-06, + "loss": 1.6761, + "num_input_tokens_seen": 528949016, + "step": 3060 + }, + { + "epoch": 1.171964764458062, + "loss": 1.6243350505828857, + "loss_ce": 0.4753933548927307, + "loss_iou": 0.5394213795661926, + "loss_num": 0.0140380859375, + "loss_xval": 1.1489416360855103, + "num_input_tokens_seen": 528949016, + "step": 3060 + }, + { + "epoch": 1.1723477594791267, + "grad_norm": 173.64181803167602, + "learning_rate": 5e-06, + "loss": 2.124, + "num_input_tokens_seen": 529122504, + "step": 3061 + }, + { + "epoch": 1.1723477594791267, + "loss": 2.029665946960449, + "loss_ce": 0.4836769104003906, + "loss_iou": 0.7265772819519043, + "loss_num": 0.0185546875, + "loss_xval": 1.5459890365600586, + "num_input_tokens_seen": 529122504, + "step": 3061 + }, + { + "epoch": 1.1727307545001915, + "grad_norm": 125.77741119958662, + "learning_rate": 5e-06, + "loss": 2.4215, + "num_input_tokens_seen": 529295208, + "step": 3062 + }, + { + "epoch": 1.1727307545001915, + "loss": 2.460716724395752, + "loss_ce": 0.47949209809303284, + "loss_iou": 0.8830683827400208, + "loss_num": 0.04296875, + "loss_xval": 1.9812246561050415, + "num_input_tokens_seen": 529295208, + "step": 3062 + }, + { + "epoch": 1.1731137495212562, + "grad_norm": 109.1398471362209, + "learning_rate": 5e-06, + "loss": 2.137, + "num_input_tokens_seen": 529467976, + "step": 3063 + }, + { + "epoch": 1.1731137495212562, + "loss": 2.1569390296936035, + "loss_ce": 0.4654855728149414, + "loss_iou": 0.7971580028533936, + "loss_num": 0.0194091796875, + "loss_xval": 1.691453456878662, + "num_input_tokens_seen": 529467976, + "step": 3063 + }, + { + "epoch": 1.173496744542321, + "grad_norm": 196.19940314823938, + "learning_rate": 5e-06, + "loss": 2.4079, + "num_input_tokens_seen": 529640848, + "step": 3064 + }, + { + "epoch": 1.173496744542321, + "loss": 2.412062644958496, + "loss_ce": 0.49375009536743164, + "loss_iou": 0.9106332659721375, + "loss_num": 0.0194091796875, + "loss_xval": 1.918312430381775, + "num_input_tokens_seen": 529640848, + "step": 3064 + }, + { + "epoch": 1.1738797395633858, + "grad_norm": 120.60717314557823, + "learning_rate": 5e-06, + "loss": 2.4104, + "num_input_tokens_seen": 529813776, + "step": 3065 + }, + { + "epoch": 1.1738797395633858, + "loss": 2.3200154304504395, + "loss_ce": 0.5093563795089722, + "loss_iou": 0.8057507276535034, + "loss_num": 0.039794921875, + "loss_xval": 1.8106591701507568, + "num_input_tokens_seen": 529813776, + "step": 3065 + }, + { + "epoch": 1.1742627345844503, + "grad_norm": 88.43849490081806, + "learning_rate": 5e-06, + "loss": 2.2045, + "num_input_tokens_seen": 529986888, + "step": 3066 + }, + { + "epoch": 1.1742627345844503, + "loss": 2.1570796966552734, + "loss_ce": 0.5062353014945984, + "loss_iou": 0.7442454695701599, + "loss_num": 0.032470703125, + "loss_xval": 1.6508444547653198, + "num_input_tokens_seen": 529986888, + "step": 3066 + }, + { + "epoch": 1.174645729605515, + "grad_norm": 259.62984874746115, + "learning_rate": 5e-06, + "loss": 2.0067, + "num_input_tokens_seen": 530159496, + "step": 3067 + }, + { + "epoch": 1.174645729605515, + "loss": 1.8444569110870361, + "loss_ce": 0.49705448746681213, + "loss_iou": 0.6307781934738159, + "loss_num": 0.0172119140625, + "loss_xval": 1.3474023342132568, + "num_input_tokens_seen": 530159496, + "step": 3067 + }, + { + "epoch": 1.1750287246265798, + "grad_norm": 72.49031287732348, + "learning_rate": 5e-06, + "loss": 2.1604, + "num_input_tokens_seen": 530328504, + "step": 3068 + }, + { + "epoch": 1.1750287246265798, + "loss": 2.269585132598877, + "loss_ce": 0.4805722236633301, + "loss_iou": 0.8140620589256287, + "loss_num": 0.0322265625, + "loss_xval": 1.7890127897262573, + "num_input_tokens_seen": 530328504, + "step": 3068 + }, + { + "epoch": 1.1754117196476446, + "grad_norm": 110.81024774100332, + "learning_rate": 5e-06, + "loss": 2.1665, + "num_input_tokens_seen": 530501384, + "step": 3069 + }, + { + "epoch": 1.1754117196476446, + "loss": 2.2385213375091553, + "loss_ce": 0.4997120797634125, + "loss_iou": 0.81157386302948, + "loss_num": 0.023193359375, + "loss_xval": 1.73880934715271, + "num_input_tokens_seen": 530501384, + "step": 3069 + }, + { + "epoch": 1.1757947146687093, + "grad_norm": 292.41156610271406, + "learning_rate": 5e-06, + "loss": 2.4211, + "num_input_tokens_seen": 530674736, + "step": 3070 + }, + { + "epoch": 1.1757947146687093, + "loss": 2.433666944503784, + "loss_ce": 0.5467498302459717, + "loss_iou": 0.8623733520507812, + "loss_num": 0.032470703125, + "loss_xval": 1.8869171142578125, + "num_input_tokens_seen": 530674736, + "step": 3070 + }, + { + "epoch": 1.176177709689774, + "grad_norm": 181.99659514108347, + "learning_rate": 5e-06, + "loss": 2.5487, + "num_input_tokens_seen": 530847776, + "step": 3071 + }, + { + "epoch": 1.176177709689774, + "loss": 2.5342164039611816, + "loss_ce": 0.5647098422050476, + "loss_iou": 0.901562511920929, + "loss_num": 0.033203125, + "loss_xval": 1.969506859779358, + "num_input_tokens_seen": 530847776, + "step": 3071 + }, + { + "epoch": 1.1765607047108388, + "grad_norm": 330.34257726631733, + "learning_rate": 5e-06, + "loss": 2.5627, + "num_input_tokens_seen": 531020992, + "step": 3072 + }, + { + "epoch": 1.1765607047108388, + "loss": 2.55936336517334, + "loss_ce": 0.5791891813278198, + "loss_iou": 0.9034783244132996, + "loss_num": 0.03466796875, + "loss_xval": 1.9801744222640991, + "num_input_tokens_seen": 531020992, + "step": 3072 + }, + { + "epoch": 1.1769436997319036, + "grad_norm": 59.011670424016785, + "learning_rate": 5e-06, + "loss": 2.4984, + "num_input_tokens_seen": 531194504, + "step": 3073 + }, + { + "epoch": 1.1769436997319036, + "loss": 2.4518914222717285, + "loss_ce": 0.5527679920196533, + "loss_iou": 0.8246227502822876, + "loss_num": 0.050048828125, + "loss_xval": 1.8991234302520752, + "num_input_tokens_seen": 531194504, + "step": 3073 + }, + { + "epoch": 1.1773266947529681, + "grad_norm": 101.3018967292303, + "learning_rate": 5e-06, + "loss": 2.2879, + "num_input_tokens_seen": 531367744, + "step": 3074 + }, + { + "epoch": 1.1773266947529681, + "loss": 2.198519468307495, + "loss_ce": 0.582373857498169, + "loss_iou": 0.7422463893890381, + "loss_num": 0.0263671875, + "loss_xval": 1.6161456108093262, + "num_input_tokens_seen": 531367744, + "step": 3074 + }, + { + "epoch": 1.177709689774033, + "grad_norm": 85.82880948447881, + "learning_rate": 5e-06, + "loss": 1.9941, + "num_input_tokens_seen": 531540552, + "step": 3075 + }, + { + "epoch": 1.177709689774033, + "loss": 1.8359382152557373, + "loss_ce": 0.5748423337936401, + "loss_iou": 0.5849394798278809, + "loss_num": 0.0181884765625, + "loss_xval": 1.2610960006713867, + "num_input_tokens_seen": 531540552, + "step": 3075 + }, + { + "epoch": 1.1780926847950977, + "grad_norm": 127.92420307754955, + "learning_rate": 5e-06, + "loss": 2.4393, + "num_input_tokens_seen": 531713392, + "step": 3076 + }, + { + "epoch": 1.1780926847950977, + "loss": 2.404845952987671, + "loss_ce": 0.545185387134552, + "loss_iou": 0.8741356134414673, + "loss_num": 0.0223388671875, + "loss_xval": 1.8596603870391846, + "num_input_tokens_seen": 531713392, + "step": 3076 + }, + { + "epoch": 1.1784756798161624, + "grad_norm": 55.50579941902585, + "learning_rate": 5e-06, + "loss": 2.0792, + "num_input_tokens_seen": 531885920, + "step": 3077 + }, + { + "epoch": 1.1784756798161624, + "loss": 1.95820951461792, + "loss_ce": 0.546619713306427, + "loss_iou": 0.6498562097549438, + "loss_num": 0.0223388671875, + "loss_xval": 1.4115898609161377, + "num_input_tokens_seen": 531885920, + "step": 3077 + }, + { + "epoch": 1.1788586748372272, + "grad_norm": 118.59406570043875, + "learning_rate": 5e-06, + "loss": 1.84, + "num_input_tokens_seen": 532058696, + "step": 3078 + }, + { + "epoch": 1.1788586748372272, + "loss": 1.9588350057601929, + "loss_ce": 0.5443446040153503, + "loss_iou": 0.6634677052497864, + "loss_num": 0.017578125, + "loss_xval": 1.4144903421401978, + "num_input_tokens_seen": 532058696, + "step": 3078 + }, + { + "epoch": 1.179241669858292, + "grad_norm": 127.6107443593757, + "learning_rate": 5e-06, + "loss": 2.0947, + "num_input_tokens_seen": 532231656, + "step": 3079 + }, + { + "epoch": 1.179241669858292, + "loss": 1.9777263402938843, + "loss_ce": 0.5301843285560608, + "loss_iou": 0.6778877973556519, + "loss_num": 0.018310546875, + "loss_xval": 1.4475419521331787, + "num_input_tokens_seen": 532231656, + "step": 3079 + }, + { + "epoch": 1.1796246648793565, + "grad_norm": 208.10514886009696, + "learning_rate": 5e-06, + "loss": 1.9466, + "num_input_tokens_seen": 532404432, + "step": 3080 + }, + { + "epoch": 1.1796246648793565, + "loss": 1.9196019172668457, + "loss_ce": 0.541554868221283, + "loss_iou": 0.644559383392334, + "loss_num": 0.017822265625, + "loss_xval": 1.378046989440918, + "num_input_tokens_seen": 532404432, + "step": 3080 + }, + { + "epoch": 1.1800076599004212, + "grad_norm": 138.20639601401902, + "learning_rate": 5e-06, + "loss": 2.2284, + "num_input_tokens_seen": 532577672, + "step": 3081 + }, + { + "epoch": 1.1800076599004212, + "loss": 2.2285828590393066, + "loss_ce": 0.5263062119483948, + "loss_iou": 0.7832977771759033, + "loss_num": 0.027099609375, + "loss_xval": 1.7022767066955566, + "num_input_tokens_seen": 532577672, + "step": 3081 + }, + { + "epoch": 1.180390654921486, + "grad_norm": 106.42366775498245, + "learning_rate": 5e-06, + "loss": 1.7875, + "num_input_tokens_seen": 532750640, + "step": 3082 + }, + { + "epoch": 1.180390654921486, + "loss": 1.6181203126907349, + "loss_ce": 0.5389182567596436, + "loss_iou": 0.5006301403045654, + "loss_num": 0.01556396484375, + "loss_xval": 1.0792021751403809, + "num_input_tokens_seen": 532750640, + "step": 3082 + }, + { + "epoch": 1.1807736499425507, + "grad_norm": 174.05157276395775, + "learning_rate": 5e-06, + "loss": 1.9299, + "num_input_tokens_seen": 532923888, + "step": 3083 + }, + { + "epoch": 1.1807736499425507, + "loss": 1.84665846824646, + "loss_ce": 0.5498446226119995, + "loss_iou": 0.6193541884422302, + "loss_num": 0.0115966796875, + "loss_xval": 1.2968138456344604, + "num_input_tokens_seen": 532923888, + "step": 3083 + }, + { + "epoch": 1.1811566449636155, + "grad_norm": 78.5711205346916, + "learning_rate": 5e-06, + "loss": 2.1162, + "num_input_tokens_seen": 533096768, + "step": 3084 + }, + { + "epoch": 1.1811566449636155, + "loss": 2.3008179664611816, + "loss_ce": 0.5162010788917542, + "loss_iou": 0.8209277987480164, + "loss_num": 0.028564453125, + "loss_xval": 1.7846168279647827, + "num_input_tokens_seen": 533096768, + "step": 3084 + }, + { + "epoch": 1.1815396399846803, + "grad_norm": 69.41066415665922, + "learning_rate": 5e-06, + "loss": 1.9532, + "num_input_tokens_seen": 533269672, + "step": 3085 + }, + { + "epoch": 1.1815396399846803, + "loss": 1.8059289455413818, + "loss_ce": 0.5179780721664429, + "loss_iou": 0.600259006023407, + "loss_num": 0.0174560546875, + "loss_xval": 1.2879507541656494, + "num_input_tokens_seen": 533269672, + "step": 3085 + }, + { + "epoch": 1.181922635005745, + "grad_norm": 213.4351583881707, + "learning_rate": 5e-06, + "loss": 2.1173, + "num_input_tokens_seen": 533442752, + "step": 3086 + }, + { + "epoch": 1.181922635005745, + "loss": 2.108553886413574, + "loss_ce": 0.5023859143257141, + "loss_iou": 0.7601457238197327, + "loss_num": 0.0172119140625, + "loss_xval": 1.6061679124832153, + "num_input_tokens_seen": 533442752, + "step": 3086 + }, + { + "epoch": 1.1823056300268098, + "grad_norm": 170.57106458149943, + "learning_rate": 5e-06, + "loss": 2.1106, + "num_input_tokens_seen": 533615552, + "step": 3087 + }, + { + "epoch": 1.1823056300268098, + "loss": 2.032696008682251, + "loss_ce": 0.5327855348587036, + "loss_iou": 0.7113352417945862, + "loss_num": 0.01544189453125, + "loss_xval": 1.4999104738235474, + "num_input_tokens_seen": 533615552, + "step": 3087 + }, + { + "epoch": 1.1826886250478743, + "grad_norm": 422.2572321374377, + "learning_rate": 5e-06, + "loss": 2.6274, + "num_input_tokens_seen": 533788624, + "step": 3088 + }, + { + "epoch": 1.1826886250478743, + "loss": 2.3464598655700684, + "loss_ce": 0.49449825286865234, + "loss_iou": 0.8754742741584778, + "loss_num": 0.020263671875, + "loss_xval": 1.8519617319107056, + "num_input_tokens_seen": 533788624, + "step": 3088 + }, + { + "epoch": 1.183071620068939, + "grad_norm": 95.19896558821257, + "learning_rate": 5e-06, + "loss": 2.3712, + "num_input_tokens_seen": 533961888, + "step": 3089 + }, + { + "epoch": 1.183071620068939, + "loss": 2.4336447715759277, + "loss_ce": 0.5236128568649292, + "loss_iou": 0.871397852897644, + "loss_num": 0.033447265625, + "loss_xval": 1.910032033920288, + "num_input_tokens_seen": 533961888, + "step": 3089 + }, + { + "epoch": 1.1834546150900038, + "grad_norm": 141.36348073633923, + "learning_rate": 5e-06, + "loss": 2.1838, + "num_input_tokens_seen": 534134648, + "step": 3090 + }, + { + "epoch": 1.1834546150900038, + "loss": 2.1448426246643066, + "loss_ce": 0.503603458404541, + "loss_iou": 0.7510090470314026, + "loss_num": 0.02783203125, + "loss_xval": 1.6412392854690552, + "num_input_tokens_seen": 534134648, + "step": 3090 + }, + { + "epoch": 1.1838376101110686, + "grad_norm": 185.078878828955, + "learning_rate": 5e-06, + "loss": 2.0241, + "num_input_tokens_seen": 534307288, + "step": 3091 + }, + { + "epoch": 1.1838376101110686, + "loss": 1.9381513595581055, + "loss_ce": 0.49507689476013184, + "loss_iou": 0.6708323359489441, + "loss_num": 0.020263671875, + "loss_xval": 1.4430745840072632, + "num_input_tokens_seen": 534307288, + "step": 3091 + }, + { + "epoch": 1.1842206051321333, + "grad_norm": 185.54291656001425, + "learning_rate": 5e-06, + "loss": 2.2354, + "num_input_tokens_seen": 534476920, + "step": 3092 + }, + { + "epoch": 1.1842206051321333, + "loss": 2.2484748363494873, + "loss_ce": 0.4822235107421875, + "loss_iou": 0.8301472067832947, + "loss_num": 0.021240234375, + "loss_xval": 1.7514809370040894, + "num_input_tokens_seen": 534476920, + "step": 3092 + }, + { + "epoch": 1.184603600153198, + "grad_norm": 100.56233131470334, + "learning_rate": 5e-06, + "loss": 2.2592, + "num_input_tokens_seen": 534650168, + "step": 3093 + }, + { + "epoch": 1.184603600153198, + "loss": 2.173462390899658, + "loss_ce": 0.49785223603248596, + "loss_iou": 0.7757627964019775, + "loss_num": 0.02490234375, + "loss_xval": 1.675610065460205, + "num_input_tokens_seen": 534650168, + "step": 3093 + }, + { + "epoch": 1.1849865951742626, + "grad_norm": 156.1794590780056, + "learning_rate": 5e-06, + "loss": 1.7972, + "num_input_tokens_seen": 534822960, + "step": 3094 + }, + { + "epoch": 1.1849865951742626, + "loss": 1.6737004518508911, + "loss_ce": 0.47707849740982056, + "loss_iou": 0.5625748634338379, + "loss_num": 0.0142822265625, + "loss_xval": 1.1966218948364258, + "num_input_tokens_seen": 534822960, + "step": 3094 + }, + { + "epoch": 1.1853695901953274, + "grad_norm": 331.3179014216725, + "learning_rate": 5e-06, + "loss": 2.1746, + "num_input_tokens_seen": 534995776, + "step": 3095 + }, + { + "epoch": 1.1853695901953274, + "loss": 2.3747072219848633, + "loss_ce": 0.4959421753883362, + "loss_iou": 0.8826808929443359, + "loss_num": 0.022705078125, + "loss_xval": 1.8787651062011719, + "num_input_tokens_seen": 534995776, + "step": 3095 + }, + { + "epoch": 1.1857525852163922, + "grad_norm": 95.659466390315, + "learning_rate": 5e-06, + "loss": 2.1805, + "num_input_tokens_seen": 535168896, + "step": 3096 + }, + { + "epoch": 1.1857525852163922, + "loss": 2.071247100830078, + "loss_ce": 0.5040996670722961, + "loss_iou": 0.7256512641906738, + "loss_num": 0.023193359375, + "loss_xval": 1.5671472549438477, + "num_input_tokens_seen": 535168896, + "step": 3096 + }, + { + "epoch": 1.186135580237457, + "grad_norm": 98.1924804357043, + "learning_rate": 5e-06, + "loss": 1.7807, + "num_input_tokens_seen": 535338752, + "step": 3097 + }, + { + "epoch": 1.186135580237457, + "loss": 1.6573166847229004, + "loss_ce": 0.5046724677085876, + "loss_iou": 0.540708065032959, + "loss_num": 0.01422119140625, + "loss_xval": 1.152644157409668, + "num_input_tokens_seen": 535338752, + "step": 3097 + }, + { + "epoch": 1.1865185752585217, + "grad_norm": 135.8681467987988, + "learning_rate": 5e-06, + "loss": 2.236, + "num_input_tokens_seen": 535511952, + "step": 3098 + }, + { + "epoch": 1.1865185752585217, + "loss": 2.317953109741211, + "loss_ce": 0.5204952955245972, + "loss_iou": 0.8491530418395996, + "loss_num": 0.019775390625, + "loss_xval": 1.7974576950073242, + "num_input_tokens_seen": 535511952, + "step": 3098 + }, + { + "epoch": 1.1869015702795864, + "grad_norm": 106.61760525812039, + "learning_rate": 5e-06, + "loss": 2.204, + "num_input_tokens_seen": 535685040, + "step": 3099 + }, + { + "epoch": 1.1869015702795864, + "loss": 2.198577880859375, + "loss_ce": 0.49872907996177673, + "loss_iou": 0.7996619939804077, + "loss_num": 0.0201416015625, + "loss_xval": 1.6998488903045654, + "num_input_tokens_seen": 535685040, + "step": 3099 + }, + { + "epoch": 1.1872845653006512, + "grad_norm": 77.8865254037316, + "learning_rate": 5e-06, + "loss": 1.9618, + "num_input_tokens_seen": 535858192, + "step": 3100 + }, + { + "epoch": 1.1872845653006512, + "loss": 1.7767446041107178, + "loss_ce": 0.5023010969161987, + "loss_iou": 0.600615918636322, + "loss_num": 0.0146484375, + "loss_xval": 1.274443507194519, + "num_input_tokens_seen": 535858192, + "step": 3100 + }, + { + "epoch": 1.1876675603217157, + "grad_norm": 155.840972582791, + "learning_rate": 5e-06, + "loss": 1.9774, + "num_input_tokens_seen": 536031320, + "step": 3101 + }, + { + "epoch": 1.1876675603217157, + "loss": 1.8352344036102295, + "loss_ce": 0.5101897716522217, + "loss_iou": 0.6163797378540039, + "loss_num": 0.0184326171875, + "loss_xval": 1.3250446319580078, + "num_input_tokens_seen": 536031320, + "step": 3101 + }, + { + "epoch": 1.1880505553427805, + "grad_norm": 110.38495657579385, + "learning_rate": 5e-06, + "loss": 2.0977, + "num_input_tokens_seen": 536204088, + "step": 3102 + }, + { + "epoch": 1.1880505553427805, + "loss": 2.2395238876342773, + "loss_ce": 0.49135062098503113, + "loss_iou": 0.8049337863922119, + "loss_num": 0.0277099609375, + "loss_xval": 1.7481732368469238, + "num_input_tokens_seen": 536204088, + "step": 3102 + }, + { + "epoch": 1.1884335503638452, + "grad_norm": 59.5160470004755, + "learning_rate": 5e-06, + "loss": 1.8086, + "num_input_tokens_seen": 536377472, + "step": 3103 + }, + { + "epoch": 1.1884335503638452, + "loss": 1.804865837097168, + "loss_ce": 0.51838219165802, + "loss_iou": 0.6018751859664917, + "loss_num": 0.0166015625, + "loss_xval": 1.2864835262298584, + "num_input_tokens_seen": 536377472, + "step": 3103 + }, + { + "epoch": 1.18881654538491, + "grad_norm": 89.1968533612831, + "learning_rate": 5e-06, + "loss": 1.6553, + "num_input_tokens_seen": 536550112, + "step": 3104 + }, + { + "epoch": 1.18881654538491, + "loss": 1.7249507904052734, + "loss_ce": 0.4785350561141968, + "loss_iou": 0.589348554611206, + "loss_num": 0.0135498046875, + "loss_xval": 1.246415615081787, + "num_input_tokens_seen": 536550112, + "step": 3104 + }, + { + "epoch": 1.1891995404059748, + "grad_norm": 137.08758924315913, + "learning_rate": 5e-06, + "loss": 1.7983, + "num_input_tokens_seen": 536723160, + "step": 3105 + }, + { + "epoch": 1.1891995404059748, + "loss": 1.8237223625183105, + "loss_ce": 0.493984192609787, + "loss_iou": 0.6302469372749329, + "loss_num": 0.01385498046875, + "loss_xval": 1.3297382593154907, + "num_input_tokens_seen": 536723160, + "step": 3105 + }, + { + "epoch": 1.1895825354270395, + "grad_norm": 145.7121057801843, + "learning_rate": 5e-06, + "loss": 2.0669, + "num_input_tokens_seen": 536896160, + "step": 3106 + }, + { + "epoch": 1.1895825354270395, + "loss": 2.0431172847747803, + "loss_ce": 0.47122979164123535, + "loss_iou": 0.7367188930511475, + "loss_num": 0.0196533203125, + "loss_xval": 1.571887493133545, + "num_input_tokens_seen": 536896160, + "step": 3106 + }, + { + "epoch": 1.1899655304481043, + "grad_norm": 75.90376758680951, + "learning_rate": 5e-06, + "loss": 2.3048, + "num_input_tokens_seen": 537069344, + "step": 3107 + }, + { + "epoch": 1.1899655304481043, + "loss": 2.289397716522217, + "loss_ce": 0.44671905040740967, + "loss_iou": 0.8297255039215088, + "loss_num": 0.03662109375, + "loss_xval": 1.8426785469055176, + "num_input_tokens_seen": 537069344, + "step": 3107 + }, + { + "epoch": 1.1903485254691688, + "grad_norm": 99.21312086510689, + "learning_rate": 5e-06, + "loss": 1.719, + "num_input_tokens_seen": 537242456, + "step": 3108 + }, + { + "epoch": 1.1903485254691688, + "loss": 1.5367971658706665, + "loss_ce": 0.4708802402019501, + "loss_iou": 0.5069498419761658, + "loss_num": 0.0103759765625, + "loss_xval": 1.0659170150756836, + "num_input_tokens_seen": 537242456, + "step": 3108 + }, + { + "epoch": 1.1907315204902336, + "grad_norm": 113.36016933580596, + "learning_rate": 5e-06, + "loss": 1.8786, + "num_input_tokens_seen": 537415464, + "step": 3109 + }, + { + "epoch": 1.1907315204902336, + "loss": 1.7586313486099243, + "loss_ce": 0.4701662063598633, + "loss_iou": 0.5941989421844482, + "loss_num": 0.02001953125, + "loss_xval": 1.2819952964782715, + "num_input_tokens_seen": 537415464, + "step": 3109 + }, + { + "epoch": 1.1911145155112983, + "grad_norm": 119.55250576519718, + "learning_rate": 5e-06, + "loss": 1.9036, + "num_input_tokens_seen": 537588368, + "step": 3110 + }, + { + "epoch": 1.1911145155112983, + "loss": 1.8303320407867432, + "loss_ce": 0.4702279269695282, + "loss_iou": 0.6384260654449463, + "loss_num": 0.0166015625, + "loss_xval": 1.3601040840148926, + "num_input_tokens_seen": 537588368, + "step": 3110 + }, + { + "epoch": 1.191497510532363, + "grad_norm": 156.538618103619, + "learning_rate": 5e-06, + "loss": 1.9782, + "num_input_tokens_seen": 537761344, + "step": 3111 + }, + { + "epoch": 1.191497510532363, + "loss": 1.9333223104476929, + "loss_ce": 0.4669739603996277, + "loss_iou": 0.6894271969795227, + "loss_num": 0.0174560546875, + "loss_xval": 1.4663482904434204, + "num_input_tokens_seen": 537761344, + "step": 3111 + }, + { + "epoch": 1.1918805055534278, + "grad_norm": 162.18242779358536, + "learning_rate": 5e-06, + "loss": 2.2692, + "num_input_tokens_seen": 537934552, + "step": 3112 + }, + { + "epoch": 1.1918805055534278, + "loss": 2.299771308898926, + "loss_ce": 0.4665790796279907, + "loss_iou": 0.8423773050308228, + "loss_num": 0.0296630859375, + "loss_xval": 1.8331921100616455, + "num_input_tokens_seen": 537934552, + "step": 3112 + }, + { + "epoch": 1.1922635005744926, + "grad_norm": 47.94872101437066, + "learning_rate": 5e-06, + "loss": 1.7896, + "num_input_tokens_seen": 538107488, + "step": 3113 + }, + { + "epoch": 1.1922635005744926, + "loss": 1.7490005493164062, + "loss_ce": 0.4844675064086914, + "loss_iou": 0.5784487724304199, + "loss_num": 0.021484375, + "loss_xval": 1.2645330429077148, + "num_input_tokens_seen": 538107488, + "step": 3113 + }, + { + "epoch": 1.1926464955955574, + "grad_norm": 105.30828538266552, + "learning_rate": 5e-06, + "loss": 1.6832, + "num_input_tokens_seen": 538280568, + "step": 3114 + }, + { + "epoch": 1.1926464955955574, + "loss": 1.5757901668548584, + "loss_ce": 0.43801149725914, + "loss_iou": 0.5380741357803345, + "loss_num": 0.0123291015625, + "loss_xval": 1.1377785205841064, + "num_input_tokens_seen": 538280568, + "step": 3114 + }, + { + "epoch": 1.193029490616622, + "grad_norm": 158.47482049089467, + "learning_rate": 5e-06, + "loss": 2.0204, + "num_input_tokens_seen": 538453360, + "step": 3115 + }, + { + "epoch": 1.193029490616622, + "loss": 2.0267176628112793, + "loss_ce": 0.4336962103843689, + "loss_iou": 0.7289599776268005, + "loss_num": 0.027099609375, + "loss_xval": 1.593021273612976, + "num_input_tokens_seen": 538453360, + "step": 3115 + }, + { + "epoch": 1.1934124856376866, + "grad_norm": 168.57864556594072, + "learning_rate": 5e-06, + "loss": 2.0213, + "num_input_tokens_seen": 538626680, + "step": 3116 + }, + { + "epoch": 1.1934124856376866, + "loss": 2.125065326690674, + "loss_ce": 0.4352427124977112, + "loss_iou": 0.7829911708831787, + "loss_num": 0.0247802734375, + "loss_xval": 1.689822793006897, + "num_input_tokens_seen": 538626680, + "step": 3116 + }, + { + "epoch": 1.1937954806587514, + "grad_norm": 165.0405788645187, + "learning_rate": 5e-06, + "loss": 1.7747, + "num_input_tokens_seen": 538799744, + "step": 3117 + }, + { + "epoch": 1.1937954806587514, + "loss": 1.824463129043579, + "loss_ce": 0.4427624046802521, + "loss_iou": 0.6334620714187622, + "loss_num": 0.02294921875, + "loss_xval": 1.3817007541656494, + "num_input_tokens_seen": 538799744, + "step": 3117 + }, + { + "epoch": 1.1941784756798162, + "grad_norm": 131.74273003933374, + "learning_rate": 5e-06, + "loss": 2.3743, + "num_input_tokens_seen": 538972856, + "step": 3118 + }, + { + "epoch": 1.1941784756798162, + "loss": 2.4387192726135254, + "loss_ce": 0.4320226311683655, + "loss_iou": 0.9001989364624023, + "loss_num": 0.041259765625, + "loss_xval": 2.0066967010498047, + "num_input_tokens_seen": 538972856, + "step": 3118 + }, + { + "epoch": 1.194561470700881, + "grad_norm": 87.15329040106326, + "learning_rate": 5e-06, + "loss": 1.8024, + "num_input_tokens_seen": 539145752, + "step": 3119 + }, + { + "epoch": 1.194561470700881, + "loss": 1.725273847579956, + "loss_ce": 0.44722792506217957, + "loss_iou": 0.5879517793655396, + "loss_num": 0.0203857421875, + "loss_xval": 1.278045892715454, + "num_input_tokens_seen": 539145752, + "step": 3119 + }, + { + "epoch": 1.1949444657219457, + "grad_norm": 112.77290771039542, + "learning_rate": 5e-06, + "loss": 1.8274, + "num_input_tokens_seen": 539318872, + "step": 3120 + }, + { + "epoch": 1.1949444657219457, + "loss": 1.7642662525177002, + "loss_ce": 0.46796268224716187, + "loss_iou": 0.607609212398529, + "loss_num": 0.0162353515625, + "loss_xval": 1.296303629875183, + "num_input_tokens_seen": 539318872, + "step": 3120 + }, + { + "epoch": 1.1953274607430102, + "grad_norm": 98.35715045614509, + "learning_rate": 5e-06, + "loss": 1.9458, + "num_input_tokens_seen": 539491696, + "step": 3121 + }, + { + "epoch": 1.1953274607430102, + "loss": 1.9283684492111206, + "loss_ce": 0.46976467967033386, + "loss_iou": 0.685448169708252, + "loss_num": 0.017578125, + "loss_xval": 1.458603858947754, + "num_input_tokens_seen": 539491696, + "step": 3121 + }, + { + "epoch": 1.195710455764075, + "grad_norm": 85.9145857232359, + "learning_rate": 5e-06, + "loss": 1.8642, + "num_input_tokens_seen": 539664320, + "step": 3122 + }, + { + "epoch": 1.195710455764075, + "loss": 1.9947984218597412, + "loss_ce": 0.4589182138442993, + "loss_iou": 0.6967273950576782, + "loss_num": 0.0284423828125, + "loss_xval": 1.5358803272247314, + "num_input_tokens_seen": 539664320, + "step": 3122 + }, + { + "epoch": 1.1960934507851397, + "grad_norm": 178.0041822743624, + "learning_rate": 5e-06, + "loss": 2.1027, + "num_input_tokens_seen": 539836968, + "step": 3123 + }, + { + "epoch": 1.1960934507851397, + "loss": 2.0192744731903076, + "loss_ce": 0.4354323744773865, + "loss_iou": 0.7428792119026184, + "loss_num": 0.0196533203125, + "loss_xval": 1.5838419198989868, + "num_input_tokens_seen": 539836968, + "step": 3123 + }, + { + "epoch": 1.1964764458062045, + "grad_norm": 102.29299889009614, + "learning_rate": 5e-06, + "loss": 2.2874, + "num_input_tokens_seen": 540010136, + "step": 3124 + }, + { + "epoch": 1.1964764458062045, + "loss": 2.20661997795105, + "loss_ce": 0.4187849164009094, + "loss_iou": 0.8059658408164978, + "loss_num": 0.03515625, + "loss_xval": 1.7878351211547852, + "num_input_tokens_seen": 540010136, + "step": 3124 + }, + { + "epoch": 1.1968594408272692, + "grad_norm": 169.16879881047464, + "learning_rate": 5e-06, + "loss": 1.7049, + "num_input_tokens_seen": 540182912, + "step": 3125 + }, + { + "epoch": 1.1968594408272692, + "loss": 1.7866005897521973, + "loss_ce": 0.38834109902381897, + "loss_iou": 0.6257350444793701, + "loss_num": 0.029296875, + "loss_xval": 1.3587088584899902, + "num_input_tokens_seen": 540182912, + "step": 3125 + }, + { + "epoch": 1.197242435848334, + "grad_norm": 214.9198131394962, + "learning_rate": 5e-06, + "loss": 3.465, + "num_input_tokens_seen": 540355872, + "step": 3126 + }, + { + "epoch": 1.197242435848334, + "loss": 3.44808292388916, + "loss_ce": 0.42785316705703735, + "loss_iou": 1.4119094610214233, + "loss_num": 0.039306640625, + "loss_xval": 3.0202300548553467, + "num_input_tokens_seen": 540355872, + "step": 3126 + }, + { + "epoch": 1.1976254308693988, + "grad_norm": 244.08982046201126, + "learning_rate": 5e-06, + "loss": 2.077, + "num_input_tokens_seen": 540529208, + "step": 3127 + }, + { + "epoch": 1.1976254308693988, + "loss": 2.0087804794311523, + "loss_ce": 0.48206713795661926, + "loss_iou": 0.7112632989883423, + "loss_num": 0.0208740234375, + "loss_xval": 1.5267136096954346, + "num_input_tokens_seen": 540529208, + "step": 3127 + }, + { + "epoch": 1.1980084258904635, + "grad_norm": 250.80855549566525, + "learning_rate": 5e-06, + "loss": 2.5378, + "num_input_tokens_seen": 540698496, + "step": 3128 + }, + { + "epoch": 1.1980084258904635, + "loss": 2.458965301513672, + "loss_ce": 0.4827730655670166, + "loss_iou": 0.9210793972015381, + "loss_num": 0.02685546875, + "loss_xval": 1.9761919975280762, + "num_input_tokens_seen": 540698496, + "step": 3128 + }, + { + "epoch": 1.198391420911528, + "grad_norm": 205.82804991445155, + "learning_rate": 5e-06, + "loss": 2.2837, + "num_input_tokens_seen": 540872048, + "step": 3129 + }, + { + "epoch": 1.198391420911528, + "loss": 2.347994327545166, + "loss_ce": 0.4887653887271881, + "loss_iou": 0.8249697685241699, + "loss_num": 0.041748046875, + "loss_xval": 1.8592290878295898, + "num_input_tokens_seen": 540872048, + "step": 3129 + }, + { + "epoch": 1.1987744159325928, + "grad_norm": 193.933614492199, + "learning_rate": 5e-06, + "loss": 1.9608, + "num_input_tokens_seen": 541044768, + "step": 3130 + }, + { + "epoch": 1.1987744159325928, + "loss": 2.005110263824463, + "loss_ce": 0.46384477615356445, + "loss_iou": 0.7266265153884888, + "loss_num": 0.017578125, + "loss_xval": 1.5412657260894775, + "num_input_tokens_seen": 541044768, + "step": 3130 + }, + { + "epoch": 1.1991574109536576, + "grad_norm": 172.24400401577392, + "learning_rate": 5e-06, + "loss": 1.9607, + "num_input_tokens_seen": 541217544, + "step": 3131 + }, + { + "epoch": 1.1991574109536576, + "loss": 1.8374714851379395, + "loss_ce": 0.46363934874534607, + "loss_iou": 0.6391102075576782, + "loss_num": 0.0191650390625, + "loss_xval": 1.3738319873809814, + "num_input_tokens_seen": 541217544, + "step": 3131 + }, + { + "epoch": 1.1995404059747223, + "grad_norm": 81.50056155127288, + "learning_rate": 5e-06, + "loss": 1.6268, + "num_input_tokens_seen": 541390576, + "step": 3132 + }, + { + "epoch": 1.1995404059747223, + "loss": 1.5544099807739258, + "loss_ce": 0.44078072905540466, + "loss_iou": 0.5041108131408691, + "loss_num": 0.0211181640625, + "loss_xval": 1.1033754348754883, + "num_input_tokens_seen": 541390576, + "step": 3132 + }, + { + "epoch": 1.199923400995787, + "grad_norm": 211.04545142130024, + "learning_rate": 5e-06, + "loss": 1.6326, + "num_input_tokens_seen": 541563320, + "step": 3133 + }, + { + "epoch": 1.199923400995787, + "loss": 1.7176035642623901, + "loss_ce": 0.42498356103897095, + "loss_iou": 0.5992518663406372, + "loss_num": 0.018798828125, + "loss_xval": 1.2807180881500244, + "num_input_tokens_seen": 541563320, + "step": 3133 + }, + { + "epoch": 1.2003063960168519, + "grad_norm": 122.70851376184869, + "learning_rate": 5e-06, + "loss": 1.9539, + "num_input_tokens_seen": 541736184, + "step": 3134 + }, + { + "epoch": 1.2003063960168519, + "loss": 2.0332226753234863, + "loss_ce": 0.4139161705970764, + "loss_iou": 0.7215796113014221, + "loss_num": 0.03515625, + "loss_xval": 1.6193065643310547, + "num_input_tokens_seen": 541736184, + "step": 3134 + }, + { + "epoch": 1.2006893910379164, + "grad_norm": 101.12918973362021, + "learning_rate": 5e-06, + "loss": 1.7302, + "num_input_tokens_seen": 541909384, + "step": 3135 + }, + { + "epoch": 1.2006893910379164, + "loss": 1.8653372526168823, + "loss_ce": 0.47176891565322876, + "loss_iou": 0.6533271670341492, + "loss_num": 0.017333984375, + "loss_xval": 1.3935683965682983, + "num_input_tokens_seen": 541909384, + "step": 3135 + }, + { + "epoch": 1.2010723860589811, + "grad_norm": 85.17518217819423, + "learning_rate": 5e-06, + "loss": 1.932, + "num_input_tokens_seen": 542082280, + "step": 3136 + }, + { + "epoch": 1.2010723860589811, + "loss": 2.052091121673584, + "loss_ce": 0.44024819135665894, + "loss_iou": 0.7444589734077454, + "loss_num": 0.024658203125, + "loss_xval": 1.6118427515029907, + "num_input_tokens_seen": 542082280, + "step": 3136 + }, + { + "epoch": 1.201455381080046, + "grad_norm": 109.44411326728972, + "learning_rate": 5e-06, + "loss": 1.6436, + "num_input_tokens_seen": 542254880, + "step": 3137 + }, + { + "epoch": 1.201455381080046, + "loss": 1.718399167060852, + "loss_ce": 0.42179369926452637, + "loss_iou": 0.6057153940200806, + "loss_num": 0.01708984375, + "loss_xval": 1.2966053485870361, + "num_input_tokens_seen": 542254880, + "step": 3137 + }, + { + "epoch": 1.2018383761011107, + "grad_norm": 84.40625767941842, + "learning_rate": 5e-06, + "loss": 1.9448, + "num_input_tokens_seen": 542428208, + "step": 3138 + }, + { + "epoch": 1.2018383761011107, + "loss": 1.921809196472168, + "loss_ce": 0.3864181637763977, + "loss_iou": 0.7132673263549805, + "loss_num": 0.021728515625, + "loss_xval": 1.535390853881836, + "num_input_tokens_seen": 542428208, + "step": 3138 + }, + { + "epoch": 1.2022213711221754, + "grad_norm": 74.28199122633579, + "learning_rate": 5e-06, + "loss": 1.509, + "num_input_tokens_seen": 542601032, + "step": 3139 + }, + { + "epoch": 1.2022213711221754, + "loss": 1.3731598854064941, + "loss_ce": 0.4295925498008728, + "loss_iou": 0.4371461868286133, + "loss_num": 0.01385498046875, + "loss_xval": 0.7442958354949951, + "num_input_tokens_seen": 542601032, + "step": 3139 + }, + { + "epoch": 1.2026043661432402, + "grad_norm": 166.1930064220397, + "learning_rate": 5e-06, + "loss": 1.6838, + "num_input_tokens_seen": 542774376, + "step": 3140 + }, + { + "epoch": 1.2026043661432402, + "loss": 1.6118892431259155, + "loss_ce": 0.38677966594696045, + "loss_iou": 0.566519021987915, + "loss_num": 0.0184326171875, + "loss_xval": 1.211620807647705, + "num_input_tokens_seen": 542774376, + "step": 3140 + }, + { + "epoch": 1.202987361164305, + "grad_norm": 190.0469889866551, + "learning_rate": 5e-06, + "loss": 1.9974, + "num_input_tokens_seen": 542947216, + "step": 3141 + }, + { + "epoch": 1.202987361164305, + "loss": 1.7716901302337646, + "loss_ce": 0.40608906745910645, + "loss_iou": 0.6318361759185791, + "loss_num": 0.0203857421875, + "loss_xval": 1.3499760627746582, + "num_input_tokens_seen": 542947216, + "step": 3141 + }, + { + "epoch": 1.2033703561853697, + "grad_norm": 276.2473347389736, + "learning_rate": 5e-06, + "loss": 2.0013, + "num_input_tokens_seen": 543120248, + "step": 3142 + }, + { + "epoch": 1.2033703561853697, + "loss": 1.96783447265625, + "loss_ce": 0.40639060735702515, + "loss_iou": 0.7039092183113098, + "loss_num": 0.03076171875, + "loss_xval": 1.56144380569458, + "num_input_tokens_seen": 543120248, + "step": 3142 + }, + { + "epoch": 1.2037533512064342, + "grad_norm": 110.19184001956351, + "learning_rate": 5e-06, + "loss": 2.093, + "num_input_tokens_seen": 543293088, + "step": 3143 + }, + { + "epoch": 1.2037533512064342, + "loss": 2.0981013774871826, + "loss_ce": 0.42810600996017456, + "loss_iou": 0.7602143883705139, + "loss_num": 0.0299072265625, + "loss_xval": 1.6699954271316528, + "num_input_tokens_seen": 543293088, + "step": 3143 + }, + { + "epoch": 1.204136346227499, + "grad_norm": 161.9044402146107, + "learning_rate": 5e-06, + "loss": 1.6954, + "num_input_tokens_seen": 543465880, + "step": 3144 + }, + { + "epoch": 1.204136346227499, + "loss": 1.7552735805511475, + "loss_ce": 0.41109591722488403, + "loss_iou": 0.6269075870513916, + "loss_num": 0.01806640625, + "loss_xval": 1.3441777229309082, + "num_input_tokens_seen": 543465880, + "step": 3144 + }, + { + "epoch": 1.2045193412485637, + "grad_norm": 137.31052434918928, + "learning_rate": 5e-06, + "loss": 2.0543, + "num_input_tokens_seen": 543638880, + "step": 3145 + }, + { + "epoch": 1.2045193412485637, + "loss": 2.0171594619750977, + "loss_ce": 0.42851918935775757, + "loss_iou": 0.7467432022094727, + "loss_num": 0.01904296875, + "loss_xval": 1.5886402130126953, + "num_input_tokens_seen": 543638880, + "step": 3145 + }, + { + "epoch": 1.2049023362696285, + "grad_norm": 180.0421227264355, + "learning_rate": 5e-06, + "loss": 2.6378, + "num_input_tokens_seen": 543811792, + "step": 3146 + }, + { + "epoch": 1.2049023362696285, + "loss": 2.5615615844726562, + "loss_ce": 0.41080349683761597, + "loss_iou": 0.8848272562026978, + "loss_num": 0.076171875, + "loss_xval": 2.1507580280303955, + "num_input_tokens_seen": 543811792, + "step": 3146 + }, + { + "epoch": 1.2052853312906933, + "grad_norm": 186.06827639905708, + "learning_rate": 5e-06, + "loss": 1.9071, + "num_input_tokens_seen": 543984848, + "step": 3147 + }, + { + "epoch": 1.2052853312906933, + "loss": 1.9688708782196045, + "loss_ce": 0.37465977668762207, + "loss_iou": 0.7266404628753662, + "loss_num": 0.0281982421875, + "loss_xval": 1.5942111015319824, + "num_input_tokens_seen": 543984848, + "step": 3147 + }, + { + "epoch": 1.205668326311758, + "grad_norm": 177.21960815543142, + "learning_rate": 5e-06, + "loss": 2.8569, + "num_input_tokens_seen": 544157968, + "step": 3148 + }, + { + "epoch": 1.205668326311758, + "loss": 2.867978096008301, + "loss_ce": 0.3810361623764038, + "loss_iou": 1.1110856533050537, + "loss_num": 0.052978515625, + "loss_xval": 2.4869418144226074, + "num_input_tokens_seen": 544157968, + "step": 3148 + }, + { + "epoch": 1.2060513213328226, + "grad_norm": 104.67555522556067, + "learning_rate": 5e-06, + "loss": 2.2487, + "num_input_tokens_seen": 544331024, + "step": 3149 + }, + { + "epoch": 1.2060513213328226, + "loss": 2.21226167678833, + "loss_ce": 0.4219363331794739, + "loss_iou": 0.7929898500442505, + "loss_num": 0.040771484375, + "loss_xval": 1.790325403213501, + "num_input_tokens_seen": 544331024, + "step": 3149 + }, + { + "epoch": 1.2064343163538873, + "grad_norm": 111.45546340906715, + "learning_rate": 5e-06, + "loss": 2.2152, + "num_input_tokens_seen": 544504400, + "step": 3150 + }, + { + "epoch": 1.2064343163538873, + "loss": 2.238651752471924, + "loss_ce": 0.4570075273513794, + "loss_iou": 0.8178850412368774, + "loss_num": 0.0291748046875, + "loss_xval": 1.7816441059112549, + "num_input_tokens_seen": 544504400, + "step": 3150 + }, + { + "epoch": 1.206817311374952, + "grad_norm": 126.96258382781926, + "learning_rate": 5e-06, + "loss": 1.6517, + "num_input_tokens_seen": 544677280, + "step": 3151 + }, + { + "epoch": 1.206817311374952, + "loss": 1.7380001544952393, + "loss_ce": 0.4228131175041199, + "loss_iou": 0.6230170726776123, + "loss_num": 0.01385498046875, + "loss_xval": 1.3151869773864746, + "num_input_tokens_seen": 544677280, + "step": 3151 + }, + { + "epoch": 1.2072003063960168, + "grad_norm": 173.14029111978968, + "learning_rate": 5e-06, + "loss": 2.1909, + "num_input_tokens_seen": 544850264, + "step": 3152 + }, + { + "epoch": 1.2072003063960168, + "loss": 2.182771921157837, + "loss_ce": 0.4373222887516022, + "loss_iou": 0.8156265020370483, + "loss_num": 0.0228271484375, + "loss_xval": 1.7454497814178467, + "num_input_tokens_seen": 544850264, + "step": 3152 + }, + { + "epoch": 1.2075833014170816, + "grad_norm": 109.54421461126226, + "learning_rate": 5e-06, + "loss": 1.9302, + "num_input_tokens_seen": 545023304, + "step": 3153 + }, + { + "epoch": 1.2075833014170816, + "loss": 1.8964380025863647, + "loss_ce": 0.4667872488498688, + "loss_iou": 0.6590697765350342, + "loss_num": 0.0223388671875, + "loss_xval": 1.4296507835388184, + "num_input_tokens_seen": 545023304, + "step": 3153 + }, + { + "epoch": 1.2079662964381463, + "grad_norm": 155.12626056609227, + "learning_rate": 5e-06, + "loss": 1.7024, + "num_input_tokens_seen": 545196296, + "step": 3154 + }, + { + "epoch": 1.2079662964381463, + "loss": 1.7114551067352295, + "loss_ce": 0.3582027554512024, + "loss_iou": 0.6047877669334412, + "loss_num": 0.0286865234375, + "loss_xval": 1.3532522916793823, + "num_input_tokens_seen": 545196296, + "step": 3154 + }, + { + "epoch": 1.208349291459211, + "grad_norm": 85.11109123507192, + "learning_rate": 5e-06, + "loss": 2.179, + "num_input_tokens_seen": 545369256, + "step": 3155 + }, + { + "epoch": 1.208349291459211, + "loss": 2.168123722076416, + "loss_ce": 0.4210514724254608, + "loss_iou": 0.7872323393821716, + "loss_num": 0.03466796875, + "loss_xval": 1.7470721006393433, + "num_input_tokens_seen": 545369256, + "step": 3155 + }, + { + "epoch": 1.2087322864802759, + "grad_norm": 93.31662199160257, + "learning_rate": 5e-06, + "loss": 1.8172, + "num_input_tokens_seen": 545542424, + "step": 3156 + }, + { + "epoch": 1.2087322864802759, + "loss": 1.997159481048584, + "loss_ce": 0.4138537049293518, + "loss_iou": 0.7168543338775635, + "loss_num": 0.0299072265625, + "loss_xval": 1.583305835723877, + "num_input_tokens_seen": 545542424, + "step": 3156 + }, + { + "epoch": 1.2091152815013404, + "grad_norm": 194.46825793740766, + "learning_rate": 5e-06, + "loss": 2.0425, + "num_input_tokens_seen": 545715432, + "step": 3157 + }, + { + "epoch": 1.2091152815013404, + "loss": 2.0387141704559326, + "loss_ce": 0.40486350655555725, + "loss_iou": 0.7460329532623291, + "loss_num": 0.0283203125, + "loss_xval": 1.6338505744934082, + "num_input_tokens_seen": 545715432, + "step": 3157 + }, + { + "epoch": 1.2094982765224052, + "grad_norm": 102.17582157733656, + "learning_rate": 5e-06, + "loss": 2.4329, + "num_input_tokens_seen": 545888312, + "step": 3158 + }, + { + "epoch": 1.2094982765224052, + "loss": 2.5089845657348633, + "loss_ce": 0.3819023072719574, + "loss_iou": 0.9404637813568115, + "loss_num": 0.04931640625, + "loss_xval": 2.127082347869873, + "num_input_tokens_seen": 545888312, + "step": 3158 + }, + { + "epoch": 1.20988127154347, + "grad_norm": 128.1332777760175, + "learning_rate": 5e-06, + "loss": 1.9519, + "num_input_tokens_seen": 546061184, + "step": 3159 + }, + { + "epoch": 1.20988127154347, + "loss": 2.0174331665039062, + "loss_ce": 0.43313467502593994, + "loss_iou": 0.7280622720718384, + "loss_num": 0.025634765625, + "loss_xval": 1.5842983722686768, + "num_input_tokens_seen": 546061184, + "step": 3159 + }, + { + "epoch": 1.2102642665645347, + "grad_norm": 166.00311725221368, + "learning_rate": 5e-06, + "loss": 2.016, + "num_input_tokens_seen": 546234064, + "step": 3160 + }, + { + "epoch": 1.2102642665645347, + "loss": 2.0598645210266113, + "loss_ce": 0.3841882646083832, + "loss_iou": 0.7913904190063477, + "loss_num": 0.0185546875, + "loss_xval": 1.6756762266159058, + "num_input_tokens_seen": 546234064, + "step": 3160 + }, + { + "epoch": 1.2106472615855994, + "grad_norm": 75.9356801665817, + "learning_rate": 5e-06, + "loss": 2.3616, + "num_input_tokens_seen": 546407040, + "step": 3161 + }, + { + "epoch": 1.2106472615855994, + "loss": 2.368368148803711, + "loss_ce": 0.40133553743362427, + "loss_iou": 0.8806720972061157, + "loss_num": 0.041259765625, + "loss_xval": 1.967032551765442, + "num_input_tokens_seen": 546407040, + "step": 3161 + }, + { + "epoch": 1.2110302566066642, + "grad_norm": 80.20061715058009, + "learning_rate": 5e-06, + "loss": 1.8253, + "num_input_tokens_seen": 546580256, + "step": 3162 + }, + { + "epoch": 1.2110302566066642, + "loss": 1.9942257404327393, + "loss_ce": 0.3905842900276184, + "loss_iou": 0.705446183681488, + "loss_num": 0.03857421875, + "loss_xval": 1.603641390800476, + "num_input_tokens_seen": 546580256, + "step": 3162 + }, + { + "epoch": 1.2114132516277287, + "grad_norm": 139.36934028075223, + "learning_rate": 5e-06, + "loss": 1.7623, + "num_input_tokens_seen": 546752784, + "step": 3163 + }, + { + "epoch": 1.2114132516277287, + "loss": 1.612166404724121, + "loss_ce": 0.4187697172164917, + "loss_iou": 0.5422397255897522, + "loss_num": 0.021728515625, + "loss_xval": 1.1659308671951294, + "num_input_tokens_seen": 546752784, + "step": 3163 + }, + { + "epoch": 1.2117962466487935, + "grad_norm": 89.05301644728904, + "learning_rate": 5e-06, + "loss": 2.116, + "num_input_tokens_seen": 546925888, + "step": 3164 + }, + { + "epoch": 1.2117962466487935, + "loss": 2.1580970287323, + "loss_ce": 0.3667049705982208, + "loss_iou": 0.7959645986557007, + "loss_num": 0.039794921875, + "loss_xval": 1.7913920879364014, + "num_input_tokens_seen": 546925888, + "step": 3164 + }, + { + "epoch": 1.2121792416698582, + "grad_norm": 105.80627408837645, + "learning_rate": 5e-06, + "loss": 1.9025, + "num_input_tokens_seen": 547098880, + "step": 3165 + }, + { + "epoch": 1.2121792416698582, + "loss": 2.0705251693725586, + "loss_ce": 0.39548683166503906, + "loss_iou": 0.7514901161193848, + "loss_num": 0.034423828125, + "loss_xval": 1.6750383377075195, + "num_input_tokens_seen": 547098880, + "step": 3165 + }, + { + "epoch": 1.212562236690923, + "grad_norm": 176.6958791685355, + "learning_rate": 5e-06, + "loss": 1.9749, + "num_input_tokens_seen": 547271656, + "step": 3166 + }, + { + "epoch": 1.212562236690923, + "loss": 1.965118408203125, + "loss_ce": 0.41928714513778687, + "loss_iou": 0.7106139063835144, + "loss_num": 0.02490234375, + "loss_xval": 1.5458310842514038, + "num_input_tokens_seen": 547271656, + "step": 3166 + }, + { + "epoch": 1.2129452317119878, + "grad_norm": 86.74503746663704, + "learning_rate": 5e-06, + "loss": 2.4458, + "num_input_tokens_seen": 547444392, + "step": 3167 + }, + { + "epoch": 1.2129452317119878, + "loss": 2.434870481491089, + "loss_ce": 0.3902822732925415, + "loss_iou": 0.9276590347290039, + "loss_num": 0.037841796875, + "loss_xval": 2.044588088989258, + "num_input_tokens_seen": 547444392, + "step": 3167 + }, + { + "epoch": 1.2133282267330525, + "grad_norm": 109.50378315927813, + "learning_rate": 5e-06, + "loss": 2.0427, + "num_input_tokens_seen": 547617224, + "step": 3168 + }, + { + "epoch": 1.2133282267330525, + "loss": 2.069617509841919, + "loss_ce": 0.4061136543750763, + "loss_iou": 0.7618972063064575, + "loss_num": 0.0279541015625, + "loss_xval": 1.663503885269165, + "num_input_tokens_seen": 547617224, + "step": 3168 + }, + { + "epoch": 1.2137112217541173, + "grad_norm": 102.55478575623683, + "learning_rate": 5e-06, + "loss": 1.7574, + "num_input_tokens_seen": 547790144, + "step": 3169 + }, + { + "epoch": 1.2137112217541173, + "loss": 1.8150783777236938, + "loss_ce": 0.41318345069885254, + "loss_iou": 0.6508070230484009, + "loss_num": 0.02001953125, + "loss_xval": 1.4018948078155518, + "num_input_tokens_seen": 547790144, + "step": 3169 + }, + { + "epoch": 1.214094216775182, + "grad_norm": 103.97034145766871, + "learning_rate": 5e-06, + "loss": 2.5278, + "num_input_tokens_seen": 547962632, + "step": 3170 + }, + { + "epoch": 1.214094216775182, + "loss": 2.4732987880706787, + "loss_ce": 0.365742564201355, + "loss_iou": 0.9795289039611816, + "loss_num": 0.0296630859375, + "loss_xval": 2.1075563430786133, + "num_input_tokens_seen": 547962632, + "step": 3170 + }, + { + "epoch": 1.2144772117962466, + "grad_norm": 91.75053945426667, + "learning_rate": 5e-06, + "loss": 2.3501, + "num_input_tokens_seen": 548135768, + "step": 3171 + }, + { + "epoch": 1.2144772117962466, + "loss": 2.3920657634735107, + "loss_ce": 0.3515428304672241, + "loss_iou": 0.8424050807952881, + "loss_num": 0.0712890625, + "loss_xval": 2.040523052215576, + "num_input_tokens_seen": 548135768, + "step": 3171 + }, + { + "epoch": 1.2148602068173113, + "grad_norm": 61.849301496515274, + "learning_rate": 5e-06, + "loss": 1.7051, + "num_input_tokens_seen": 548308728, + "step": 3172 + }, + { + "epoch": 1.2148602068173113, + "loss": 1.6450124979019165, + "loss_ce": 0.3722168803215027, + "loss_iou": 0.5765833854675293, + "loss_num": 0.02392578125, + "loss_xval": 1.2727956771850586, + "num_input_tokens_seen": 548308728, + "step": 3172 + }, + { + "epoch": 1.215243201838376, + "grad_norm": 95.09659762996829, + "learning_rate": 5e-06, + "loss": 1.6952, + "num_input_tokens_seen": 548481448, + "step": 3173 + }, + { + "epoch": 1.215243201838376, + "loss": 1.82415771484375, + "loss_ce": 0.38953959941864014, + "loss_iou": 0.6570368409156799, + "loss_num": 0.024169921875, + "loss_xval": 1.4346181154251099, + "num_input_tokens_seen": 548481448, + "step": 3173 + }, + { + "epoch": 1.2156261968594408, + "grad_norm": 101.60642646717736, + "learning_rate": 5e-06, + "loss": 2.0832, + "num_input_tokens_seen": 548654184, + "step": 3174 + }, + { + "epoch": 1.2156261968594408, + "loss": 2.0614495277404785, + "loss_ce": 0.3467678427696228, + "loss_iou": 0.8017683029174805, + "loss_num": 0.022216796875, + "loss_xval": 1.714681625366211, + "num_input_tokens_seen": 548654184, + "step": 3174 + }, + { + "epoch": 1.2160091918805056, + "grad_norm": 90.59437001960373, + "learning_rate": 5e-06, + "loss": 1.9203, + "num_input_tokens_seen": 548827624, + "step": 3175 + }, + { + "epoch": 1.2160091918805056, + "loss": 2.055656909942627, + "loss_ce": 0.3785133361816406, + "loss_iou": 0.7676794528961182, + "loss_num": 0.0283203125, + "loss_xval": 1.6771435737609863, + "num_input_tokens_seen": 548827624, + "step": 3175 + }, + { + "epoch": 1.2163921869015704, + "grad_norm": 165.76325074868544, + "learning_rate": 5e-06, + "loss": 1.5723, + "num_input_tokens_seen": 549000272, + "step": 3176 + }, + { + "epoch": 1.2163921869015704, + "loss": 1.5530579090118408, + "loss_ce": 0.39820683002471924, + "loss_iou": 0.5333581566810608, + "loss_num": 0.017578125, + "loss_xval": 1.1548510789871216, + "num_input_tokens_seen": 549000272, + "step": 3176 + }, + { + "epoch": 1.216775181922635, + "grad_norm": 128.32266130053333, + "learning_rate": 5e-06, + "loss": 2.0198, + "num_input_tokens_seen": 549173096, + "step": 3177 + }, + { + "epoch": 1.216775181922635, + "loss": 2.107774019241333, + "loss_ce": 0.3247598111629486, + "loss_iou": 0.8118257522583008, + "loss_num": 0.03173828125, + "loss_xval": 1.7830142974853516, + "num_input_tokens_seen": 549173096, + "step": 3177 + }, + { + "epoch": 1.2171581769436997, + "grad_norm": 90.04220984141216, + "learning_rate": 5e-06, + "loss": 1.5218, + "num_input_tokens_seen": 549345928, + "step": 3178 + }, + { + "epoch": 1.2171581769436997, + "loss": 1.4979586601257324, + "loss_ce": 0.29987651109695435, + "loss_iou": 0.5426750779151917, + "loss_num": 0.0225830078125, + "loss_xval": 1.1980820894241333, + "num_input_tokens_seen": 549345928, + "step": 3178 + }, + { + "epoch": 1.2175411719647644, + "grad_norm": 133.8270785888188, + "learning_rate": 5e-06, + "loss": 1.7561, + "num_input_tokens_seen": 549518784, + "step": 3179 + }, + { + "epoch": 1.2175411719647644, + "loss": 1.7928619384765625, + "loss_ce": 0.3510966897010803, + "loss_iou": 0.661266565322876, + "loss_num": 0.0238037109375, + "loss_xval": 1.441765308380127, + "num_input_tokens_seen": 549518784, + "step": 3179 + }, + { + "epoch": 1.2179241669858292, + "grad_norm": 71.61817012756259, + "learning_rate": 5e-06, + "loss": 1.7947, + "num_input_tokens_seen": 549691616, + "step": 3180 + }, + { + "epoch": 1.2179241669858292, + "loss": 1.7454323768615723, + "loss_ce": 0.28611862659454346, + "loss_iou": 0.6721922755241394, + "loss_num": 0.02294921875, + "loss_xval": 1.4593137502670288, + "num_input_tokens_seen": 549691616, + "step": 3180 + }, + { + "epoch": 1.218307162006894, + "grad_norm": 174.49661188752026, + "learning_rate": 5e-06, + "loss": 1.523, + "num_input_tokens_seen": 549864648, + "step": 3181 + }, + { + "epoch": 1.218307162006894, + "loss": 1.7097549438476562, + "loss_ce": 0.31086260080337524, + "loss_iou": 0.6387467384338379, + "loss_num": 0.0242919921875, + "loss_xval": 1.3988924026489258, + "num_input_tokens_seen": 549864648, + "step": 3181 + }, + { + "epoch": 1.2186901570279587, + "grad_norm": 249.66159079891125, + "learning_rate": 5e-06, + "loss": 2.0089, + "num_input_tokens_seen": 550037816, + "step": 3182 + }, + { + "epoch": 1.2186901570279587, + "loss": 2.0109968185424805, + "loss_ce": 0.27985674142837524, + "loss_iou": 0.7917481660842896, + "loss_num": 0.029541015625, + "loss_xval": 1.731140375137329, + "num_input_tokens_seen": 550037816, + "step": 3182 + }, + { + "epoch": 1.2190731520490234, + "grad_norm": 101.54537421742987, + "learning_rate": 5e-06, + "loss": 1.7719, + "num_input_tokens_seen": 550210952, + "step": 3183 + }, + { + "epoch": 1.2190731520490234, + "loss": 1.8788511753082275, + "loss_ce": 0.31219401955604553, + "loss_iou": 0.6646761894226074, + "loss_num": 0.04736328125, + "loss_xval": 1.5666570663452148, + "num_input_tokens_seen": 550210952, + "step": 3183 + }, + { + "epoch": 1.2194561470700882, + "grad_norm": 143.63311814092074, + "learning_rate": 5e-06, + "loss": 1.6131, + "num_input_tokens_seen": 550384064, + "step": 3184 + }, + { + "epoch": 1.2194561470700882, + "loss": 1.5912206172943115, + "loss_ce": 0.30355510115623474, + "loss_iou": 0.5518527626991272, + "loss_num": 0.036865234375, + "loss_xval": 1.2376166582107544, + "num_input_tokens_seen": 550384064, + "step": 3184 + }, + { + "epoch": 1.2198391420911527, + "grad_norm": 74.71351576334541, + "learning_rate": 5e-06, + "loss": 2.4814, + "num_input_tokens_seen": 550557072, + "step": 3185 + }, + { + "epoch": 1.2198391420911527, + "loss": 2.515709400177002, + "loss_ce": 0.3064751923084259, + "loss_iou": 0.8775662183761597, + "loss_num": 0.0908203125, + "loss_xval": 2.2092342376708984, + "num_input_tokens_seen": 550557072, + "step": 3185 + }, + { + "epoch": 1.2202221371122175, + "grad_norm": 106.75768485415921, + "learning_rate": 5e-06, + "loss": 1.9261, + "num_input_tokens_seen": 550730416, + "step": 3186 + }, + { + "epoch": 1.2202221371122175, + "loss": 1.9721856117248535, + "loss_ce": 0.35608041286468506, + "loss_iou": 0.7219014763832092, + "loss_num": 0.034423828125, + "loss_xval": 1.616105318069458, + "num_input_tokens_seen": 550730416, + "step": 3186 + }, + { + "epoch": 1.2206051321332823, + "grad_norm": 125.6871573178647, + "learning_rate": 5e-06, + "loss": 2.1257, + "num_input_tokens_seen": 550903288, + "step": 3187 + }, + { + "epoch": 1.2206051321332823, + "loss": 2.1850404739379883, + "loss_ce": 0.3669430613517761, + "loss_iou": 0.8350129723548889, + "loss_num": 0.0296630859375, + "loss_xval": 1.8180972337722778, + "num_input_tokens_seen": 550903288, + "step": 3187 + }, + { + "epoch": 1.220988127154347, + "grad_norm": 106.55127289380127, + "learning_rate": 5e-06, + "loss": 1.6297, + "num_input_tokens_seen": 551076160, + "step": 3188 + }, + { + "epoch": 1.220988127154347, + "loss": 1.5373339653015137, + "loss_ce": 0.3313254714012146, + "loss_iou": 0.5609052181243896, + "loss_num": 0.016845703125, + "loss_xval": 1.2060084342956543, + "num_input_tokens_seen": 551076160, + "step": 3188 + }, + { + "epoch": 1.2213711221754118, + "grad_norm": 332.88685463480067, + "learning_rate": 5e-06, + "loss": 2.1437, + "num_input_tokens_seen": 551248976, + "step": 3189 + }, + { + "epoch": 1.2213711221754118, + "loss": 2.1342363357543945, + "loss_ce": 0.35409724712371826, + "loss_iou": 0.8193603754043579, + "loss_num": 0.0283203125, + "loss_xval": 1.7801392078399658, + "num_input_tokens_seen": 551248976, + "step": 3189 + }, + { + "epoch": 1.2217541171964765, + "grad_norm": 111.80509309635501, + "learning_rate": 5e-06, + "loss": 2.0058, + "num_input_tokens_seen": 551422032, + "step": 3190 + }, + { + "epoch": 1.2217541171964765, + "loss": 2.0466792583465576, + "loss_ce": 0.3329498767852783, + "loss_iou": 0.7603681087493896, + "loss_num": 0.03857421875, + "loss_xval": 1.7137293815612793, + "num_input_tokens_seen": 551422032, + "step": 3190 + }, + { + "epoch": 1.222137112217541, + "grad_norm": 243.96988241511502, + "learning_rate": 5e-06, + "loss": 1.7906, + "num_input_tokens_seen": 551594824, + "step": 3191 + }, + { + "epoch": 1.222137112217541, + "loss": 1.803696870803833, + "loss_ce": 0.3025438189506531, + "loss_iou": 0.6702237725257874, + "loss_num": 0.0322265625, + "loss_xval": 1.5011531114578247, + "num_input_tokens_seen": 551594824, + "step": 3191 + }, + { + "epoch": 1.2225201072386058, + "grad_norm": 143.00161490878253, + "learning_rate": 5e-06, + "loss": 2.1548, + "num_input_tokens_seen": 551767880, + "step": 3192 + }, + { + "epoch": 1.2225201072386058, + "loss": 2.170114517211914, + "loss_ce": 0.30388131737709045, + "loss_iou": 0.859386146068573, + "loss_num": 0.029541015625, + "loss_xval": 1.866233229637146, + "num_input_tokens_seen": 551767880, + "step": 3192 + }, + { + "epoch": 1.2229031022596706, + "grad_norm": 87.0770299486737, + "learning_rate": 5e-06, + "loss": 1.6753, + "num_input_tokens_seen": 551940552, + "step": 3193 + }, + { + "epoch": 1.2229031022596706, + "loss": 1.6283884048461914, + "loss_ce": 0.3293622136116028, + "loss_iou": 0.5896986722946167, + "loss_num": 0.02392578125, + "loss_xval": 1.2990262508392334, + "num_input_tokens_seen": 551940552, + "step": 3193 + }, + { + "epoch": 1.2232860972807353, + "grad_norm": 164.9114922094196, + "learning_rate": 5e-06, + "loss": 1.5698, + "num_input_tokens_seen": 552113672, + "step": 3194 + }, + { + "epoch": 1.2232860972807353, + "loss": 1.3979899883270264, + "loss_ce": 0.3084055185317993, + "loss_iou": 0.4936447739601135, + "loss_num": 0.0205078125, + "loss_xval": 1.089584469795227, + "num_input_tokens_seen": 552113672, + "step": 3194 + }, + { + "epoch": 1.2236690923018, + "grad_norm": 163.39307516080493, + "learning_rate": 5e-06, + "loss": 1.7532, + "num_input_tokens_seen": 552286464, + "step": 3195 + }, + { + "epoch": 1.2236690923018, + "loss": 1.7099965810775757, + "loss_ce": 0.2678207755088806, + "loss_iou": 0.6490053534507751, + "loss_num": 0.02880859375, + "loss_xval": 1.4421757459640503, + "num_input_tokens_seen": 552286464, + "step": 3195 + }, + { + "epoch": 1.2240520873228649, + "grad_norm": 145.08628084374266, + "learning_rate": 5e-06, + "loss": 2.174, + "num_input_tokens_seen": 552459520, + "step": 3196 + }, + { + "epoch": 1.2240520873228649, + "loss": 2.1587939262390137, + "loss_ce": 0.31603682041168213, + "loss_iou": 0.8135904669761658, + "loss_num": 0.043212890625, + "loss_xval": 1.8427571058273315, + "num_input_tokens_seen": 552459520, + "step": 3196 + }, + { + "epoch": 1.2244350823439296, + "grad_norm": 62.70519028469807, + "learning_rate": 5e-06, + "loss": 1.656, + "num_input_tokens_seen": 552632120, + "step": 3197 + }, + { + "epoch": 1.2244350823439296, + "loss": 1.7491471767425537, + "loss_ce": 0.30180686712265015, + "loss_iou": 0.6547919511795044, + "loss_num": 0.027587890625, + "loss_xval": 1.4473402500152588, + "num_input_tokens_seen": 552632120, + "step": 3197 + }, + { + "epoch": 1.2248180773649944, + "grad_norm": 123.27105193638035, + "learning_rate": 5e-06, + "loss": 1.5216, + "num_input_tokens_seen": 552805312, + "step": 3198 + }, + { + "epoch": 1.2248180773649944, + "loss": 1.5703985691070557, + "loss_ce": 0.25883591175079346, + "loss_iou": 0.5804944634437561, + "loss_num": 0.0301513671875, + "loss_xval": 1.3115626573562622, + "num_input_tokens_seen": 552805312, + "step": 3198 + }, + { + "epoch": 1.225201072386059, + "grad_norm": 256.7896731375906, + "learning_rate": 5e-06, + "loss": 2.2714, + "num_input_tokens_seen": 552978072, + "step": 3199 + }, + { + "epoch": 1.225201072386059, + "loss": 2.1277894973754883, + "loss_ce": 0.25805285573005676, + "loss_iou": 0.811699390411377, + "loss_num": 0.04931640625, + "loss_xval": 1.825791358947754, + "num_input_tokens_seen": 552978072, + "step": 3199 + }, + { + "epoch": 1.2255840674071237, + "grad_norm": 85.84432593178089, + "learning_rate": 5e-06, + "loss": 1.858, + "num_input_tokens_seen": 553150808, + "step": 3200 + }, + { + "epoch": 1.2255840674071237, + "loss": 1.7213298082351685, + "loss_ce": 0.3204355835914612, + "loss_iou": 0.617317259311676, + "loss_num": 0.033203125, + "loss_xval": 1.400894284248352, + "num_input_tokens_seen": 553150808, + "step": 3200 + }, + { + "epoch": 1.2259670624281884, + "grad_norm": 238.3452424198299, + "learning_rate": 5e-06, + "loss": 1.943, + "num_input_tokens_seen": 553323536, + "step": 3201 + }, + { + "epoch": 1.2259670624281884, + "loss": 1.7847257852554321, + "loss_ce": 0.3421986997127533, + "loss_iou": 0.646281898021698, + "loss_num": 0.030029296875, + "loss_xval": 1.442527174949646, + "num_input_tokens_seen": 553323536, + "step": 3201 + }, + { + "epoch": 1.2263500574492532, + "grad_norm": 107.85156694617933, + "learning_rate": 5e-06, + "loss": 2.0012, + "num_input_tokens_seen": 553496432, + "step": 3202 + }, + { + "epoch": 1.2263500574492532, + "loss": 1.8426074981689453, + "loss_ce": 0.30851882696151733, + "loss_iou": 0.6797640323638916, + "loss_num": 0.034912109375, + "loss_xval": 1.5340886116027832, + "num_input_tokens_seen": 553496432, + "step": 3202 + }, + { + "epoch": 1.226733052470318, + "grad_norm": 132.43472861822866, + "learning_rate": 5e-06, + "loss": 1.821, + "num_input_tokens_seen": 553669560, + "step": 3203 + }, + { + "epoch": 1.226733052470318, + "loss": 1.8719840049743652, + "loss_ce": 0.2932372987270355, + "loss_iou": 0.6697444915771484, + "loss_num": 0.0478515625, + "loss_xval": 1.5787467956542969, + "num_input_tokens_seen": 553669560, + "step": 3203 + }, + { + "epoch": 1.2271160474913827, + "grad_norm": 122.07163065642287, + "learning_rate": 5e-06, + "loss": 1.9149, + "num_input_tokens_seen": 553841960, + "step": 3204 + }, + { + "epoch": 1.2271160474913827, + "loss": 2.065173864364624, + "loss_ce": 0.31117385625839233, + "loss_iou": 0.7777567505836487, + "loss_num": 0.039794921875, + "loss_xval": 1.7539998292922974, + "num_input_tokens_seen": 553841960, + "step": 3204 + }, + { + "epoch": 1.2274990425124472, + "grad_norm": 117.89645509200975, + "learning_rate": 5e-06, + "loss": 1.599, + "num_input_tokens_seen": 554015008, + "step": 3205 + }, + { + "epoch": 1.2274990425124472, + "loss": 1.7536414861679077, + "loss_ce": 0.3128109574317932, + "loss_iou": 0.6543446779251099, + "loss_num": 0.0263671875, + "loss_xval": 1.4408304691314697, + "num_input_tokens_seen": 554015008, + "step": 3205 + }, + { + "epoch": 1.227882037533512, + "grad_norm": 103.8880128453902, + "learning_rate": 5e-06, + "loss": 1.9502, + "num_input_tokens_seen": 554188192, + "step": 3206 + }, + { + "epoch": 1.227882037533512, + "loss": 1.9497625827789307, + "loss_ce": 0.20692452788352966, + "loss_iou": 0.8029986023902893, + "loss_num": 0.02734375, + "loss_xval": 1.7428380250930786, + "num_input_tokens_seen": 554188192, + "step": 3206 + }, + { + "epoch": 1.2282650325545768, + "grad_norm": 96.83289943117904, + "learning_rate": 5e-06, + "loss": 1.5813, + "num_input_tokens_seen": 554361096, + "step": 3207 + }, + { + "epoch": 1.2282650325545768, + "loss": 1.4702672958374023, + "loss_ce": 0.2342482954263687, + "loss_iou": 0.5651378631591797, + "loss_num": 0.0211181640625, + "loss_xval": 1.2360191345214844, + "num_input_tokens_seen": 554361096, + "step": 3207 + }, + { + "epoch": 1.2286480275756415, + "grad_norm": 119.81877608285815, + "learning_rate": 5e-06, + "loss": 1.59, + "num_input_tokens_seen": 554534456, + "step": 3208 + }, + { + "epoch": 1.2286480275756415, + "loss": 1.7240769863128662, + "loss_ce": 0.244428813457489, + "loss_iou": 0.6755694150924683, + "loss_num": 0.0257568359375, + "loss_xval": 1.479648232460022, + "num_input_tokens_seen": 554534456, + "step": 3208 + }, + { + "epoch": 1.2290310225967063, + "grad_norm": 499.2687277835983, + "learning_rate": 5e-06, + "loss": 2.3241, + "num_input_tokens_seen": 554707416, + "step": 3209 + }, + { + "epoch": 1.2290310225967063, + "loss": 2.3630847930908203, + "loss_ce": 0.31900089979171753, + "loss_iou": 0.938149094581604, + "loss_num": 0.033447265625, + "loss_xval": 2.044083833694458, + "num_input_tokens_seen": 554707416, + "step": 3209 + }, + { + "epoch": 1.229414017617771, + "grad_norm": 112.24615547428523, + "learning_rate": 5e-06, + "loss": 2.1574, + "num_input_tokens_seen": 554880504, + "step": 3210 + }, + { + "epoch": 1.229414017617771, + "loss": 2.1365976333618164, + "loss_ce": 0.2381032407283783, + "loss_iou": 0.8168008327484131, + "loss_num": 0.052978515625, + "loss_xval": 1.8984942436218262, + "num_input_tokens_seen": 554880504, + "step": 3210 + }, + { + "epoch": 1.2297970126388358, + "grad_norm": 152.87935583291184, + "learning_rate": 5e-06, + "loss": 1.9168, + "num_input_tokens_seen": 555053472, + "step": 3211 + }, + { + "epoch": 1.2297970126388358, + "loss": 1.924161434173584, + "loss_ce": 0.2370298206806183, + "loss_iou": 0.706541895866394, + "loss_num": 0.054931640625, + "loss_xval": 1.687131643295288, + "num_input_tokens_seen": 555053472, + "step": 3211 + }, + { + "epoch": 1.2301800076599005, + "grad_norm": 130.56666898176, + "learning_rate": 5e-06, + "loss": 1.758, + "num_input_tokens_seen": 555226824, + "step": 3212 + }, + { + "epoch": 1.2301800076599005, + "loss": 1.8666596412658691, + "loss_ce": 0.3369540572166443, + "loss_iou": 0.70448899269104, + "loss_num": 0.024169921875, + "loss_xval": 1.52970552444458, + "num_input_tokens_seen": 555226824, + "step": 3212 + }, + { + "epoch": 1.230563002680965, + "grad_norm": 165.93343641993468, + "learning_rate": 5e-06, + "loss": 2.7272, + "num_input_tokens_seen": 555395992, + "step": 3213 + }, + { + "epoch": 1.230563002680965, + "loss": 2.9067001342773438, + "loss_ce": 0.251071035861969, + "loss_iou": 1.2319283485412598, + "loss_num": 0.038330078125, + "loss_xval": 2.6556291580200195, + "num_input_tokens_seen": 555395992, + "step": 3213 + }, + { + "epoch": 1.2309459977020298, + "grad_norm": 69.42459640699623, + "learning_rate": 5e-06, + "loss": 2.2747, + "num_input_tokens_seen": 555568824, + "step": 3214 + }, + { + "epoch": 1.2309459977020298, + "loss": 2.240429162979126, + "loss_ce": 0.26815319061279297, + "loss_iou": 0.8390432596206665, + "loss_num": 0.058837890625, + "loss_xval": 1.972275972366333, + "num_input_tokens_seen": 555568824, + "step": 3214 + }, + { + "epoch": 1.2313289927230946, + "grad_norm": 130.9276975457229, + "learning_rate": 5e-06, + "loss": 1.65, + "num_input_tokens_seen": 555741224, + "step": 3215 + }, + { + "epoch": 1.2313289927230946, + "loss": 1.7605862617492676, + "loss_ce": 0.2360796332359314, + "loss_iou": 0.6469578742980957, + "loss_num": 0.046142578125, + "loss_xval": 1.5245065689086914, + "num_input_tokens_seen": 555741224, + "step": 3215 + }, + { + "epoch": 1.2317119877441594, + "grad_norm": 178.53664350802376, + "learning_rate": 5e-06, + "loss": 1.8495, + "num_input_tokens_seen": 555914120, + "step": 3216 + }, + { + "epoch": 1.2317119877441594, + "loss": 1.8208178281784058, + "loss_ce": 0.33206358551979065, + "loss_iou": 0.6912460327148438, + "loss_num": 0.021240234375, + "loss_xval": 1.4887542724609375, + "num_input_tokens_seen": 555914120, + "step": 3216 + }, + { + "epoch": 1.2320949827652241, + "grad_norm": 69.78103362951437, + "learning_rate": 5e-06, + "loss": 2.0902, + "num_input_tokens_seen": 556087096, + "step": 3217 + }, + { + "epoch": 1.2320949827652241, + "loss": 2.0778441429138184, + "loss_ce": 0.3265916705131531, + "loss_iou": 0.7904821634292603, + "loss_num": 0.0341796875, + "loss_xval": 1.7512524127960205, + "num_input_tokens_seen": 556087096, + "step": 3217 + }, + { + "epoch": 1.2324779777862889, + "grad_norm": 78.92699984343082, + "learning_rate": 5e-06, + "loss": 1.6877, + "num_input_tokens_seen": 556259840, + "step": 3218 + }, + { + "epoch": 1.2324779777862889, + "loss": 1.6588375568389893, + "loss_ce": 0.21913450956344604, + "loss_iou": 0.612734854221344, + "loss_num": 0.042724609375, + "loss_xval": 1.439703106880188, + "num_input_tokens_seen": 556259840, + "step": 3218 + }, + { + "epoch": 1.2328609728073534, + "grad_norm": 72.62503991498532, + "learning_rate": 5e-06, + "loss": 1.7251, + "num_input_tokens_seen": 556433024, + "step": 3219 + }, + { + "epoch": 1.2328609728073534, + "loss": 1.6549322605133057, + "loss_ce": 0.27027806639671326, + "loss_iou": 0.6480461359024048, + "loss_num": 0.0177001953125, + "loss_xval": 1.3846542835235596, + "num_input_tokens_seen": 556433024, + "step": 3219 + }, + { + "epoch": 1.2332439678284182, + "grad_norm": 139.84716285832107, + "learning_rate": 5e-06, + "loss": 1.6814, + "num_input_tokens_seen": 556605968, + "step": 3220 + }, + { + "epoch": 1.2332439678284182, + "loss": 1.5921721458435059, + "loss_ce": 0.23918727040290833, + "loss_iou": 0.6415956020355225, + "loss_num": 0.01397705078125, + "loss_xval": 1.35298490524292, + "num_input_tokens_seen": 556605968, + "step": 3220 + }, + { + "epoch": 1.233626962849483, + "grad_norm": 80.87651371428406, + "learning_rate": 5e-06, + "loss": 1.6152, + "num_input_tokens_seen": 556779232, + "step": 3221 + }, + { + "epoch": 1.233626962849483, + "loss": 1.5735828876495361, + "loss_ce": 0.21316048502922058, + "loss_iou": 0.6230670213699341, + "loss_num": 0.0228271484375, + "loss_xval": 1.3604223728179932, + "num_input_tokens_seen": 556779232, + "step": 3221 + }, + { + "epoch": 1.2340099578705477, + "grad_norm": 120.65197264472344, + "learning_rate": 5e-06, + "loss": 1.458, + "num_input_tokens_seen": 556951912, + "step": 3222 + }, + { + "epoch": 1.2340099578705477, + "loss": 1.3882265090942383, + "loss_ce": 0.24410924315452576, + "loss_iou": 0.5242375731468201, + "loss_num": 0.0191650390625, + "loss_xval": 1.1441172361373901, + "num_input_tokens_seen": 556951912, + "step": 3222 + }, + { + "epoch": 1.2343929528916124, + "grad_norm": 224.3041869268561, + "learning_rate": 5e-06, + "loss": 1.9567, + "num_input_tokens_seen": 557124416, + "step": 3223 + }, + { + "epoch": 1.2343929528916124, + "loss": 1.889853596687317, + "loss_ce": 0.21171405911445618, + "loss_iou": 0.7566112875938416, + "loss_num": 0.032958984375, + "loss_xval": 1.678139567375183, + "num_input_tokens_seen": 557124416, + "step": 3223 + }, + { + "epoch": 1.2347759479126772, + "grad_norm": 79.16541663282247, + "learning_rate": 5e-06, + "loss": 1.6871, + "num_input_tokens_seen": 557297456, + "step": 3224 + }, + { + "epoch": 1.2347759479126772, + "loss": 1.6730278730392456, + "loss_ce": 0.27667927742004395, + "loss_iou": 0.6475761532783508, + "loss_num": 0.020263671875, + "loss_xval": 1.3963485956192017, + "num_input_tokens_seen": 557297456, + "step": 3224 + }, + { + "epoch": 1.235158942933742, + "grad_norm": 135.92474746173508, + "learning_rate": 5e-06, + "loss": 1.6281, + "num_input_tokens_seen": 557470304, + "step": 3225 + }, + { + "epoch": 1.235158942933742, + "loss": 1.6265156269073486, + "loss_ce": 0.3339425027370453, + "loss_iou": 0.6044469475746155, + "loss_num": 0.0167236328125, + "loss_xval": 1.292573094367981, + "num_input_tokens_seen": 557470304, + "step": 3225 + }, + { + "epoch": 1.2355419379548065, + "grad_norm": 177.22506327512397, + "learning_rate": 5e-06, + "loss": 1.8915, + "num_input_tokens_seen": 557643144, + "step": 3226 + }, + { + "epoch": 1.2355419379548065, + "loss": 1.8765814304351807, + "loss_ce": 0.2532196640968323, + "loss_iou": 0.7239123582839966, + "loss_num": 0.03515625, + "loss_xval": 1.6233618259429932, + "num_input_tokens_seen": 557643144, + "step": 3226 + }, + { + "epoch": 1.2359249329758712, + "grad_norm": 172.39927254746493, + "learning_rate": 5e-06, + "loss": 1.5542, + "num_input_tokens_seen": 557816280, + "step": 3227 + }, + { + "epoch": 1.2359249329758712, + "loss": 1.4366008043289185, + "loss_ce": 0.23155264556407928, + "loss_iou": 0.5630648136138916, + "loss_num": 0.0157470703125, + "loss_xval": 1.2050480842590332, + "num_input_tokens_seen": 557816280, + "step": 3227 + }, + { + "epoch": 1.236307927996936, + "grad_norm": 119.64625104571982, + "learning_rate": 5e-06, + "loss": 1.8928, + "num_input_tokens_seen": 557989160, + "step": 3228 + }, + { + "epoch": 1.236307927996936, + "loss": 1.8278614282608032, + "loss_ce": 0.22566193342208862, + "loss_iou": 0.6986521482467651, + "loss_num": 0.041015625, + "loss_xval": 1.6021993160247803, + "num_input_tokens_seen": 557989160, + "step": 3228 + }, + { + "epoch": 1.2366909230180008, + "grad_norm": 163.34388612453532, + "learning_rate": 5e-06, + "loss": 1.6782, + "num_input_tokens_seen": 558161800, + "step": 3229 + }, + { + "epoch": 1.2366909230180008, + "loss": 1.6708768606185913, + "loss_ce": 0.2302100658416748, + "loss_iou": 0.6086390614509583, + "loss_num": 0.044677734375, + "loss_xval": 1.4406667947769165, + "num_input_tokens_seen": 558161800, + "step": 3229 + }, + { + "epoch": 1.2370739180390655, + "grad_norm": 127.47224003097482, + "learning_rate": 5e-06, + "loss": 1.5003, + "num_input_tokens_seen": 558331208, + "step": 3230 + }, + { + "epoch": 1.2370739180390655, + "loss": 1.5581037998199463, + "loss_ce": 0.27291542291641235, + "loss_iou": 0.5680092573165894, + "loss_num": 0.0299072265625, + "loss_xval": 1.2605302333831787, + "num_input_tokens_seen": 558331208, + "step": 3230 + }, + { + "epoch": 1.2374569130601303, + "grad_norm": 305.39788329021883, + "learning_rate": 5e-06, + "loss": 1.9204, + "num_input_tokens_seen": 558504008, + "step": 3231 + }, + { + "epoch": 1.2374569130601303, + "loss": 1.757807970046997, + "loss_ce": 0.24562327563762665, + "loss_iou": 0.669269859790802, + "loss_num": 0.03466796875, + "loss_xval": 1.512184739112854, + "num_input_tokens_seen": 558504008, + "step": 3231 + }, + { + "epoch": 1.237839908081195, + "grad_norm": 115.22644563825207, + "learning_rate": 5e-06, + "loss": 2.0037, + "num_input_tokens_seen": 558677088, + "step": 3232 + }, + { + "epoch": 1.237839908081195, + "loss": 2.114217758178711, + "loss_ce": 0.27020809054374695, + "loss_iou": 0.8052446246147156, + "loss_num": 0.046630859375, + "loss_xval": 1.8440097570419312, + "num_input_tokens_seen": 558677088, + "step": 3232 + }, + { + "epoch": 1.2382229031022596, + "grad_norm": 129.5269224026582, + "learning_rate": 5e-06, + "loss": 1.6434, + "num_input_tokens_seen": 558850040, + "step": 3233 + }, + { + "epoch": 1.2382229031022596, + "loss": 1.7667224407196045, + "loss_ce": 0.31137874722480774, + "loss_iou": 0.6632645130157471, + "loss_num": 0.0257568359375, + "loss_xval": 1.4553437232971191, + "num_input_tokens_seen": 558850040, + "step": 3233 + }, + { + "epoch": 1.2386058981233243, + "grad_norm": 109.59507394125446, + "learning_rate": 5e-06, + "loss": 2.0253, + "num_input_tokens_seen": 559022976, + "step": 3234 + }, + { + "epoch": 1.2386058981233243, + "loss": 1.931747555732727, + "loss_ce": 0.2941380739212036, + "loss_iou": 0.7258481979370117, + "loss_num": 0.037109375, + "loss_xval": 1.6376094818115234, + "num_input_tokens_seen": 559022976, + "step": 3234 + }, + { + "epoch": 1.238988893144389, + "grad_norm": 120.38702571982651, + "learning_rate": 5e-06, + "loss": 1.7104, + "num_input_tokens_seen": 559195960, + "step": 3235 + }, + { + "epoch": 1.238988893144389, + "loss": 1.7832870483398438, + "loss_ce": 0.23368623852729797, + "loss_iou": 0.6858416795730591, + "loss_num": 0.03564453125, + "loss_xval": 1.5215246677398682, + "num_input_tokens_seen": 559195960, + "step": 3235 + }, + { + "epoch": 1.2393718881654538, + "grad_norm": 122.12165189773208, + "learning_rate": 5e-06, + "loss": 1.8396, + "num_input_tokens_seen": 559369008, + "step": 3236 + }, + { + "epoch": 1.2393718881654538, + "loss": 1.6692432165145874, + "loss_ce": 0.2788049578666687, + "loss_iou": 0.636472761631012, + "loss_num": 0.0234375, + "loss_xval": 1.390438199043274, + "num_input_tokens_seen": 559369008, + "step": 3236 + }, + { + "epoch": 1.2397548831865186, + "grad_norm": 87.73278494127817, + "learning_rate": 5e-06, + "loss": 1.9152, + "num_input_tokens_seen": 559541816, + "step": 3237 + }, + { + "epoch": 1.2397548831865186, + "loss": 1.9559450149536133, + "loss_ce": 0.25496596097946167, + "loss_iou": 0.7817944884300232, + "loss_num": 0.0274658203125, + "loss_xval": 1.7009791135787964, + "num_input_tokens_seen": 559541816, + "step": 3237 + }, + { + "epoch": 1.2401378782075834, + "grad_norm": 121.25869855775306, + "learning_rate": 5e-06, + "loss": 1.4474, + "num_input_tokens_seen": 559714584, + "step": 3238 + }, + { + "epoch": 1.2401378782075834, + "loss": 1.5788640975952148, + "loss_ce": 0.24121490120887756, + "loss_iou": 0.6191266775131226, + "loss_num": 0.0198974609375, + "loss_xval": 1.3376491069793701, + "num_input_tokens_seen": 559714584, + "step": 3238 + }, + { + "epoch": 1.2405208732286481, + "grad_norm": 156.6318429034414, + "learning_rate": 5e-06, + "loss": 1.6067, + "num_input_tokens_seen": 559887576, + "step": 3239 + }, + { + "epoch": 1.2405208732286481, + "loss": 1.8070673942565918, + "loss_ce": 0.21646954119205475, + "loss_iou": 0.7184556722640991, + "loss_num": 0.03076171875, + "loss_xval": 1.5905978679656982, + "num_input_tokens_seen": 559887576, + "step": 3239 + }, + { + "epoch": 1.2409038682497127, + "grad_norm": 124.03688040514857, + "learning_rate": 5e-06, + "loss": 1.4161, + "num_input_tokens_seen": 560060584, + "step": 3240 + }, + { + "epoch": 1.2409038682497127, + "loss": 1.395925760269165, + "loss_ce": 0.2303113490343094, + "loss_iou": 0.5364052653312683, + "loss_num": 0.0185546875, + "loss_xval": 1.1656144857406616, + "num_input_tokens_seen": 560060584, + "step": 3240 + }, + { + "epoch": 1.2412868632707774, + "grad_norm": 106.66204792652965, + "learning_rate": 5e-06, + "loss": 1.6025, + "num_input_tokens_seen": 560233576, + "step": 3241 + }, + { + "epoch": 1.2412868632707774, + "loss": 1.7795209884643555, + "loss_ce": 0.22966763377189636, + "loss_iou": 0.6916441917419434, + "loss_num": 0.033447265625, + "loss_xval": 1.5498533248901367, + "num_input_tokens_seen": 560233576, + "step": 3241 + }, + { + "epoch": 1.2416698582918422, + "grad_norm": 154.84085934143445, + "learning_rate": 5e-06, + "loss": 1.5263, + "num_input_tokens_seen": 560406488, + "step": 3242 + }, + { + "epoch": 1.2416698582918422, + "loss": 1.4058425426483154, + "loss_ce": 0.22820329666137695, + "loss_iou": 0.513380229473114, + "loss_num": 0.0301513671875, + "loss_xval": 1.1776392459869385, + "num_input_tokens_seen": 560406488, + "step": 3242 + }, + { + "epoch": 1.242052853312907, + "grad_norm": 115.73958391415185, + "learning_rate": 5e-06, + "loss": 2.1085, + "num_input_tokens_seen": 560579680, + "step": 3243 + }, + { + "epoch": 1.242052853312907, + "loss": 2.232184410095215, + "loss_ce": 0.25877219438552856, + "loss_iou": 0.8758053183555603, + "loss_num": 0.04443359375, + "loss_xval": 1.9734123945236206, + "num_input_tokens_seen": 560579680, + "step": 3243 + }, + { + "epoch": 1.2424358483339717, + "grad_norm": 136.69204076216332, + "learning_rate": 5e-06, + "loss": 1.5209, + "num_input_tokens_seen": 560752720, + "step": 3244 + }, + { + "epoch": 1.2424358483339717, + "loss": 1.6917695999145508, + "loss_ce": 0.3209766447544098, + "loss_iou": 0.6305564045906067, + "loss_num": 0.02197265625, + "loss_xval": 1.3707929849624634, + "num_input_tokens_seen": 560752720, + "step": 3244 + }, + { + "epoch": 1.2428188433550365, + "grad_norm": 135.49102560240664, + "learning_rate": 5e-06, + "loss": 1.8771, + "num_input_tokens_seen": 560925560, + "step": 3245 + }, + { + "epoch": 1.2428188433550365, + "loss": 1.9464309215545654, + "loss_ce": 0.2025822401046753, + "loss_iou": 0.7859258055686951, + "loss_num": 0.034423828125, + "loss_xval": 1.7438486814498901, + "num_input_tokens_seen": 560925560, + "step": 3245 + }, + { + "epoch": 1.243201838376101, + "grad_norm": 135.75860197753198, + "learning_rate": 5e-06, + "loss": 1.6662, + "num_input_tokens_seen": 561098104, + "step": 3246 + }, + { + "epoch": 1.243201838376101, + "loss": 1.75368332862854, + "loss_ce": 0.23724700510501862, + "loss_iou": 0.6847623586654663, + "loss_num": 0.0294189453125, + "loss_xval": 1.5164363384246826, + "num_input_tokens_seen": 561098104, + "step": 3246 + }, + { + "epoch": 1.2435848333971657, + "grad_norm": 165.5008227522788, + "learning_rate": 5e-06, + "loss": 1.658, + "num_input_tokens_seen": 561271096, + "step": 3247 + }, + { + "epoch": 1.2435848333971657, + "loss": 1.527578353881836, + "loss_ce": 0.22737956047058105, + "loss_iou": 0.6089922785758972, + "loss_num": 0.0164794921875, + "loss_xval": 1.3001989126205444, + "num_input_tokens_seen": 561271096, + "step": 3247 + }, + { + "epoch": 1.2439678284182305, + "grad_norm": 174.5125200656963, + "learning_rate": 5e-06, + "loss": 2.3443, + "num_input_tokens_seen": 561443736, + "step": 3248 + }, + { + "epoch": 1.2439678284182305, + "loss": 2.3821632862091064, + "loss_ce": 0.24301809072494507, + "loss_iou": 0.9132615327835083, + "loss_num": 0.0625, + "loss_xval": 2.1391451358795166, + "num_input_tokens_seen": 561443736, + "step": 3248 + }, + { + "epoch": 1.2443508234392953, + "grad_norm": 87.023104835008, + "learning_rate": 5e-06, + "loss": 1.9802, + "num_input_tokens_seen": 561616816, + "step": 3249 + }, + { + "epoch": 1.2443508234392953, + "loss": 2.027251720428467, + "loss_ce": 0.2520645260810852, + "loss_iou": 0.8110250234603882, + "loss_num": 0.0306396484375, + "loss_xval": 1.7751872539520264, + "num_input_tokens_seen": 561616816, + "step": 3249 + }, + { + "epoch": 1.24473381846036, + "grad_norm": 134.0405492023822, + "learning_rate": 5e-06, + "loss": 1.5023, + "num_input_tokens_seen": 561789592, + "step": 3250 + }, + { + "epoch": 1.24473381846036, + "eval_websight_new_CIoU": 0.8580086827278137, + "eval_websight_new_GIoU": 0.8578130304813385, + "eval_websight_new_IoU": 0.85853511095047, + "eval_websight_new_MAE_all": 0.017037231475114822, + "eval_websight_new_MAE_h": 0.01280592754483223, + "eval_websight_new_MAE_w": 0.035745494067668915, + "eval_websight_new_MAE_x": 0.012769540306180716, + "eval_websight_new_MAE_y": 0.0068279586266726255, + "eval_websight_new_NUM_probability": 0.022801623679697514, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 1.7849124670028687, + "eval_websight_new_loss_ce": 0.5348544716835022, + "eval_websight_new_loss_iou": 0.5845544040203094, + "eval_websight_new_loss_num": 0.020751953125, + "eval_websight_new_loss_xval": 1.272868573665619, + "eval_websight_new_runtime": 59.4868, + "eval_websight_new_samples_per_second": 0.841, + "eval_websight_new_steps_per_second": 0.034, + "num_input_tokens_seen": 561789592, + "step": 3250 + }, + { + "epoch": 1.24473381846036, + "eval_seeclick_CIoU": 0.5733348727226257, + "eval_seeclick_GIoU": 0.5819355547428131, + "eval_seeclick_IoU": 0.6091125011444092, + "eval_seeclick_MAE_all": 0.06609445624053478, + "eval_seeclick_MAE_h": 0.0467582605779171, + "eval_seeclick_MAE_w": 0.1045493446290493, + "eval_seeclick_MAE_x": 0.0733401719480753, + "eval_seeclick_MAE_y": 0.0397300636395812, + "eval_seeclick_NUM_probability": 0.06769441440701485, + "eval_seeclick_inside_bbox": 0.8819444477558136, + "eval_seeclick_loss": 2.5507171154022217, + "eval_seeclick_loss_ce": 0.4472598135471344, + "eval_seeclick_loss_iou": 0.9121057689189911, + "eval_seeclick_loss_num": 0.0566864013671875, + "eval_seeclick_loss_xval": 2.107689380645752, + "eval_seeclick_runtime": 83.6077, + "eval_seeclick_samples_per_second": 0.598, + "eval_seeclick_steps_per_second": 0.024, + "num_input_tokens_seen": 561789592, + "step": 3250 + }, + { + "epoch": 1.24473381846036, + "eval_icons_CIoU": 0.8252148926258087, + "eval_icons_GIoU": 0.8298048079013824, + "eval_icons_IoU": 0.8399739861488342, + "eval_icons_MAE_all": 0.028438608162105083, + "eval_icons_MAE_h": 0.017280051484704018, + "eval_icons_MAE_w": 0.04606501758098602, + "eval_icons_MAE_x": 0.037893764674663544, + "eval_icons_MAE_y": 0.012515589827671647, + "eval_icons_NUM_probability": 0.23579960316419601, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 2.475114345550537, + "eval_icons_loss_ce": 0.41409504413604736, + "eval_icons_loss_iou": 0.9459198415279388, + "eval_icons_loss_num": 0.016490936279296875, + "eval_icons_loss_xval": 1.9742524027824402, + "eval_icons_runtime": 82.7171, + "eval_icons_samples_per_second": 0.604, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 561789592, + "step": 3250 + }, + { + "epoch": 1.24473381846036, + "loss": 2.4445314407348633, + "loss_ce": 0.41270190477371216, + "loss_iou": 0.9729459285736084, + "loss_num": 0.0172119140625, + "loss_xval": 2.031829357147217, + "num_input_tokens_seen": 561789592, + "step": 3250 + }, + { + "epoch": 1.2451168134814248, + "grad_norm": 228.77569955935664, + "learning_rate": 5e-06, + "loss": 2.2008, + "num_input_tokens_seen": 561963064, + "step": 3251 + }, + { + "epoch": 1.2451168134814248, + "loss": 2.1139419078826904, + "loss_ce": 0.2600467801094055, + "loss_iou": 0.8336858749389648, + "loss_num": 0.037353515625, + "loss_xval": 1.8538951873779297, + "num_input_tokens_seen": 561963064, + "step": 3251 + }, + { + "epoch": 1.2454998085024895, + "grad_norm": 110.26617648983085, + "learning_rate": 5e-06, + "loss": 2.0558, + "num_input_tokens_seen": 562136384, + "step": 3252 + }, + { + "epoch": 1.2454998085024895, + "loss": 2.0282206535339355, + "loss_ce": 0.2218882441520691, + "loss_iou": 0.7741989493370056, + "loss_num": 0.0517578125, + "loss_xval": 1.8063324689865112, + "num_input_tokens_seen": 562136384, + "step": 3252 + }, + { + "epoch": 1.2458828035235543, + "grad_norm": 92.51480806821526, + "learning_rate": 5e-06, + "loss": 1.7312, + "num_input_tokens_seen": 562309504, + "step": 3253 + }, + { + "epoch": 1.2458828035235543, + "loss": 1.6316841840744019, + "loss_ce": 0.27195367217063904, + "loss_iou": 0.5550483465194702, + "loss_num": 0.0498046875, + "loss_xval": 1.3597304821014404, + "num_input_tokens_seen": 562309504, + "step": 3253 + }, + { + "epoch": 1.2462657985446188, + "grad_norm": 145.45499441622874, + "learning_rate": 5e-06, + "loss": 1.8745, + "num_input_tokens_seen": 562482232, + "step": 3254 + }, + { + "epoch": 1.2462657985446188, + "loss": 1.952561616897583, + "loss_ce": 0.26895102858543396, + "loss_iou": 0.7191856503486633, + "loss_num": 0.049072265625, + "loss_xval": 1.6836105585098267, + "num_input_tokens_seen": 562482232, + "step": 3254 + }, + { + "epoch": 1.2466487935656836, + "grad_norm": 161.5191585467992, + "learning_rate": 5e-06, + "loss": 2.3314, + "num_input_tokens_seen": 562655464, + "step": 3255 + }, + { + "epoch": 1.2466487935656836, + "loss": 2.4464144706726074, + "loss_ce": 0.22417005896568298, + "loss_iou": 1.0176469087600708, + "loss_num": 0.037353515625, + "loss_xval": 2.2222445011138916, + "num_input_tokens_seen": 562655464, + "step": 3255 + }, + { + "epoch": 1.2470317885867483, + "grad_norm": 89.43310852326586, + "learning_rate": 5e-06, + "loss": 2.0123, + "num_input_tokens_seen": 562828528, + "step": 3256 + }, + { + "epoch": 1.2470317885867483, + "loss": 1.9258875846862793, + "loss_ce": 0.25422924757003784, + "loss_iou": 0.7314590215682983, + "loss_num": 0.041748046875, + "loss_xval": 1.6355254650115967, + "num_input_tokens_seen": 562828528, + "step": 3256 + }, + { + "epoch": 1.247414783607813, + "grad_norm": 84.93228700629363, + "learning_rate": 5e-06, + "loss": 1.9054, + "num_input_tokens_seen": 563001416, + "step": 3257 + }, + { + "epoch": 1.247414783607813, + "loss": 1.9564383029937744, + "loss_ce": 0.28483855724334717, + "loss_iou": 0.7379299402236938, + "loss_num": 0.0390625, + "loss_xval": 1.6715996265411377, + "num_input_tokens_seen": 563001416, + "step": 3257 + }, + { + "epoch": 1.2477977786288779, + "grad_norm": 97.46715560652545, + "learning_rate": 5e-06, + "loss": 1.9426, + "num_input_tokens_seen": 563174360, + "step": 3258 + }, + { + "epoch": 1.2477977786288779, + "loss": 2.0292422771453857, + "loss_ce": 0.318520724773407, + "loss_iou": 0.749098539352417, + "loss_num": 0.04248046875, + "loss_xval": 1.710721492767334, + "num_input_tokens_seen": 563174360, + "step": 3258 + }, + { + "epoch": 1.2481807736499426, + "grad_norm": 91.15667847541916, + "learning_rate": 5e-06, + "loss": 1.6545, + "num_input_tokens_seen": 563347680, + "step": 3259 + }, + { + "epoch": 1.2481807736499426, + "loss": 1.6798195838928223, + "loss_ce": 0.2844923138618469, + "loss_iou": 0.6500409245491028, + "loss_num": 0.01904296875, + "loss_xval": 1.3953272104263306, + "num_input_tokens_seen": 563347680, + "step": 3259 + }, + { + "epoch": 1.2485637686710072, + "grad_norm": 95.97606407986159, + "learning_rate": 5e-06, + "loss": 1.5331, + "num_input_tokens_seen": 563520864, + "step": 3260 + }, + { + "epoch": 1.2485637686710072, + "loss": 1.5802524089813232, + "loss_ce": 0.25654444098472595, + "loss_iou": 0.6218760013580322, + "loss_num": 0.0159912109375, + "loss_xval": 1.3237080574035645, + "num_input_tokens_seen": 563520864, + "step": 3260 + }, + { + "epoch": 1.248946763692072, + "grad_norm": 165.89110254922696, + "learning_rate": 5e-06, + "loss": 1.9111, + "num_input_tokens_seen": 563693664, + "step": 3261 + }, + { + "epoch": 1.248946763692072, + "loss": 1.8390283584594727, + "loss_ce": 0.2378949224948883, + "loss_iou": 0.7517080307006836, + "loss_num": 0.01953125, + "loss_xval": 1.6011333465576172, + "num_input_tokens_seen": 563693664, + "step": 3261 + }, + { + "epoch": 1.2493297587131367, + "grad_norm": 189.87879349929213, + "learning_rate": 5e-06, + "loss": 1.706, + "num_input_tokens_seen": 563866592, + "step": 3262 + }, + { + "epoch": 1.2493297587131367, + "loss": 1.602787971496582, + "loss_ce": 0.239871546626091, + "loss_iou": 0.6360023021697998, + "loss_num": 0.0181884765625, + "loss_xval": 1.3629164695739746, + "num_input_tokens_seen": 563866592, + "step": 3262 + }, + { + "epoch": 1.2497127537342014, + "grad_norm": 136.87018797305842, + "learning_rate": 5e-06, + "loss": 1.7997, + "num_input_tokens_seen": 564039712, + "step": 3263 + }, + { + "epoch": 1.2497127537342014, + "loss": 1.919539213180542, + "loss_ce": 0.24399109184741974, + "loss_iou": 0.7718561291694641, + "loss_num": 0.0263671875, + "loss_xval": 1.6755481958389282, + "num_input_tokens_seen": 564039712, + "step": 3263 + }, + { + "epoch": 1.2500957487552662, + "grad_norm": 145.61809969199768, + "learning_rate": 5e-06, + "loss": 1.5724, + "num_input_tokens_seen": 564212800, + "step": 3264 + }, + { + "epoch": 1.2500957487552662, + "loss": 1.554048776626587, + "loss_ce": 0.28291055560112, + "loss_iou": 0.5928444862365723, + "loss_num": 0.01708984375, + "loss_xval": 1.2711381912231445, + "num_input_tokens_seen": 564212800, + "step": 3264 + }, + { + "epoch": 1.250478743776331, + "grad_norm": 304.4632897757661, + "learning_rate": 5e-06, + "loss": 2.0111, + "num_input_tokens_seen": 564385776, + "step": 3265 + }, + { + "epoch": 1.250478743776331, + "loss": 2.1204137802124023, + "loss_ce": 0.21181172132492065, + "loss_iou": 0.8808146715164185, + "loss_num": 0.0294189453125, + "loss_xval": 1.908601999282837, + "num_input_tokens_seen": 564385776, + "step": 3265 + }, + { + "epoch": 1.2508617387973957, + "grad_norm": 120.73757773626873, + "learning_rate": 5e-06, + "loss": 2.014, + "num_input_tokens_seen": 564558968, + "step": 3266 + }, + { + "epoch": 1.2508617387973957, + "loss": 1.9903008937835693, + "loss_ce": 0.27592355012893677, + "loss_iou": 0.7686266899108887, + "loss_num": 0.035400390625, + "loss_xval": 1.7143774032592773, + "num_input_tokens_seen": 564558968, + "step": 3266 + }, + { + "epoch": 1.2512447338184605, + "grad_norm": 118.76541949402854, + "learning_rate": 5e-06, + "loss": 1.5929, + "num_input_tokens_seen": 564732080, + "step": 3267 + }, + { + "epoch": 1.2512447338184605, + "loss": 1.749008297920227, + "loss_ce": 0.26510366797447205, + "loss_iou": 0.6826719045639038, + "loss_num": 0.023681640625, + "loss_xval": 1.4839046001434326, + "num_input_tokens_seen": 564732080, + "step": 3267 + }, + { + "epoch": 1.2516277288395252, + "grad_norm": 166.23823548579298, + "learning_rate": 5e-06, + "loss": 1.8225, + "num_input_tokens_seen": 564905120, + "step": 3268 + }, + { + "epoch": 1.2516277288395252, + "loss": 1.8056085109710693, + "loss_ce": 0.2737082839012146, + "loss_iou": 0.683491587638855, + "loss_num": 0.032958984375, + "loss_xval": 1.53190016746521, + "num_input_tokens_seen": 564905120, + "step": 3268 + }, + { + "epoch": 1.2520107238605898, + "grad_norm": 192.34750726691107, + "learning_rate": 5e-06, + "loss": 1.6959, + "num_input_tokens_seen": 565078176, + "step": 3269 + }, + { + "epoch": 1.2520107238605898, + "loss": 1.9059031009674072, + "loss_ce": 0.29611802101135254, + "loss_iou": 0.7298498153686523, + "loss_num": 0.030029296875, + "loss_xval": 1.6097850799560547, + "num_input_tokens_seen": 565078176, + "step": 3269 + }, + { + "epoch": 1.2523937188816545, + "grad_norm": 61.299248319254446, + "learning_rate": 5e-06, + "loss": 1.9484, + "num_input_tokens_seen": 565250432, + "step": 3270 + }, + { + "epoch": 1.2523937188816545, + "loss": 2.06618070602417, + "loss_ce": 0.22253888845443726, + "loss_iou": 0.789618730545044, + "loss_num": 0.052978515625, + "loss_xval": 1.843641757965088, + "num_input_tokens_seen": 565250432, + "step": 3270 + }, + { + "epoch": 1.2527767139027193, + "grad_norm": 156.37870387188582, + "learning_rate": 5e-06, + "loss": 2.0245, + "num_input_tokens_seen": 565423752, + "step": 3271 + }, + { + "epoch": 1.2527767139027193, + "loss": 2.005537509918213, + "loss_ce": 0.19682714343070984, + "loss_iou": 0.8074923157691956, + "loss_num": 0.038818359375, + "loss_xval": 1.8087102174758911, + "num_input_tokens_seen": 565423752, + "step": 3271 + }, + { + "epoch": 1.253159708923784, + "grad_norm": 196.4282815454342, + "learning_rate": 5e-06, + "loss": 1.6765, + "num_input_tokens_seen": 565596736, + "step": 3272 + }, + { + "epoch": 1.253159708923784, + "loss": 1.6703040599822998, + "loss_ce": 0.20657575130462646, + "loss_iou": 0.6687232255935669, + "loss_num": 0.0252685546875, + "loss_xval": 1.4637281894683838, + "num_input_tokens_seen": 565596736, + "step": 3272 + }, + { + "epoch": 1.2535427039448488, + "grad_norm": 336.0777691554218, + "learning_rate": 5e-06, + "loss": 1.6791, + "num_input_tokens_seen": 565769752, + "step": 3273 + }, + { + "epoch": 1.2535427039448488, + "loss": 1.6296412944793701, + "loss_ce": 0.2718214988708496, + "loss_iou": 0.6263586282730103, + "loss_num": 0.02099609375, + "loss_xval": 1.3578197956085205, + "num_input_tokens_seen": 565769752, + "step": 3273 + }, + { + "epoch": 1.2539256989659133, + "grad_norm": 125.3995037519553, + "learning_rate": 5e-06, + "loss": 1.7241, + "num_input_tokens_seen": 565942672, + "step": 3274 + }, + { + "epoch": 1.2539256989659133, + "loss": 1.6929802894592285, + "loss_ce": 0.2351137101650238, + "loss_iou": 0.6609401702880859, + "loss_num": 0.0272216796875, + "loss_xval": 1.4578666687011719, + "num_input_tokens_seen": 565942672, + "step": 3274 + }, + { + "epoch": 1.254308693986978, + "grad_norm": 159.5506450773098, + "learning_rate": 5e-06, + "loss": 1.5594, + "num_input_tokens_seen": 566115584, + "step": 3275 + }, + { + "epoch": 1.254308693986978, + "loss": 1.4766712188720703, + "loss_ce": 0.17369934916496277, + "loss_iou": 0.5988736152648926, + "loss_num": 0.02099609375, + "loss_xval": 1.2925348281860352, + "num_input_tokens_seen": 566115584, + "step": 3275 + }, + { + "epoch": 1.2546916890080428, + "grad_norm": 169.4832582524888, + "learning_rate": 5e-06, + "loss": 1.7684, + "num_input_tokens_seen": 566288688, + "step": 3276 + }, + { + "epoch": 1.2546916890080428, + "loss": 1.7170007228851318, + "loss_ce": 0.2073092758655548, + "loss_iou": 0.6677485704421997, + "loss_num": 0.034912109375, + "loss_xval": 1.5096914768218994, + "num_input_tokens_seen": 566288688, + "step": 3276 + }, + { + "epoch": 1.2550746840291076, + "grad_norm": 442.3508245704993, + "learning_rate": 5e-06, + "loss": 2.0786, + "num_input_tokens_seen": 566461920, + "step": 3277 + }, + { + "epoch": 1.2550746840291076, + "loss": 1.9707210063934326, + "loss_ce": 0.29288074374198914, + "loss_iou": 0.7639079093933105, + "loss_num": 0.030029296875, + "loss_xval": 1.677840232849121, + "num_input_tokens_seen": 566461920, + "step": 3277 + }, + { + "epoch": 1.2554576790501724, + "grad_norm": 76.01274657392398, + "learning_rate": 5e-06, + "loss": 2.1564, + "num_input_tokens_seen": 566634792, + "step": 3278 + }, + { + "epoch": 1.2554576790501724, + "loss": 2.133183240890503, + "loss_ce": 0.20118530094623566, + "loss_iou": 0.8348344564437866, + "loss_num": 0.052490234375, + "loss_xval": 1.9319980144500732, + "num_input_tokens_seen": 566634792, + "step": 3278 + }, + { + "epoch": 1.2558406740712371, + "grad_norm": 237.51501335104388, + "learning_rate": 5e-06, + "loss": 2.0103, + "num_input_tokens_seen": 566808032, + "step": 3279 + }, + { + "epoch": 1.2558406740712371, + "loss": 1.9694063663482666, + "loss_ce": 0.22570320963859558, + "loss_iou": 0.6653085947036743, + "loss_num": 0.08251953125, + "loss_xval": 1.7437031269073486, + "num_input_tokens_seen": 566808032, + "step": 3279 + }, + { + "epoch": 1.2562236690923019, + "grad_norm": 154.0387832752544, + "learning_rate": 5e-06, + "loss": 1.588, + "num_input_tokens_seen": 566980776, + "step": 3280 + }, + { + "epoch": 1.2562236690923019, + "loss": 1.5974631309509277, + "loss_ce": 0.25681233406066895, + "loss_iou": 0.6179877519607544, + "loss_num": 0.02099609375, + "loss_xval": 1.3284437656402588, + "num_input_tokens_seen": 566980776, + "step": 3280 + }, + { + "epoch": 1.2566066641133666, + "grad_norm": 228.31667108048424, + "learning_rate": 5e-06, + "loss": 2.1985, + "num_input_tokens_seen": 567153464, + "step": 3281 + }, + { + "epoch": 1.2566066641133666, + "loss": 2.1468119621276855, + "loss_ce": 0.2339622676372528, + "loss_iou": 0.8826028108596802, + "loss_num": 0.029541015625, + "loss_xval": 1.9128496646881104, + "num_input_tokens_seen": 567153464, + "step": 3281 + }, + { + "epoch": 1.2569896591344312, + "grad_norm": 95.64891904610043, + "learning_rate": 5e-06, + "loss": 2.2372, + "num_input_tokens_seen": 567326832, + "step": 3282 + }, + { + "epoch": 1.2569896591344312, + "loss": 2.37243390083313, + "loss_ce": 0.25682705640792847, + "loss_iou": 0.9149811267852783, + "loss_num": 0.05712890625, + "loss_xval": 2.1156067848205566, + "num_input_tokens_seen": 567326832, + "step": 3282 + }, + { + "epoch": 1.257372654155496, + "grad_norm": 91.41657487146114, + "learning_rate": 5e-06, + "loss": 1.6062, + "num_input_tokens_seen": 567499528, + "step": 3283 + }, + { + "epoch": 1.257372654155496, + "loss": 1.4798704385757446, + "loss_ce": 0.25309568643569946, + "loss_iou": 0.54359370470047, + "loss_num": 0.0279541015625, + "loss_xval": 1.22677481174469, + "num_input_tokens_seen": 567499528, + "step": 3283 + }, + { + "epoch": 1.2577556491765607, + "grad_norm": 238.74094096290244, + "learning_rate": 5e-06, + "loss": 1.9775, + "num_input_tokens_seen": 567672440, + "step": 3284 + }, + { + "epoch": 1.2577556491765607, + "loss": 1.8096680641174316, + "loss_ce": 0.24874097108840942, + "loss_iou": 0.7278817892074585, + "loss_num": 0.02099609375, + "loss_xval": 1.560927152633667, + "num_input_tokens_seen": 567672440, + "step": 3284 + }, + { + "epoch": 1.2581386441976254, + "grad_norm": 159.01997976887932, + "learning_rate": 5e-06, + "loss": 1.7992, + "num_input_tokens_seen": 567845352, + "step": 3285 + }, + { + "epoch": 1.2581386441976254, + "loss": 1.9142980575561523, + "loss_ce": 0.26271456480026245, + "loss_iou": 0.7631086111068726, + "loss_num": 0.0250244140625, + "loss_xval": 1.6515834331512451, + "num_input_tokens_seen": 567845352, + "step": 3285 + }, + { + "epoch": 1.2585216392186902, + "grad_norm": 181.6217377870929, + "learning_rate": 5e-06, + "loss": 1.6094, + "num_input_tokens_seen": 568018296, + "step": 3286 + }, + { + "epoch": 1.2585216392186902, + "loss": 1.5701494216918945, + "loss_ce": 0.26261407136917114, + "loss_iou": 0.5949907898902893, + "loss_num": 0.0235595703125, + "loss_xval": 1.3075352907180786, + "num_input_tokens_seen": 568018296, + "step": 3286 + }, + { + "epoch": 1.258904634239755, + "grad_norm": 106.04391570828672, + "learning_rate": 5e-06, + "loss": 1.8732, + "num_input_tokens_seen": 568191448, + "step": 3287 + }, + { + "epoch": 1.258904634239755, + "loss": 1.8496975898742676, + "loss_ce": 0.2655804753303528, + "loss_iou": 0.7483574151992798, + "loss_num": 0.0174560546875, + "loss_xval": 1.5841171741485596, + "num_input_tokens_seen": 568191448, + "step": 3287 + }, + { + "epoch": 1.2592876292608195, + "grad_norm": 77.93709908838132, + "learning_rate": 5e-06, + "loss": 1.4416, + "num_input_tokens_seen": 568364240, + "step": 3288 + }, + { + "epoch": 1.2592876292608195, + "loss": 1.5831642150878906, + "loss_ce": 0.28663915395736694, + "loss_iou": 0.5807881355285645, + "loss_num": 0.0269775390625, + "loss_xval": 1.296525001525879, + "num_input_tokens_seen": 568364240, + "step": 3288 + }, + { + "epoch": 1.2596706242818843, + "grad_norm": 92.30320114278938, + "learning_rate": 5e-06, + "loss": 1.5375, + "num_input_tokens_seen": 568537336, + "step": 3289 + }, + { + "epoch": 1.2596706242818843, + "loss": 1.4855799674987793, + "loss_ce": 0.258095383644104, + "loss_iou": 0.5723146796226501, + "loss_num": 0.0166015625, + "loss_xval": 1.2274845838546753, + "num_input_tokens_seen": 568537336, + "step": 3289 + }, + { + "epoch": 1.260053619302949, + "grad_norm": 82.06844638151883, + "learning_rate": 5e-06, + "loss": 1.6525, + "num_input_tokens_seen": 568710128, + "step": 3290 + }, + { + "epoch": 1.260053619302949, + "loss": 1.7507693767547607, + "loss_ce": 0.2482771873474121, + "loss_iou": 0.6878000497817993, + "loss_num": 0.025390625, + "loss_xval": 1.5024921894073486, + "num_input_tokens_seen": 568710128, + "step": 3290 + }, + { + "epoch": 1.2604366143240138, + "grad_norm": 222.3930668374789, + "learning_rate": 5e-06, + "loss": 1.6031, + "num_input_tokens_seen": 568883000, + "step": 3291 + }, + { + "epoch": 1.2604366143240138, + "loss": 1.6394113302230835, + "loss_ce": 0.18138174712657928, + "loss_iou": 0.6749376654624939, + "loss_num": 0.0216064453125, + "loss_xval": 1.4580296277999878, + "num_input_tokens_seen": 568883000, + "step": 3291 + }, + { + "epoch": 1.2608196093450785, + "grad_norm": 114.01998218747941, + "learning_rate": 5e-06, + "loss": 1.867, + "num_input_tokens_seen": 569056168, + "step": 3292 + }, + { + "epoch": 1.2608196093450785, + "loss": 1.9098315238952637, + "loss_ce": 0.24824270606040955, + "loss_iou": 0.7440940141677856, + "loss_num": 0.03466796875, + "loss_xval": 1.6615889072418213, + "num_input_tokens_seen": 569056168, + "step": 3292 + }, + { + "epoch": 1.2612026043661433, + "grad_norm": 92.28780702393767, + "learning_rate": 5e-06, + "loss": 1.5238, + "num_input_tokens_seen": 569229304, + "step": 3293 + }, + { + "epoch": 1.2612026043661433, + "loss": 1.4576985836029053, + "loss_ce": 0.23735719919204712, + "loss_iou": 0.549181342124939, + "loss_num": 0.0244140625, + "loss_xval": 1.220341444015503, + "num_input_tokens_seen": 569229304, + "step": 3293 + }, + { + "epoch": 1.261585599387208, + "grad_norm": 158.6336337533615, + "learning_rate": 5e-06, + "loss": 1.5514, + "num_input_tokens_seen": 569402488, + "step": 3294 + }, + { + "epoch": 1.261585599387208, + "loss": 1.596184492111206, + "loss_ce": 0.21201471984386444, + "loss_iou": 0.6427226662635803, + "loss_num": 0.019775390625, + "loss_xval": 1.3841696977615356, + "num_input_tokens_seen": 569402488, + "step": 3294 + }, + { + "epoch": 1.2619685944082728, + "grad_norm": 157.9328062451583, + "learning_rate": 5e-06, + "loss": 1.9171, + "num_input_tokens_seen": 569575648, + "step": 3295 + }, + { + "epoch": 1.2619685944082728, + "loss": 1.813136100769043, + "loss_ce": 0.2165515124797821, + "loss_iou": 0.739820659160614, + "loss_num": 0.0233154296875, + "loss_xval": 1.596584677696228, + "num_input_tokens_seen": 569575648, + "step": 3295 + }, + { + "epoch": 1.2623515894293373, + "grad_norm": 71.1308307861241, + "learning_rate": 5e-06, + "loss": 2.1265, + "num_input_tokens_seen": 569748648, + "step": 3296 + }, + { + "epoch": 1.2623515894293373, + "loss": 2.096177339553833, + "loss_ce": 0.23352748155593872, + "loss_iou": 0.80034339427948, + "loss_num": 0.05224609375, + "loss_xval": 1.86264967918396, + "num_input_tokens_seen": 569748648, + "step": 3296 + }, + { + "epoch": 1.262734584450402, + "grad_norm": 307.3840565641425, + "learning_rate": 5e-06, + "loss": 1.9823, + "num_input_tokens_seen": 569921568, + "step": 3297 + }, + { + "epoch": 1.262734584450402, + "loss": 2.0598111152648926, + "loss_ce": 0.2661011815071106, + "loss_iou": 0.8003278374671936, + "loss_num": 0.03857421875, + "loss_xval": 1.7937098741531372, + "num_input_tokens_seen": 569921568, + "step": 3297 + }, + { + "epoch": 1.2631175794714669, + "grad_norm": 116.75184479233158, + "learning_rate": 5e-06, + "loss": 1.894, + "num_input_tokens_seen": 570094896, + "step": 3298 + }, + { + "epoch": 1.2631175794714669, + "loss": 1.9589283466339111, + "loss_ce": 0.22966188192367554, + "loss_iou": 0.7824494242668152, + "loss_num": 0.032958984375, + "loss_xval": 1.7292665243148804, + "num_input_tokens_seen": 570094896, + "step": 3298 + }, + { + "epoch": 1.2635005744925316, + "grad_norm": 202.01678384155286, + "learning_rate": 5e-06, + "loss": 1.8331, + "num_input_tokens_seen": 570267528, + "step": 3299 + }, + { + "epoch": 1.2635005744925316, + "loss": 1.9015049934387207, + "loss_ce": 0.2262372225522995, + "loss_iou": 0.7774227261543274, + "loss_num": 0.0240478515625, + "loss_xval": 1.6752678155899048, + "num_input_tokens_seen": 570267528, + "step": 3299 + }, + { + "epoch": 1.2638835695135964, + "grad_norm": 152.66790830149523, + "learning_rate": 5e-06, + "loss": 1.6035, + "num_input_tokens_seen": 570440568, + "step": 3300 + }, + { + "epoch": 1.2638835695135964, + "loss": 1.636314034461975, + "loss_ce": 0.24354144930839539, + "loss_iou": 0.6540737152099609, + "loss_num": 0.0169677734375, + "loss_xval": 1.3927726745605469, + "num_input_tokens_seen": 570440568, + "step": 3300 + }, + { + "epoch": 1.2642665645346611, + "grad_norm": 425.4893270942091, + "learning_rate": 5e-06, + "loss": 1.8108, + "num_input_tokens_seen": 570613776, + "step": 3301 + }, + { + "epoch": 1.2642665645346611, + "loss": 1.7876484394073486, + "loss_ce": 0.22738194465637207, + "loss_iou": 0.7047243118286133, + "loss_num": 0.0301513671875, + "loss_xval": 1.5602664947509766, + "num_input_tokens_seen": 570613776, + "step": 3301 + }, + { + "epoch": 1.2646495595557257, + "grad_norm": 79.07818183845008, + "learning_rate": 5e-06, + "loss": 2.1274, + "num_input_tokens_seen": 570787008, + "step": 3302 + }, + { + "epoch": 1.2646495595557257, + "loss": 2.1811628341674805, + "loss_ce": 0.20051544904708862, + "loss_iou": 0.8656898736953735, + "loss_num": 0.0498046875, + "loss_xval": 1.980647325515747, + "num_input_tokens_seen": 570787008, + "step": 3302 + }, + { + "epoch": 1.2650325545767904, + "grad_norm": 112.437851888758, + "learning_rate": 5e-06, + "loss": 1.5488, + "num_input_tokens_seen": 570956728, + "step": 3303 + }, + { + "epoch": 1.2650325545767904, + "loss": 1.6475396156311035, + "loss_ce": 0.2554776966571808, + "loss_iou": 0.6174481511116028, + "loss_num": 0.031494140625, + "loss_xval": 1.3920618295669556, + "num_input_tokens_seen": 570956728, + "step": 3303 + }, + { + "epoch": 1.2654155495978552, + "grad_norm": 169.52040758555097, + "learning_rate": 5e-06, + "loss": 2.4792, + "num_input_tokens_seen": 571130008, + "step": 3304 + }, + { + "epoch": 1.2654155495978552, + "loss": 2.4361348152160645, + "loss_ce": 0.28582724928855896, + "loss_iou": 0.9875988960266113, + "loss_num": 0.03515625, + "loss_xval": 2.1503076553344727, + "num_input_tokens_seen": 571130008, + "step": 3304 + }, + { + "epoch": 1.26579854461892, + "grad_norm": 83.45002335813479, + "learning_rate": 5e-06, + "loss": 2.1598, + "num_input_tokens_seen": 571302904, + "step": 3305 + }, + { + "epoch": 1.26579854461892, + "loss": 2.2095844745635986, + "loss_ce": 0.222993403673172, + "loss_iou": 0.8178194761276245, + "loss_num": 0.0703125, + "loss_xval": 1.9865909814834595, + "num_input_tokens_seen": 571302904, + "step": 3305 + }, + { + "epoch": 1.2661815396399847, + "grad_norm": 75.82920681965383, + "learning_rate": 5e-06, + "loss": 1.6185, + "num_input_tokens_seen": 571475680, + "step": 3306 + }, + { + "epoch": 1.2661815396399847, + "loss": 1.6119019985198975, + "loss_ce": 0.2990841865539551, + "loss_iou": 0.5760866403579712, + "loss_num": 0.031982421875, + "loss_xval": 1.3128178119659424, + "num_input_tokens_seen": 571475680, + "step": 3306 + }, + { + "epoch": 1.2665645346610495, + "grad_norm": 143.68126024410492, + "learning_rate": 5e-06, + "loss": 1.8082, + "num_input_tokens_seen": 571648600, + "step": 3307 + }, + { + "epoch": 1.2665645346610495, + "loss": 2.061361789703369, + "loss_ce": 0.3056067228317261, + "loss_iou": 0.8106932044029236, + "loss_num": 0.02685546875, + "loss_xval": 1.7557553052902222, + "num_input_tokens_seen": 571648600, + "step": 3307 + }, + { + "epoch": 1.2669475296821142, + "grad_norm": 179.8805817421556, + "learning_rate": 5e-06, + "loss": 1.7957, + "num_input_tokens_seen": 571821520, + "step": 3308 + }, + { + "epoch": 1.2669475296821142, + "loss": 1.7377934455871582, + "loss_ce": 0.19629667699337006, + "loss_iou": 0.709514856338501, + "loss_num": 0.0245361328125, + "loss_xval": 1.506584644317627, + "num_input_tokens_seen": 571821520, + "step": 3308 + }, + { + "epoch": 1.267330524703179, + "grad_norm": 84.08926843473174, + "learning_rate": 5e-06, + "loss": 1.6271, + "num_input_tokens_seen": 571994152, + "step": 3309 + }, + { + "epoch": 1.267330524703179, + "loss": 1.6579076051712036, + "loss_ce": 0.2033272236585617, + "loss_iou": 0.6503553986549377, + "loss_num": 0.03076171875, + "loss_xval": 1.4545804262161255, + "num_input_tokens_seen": 571994152, + "step": 3309 + }, + { + "epoch": 1.2677135197242435, + "grad_norm": 216.91039107143519, + "learning_rate": 5e-06, + "loss": 1.6531, + "num_input_tokens_seen": 572166848, + "step": 3310 + }, + { + "epoch": 1.2677135197242435, + "loss": 1.5964694023132324, + "loss_ce": 0.20418967306613922, + "loss_iou": 0.658084511756897, + "loss_num": 0.01519775390625, + "loss_xval": 1.392279863357544, + "num_input_tokens_seen": 572166848, + "step": 3310 + }, + { + "epoch": 1.2680965147453083, + "grad_norm": 109.1279957353923, + "learning_rate": 5e-06, + "loss": 2.0814, + "num_input_tokens_seen": 572340224, + "step": 3311 + }, + { + "epoch": 1.2680965147453083, + "loss": 1.9804760217666626, + "loss_ce": 0.26399368047714233, + "loss_iou": 0.762598991394043, + "loss_num": 0.038330078125, + "loss_xval": 1.716482162475586, + "num_input_tokens_seen": 572340224, + "step": 3311 + }, + { + "epoch": 1.268479509766373, + "grad_norm": 107.37991499709248, + "learning_rate": 5e-06, + "loss": 1.7606, + "num_input_tokens_seen": 572513376, + "step": 3312 + }, + { + "epoch": 1.268479509766373, + "loss": 1.807293176651001, + "loss_ce": 0.22050227224826813, + "loss_iou": 0.7271722555160522, + "loss_num": 0.0264892578125, + "loss_xval": 1.5867908000946045, + "num_input_tokens_seen": 572513376, + "step": 3312 + }, + { + "epoch": 1.2688625047874378, + "grad_norm": 91.16603981539338, + "learning_rate": 5e-06, + "loss": 1.9189, + "num_input_tokens_seen": 572686184, + "step": 3313 + }, + { + "epoch": 1.2688625047874378, + "loss": 2.00659441947937, + "loss_ce": 0.21515393257141113, + "loss_iou": 0.8299853801727295, + "loss_num": 0.0262451171875, + "loss_xval": 1.791440486907959, + "num_input_tokens_seen": 572686184, + "step": 3313 + }, + { + "epoch": 1.2692454998085025, + "grad_norm": 101.56326843579461, + "learning_rate": 5e-06, + "loss": 1.8273, + "num_input_tokens_seen": 572859016, + "step": 3314 + }, + { + "epoch": 1.2692454998085025, + "loss": 1.8420684337615967, + "loss_ce": 0.21937984228134155, + "loss_iou": 0.7437478303909302, + "loss_num": 0.027099609375, + "loss_xval": 1.6226885318756104, + "num_input_tokens_seen": 572859016, + "step": 3314 + }, + { + "epoch": 1.269628494829567, + "grad_norm": 102.3069855191529, + "learning_rate": 5e-06, + "loss": 1.3796, + "num_input_tokens_seen": 573031664, + "step": 3315 + }, + { + "epoch": 1.269628494829567, + "loss": 1.4531071186065674, + "loss_ce": 0.252878874540329, + "loss_iou": 0.5692150592803955, + "loss_num": 0.0123291015625, + "loss_xval": 1.200228214263916, + "num_input_tokens_seen": 573031664, + "step": 3315 + }, + { + "epoch": 1.2700114898506318, + "grad_norm": 126.41855887441481, + "learning_rate": 5e-06, + "loss": 1.44, + "num_input_tokens_seen": 573204672, + "step": 3316 + }, + { + "epoch": 1.2700114898506318, + "loss": 1.4367523193359375, + "loss_ce": 0.21155793964862823, + "loss_iou": 0.5507381558418274, + "loss_num": 0.0247802734375, + "loss_xval": 1.2251945734024048, + "num_input_tokens_seen": 573204672, + "step": 3316 + }, + { + "epoch": 1.2703944848716966, + "grad_norm": 133.43197200117166, + "learning_rate": 5e-06, + "loss": 1.6787, + "num_input_tokens_seen": 573377680, + "step": 3317 + }, + { + "epoch": 1.2703944848716966, + "loss": 1.887868046760559, + "loss_ce": 0.2615564167499542, + "loss_iou": 0.7499233484268188, + "loss_num": 0.0252685546875, + "loss_xval": 1.6263115406036377, + "num_input_tokens_seen": 573377680, + "step": 3317 + }, + { + "epoch": 1.2707774798927614, + "grad_norm": 112.2828628960354, + "learning_rate": 5e-06, + "loss": 2.0386, + "num_input_tokens_seen": 573550792, + "step": 3318 + }, + { + "epoch": 1.2707774798927614, + "loss": 2.0267248153686523, + "loss_ce": 0.15854592621326447, + "loss_iou": 0.8536450862884521, + "loss_num": 0.0322265625, + "loss_xval": 1.8681788444519043, + "num_input_tokens_seen": 573550792, + "step": 3318 + }, + { + "epoch": 1.271160474913826, + "grad_norm": 80.14634241609927, + "learning_rate": 5e-06, + "loss": 1.6105, + "num_input_tokens_seen": 573723624, + "step": 3319 + }, + { + "epoch": 1.271160474913826, + "loss": 1.638106346130371, + "loss_ce": 0.24392704665660858, + "loss_iou": 0.616645336151123, + "loss_num": 0.0322265625, + "loss_xval": 1.394179344177246, + "num_input_tokens_seen": 573723624, + "step": 3319 + }, + { + "epoch": 1.2715434699348909, + "grad_norm": 231.3540922648395, + "learning_rate": 5e-06, + "loss": 1.6655, + "num_input_tokens_seen": 573896544, + "step": 3320 + }, + { + "epoch": 1.2715434699348909, + "loss": 1.5085893869400024, + "loss_ce": 0.2365088164806366, + "loss_iou": 0.595192551612854, + "loss_num": 0.016357421875, + "loss_xval": 1.272080659866333, + "num_input_tokens_seen": 573896544, + "step": 3320 + }, + { + "epoch": 1.2719264649559556, + "grad_norm": 72.31407858217699, + "learning_rate": 5e-06, + "loss": 1.9471, + "num_input_tokens_seen": 574069736, + "step": 3321 + }, + { + "epoch": 1.2719264649559556, + "loss": 1.8264158964157104, + "loss_ce": 0.23415064811706543, + "loss_iou": 0.7034813165664673, + "loss_num": 0.037109375, + "loss_xval": 1.5922653675079346, + "num_input_tokens_seen": 574069736, + "step": 3321 + }, + { + "epoch": 1.2723094599770204, + "grad_norm": 100.37074009002036, + "learning_rate": 5e-06, + "loss": 1.6824, + "num_input_tokens_seen": 574242672, + "step": 3322 + }, + { + "epoch": 1.2723094599770204, + "loss": 1.4854986667633057, + "loss_ce": 0.24368014931678772, + "loss_iou": 0.5532212257385254, + "loss_num": 0.027099609375, + "loss_xval": 1.2418184280395508, + "num_input_tokens_seen": 574242672, + "step": 3322 + }, + { + "epoch": 1.2726924549980851, + "grad_norm": 122.5845039333213, + "learning_rate": 5e-06, + "loss": 2.0082, + "num_input_tokens_seen": 574415792, + "step": 3323 + }, + { + "epoch": 1.2726924549980851, + "loss": 2.0704798698425293, + "loss_ce": 0.2315565049648285, + "loss_iou": 0.858945369720459, + "loss_num": 0.024169921875, + "loss_xval": 1.838923454284668, + "num_input_tokens_seen": 574415792, + "step": 3323 + }, + { + "epoch": 1.2730754500191497, + "grad_norm": 90.9971326969193, + "learning_rate": 5e-06, + "loss": 1.9792, + "num_input_tokens_seen": 574588576, + "step": 3324 + }, + { + "epoch": 1.2730754500191497, + "loss": 2.1510653495788574, + "loss_ce": 0.21998129785060883, + "loss_iou": 0.8659937381744385, + "loss_num": 0.039794921875, + "loss_xval": 1.9310842752456665, + "num_input_tokens_seen": 574588576, + "step": 3324 + }, + { + "epoch": 1.2734584450402144, + "grad_norm": 161.08256716104233, + "learning_rate": 5e-06, + "loss": 1.4981, + "num_input_tokens_seen": 574761496, + "step": 3325 + }, + { + "epoch": 1.2734584450402144, + "loss": 1.5489988327026367, + "loss_ce": 0.26540493965148926, + "loss_iou": 0.5177735090255737, + "loss_num": 0.049560546875, + "loss_xval": 1.2835938930511475, + "num_input_tokens_seen": 574761496, + "step": 3325 + }, + { + "epoch": 1.2738414400612792, + "grad_norm": 81.783766369408, + "learning_rate": 5e-06, + "loss": 1.9854, + "num_input_tokens_seen": 574934272, + "step": 3326 + }, + { + "epoch": 1.2738414400612792, + "loss": 1.9096903800964355, + "loss_ce": 0.1965281367301941, + "loss_iou": 0.7584366202354431, + "loss_num": 0.039306640625, + "loss_xval": 1.7131623029708862, + "num_input_tokens_seen": 574934272, + "step": 3326 + }, + { + "epoch": 1.274224435082344, + "grad_norm": 161.39022465012826, + "learning_rate": 5e-06, + "loss": 1.5728, + "num_input_tokens_seen": 575107376, + "step": 3327 + }, + { + "epoch": 1.274224435082344, + "loss": 1.6352365016937256, + "loss_ce": 0.22083421051502228, + "loss_iou": 0.6329213380813599, + "loss_num": 0.0296630859375, + "loss_xval": 1.4144022464752197, + "num_input_tokens_seen": 575107376, + "step": 3327 + }, + { + "epoch": 1.2746074301034087, + "grad_norm": 66.53598461945988, + "learning_rate": 5e-06, + "loss": 1.9659, + "num_input_tokens_seen": 575280264, + "step": 3328 + }, + { + "epoch": 1.2746074301034087, + "loss": 2.036944627761841, + "loss_ce": 0.22707851231098175, + "loss_iou": 0.7948256731033325, + "loss_num": 0.0439453125, + "loss_xval": 1.809866189956665, + "num_input_tokens_seen": 575280264, + "step": 3328 + }, + { + "epoch": 1.2749904251244732, + "grad_norm": 77.72813265415147, + "learning_rate": 5e-06, + "loss": 1.4719, + "num_input_tokens_seen": 575453096, + "step": 3329 + }, + { + "epoch": 1.2749904251244732, + "loss": 1.4825770854949951, + "loss_ce": 0.22733338177204132, + "loss_iou": 0.5647861957550049, + "loss_num": 0.025146484375, + "loss_xval": 1.2451729774475098, + "num_input_tokens_seen": 575453096, + "step": 3329 + }, + { + "epoch": 1.275373420145538, + "grad_norm": 211.22215426560996, + "learning_rate": 5e-06, + "loss": 2.1258, + "num_input_tokens_seen": 575626136, + "step": 3330 + }, + { + "epoch": 1.275373420145538, + "loss": 2.1490392684936523, + "loss_ce": 0.27700331807136536, + "loss_iou": 0.8870066404342651, + "loss_num": 0.0196533203125, + "loss_xval": 1.8720357418060303, + "num_input_tokens_seen": 575626136, + "step": 3330 + }, + { + "epoch": 1.2757564151666028, + "grad_norm": 109.66363871140655, + "learning_rate": 5e-06, + "loss": 2.2011, + "num_input_tokens_seen": 575799360, + "step": 3331 + }, + { + "epoch": 1.2757564151666028, + "loss": 2.222154378890991, + "loss_ce": 0.2608831822872162, + "loss_iou": 0.8851766586303711, + "loss_num": 0.038330078125, + "loss_xval": 1.9612712860107422, + "num_input_tokens_seen": 575799360, + "step": 3331 + }, + { + "epoch": 1.2761394101876675, + "grad_norm": 115.06326247553235, + "learning_rate": 5e-06, + "loss": 1.7082, + "num_input_tokens_seen": 575971848, + "step": 3332 + }, + { + "epoch": 1.2761394101876675, + "loss": 1.5471818447113037, + "loss_ce": 0.21208080649375916, + "loss_iou": 0.5933317542076111, + "loss_num": 0.0296630859375, + "loss_xval": 1.3138607740402222, + "num_input_tokens_seen": 575971848, + "step": 3332 + }, + { + "epoch": 1.2765224052087323, + "grad_norm": 125.10481966929216, + "learning_rate": 5e-06, + "loss": 1.718, + "num_input_tokens_seen": 576144984, + "step": 3333 + }, + { + "epoch": 1.2765224052087323, + "loss": 1.7597615718841553, + "loss_ce": 0.22273939847946167, + "loss_iou": 0.7130453586578369, + "loss_num": 0.022216796875, + "loss_xval": 1.5370221138000488, + "num_input_tokens_seen": 576144984, + "step": 3333 + }, + { + "epoch": 1.276905400229797, + "grad_norm": 98.92262775558656, + "learning_rate": 5e-06, + "loss": 1.7493, + "num_input_tokens_seen": 576318296, + "step": 3334 + }, + { + "epoch": 1.276905400229797, + "loss": 1.7959173917770386, + "loss_ce": 0.25960248708724976, + "loss_iou": 0.694945752620697, + "loss_num": 0.029296875, + "loss_xval": 1.536314845085144, + "num_input_tokens_seen": 576318296, + "step": 3334 + }, + { + "epoch": 1.2772883952508618, + "grad_norm": 98.34931008554716, + "learning_rate": 5e-06, + "loss": 1.5485, + "num_input_tokens_seen": 576491080, + "step": 3335 + }, + { + "epoch": 1.2772883952508618, + "loss": 1.4789702892303467, + "loss_ce": 0.2304389774799347, + "loss_iou": 0.5721111297607422, + "loss_num": 0.0208740234375, + "loss_xval": 1.2485313415527344, + "num_input_tokens_seen": 576491080, + "step": 3335 + }, + { + "epoch": 1.2776713902719266, + "grad_norm": 105.37604160624177, + "learning_rate": 5e-06, + "loss": 1.9593, + "num_input_tokens_seen": 576663952, + "step": 3336 + }, + { + "epoch": 1.2776713902719266, + "loss": 2.047152280807495, + "loss_ce": 0.26648885011672974, + "loss_iou": 0.8120236396789551, + "loss_num": 0.03125, + "loss_xval": 1.7806634902954102, + "num_input_tokens_seen": 576663952, + "step": 3336 + }, + { + "epoch": 1.2780543852929913, + "grad_norm": 98.1583425890488, + "learning_rate": 5e-06, + "loss": 1.3958, + "num_input_tokens_seen": 576837200, + "step": 3337 + }, + { + "epoch": 1.2780543852929913, + "loss": 1.3965688943862915, + "loss_ce": 0.24463441967964172, + "loss_iou": 0.5312895178794861, + "loss_num": 0.017822265625, + "loss_xval": 1.1519345045089722, + "num_input_tokens_seen": 576837200, + "step": 3337 + }, + { + "epoch": 1.2784373803140558, + "grad_norm": 178.8829176594653, + "learning_rate": 5e-06, + "loss": 1.7012, + "num_input_tokens_seen": 577010496, + "step": 3338 + }, + { + "epoch": 1.2784373803140558, + "loss": 1.8009527921676636, + "loss_ce": 0.2655678987503052, + "loss_iou": 0.7156294584274292, + "loss_num": 0.0208740234375, + "loss_xval": 1.5353848934173584, + "num_input_tokens_seen": 577010496, + "step": 3338 + }, + { + "epoch": 1.2788203753351206, + "grad_norm": 99.67392432700711, + "learning_rate": 5e-06, + "loss": 1.878, + "num_input_tokens_seen": 577183768, + "step": 3339 + }, + { + "epoch": 1.2788203753351206, + "loss": 1.8287030458450317, + "loss_ce": 0.244386687874794, + "loss_iou": 0.6971874237060547, + "loss_num": 0.0380859375, + "loss_xval": 1.5843162536621094, + "num_input_tokens_seen": 577183768, + "step": 3339 + }, + { + "epoch": 1.2792033703561854, + "grad_norm": 107.38374897880072, + "learning_rate": 5e-06, + "loss": 1.6041, + "num_input_tokens_seen": 577356544, + "step": 3340 + }, + { + "epoch": 1.2792033703561854, + "loss": 1.550626277923584, + "loss_ce": 0.26853227615356445, + "loss_iou": 0.5984139442443848, + "loss_num": 0.01708984375, + "loss_xval": 1.2820940017700195, + "num_input_tokens_seen": 577356544, + "step": 3340 + }, + { + "epoch": 1.2795863653772501, + "grad_norm": 123.60648613766934, + "learning_rate": 5e-06, + "loss": 1.9738, + "num_input_tokens_seen": 577529592, + "step": 3341 + }, + { + "epoch": 1.2795863653772501, + "loss": 2.078113555908203, + "loss_ce": 0.2186749279499054, + "loss_iou": 0.8510449528694153, + "loss_num": 0.031494140625, + "loss_xval": 1.859438419342041, + "num_input_tokens_seen": 577529592, + "step": 3341 + }, + { + "epoch": 1.2799693603983149, + "grad_norm": 93.47068258699876, + "learning_rate": 5e-06, + "loss": 1.9349, + "num_input_tokens_seen": 577702624, + "step": 3342 + }, + { + "epoch": 1.2799693603983149, + "loss": 1.8729562759399414, + "loss_ce": 0.2133171111345291, + "loss_iou": 0.7257546782493591, + "loss_num": 0.041748046875, + "loss_xval": 1.6596392393112183, + "num_input_tokens_seen": 577702624, + "step": 3342 + }, + { + "epoch": 1.2803523554193794, + "grad_norm": 94.29295943043381, + "learning_rate": 5e-06, + "loss": 1.3165, + "num_input_tokens_seen": 577875264, + "step": 3343 + }, + { + "epoch": 1.2803523554193794, + "loss": 1.3596270084381104, + "loss_ce": 0.26244881749153137, + "loss_iou": 0.5014546513557434, + "loss_num": 0.0189208984375, + "loss_xval": 1.0971781015396118, + "num_input_tokens_seen": 577875264, + "step": 3343 + }, + { + "epoch": 1.2807353504404442, + "grad_norm": 124.71385888265097, + "learning_rate": 5e-06, + "loss": 1.6864, + "num_input_tokens_seen": 578047864, + "step": 3344 + }, + { + "epoch": 1.2807353504404442, + "loss": 1.8556559085845947, + "loss_ce": 0.21935562789440155, + "loss_iou": 0.7627912759780884, + "loss_num": 0.0220947265625, + "loss_xval": 1.6363003253936768, + "num_input_tokens_seen": 578047864, + "step": 3344 + }, + { + "epoch": 1.281118345461509, + "grad_norm": 151.5891353046151, + "learning_rate": 5e-06, + "loss": 1.8347, + "num_input_tokens_seen": 578220520, + "step": 3345 + }, + { + "epoch": 1.281118345461509, + "loss": 1.7800143957138062, + "loss_ce": 0.19940626621246338, + "loss_iou": 0.7452296018600464, + "loss_num": 0.01806640625, + "loss_xval": 1.5806081295013428, + "num_input_tokens_seen": 578220520, + "step": 3345 + }, + { + "epoch": 1.2815013404825737, + "grad_norm": 105.68129864653623, + "learning_rate": 5e-06, + "loss": 1.9297, + "num_input_tokens_seen": 578393304, + "step": 3346 + }, + { + "epoch": 1.2815013404825737, + "loss": 1.8328579664230347, + "loss_ce": 0.23113152384757996, + "loss_iou": 0.7155971527099609, + "loss_num": 0.0341796875, + "loss_xval": 1.6017265319824219, + "num_input_tokens_seen": 578393304, + "step": 3346 + }, + { + "epoch": 1.2818843355036385, + "grad_norm": 106.32825988455923, + "learning_rate": 5e-06, + "loss": 1.4914, + "num_input_tokens_seen": 578566488, + "step": 3347 + }, + { + "epoch": 1.2818843355036385, + "loss": 1.635674238204956, + "loss_ce": 0.21635141968727112, + "loss_iou": 0.6693781614303589, + "loss_num": 0.01611328125, + "loss_xval": 1.4193227291107178, + "num_input_tokens_seen": 578566488, + "step": 3347 + }, + { + "epoch": 1.2822673305247032, + "grad_norm": 119.87350863033323, + "learning_rate": 5e-06, + "loss": 1.5509, + "num_input_tokens_seen": 578739608, + "step": 3348 + }, + { + "epoch": 1.2822673305247032, + "loss": 1.4294211864471436, + "loss_ce": 0.180038183927536, + "loss_iou": 0.5733305215835571, + "loss_num": 0.0205078125, + "loss_xval": 1.2493832111358643, + "num_input_tokens_seen": 578739608, + "step": 3348 + }, + { + "epoch": 1.282650325545768, + "grad_norm": 102.46857820913375, + "learning_rate": 5e-06, + "loss": 1.5167, + "num_input_tokens_seen": 578912640, + "step": 3349 + }, + { + "epoch": 1.282650325545768, + "loss": 1.6376855373382568, + "loss_ce": 0.18948471546173096, + "loss_iou": 0.6668798923492432, + "loss_num": 0.02294921875, + "loss_xval": 1.4482008218765259, + "num_input_tokens_seen": 578912640, + "step": 3349 + }, + { + "epoch": 1.2830333205668327, + "grad_norm": 84.18815580132912, + "learning_rate": 5e-06, + "loss": 1.4077, + "num_input_tokens_seen": 579085336, + "step": 3350 + }, + { + "epoch": 1.2830333205668327, + "loss": 1.3755912780761719, + "loss_ce": 0.19753962755203247, + "loss_iou": 0.5309813618659973, + "loss_num": 0.023193359375, + "loss_xval": 1.1780515909194946, + "num_input_tokens_seen": 579085336, + "step": 3350 + }, + { + "epoch": 1.2834163155878975, + "grad_norm": 118.6917879357888, + "learning_rate": 5e-06, + "loss": 1.5693, + "num_input_tokens_seen": 579258376, + "step": 3351 + }, + { + "epoch": 1.2834163155878975, + "loss": 1.615788221359253, + "loss_ce": 0.24710959196090698, + "loss_iou": 0.6457650661468506, + "loss_num": 0.015380859375, + "loss_xval": 1.3686785697937012, + "num_input_tokens_seen": 579258376, + "step": 3351 + }, + { + "epoch": 1.283799310608962, + "grad_norm": 175.36351041767077, + "learning_rate": 5e-06, + "loss": 1.8253, + "num_input_tokens_seen": 579431512, + "step": 3352 + }, + { + "epoch": 1.283799310608962, + "loss": 1.7172226905822754, + "loss_ce": 0.24279974400997162, + "loss_iou": 0.6783125400543213, + "loss_num": 0.0235595703125, + "loss_xval": 1.4744229316711426, + "num_input_tokens_seen": 579431512, + "step": 3352 + }, + { + "epoch": 1.2841823056300268, + "grad_norm": 138.4932788465756, + "learning_rate": 5e-06, + "loss": 2.0092, + "num_input_tokens_seen": 579604624, + "step": 3353 + }, + { + "epoch": 1.2841823056300268, + "loss": 1.8588067293167114, + "loss_ce": 0.2468595653772354, + "loss_iou": 0.7251935005187988, + "loss_num": 0.0322265625, + "loss_xval": 1.6119470596313477, + "num_input_tokens_seen": 579604624, + "step": 3353 + }, + { + "epoch": 1.2845653006510915, + "grad_norm": 81.77329426208514, + "learning_rate": 5e-06, + "loss": 1.5477, + "num_input_tokens_seen": 579777960, + "step": 3354 + }, + { + "epoch": 1.2845653006510915, + "loss": 1.4595190286636353, + "loss_ce": 0.24423342943191528, + "loss_iou": 0.5725475549697876, + "loss_num": 0.0140380859375, + "loss_xval": 1.2152855396270752, + "num_input_tokens_seen": 579777960, + "step": 3354 + }, + { + "epoch": 1.2849482956721563, + "grad_norm": 152.47406169663037, + "learning_rate": 5e-06, + "loss": 1.5219, + "num_input_tokens_seen": 579950984, + "step": 3355 + }, + { + "epoch": 1.2849482956721563, + "loss": 1.4859492778778076, + "loss_ce": 0.27817896008491516, + "loss_iou": 0.5744203925132751, + "loss_num": 0.01177978515625, + "loss_xval": 1.2077702283859253, + "num_input_tokens_seen": 579950984, + "step": 3355 + }, + { + "epoch": 1.285331290693221, + "grad_norm": 100.04572783802985, + "learning_rate": 5e-06, + "loss": 1.8967, + "num_input_tokens_seen": 580123816, + "step": 3356 + }, + { + "epoch": 1.285331290693221, + "loss": 1.830329179763794, + "loss_ce": 0.24495810270309448, + "loss_iou": 0.7337560653686523, + "loss_num": 0.0235595703125, + "loss_xval": 1.5853710174560547, + "num_input_tokens_seen": 580123816, + "step": 3356 + }, + { + "epoch": 1.2857142857142856, + "grad_norm": 118.56857953203593, + "learning_rate": 5e-06, + "loss": 1.4661, + "num_input_tokens_seen": 580296592, + "step": 3357 + }, + { + "epoch": 1.2857142857142856, + "loss": 1.46898353099823, + "loss_ce": 0.2476564347743988, + "loss_iou": 0.5652838945388794, + "loss_num": 0.0181884765625, + "loss_xval": 1.2213270664215088, + "num_input_tokens_seen": 580296592, + "step": 3357 + }, + { + "epoch": 1.2860972807353503, + "grad_norm": 385.29104544462143, + "learning_rate": 5e-06, + "loss": 1.7356, + "num_input_tokens_seen": 580469512, + "step": 3358 + }, + { + "epoch": 1.2860972807353503, + "loss": 1.827427625656128, + "loss_ce": 0.22973480820655823, + "loss_iou": 0.7317382097244263, + "loss_num": 0.02685546875, + "loss_xval": 1.5976927280426025, + "num_input_tokens_seen": 580469512, + "step": 3358 + }, + { + "epoch": 1.286480275756415, + "grad_norm": 117.23976088897443, + "learning_rate": 5e-06, + "loss": 2.1095, + "num_input_tokens_seen": 580642488, + "step": 3359 + }, + { + "epoch": 1.286480275756415, + "loss": 2.1356539726257324, + "loss_ce": 0.2708384692668915, + "loss_iou": 0.845737874507904, + "loss_num": 0.03466796875, + "loss_xval": 1.864815592765808, + "num_input_tokens_seen": 580642488, + "step": 3359 + }, + { + "epoch": 1.2868632707774799, + "grad_norm": 195.45524028811684, + "learning_rate": 5e-06, + "loss": 1.5106, + "num_input_tokens_seen": 580815576, + "step": 3360 + }, + { + "epoch": 1.2868632707774799, + "loss": 1.553452968597412, + "loss_ce": 0.23564180731773376, + "loss_iou": 0.6180273294448853, + "loss_num": 0.016357421875, + "loss_xval": 1.3178112506866455, + "num_input_tokens_seen": 580815576, + "step": 3360 + }, + { + "epoch": 1.2872462657985446, + "grad_norm": 183.7944958876849, + "learning_rate": 5e-06, + "loss": 1.8716, + "num_input_tokens_seen": 580988736, + "step": 3361 + }, + { + "epoch": 1.2872462657985446, + "loss": 1.8165233135223389, + "loss_ce": 0.23417851328849792, + "loss_iou": 0.7233012914657593, + "loss_num": 0.027099609375, + "loss_xval": 1.5823447704315186, + "num_input_tokens_seen": 580988736, + "step": 3361 + }, + { + "epoch": 1.2876292608196094, + "grad_norm": 320.58422365920035, + "learning_rate": 5e-06, + "loss": 1.9082, + "num_input_tokens_seen": 581161720, + "step": 3362 + }, + { + "epoch": 1.2876292608196094, + "loss": 2.008775234222412, + "loss_ce": 0.24376258254051208, + "loss_iou": 0.8073720335960388, + "loss_num": 0.030029296875, + "loss_xval": 1.7650126218795776, + "num_input_tokens_seen": 581161720, + "step": 3362 + }, + { + "epoch": 1.2880122558406741, + "grad_norm": 84.63246045541894, + "learning_rate": 5e-06, + "loss": 1.683, + "num_input_tokens_seen": 581334952, + "step": 3363 + }, + { + "epoch": 1.2880122558406741, + "loss": 1.6573615074157715, + "loss_ce": 0.25077182054519653, + "loss_iou": 0.6485462784767151, + "loss_num": 0.0218505859375, + "loss_xval": 1.4065896272659302, + "num_input_tokens_seen": 581334952, + "step": 3363 + }, + { + "epoch": 1.288395250861739, + "grad_norm": 138.0620839987321, + "learning_rate": 5e-06, + "loss": 1.9789, + "num_input_tokens_seen": 581507880, + "step": 3364 + }, + { + "epoch": 1.288395250861739, + "loss": 1.7434158325195312, + "loss_ce": 0.21551012992858887, + "loss_iou": 0.6336733102798462, + "loss_num": 0.05224609375, + "loss_xval": 1.5279057025909424, + "num_input_tokens_seen": 581507880, + "step": 3364 + }, + { + "epoch": 1.2887782458828037, + "grad_norm": 88.13174218950272, + "learning_rate": 5e-06, + "loss": 2.0411, + "num_input_tokens_seen": 581680552, + "step": 3365 + }, + { + "epoch": 1.2887782458828037, + "loss": 2.142244577407837, + "loss_ce": 0.246106818318367, + "loss_iou": 0.8491919636726379, + "loss_num": 0.03955078125, + "loss_xval": 1.8961378335952759, + "num_input_tokens_seen": 581680552, + "step": 3365 + }, + { + "epoch": 1.2891612409038682, + "grad_norm": 106.17229374306666, + "learning_rate": 5e-06, + "loss": 1.5407, + "num_input_tokens_seen": 581853712, + "step": 3366 + }, + { + "epoch": 1.2891612409038682, + "loss": 1.7057932615280151, + "loss_ce": 0.174179345369339, + "loss_iou": 0.6264026165008545, + "loss_num": 0.0556640625, + "loss_xval": 1.531613826751709, + "num_input_tokens_seen": 581853712, + "step": 3366 + }, + { + "epoch": 1.289544235924933, + "grad_norm": 112.54860255656129, + "learning_rate": 5e-06, + "loss": 1.8566, + "num_input_tokens_seen": 582026704, + "step": 3367 + }, + { + "epoch": 1.289544235924933, + "loss": 1.845980167388916, + "loss_ce": 0.24146392941474915, + "loss_iou": 0.6961179971694946, + "loss_num": 0.04248046875, + "loss_xval": 1.6045162677764893, + "num_input_tokens_seen": 582026704, + "step": 3367 + }, + { + "epoch": 1.2899272309459977, + "grad_norm": 129.4485495674371, + "learning_rate": 5e-06, + "loss": 1.4022, + "num_input_tokens_seen": 582199240, + "step": 3368 + }, + { + "epoch": 1.2899272309459977, + "loss": 1.3163137435913086, + "loss_ce": 0.18781515955924988, + "loss_iou": 0.5210669040679932, + "loss_num": 0.0172119140625, + "loss_xval": 1.1202588081359863, + "num_input_tokens_seen": 582199240, + "step": 3368 + }, + { + "epoch": 1.2903102259670625, + "grad_norm": 154.53419435302868, + "learning_rate": 5e-06, + "loss": 2.0915, + "num_input_tokens_seen": 582372680, + "step": 3369 + }, + { + "epoch": 1.2903102259670625, + "loss": 2.0044426918029785, + "loss_ce": 0.2393362671136856, + "loss_iou": 0.7790987491607666, + "loss_num": 0.04150390625, + "loss_xval": 1.7651066780090332, + "num_input_tokens_seen": 582372680, + "step": 3369 + }, + { + "epoch": 1.2906932209881272, + "grad_norm": 204.9802556331962, + "learning_rate": 5e-06, + "loss": 1.5666, + "num_input_tokens_seen": 582545696, + "step": 3370 + }, + { + "epoch": 1.2906932209881272, + "loss": 1.6414077281951904, + "loss_ce": 0.26562219858169556, + "loss_iou": 0.6418416500091553, + "loss_num": 0.0184326171875, + "loss_xval": 1.3757853507995605, + "num_input_tokens_seen": 582545696, + "step": 3370 + }, + { + "epoch": 1.2910762160091918, + "grad_norm": 174.45829575674773, + "learning_rate": 5e-06, + "loss": 2.6819, + "num_input_tokens_seen": 582718384, + "step": 3371 + }, + { + "epoch": 1.2910762160091918, + "loss": 2.629523277282715, + "loss_ce": 0.21946796774864197, + "loss_iou": 0.9732161164283752, + "loss_num": 0.0927734375, + "loss_xval": 2.41005539894104, + "num_input_tokens_seen": 582718384, + "step": 3371 + }, + { + "epoch": 1.2914592110302565, + "grad_norm": 161.16437623061037, + "learning_rate": 5e-06, + "loss": 1.7993, + "num_input_tokens_seen": 582891496, + "step": 3372 + }, + { + "epoch": 1.2914592110302565, + "loss": 1.6719965934753418, + "loss_ce": 0.20556679368019104, + "loss_iou": 0.6563715934753418, + "loss_num": 0.03076171875, + "loss_xval": 1.4664297103881836, + "num_input_tokens_seen": 582891496, + "step": 3372 + }, + { + "epoch": 1.2918422060513213, + "grad_norm": 183.23312031668578, + "learning_rate": 5e-06, + "loss": 1.7636, + "num_input_tokens_seen": 583064400, + "step": 3373 + }, + { + "epoch": 1.2918422060513213, + "loss": 1.8405812978744507, + "loss_ce": 0.3046218454837799, + "loss_iou": 0.7224475145339966, + "loss_num": 0.0181884765625, + "loss_xval": 1.5359594821929932, + "num_input_tokens_seen": 583064400, + "step": 3373 + }, + { + "epoch": 1.292225201072386, + "grad_norm": 281.53897382838215, + "learning_rate": 5e-06, + "loss": 1.7992, + "num_input_tokens_seen": 583237280, + "step": 3374 + }, + { + "epoch": 1.292225201072386, + "loss": 1.7347018718719482, + "loss_ce": 0.23590445518493652, + "loss_iou": 0.6990141868591309, + "loss_num": 0.0201416015625, + "loss_xval": 1.4987974166870117, + "num_input_tokens_seen": 583237280, + "step": 3374 + }, + { + "epoch": 1.2926081960934508, + "grad_norm": 302.85637195824273, + "learning_rate": 5e-06, + "loss": 2.3102, + "num_input_tokens_seen": 583410136, + "step": 3375 + }, + { + "epoch": 1.2926081960934508, + "loss": 2.5609192848205566, + "loss_ce": 0.26040640473365784, + "loss_iou": 1.0801881551742554, + "loss_num": 0.028076171875, + "loss_xval": 2.3005130290985107, + "num_input_tokens_seen": 583410136, + "step": 3375 + }, + { + "epoch": 1.2929911911145155, + "grad_norm": 130.0003850790514, + "learning_rate": 5e-06, + "loss": 2.2914, + "num_input_tokens_seen": 583583152, + "step": 3376 + }, + { + "epoch": 1.2929911911145155, + "loss": 2.368964195251465, + "loss_ce": 0.2561070919036865, + "loss_iou": 0.8890701532363892, + "loss_num": 0.06689453125, + "loss_xval": 2.1128571033477783, + "num_input_tokens_seen": 583583152, + "step": 3376 + }, + { + "epoch": 1.2933741861355803, + "grad_norm": 73.73597806623299, + "learning_rate": 5e-06, + "loss": 1.6875, + "num_input_tokens_seen": 583756336, + "step": 3377 + }, + { + "epoch": 1.2933741861355803, + "loss": 1.8240755796432495, + "loss_ce": 0.21413224935531616, + "loss_iou": 0.7477816939353943, + "loss_num": 0.02294921875, + "loss_xval": 1.6099432706832886, + "num_input_tokens_seen": 583756336, + "step": 3377 + }, + { + "epoch": 1.293757181156645, + "grad_norm": 156.63117108922665, + "learning_rate": 5e-06, + "loss": 1.6359, + "num_input_tokens_seen": 583928928, + "step": 3378 + }, + { + "epoch": 1.293757181156645, + "loss": 1.6808727979660034, + "loss_ce": 0.27408987283706665, + "loss_iou": 0.650596022605896, + "loss_num": 0.0211181640625, + "loss_xval": 1.406782865524292, + "num_input_tokens_seen": 583928928, + "step": 3378 + }, + { + "epoch": 1.2941401761777098, + "grad_norm": 157.78362428064494, + "learning_rate": 5e-06, + "loss": 1.6835, + "num_input_tokens_seen": 584101776, + "step": 3379 + }, + { + "epoch": 1.2941401761777098, + "loss": 1.5322535037994385, + "loss_ce": 0.24172645807266235, + "loss_iou": 0.6080930829048157, + "loss_num": 0.014892578125, + "loss_xval": 1.2905269861221313, + "num_input_tokens_seen": 584101776, + "step": 3379 + }, + { + "epoch": 1.2945231711987744, + "grad_norm": 340.747508174026, + "learning_rate": 5e-06, + "loss": 1.8243, + "num_input_tokens_seen": 584274624, + "step": 3380 + }, + { + "epoch": 1.2945231711987744, + "loss": 1.675611138343811, + "loss_ce": 0.22095349431037903, + "loss_iou": 0.6779208779335022, + "loss_num": 0.019775390625, + "loss_xval": 1.4546576738357544, + "num_input_tokens_seen": 584274624, + "step": 3380 + }, + { + "epoch": 1.2949061662198391, + "grad_norm": 129.25041796196015, + "learning_rate": 5e-06, + "loss": 1.75, + "num_input_tokens_seen": 584447240, + "step": 3381 + }, + { + "epoch": 1.2949061662198391, + "loss": 1.8822615146636963, + "loss_ce": 0.2786910831928253, + "loss_iou": 0.7368133068084717, + "loss_num": 0.0260009765625, + "loss_xval": 1.6035704612731934, + "num_input_tokens_seen": 584447240, + "step": 3381 + }, + { + "epoch": 1.2952891612409039, + "grad_norm": 118.61187800027115, + "learning_rate": 5e-06, + "loss": 1.3366, + "num_input_tokens_seen": 584620000, + "step": 3382 + }, + { + "epoch": 1.2952891612409039, + "loss": 1.2266054153442383, + "loss_ce": 0.1901278793811798, + "loss_iou": 0.48373866081237793, + "loss_num": 0.0137939453125, + "loss_xval": 1.0364775657653809, + "num_input_tokens_seen": 584620000, + "step": 3382 + }, + { + "epoch": 1.2956721562619686, + "grad_norm": 149.1785933503822, + "learning_rate": 5e-06, + "loss": 1.7609, + "num_input_tokens_seen": 584792928, + "step": 3383 + }, + { + "epoch": 1.2956721562619686, + "loss": 1.6310421228408813, + "loss_ce": 0.22532498836517334, + "loss_iou": 0.6436392068862915, + "loss_num": 0.023681640625, + "loss_xval": 1.405717134475708, + "num_input_tokens_seen": 584792928, + "step": 3383 + }, + { + "epoch": 1.2960551512830334, + "grad_norm": 103.98641929599162, + "learning_rate": 5e-06, + "loss": 1.3192, + "num_input_tokens_seen": 584965776, + "step": 3384 + }, + { + "epoch": 1.2960551512830334, + "loss": 1.3010835647583008, + "loss_ce": 0.20399756729602814, + "loss_iou": 0.5086259841918945, + "loss_num": 0.0159912109375, + "loss_xval": 1.0970860719680786, + "num_input_tokens_seen": 584965776, + "step": 3384 + }, + { + "epoch": 1.296438146304098, + "grad_norm": 152.74040832798303, + "learning_rate": 5e-06, + "loss": 1.4629, + "num_input_tokens_seen": 585138608, + "step": 3385 + }, + { + "epoch": 1.296438146304098, + "loss": 1.4023289680480957, + "loss_ce": 0.2729048728942871, + "loss_iou": 0.5235438346862793, + "loss_num": 0.0164794921875, + "loss_xval": 1.1294240951538086, + "num_input_tokens_seen": 585138608, + "step": 3385 + }, + { + "epoch": 1.2968211413251627, + "grad_norm": 189.98002443436982, + "learning_rate": 5e-06, + "loss": 1.6322, + "num_input_tokens_seen": 585311384, + "step": 3386 + }, + { + "epoch": 1.2968211413251627, + "loss": 1.6417934894561768, + "loss_ce": 0.2782427966594696, + "loss_iou": 0.6331761479377747, + "loss_num": 0.0194091796875, + "loss_xval": 1.3635507822036743, + "num_input_tokens_seen": 585311384, + "step": 3386 + }, + { + "epoch": 1.2972041363462274, + "grad_norm": 125.16948817146444, + "learning_rate": 5e-06, + "loss": 1.5475, + "num_input_tokens_seen": 585484344, + "step": 3387 + }, + { + "epoch": 1.2972041363462274, + "loss": 1.6016695499420166, + "loss_ce": 0.18500235676765442, + "loss_iou": 0.6527305245399475, + "loss_num": 0.022216796875, + "loss_xval": 1.416667103767395, + "num_input_tokens_seen": 585484344, + "step": 3387 + }, + { + "epoch": 1.2975871313672922, + "grad_norm": 183.59067421613585, + "learning_rate": 5e-06, + "loss": 1.5959, + "num_input_tokens_seen": 585657600, + "step": 3388 + }, + { + "epoch": 1.2975871313672922, + "loss": 1.695542573928833, + "loss_ce": 0.19205079972743988, + "loss_iou": 0.7017734050750732, + "loss_num": 0.02001953125, + "loss_xval": 1.5034918785095215, + "num_input_tokens_seen": 585657600, + "step": 3388 + }, + { + "epoch": 1.297970126388357, + "grad_norm": 247.7912259115244, + "learning_rate": 5e-06, + "loss": 2.0144, + "num_input_tokens_seen": 585830392, + "step": 3389 + }, + { + "epoch": 1.297970126388357, + "loss": 2.0455682277679443, + "loss_ce": 0.2454589456319809, + "loss_iou": 0.8351743221282959, + "loss_num": 0.0260009765625, + "loss_xval": 1.8001093864440918, + "num_input_tokens_seen": 585830392, + "step": 3389 + }, + { + "epoch": 1.2983531214094217, + "grad_norm": 76.46328648680853, + "learning_rate": 5e-06, + "loss": 1.7439, + "num_input_tokens_seen": 586003496, + "step": 3390 + }, + { + "epoch": 1.2983531214094217, + "loss": 1.6645066738128662, + "loss_ce": 0.22745877504348755, + "loss_iou": 0.6660947799682617, + "loss_num": 0.02099609375, + "loss_xval": 1.4370479583740234, + "num_input_tokens_seen": 586003496, + "step": 3390 + }, + { + "epoch": 1.2987361164304865, + "grad_norm": 153.52668657699834, + "learning_rate": 5e-06, + "loss": 1.4076, + "num_input_tokens_seen": 586176648, + "step": 3391 + }, + { + "epoch": 1.2987361164304865, + "loss": 1.3777530193328857, + "loss_ce": 0.22161175310611725, + "loss_iou": 0.5349493026733398, + "loss_num": 0.0172119140625, + "loss_xval": 1.1561412811279297, + "num_input_tokens_seen": 586176648, + "step": 3391 + }, + { + "epoch": 1.2991191114515512, + "grad_norm": 195.1283230852289, + "learning_rate": 5e-06, + "loss": 1.6028, + "num_input_tokens_seen": 586349664, + "step": 3392 + }, + { + "epoch": 1.2991191114515512, + "loss": 1.6521891355514526, + "loss_ce": 0.2032175064086914, + "loss_iou": 0.6701949834823608, + "loss_num": 0.021728515625, + "loss_xval": 1.4489715099334717, + "num_input_tokens_seen": 586349664, + "step": 3392 + }, + { + "epoch": 1.299502106472616, + "grad_norm": 227.96607768799603, + "learning_rate": 5e-06, + "loss": 1.4066, + "num_input_tokens_seen": 586522800, + "step": 3393 + }, + { + "epoch": 1.299502106472616, + "loss": 1.3554717302322388, + "loss_ce": 0.24635626375675201, + "loss_iou": 0.5075300931930542, + "loss_num": 0.018798828125, + "loss_xval": 1.1091153621673584, + "num_input_tokens_seen": 586522800, + "step": 3393 + }, + { + "epoch": 1.2998851014936805, + "grad_norm": 77.66038341172509, + "learning_rate": 5e-06, + "loss": 1.7276, + "num_input_tokens_seen": 586695656, + "step": 3394 + }, + { + "epoch": 1.2998851014936805, + "loss": 1.7297232151031494, + "loss_ce": 0.1546655297279358, + "loss_iou": 0.7195356488227844, + "loss_num": 0.0272216796875, + "loss_xval": 1.5750576257705688, + "num_input_tokens_seen": 586695656, + "step": 3394 + }, + { + "epoch": 1.3002680965147453, + "grad_norm": 94.67778664875233, + "learning_rate": 5e-06, + "loss": 1.3643, + "num_input_tokens_seen": 586868424, + "step": 3395 + }, + { + "epoch": 1.3002680965147453, + "loss": 1.3454577922821045, + "loss_ce": 0.242027148604393, + "loss_iou": 0.48811671137809753, + "loss_num": 0.0255126953125, + "loss_xval": 1.1034307479858398, + "num_input_tokens_seen": 586868424, + "step": 3395 + }, + { + "epoch": 1.30065109153581, + "grad_norm": 107.85203258493827, + "learning_rate": 5e-06, + "loss": 1.7604, + "num_input_tokens_seen": 587041720, + "step": 3396 + }, + { + "epoch": 1.30065109153581, + "loss": 1.859063744544983, + "loss_ce": 0.29286590218544006, + "loss_iou": 0.7240779399871826, + "loss_num": 0.0235595703125, + "loss_xval": 1.5661978721618652, + "num_input_tokens_seen": 587041720, + "step": 3396 + }, + { + "epoch": 1.3010340865568748, + "grad_norm": 139.59650402931635, + "learning_rate": 5e-06, + "loss": 1.5741, + "num_input_tokens_seen": 587214208, + "step": 3397 + }, + { + "epoch": 1.3010340865568748, + "loss": 1.5014113187789917, + "loss_ce": 0.22343674302101135, + "loss_iou": 0.5911662578582764, + "loss_num": 0.0191650390625, + "loss_xval": 1.2779746055603027, + "num_input_tokens_seen": 587214208, + "step": 3397 + }, + { + "epoch": 1.3014170815779396, + "grad_norm": 138.43404940968887, + "learning_rate": 5e-06, + "loss": 1.6001, + "num_input_tokens_seen": 587387416, + "step": 3398 + }, + { + "epoch": 1.3014170815779396, + "loss": 1.6280248165130615, + "loss_ce": 0.28513529896736145, + "loss_iou": 0.6276062726974487, + "loss_num": 0.017578125, + "loss_xval": 1.3428895473480225, + "num_input_tokens_seen": 587387416, + "step": 3398 + }, + { + "epoch": 1.301800076599004, + "grad_norm": 169.81339489368256, + "learning_rate": 5e-06, + "loss": 1.3702, + "num_input_tokens_seen": 587560144, + "step": 3399 + }, + { + "epoch": 1.301800076599004, + "loss": 1.4205771684646606, + "loss_ce": 0.2325984686613083, + "loss_iou": 0.5562238693237305, + "loss_num": 0.01507568359375, + "loss_xval": 1.187978744506836, + "num_input_tokens_seen": 587560144, + "step": 3399 + }, + { + "epoch": 1.3021830716200689, + "grad_norm": 272.7182510346355, + "learning_rate": 5e-06, + "loss": 2.0319, + "num_input_tokens_seen": 587733352, + "step": 3400 + }, + { + "epoch": 1.3021830716200689, + "loss": 2.091144561767578, + "loss_ce": 0.22405028343200684, + "loss_iou": 0.8771965503692627, + "loss_num": 0.0225830078125, + "loss_xval": 1.8670945167541504, + "num_input_tokens_seen": 587733352, + "step": 3400 + }, + { + "epoch": 1.3025660666411336, + "grad_norm": 74.46317501421858, + "learning_rate": 5e-06, + "loss": 2.1574, + "num_input_tokens_seen": 587906696, + "step": 3401 + }, + { + "epoch": 1.3025660666411336, + "loss": 2.1836647987365723, + "loss_ce": 0.2143188863992691, + "loss_iou": 0.8512501120567322, + "loss_num": 0.053466796875, + "loss_xval": 1.9693459272384644, + "num_input_tokens_seen": 587906696, + "step": 3401 + }, + { + "epoch": 1.3029490616621984, + "grad_norm": 123.36731959699017, + "learning_rate": 5e-06, + "loss": 1.6449, + "num_input_tokens_seen": 588079416, + "step": 3402 + }, + { + "epoch": 1.3029490616621984, + "loss": 1.690931797027588, + "loss_ce": 0.22199325263500214, + "loss_iou": 0.6643704175949097, + "loss_num": 0.028076171875, + "loss_xval": 1.4689385890960693, + "num_input_tokens_seen": 588079416, + "step": 3402 + }, + { + "epoch": 1.3033320566832631, + "grad_norm": 99.49372580534332, + "learning_rate": 5e-06, + "loss": 1.7657, + "num_input_tokens_seen": 588251944, + "step": 3403 + }, + { + "epoch": 1.3033320566832631, + "loss": 1.8089815378189087, + "loss_ce": 0.20827698707580566, + "loss_iou": 0.7316877841949463, + "loss_num": 0.0274658203125, + "loss_xval": 1.6007046699523926, + "num_input_tokens_seen": 588251944, + "step": 3403 + }, + { + "epoch": 1.3037150517043279, + "grad_norm": 95.52033263317679, + "learning_rate": 5e-06, + "loss": 1.3348, + "num_input_tokens_seen": 588424848, + "step": 3404 + }, + { + "epoch": 1.3037150517043279, + "loss": 1.454200267791748, + "loss_ce": 0.2225445806980133, + "loss_iou": 0.5643294453620911, + "loss_num": 0.0206298828125, + "loss_xval": 1.2316557168960571, + "num_input_tokens_seen": 588424848, + "step": 3404 + }, + { + "epoch": 1.3040980467253926, + "grad_norm": 104.31693829184277, + "learning_rate": 5e-06, + "loss": 1.5788, + "num_input_tokens_seen": 588597792, + "step": 3405 + }, + { + "epoch": 1.3040980467253926, + "loss": 1.558342456817627, + "loss_ce": 0.18813736736774445, + "loss_iou": 0.6328411102294922, + "loss_num": 0.0208740234375, + "loss_xval": 1.3702049255371094, + "num_input_tokens_seen": 588597792, + "step": 3405 + }, + { + "epoch": 1.3044810417464574, + "grad_norm": 131.06770403703226, + "learning_rate": 5e-06, + "loss": 1.8656, + "num_input_tokens_seen": 588770904, + "step": 3406 + }, + { + "epoch": 1.3044810417464574, + "loss": 1.9378714561462402, + "loss_ce": 0.23714855313301086, + "loss_iou": 0.8048138618469238, + "loss_num": 0.0181884765625, + "loss_xval": 1.7007226943969727, + "num_input_tokens_seen": 588770904, + "step": 3406 + }, + { + "epoch": 1.304864036767522, + "grad_norm": 98.80346222293404, + "learning_rate": 5e-06, + "loss": 1.6852, + "num_input_tokens_seen": 588941032, + "step": 3407 + }, + { + "epoch": 1.304864036767522, + "loss": 1.6783905029296875, + "loss_ce": 0.24246475100517273, + "loss_iou": 0.6664491891860962, + "loss_num": 0.0206298828125, + "loss_xval": 1.4359257221221924, + "num_input_tokens_seen": 588941032, + "step": 3407 + }, + { + "epoch": 1.3052470317885867, + "grad_norm": 70.35809587279714, + "learning_rate": 5e-06, + "loss": 1.8113, + "num_input_tokens_seen": 589113768, + "step": 3408 + }, + { + "epoch": 1.3052470317885867, + "loss": 1.8585872650146484, + "loss_ce": 0.19603464007377625, + "loss_iou": 0.7664875388145447, + "loss_num": 0.02587890625, + "loss_xval": 1.6625527143478394, + "num_input_tokens_seen": 589113768, + "step": 3408 + }, + { + "epoch": 1.3056300268096515, + "grad_norm": 98.41693683214676, + "learning_rate": 5e-06, + "loss": 1.4478, + "num_input_tokens_seen": 589286864, + "step": 3409 + }, + { + "epoch": 1.3056300268096515, + "loss": 1.317795991897583, + "loss_ce": 0.23353098332881927, + "loss_iou": 0.49277031421661377, + "loss_num": 0.019775390625, + "loss_xval": 1.0842649936676025, + "num_input_tokens_seen": 589286864, + "step": 3409 + }, + { + "epoch": 1.3060130218307162, + "grad_norm": 245.15268466888702, + "learning_rate": 5e-06, + "loss": 1.6307, + "num_input_tokens_seen": 589459896, + "step": 3410 + }, + { + "epoch": 1.3060130218307162, + "loss": 1.6969010829925537, + "loss_ce": 0.25010430812835693, + "loss_iou": 0.6713963747024536, + "loss_num": 0.020751953125, + "loss_xval": 1.4467966556549072, + "num_input_tokens_seen": 589459896, + "step": 3410 + }, + { + "epoch": 1.306396016851781, + "grad_norm": 108.56488167085968, + "learning_rate": 5e-06, + "loss": 1.4357, + "num_input_tokens_seen": 589632448, + "step": 3411 + }, + { + "epoch": 1.306396016851781, + "loss": 1.6918354034423828, + "loss_ce": 0.2020936906337738, + "loss_iou": 0.6973243951797485, + "loss_num": 0.01904296875, + "loss_xval": 1.489741563796997, + "num_input_tokens_seen": 589632448, + "step": 3411 + }, + { + "epoch": 1.3067790118728457, + "grad_norm": 93.64761251480384, + "learning_rate": 5e-06, + "loss": 1.6644, + "num_input_tokens_seen": 589805568, + "step": 3412 + }, + { + "epoch": 1.3067790118728457, + "loss": 1.5487308502197266, + "loss_ce": 0.21271872520446777, + "loss_iou": 0.6254035234451294, + "loss_num": 0.01708984375, + "loss_xval": 1.3360121250152588, + "num_input_tokens_seen": 589805568, + "step": 3412 + }, + { + "epoch": 1.3071620068939103, + "grad_norm": 165.7201224858923, + "learning_rate": 5e-06, + "loss": 1.5928, + "num_input_tokens_seen": 589978696, + "step": 3413 + }, + { + "epoch": 1.3071620068939103, + "loss": 1.587674856185913, + "loss_ce": 0.24202266335487366, + "loss_iou": 0.6225178837776184, + "loss_num": 0.0201416015625, + "loss_xval": 1.3456522226333618, + "num_input_tokens_seen": 589978696, + "step": 3413 + }, + { + "epoch": 1.307545001914975, + "grad_norm": 128.49288898734875, + "learning_rate": 5e-06, + "loss": 1.9588, + "num_input_tokens_seen": 590151728, + "step": 3414 + }, + { + "epoch": 1.307545001914975, + "loss": 2.0160014629364014, + "loss_ce": 0.20754095911979675, + "loss_iou": 0.8112126588821411, + "loss_num": 0.037109375, + "loss_xval": 1.8084604740142822, + "num_input_tokens_seen": 590151728, + "step": 3414 + }, + { + "epoch": 1.3079279969360398, + "grad_norm": 81.97583984885038, + "learning_rate": 5e-06, + "loss": 1.5687, + "num_input_tokens_seen": 590324792, + "step": 3415 + }, + { + "epoch": 1.3079279969360398, + "loss": 1.5454689264297485, + "loss_ce": 0.2742193043231964, + "loss_iou": 0.592549204826355, + "loss_num": 0.0172119140625, + "loss_xval": 1.271249532699585, + "num_input_tokens_seen": 590324792, + "step": 3415 + }, + { + "epoch": 1.3083109919571045, + "grad_norm": 89.25642109482489, + "learning_rate": 5e-06, + "loss": 1.6331, + "num_input_tokens_seen": 590497752, + "step": 3416 + }, + { + "epoch": 1.3083109919571045, + "loss": 1.6145857572555542, + "loss_ce": 0.2385154664516449, + "loss_iou": 0.6140300631523132, + "loss_num": 0.029541015625, + "loss_xval": 1.3760703802108765, + "num_input_tokens_seen": 590497752, + "step": 3416 + }, + { + "epoch": 1.3086939869781693, + "grad_norm": 99.03178678196664, + "learning_rate": 5e-06, + "loss": 1.5583, + "num_input_tokens_seen": 590670808, + "step": 3417 + }, + { + "epoch": 1.3086939869781693, + "loss": 1.5358097553253174, + "loss_ce": 0.2650596499443054, + "loss_iou": 0.5914602279663086, + "loss_num": 0.017578125, + "loss_xval": 1.2707500457763672, + "num_input_tokens_seen": 590670808, + "step": 3417 + }, + { + "epoch": 1.309076981999234, + "grad_norm": 128.51842962470516, + "learning_rate": 5e-06, + "loss": 1.3123, + "num_input_tokens_seen": 590843984, + "step": 3418 + }, + { + "epoch": 1.309076981999234, + "loss": 1.1670749187469482, + "loss_ce": 0.19401878118515015, + "loss_iou": 0.46426552534103394, + "loss_num": 0.0089111328125, + "loss_xval": 0.9730561971664429, + "num_input_tokens_seen": 590843984, + "step": 3418 + }, + { + "epoch": 1.3094599770202988, + "grad_norm": 124.19813938301355, + "learning_rate": 5e-06, + "loss": 1.6318, + "num_input_tokens_seen": 591016968, + "step": 3419 + }, + { + "epoch": 1.3094599770202988, + "loss": 1.706656813621521, + "loss_ce": 0.1851901412010193, + "loss_iou": 0.7119662761688232, + "loss_num": 0.01953125, + "loss_xval": 1.5214667320251465, + "num_input_tokens_seen": 591016968, + "step": 3419 + }, + { + "epoch": 1.3098429720413636, + "grad_norm": 131.65719949900188, + "learning_rate": 5e-06, + "loss": 1.5282, + "num_input_tokens_seen": 591189720, + "step": 3420 + }, + { + "epoch": 1.3098429720413636, + "loss": 1.5373845100402832, + "loss_ce": 0.19379600882530212, + "loss_iou": 0.6266740560531616, + "loss_num": 0.01806640625, + "loss_xval": 1.3435885906219482, + "num_input_tokens_seen": 591189720, + "step": 3420 + }, + { + "epoch": 1.310225967062428, + "grad_norm": 128.56683467176094, + "learning_rate": 5e-06, + "loss": 1.9242, + "num_input_tokens_seen": 591362656, + "step": 3421 + }, + { + "epoch": 1.310225967062428, + "loss": 1.9553898572921753, + "loss_ce": 0.2091091424226761, + "loss_iou": 0.7950153350830078, + "loss_num": 0.03125, + "loss_xval": 1.7462806701660156, + "num_input_tokens_seen": 591362656, + "step": 3421 + }, + { + "epoch": 1.3106089620834929, + "grad_norm": 103.29049215670747, + "learning_rate": 5e-06, + "loss": 1.5314, + "num_input_tokens_seen": 591535368, + "step": 3422 + }, + { + "epoch": 1.3106089620834929, + "loss": 1.6691319942474365, + "loss_ce": 0.17890393733978271, + "loss_iou": 0.6813018321990967, + "loss_num": 0.0255126953125, + "loss_xval": 1.4902281761169434, + "num_input_tokens_seen": 591535368, + "step": 3422 + }, + { + "epoch": 1.3109919571045576, + "grad_norm": 167.01470859700683, + "learning_rate": 5e-06, + "loss": 2.5699, + "num_input_tokens_seen": 591708368, + "step": 3423 + }, + { + "epoch": 1.3109919571045576, + "loss": 2.400451183319092, + "loss_ce": 0.251986563205719, + "loss_iou": 0.961805522441864, + "loss_num": 0.044921875, + "loss_xval": 2.1484644412994385, + "num_input_tokens_seen": 591708368, + "step": 3423 + }, + { + "epoch": 1.3113749521256224, + "grad_norm": 77.31027702011308, + "learning_rate": 5e-06, + "loss": 1.735, + "num_input_tokens_seen": 591881200, + "step": 3424 + }, + { + "epoch": 1.3113749521256224, + "loss": 1.7121155261993408, + "loss_ce": 0.23263107240200043, + "loss_iou": 0.6526145935058594, + "loss_num": 0.034912109375, + "loss_xval": 1.4794845581054688, + "num_input_tokens_seen": 591881200, + "step": 3424 + }, + { + "epoch": 1.3117579471466871, + "grad_norm": 111.6612291674844, + "learning_rate": 5e-06, + "loss": 1.6015, + "num_input_tokens_seen": 592054368, + "step": 3425 + }, + { + "epoch": 1.3117579471466871, + "loss": 1.5268713235855103, + "loss_ce": 0.30413222312927246, + "loss_iou": 0.5636705756187439, + "loss_num": 0.01904296875, + "loss_xval": 1.2227391004562378, + "num_input_tokens_seen": 592054368, + "step": 3425 + }, + { + "epoch": 1.312140942167752, + "grad_norm": 191.7757606302062, + "learning_rate": 5e-06, + "loss": 1.5402, + "num_input_tokens_seen": 592227528, + "step": 3426 + }, + { + "epoch": 1.312140942167752, + "loss": 1.368982195854187, + "loss_ce": 0.18858899176120758, + "loss_iou": 0.5364398956298828, + "loss_num": 0.021484375, + "loss_xval": 1.1803932189941406, + "num_input_tokens_seen": 592227528, + "step": 3426 + }, + { + "epoch": 1.3125239371888164, + "grad_norm": 261.1235860404239, + "learning_rate": 5e-06, + "loss": 2.0906, + "num_input_tokens_seen": 592400528, + "step": 3427 + }, + { + "epoch": 1.3125239371888164, + "loss": 1.9674935340881348, + "loss_ce": 0.20730724930763245, + "loss_iou": 0.8112759590148926, + "loss_num": 0.0274658203125, + "loss_xval": 1.7601861953735352, + "num_input_tokens_seen": 592400528, + "step": 3427 + }, + { + "epoch": 1.3129069322098812, + "grad_norm": 108.63903059318454, + "learning_rate": 5e-06, + "loss": 1.7872, + "num_input_tokens_seen": 592573792, + "step": 3428 + }, + { + "epoch": 1.3129069322098812, + "loss": 1.8051846027374268, + "loss_ce": 0.20400851964950562, + "loss_iou": 0.7238973379135132, + "loss_num": 0.0306396484375, + "loss_xval": 1.6011760234832764, + "num_input_tokens_seen": 592573792, + "step": 3428 + }, + { + "epoch": 1.313289927230946, + "grad_norm": 123.06938863619305, + "learning_rate": 5e-06, + "loss": 1.4081, + "num_input_tokens_seen": 592746480, + "step": 3429 + }, + { + "epoch": 1.313289927230946, + "loss": 1.3916993141174316, + "loss_ce": 0.19342002272605896, + "loss_iou": 0.5483736991882324, + "loss_num": 0.020263671875, + "loss_xval": 1.1982793807983398, + "num_input_tokens_seen": 592746480, + "step": 3429 + }, + { + "epoch": 1.3136729222520107, + "grad_norm": 103.83790948129145, + "learning_rate": 5e-06, + "loss": 1.8334, + "num_input_tokens_seen": 592919472, + "step": 3430 + }, + { + "epoch": 1.3136729222520107, + "loss": 1.760236144065857, + "loss_ce": 0.21684814989566803, + "loss_iou": 0.6988484859466553, + "loss_num": 0.0291748046875, + "loss_xval": 1.5433878898620605, + "num_input_tokens_seen": 592919472, + "step": 3430 + }, + { + "epoch": 1.3140559172730755, + "grad_norm": 122.655449245257, + "learning_rate": 5e-06, + "loss": 1.3296, + "num_input_tokens_seen": 593092216, + "step": 3431 + }, + { + "epoch": 1.3140559172730755, + "loss": 1.393941879272461, + "loss_ce": 0.23210212588310242, + "loss_iou": 0.5376459956169128, + "loss_num": 0.017333984375, + "loss_xval": 1.1618398427963257, + "num_input_tokens_seen": 593092216, + "step": 3431 + }, + { + "epoch": 1.3144389122941402, + "grad_norm": 94.77904336321453, + "learning_rate": 5e-06, + "loss": 1.6082, + "num_input_tokens_seen": 593264800, + "step": 3432 + }, + { + "epoch": 1.3144389122941402, + "loss": 1.6623306274414062, + "loss_ce": 0.20671777427196503, + "loss_iou": 0.6692584753036499, + "loss_num": 0.0234375, + "loss_xval": 1.4556128978729248, + "num_input_tokens_seen": 593264800, + "step": 3432 + }, + { + "epoch": 1.314821907315205, + "grad_norm": 147.30436807819416, + "learning_rate": 5e-06, + "loss": 1.5344, + "num_input_tokens_seen": 593437800, + "step": 3433 + }, + { + "epoch": 1.314821907315205, + "loss": 1.4200994968414307, + "loss_ce": 0.231092631816864, + "loss_iou": 0.5567836761474609, + "loss_num": 0.01507568359375, + "loss_xval": 1.1890068054199219, + "num_input_tokens_seen": 593437800, + "step": 3433 + }, + { + "epoch": 1.3152049023362697, + "grad_norm": 106.637022094441, + "learning_rate": 5e-06, + "loss": 1.9204, + "num_input_tokens_seen": 593610720, + "step": 3434 + }, + { + "epoch": 1.3152049023362697, + "loss": 2.031893730163574, + "loss_ce": 0.20427463948726654, + "loss_iou": 0.7924715876579285, + "loss_num": 0.048583984375, + "loss_xval": 1.8276190757751465, + "num_input_tokens_seen": 593610720, + "step": 3434 + }, + { + "epoch": 1.3155878973573343, + "grad_norm": 128.15138920085224, + "learning_rate": 5e-06, + "loss": 1.4556, + "num_input_tokens_seen": 593783784, + "step": 3435 + }, + { + "epoch": 1.3155878973573343, + "loss": 1.2844393253326416, + "loss_ce": 0.23994150757789612, + "loss_iou": 0.47250521183013916, + "loss_num": 0.0198974609375, + "loss_xval": 1.0444977283477783, + "num_input_tokens_seen": 593783784, + "step": 3435 + }, + { + "epoch": 1.315970892378399, + "grad_norm": 199.4011930943759, + "learning_rate": 5e-06, + "loss": 1.8581, + "num_input_tokens_seen": 593956616, + "step": 3436 + }, + { + "epoch": 1.315970892378399, + "loss": 1.9263231754302979, + "loss_ce": 0.19064660370349884, + "loss_iou": 0.8233740925788879, + "loss_num": 0.017822265625, + "loss_xval": 1.7356765270233154, + "num_input_tokens_seen": 593956616, + "step": 3436 + }, + { + "epoch": 1.3163538873994638, + "grad_norm": 91.87156513692791, + "learning_rate": 5e-06, + "loss": 1.9686, + "num_input_tokens_seen": 594129520, + "step": 3437 + }, + { + "epoch": 1.3163538873994638, + "loss": 2.0934529304504395, + "loss_ce": 0.20075491070747375, + "loss_iou": 0.871367335319519, + "loss_num": 0.030029296875, + "loss_xval": 1.892698049545288, + "num_input_tokens_seen": 594129520, + "step": 3437 + }, + { + "epoch": 1.3167368824205286, + "grad_norm": 71.4230553026647, + "learning_rate": 5e-06, + "loss": 1.5046, + "num_input_tokens_seen": 594302296, + "step": 3438 + }, + { + "epoch": 1.3167368824205286, + "loss": 1.48215651512146, + "loss_ce": 0.2117040455341339, + "loss_iou": 0.5868406295776367, + "loss_num": 0.0194091796875, + "loss_xval": 1.2704524993896484, + "num_input_tokens_seen": 594302296, + "step": 3438 + }, + { + "epoch": 1.3171198774415933, + "grad_norm": 232.85164408634435, + "learning_rate": 5e-06, + "loss": 1.4754, + "num_input_tokens_seen": 594475336, + "step": 3439 + }, + { + "epoch": 1.3171198774415933, + "loss": 1.3627231121063232, + "loss_ce": 0.22045598924160004, + "loss_iou": 0.5019044280052185, + "loss_num": 0.0277099609375, + "loss_xval": 1.142267107963562, + "num_input_tokens_seen": 594475336, + "step": 3439 + }, + { + "epoch": 1.3175028724626578, + "grad_norm": 125.46307331108713, + "learning_rate": 5e-06, + "loss": 1.8846, + "num_input_tokens_seen": 594648088, + "step": 3440 + }, + { + "epoch": 1.3175028724626578, + "loss": 1.8807228803634644, + "loss_ce": 0.22862456738948822, + "loss_iou": 0.7542718052864075, + "loss_num": 0.0286865234375, + "loss_xval": 1.652098298072815, + "num_input_tokens_seen": 594648088, + "step": 3440 + }, + { + "epoch": 1.3178858674837226, + "grad_norm": 140.29810763456942, + "learning_rate": 5e-06, + "loss": 1.6031, + "num_input_tokens_seen": 594821080, + "step": 3441 + }, + { + "epoch": 1.3178858674837226, + "loss": 1.528963327407837, + "loss_ce": 0.2620272934436798, + "loss_iou": 0.5841668844223022, + "loss_num": 0.0196533203125, + "loss_xval": 1.2669360637664795, + "num_input_tokens_seen": 594821080, + "step": 3441 + }, + { + "epoch": 1.3182688625047874, + "grad_norm": 152.95364929288792, + "learning_rate": 5e-06, + "loss": 1.6388, + "num_input_tokens_seen": 594993744, + "step": 3442 + }, + { + "epoch": 1.3182688625047874, + "loss": 1.4266736507415771, + "loss_ce": 0.18095433712005615, + "loss_iou": 0.5637319087982178, + "loss_num": 0.023681640625, + "loss_xval": 1.2457194328308105, + "num_input_tokens_seen": 594993744, + "step": 3442 + }, + { + "epoch": 1.3186518575258521, + "grad_norm": 230.79285770032826, + "learning_rate": 5e-06, + "loss": 1.9245, + "num_input_tokens_seen": 595166472, + "step": 3443 + }, + { + "epoch": 1.3186518575258521, + "loss": 2.0874247550964355, + "loss_ce": 0.19721028208732605, + "loss_iou": 0.8709800243377686, + "loss_num": 0.0296630859375, + "loss_xval": 1.890214443206787, + "num_input_tokens_seen": 595166472, + "step": 3443 + }, + { + "epoch": 1.3190348525469169, + "grad_norm": 165.79366498689848, + "learning_rate": 5e-06, + "loss": 1.5884, + "num_input_tokens_seen": 595339312, + "step": 3444 + }, + { + "epoch": 1.3190348525469169, + "loss": 1.4338977336883545, + "loss_ce": 0.14951470494270325, + "loss_iou": 0.5644022226333618, + "loss_num": 0.0311279296875, + "loss_xval": 1.2704670429229736, + "num_input_tokens_seen": 595339312, + "step": 3444 + }, + { + "epoch": 1.3194178475679816, + "grad_norm": 159.2525834189813, + "learning_rate": 5e-06, + "loss": 1.8316, + "num_input_tokens_seen": 595512416, + "step": 3445 + }, + { + "epoch": 1.3194178475679816, + "loss": 1.8100316524505615, + "loss_ce": 0.19420024752616882, + "loss_iou": 0.7189875245094299, + "loss_num": 0.03564453125, + "loss_xval": 1.6158314943313599, + "num_input_tokens_seen": 595512416, + "step": 3445 + }, + { + "epoch": 1.3198008425890464, + "grad_norm": 150.4904805822265, + "learning_rate": 5e-06, + "loss": 1.4687, + "num_input_tokens_seen": 595685248, + "step": 3446 + }, + { + "epoch": 1.3198008425890464, + "loss": 1.5566127300262451, + "loss_ce": 0.22897005081176758, + "loss_iou": 0.6195403337478638, + "loss_num": 0.0177001953125, + "loss_xval": 1.3276426792144775, + "num_input_tokens_seen": 595685248, + "step": 3446 + }, + { + "epoch": 1.3201838376101112, + "grad_norm": 337.42542280897067, + "learning_rate": 5e-06, + "loss": 1.8937, + "num_input_tokens_seen": 595858256, + "step": 3447 + }, + { + "epoch": 1.3201838376101112, + "loss": 1.8821277618408203, + "loss_ce": 0.19640478491783142, + "loss_iou": 0.792202353477478, + "loss_num": 0.020263671875, + "loss_xval": 1.685723066329956, + "num_input_tokens_seen": 595858256, + "step": 3447 + }, + { + "epoch": 1.320566832631176, + "grad_norm": 138.67140759043002, + "learning_rate": 5e-06, + "loss": 2.2424, + "num_input_tokens_seen": 596031208, + "step": 3448 + }, + { + "epoch": 1.320566832631176, + "loss": 2.335386276245117, + "loss_ce": 0.22995525598526, + "loss_iou": 0.913860559463501, + "loss_num": 0.0556640625, + "loss_xval": 2.105431079864502, + "num_input_tokens_seen": 596031208, + "step": 3448 + }, + { + "epoch": 1.3209498276522404, + "grad_norm": 133.25394069902987, + "learning_rate": 5e-06, + "loss": 1.7018, + "num_input_tokens_seen": 596204200, + "step": 3449 + }, + { + "epoch": 1.3209498276522404, + "loss": 1.6877968311309814, + "loss_ce": 0.22173988819122314, + "loss_iou": 0.6771659851074219, + "loss_num": 0.0223388671875, + "loss_xval": 1.4660568237304688, + "num_input_tokens_seen": 596204200, + "step": 3449 + }, + { + "epoch": 1.3213328226733052, + "grad_norm": 155.64299683467604, + "learning_rate": 5e-06, + "loss": 2.1258, + "num_input_tokens_seen": 596377008, + "step": 3450 + }, + { + "epoch": 1.3213328226733052, + "loss": 1.9931901693344116, + "loss_ce": 0.1854073405265808, + "loss_iou": 0.8141697645187378, + "loss_num": 0.035888671875, + "loss_xval": 1.8077828884124756, + "num_input_tokens_seen": 596377008, + "step": 3450 + }, + { + "epoch": 1.32171581769437, + "grad_norm": 207.4083988067701, + "learning_rate": 5e-06, + "loss": 1.8708, + "num_input_tokens_seen": 596550160, + "step": 3451 + }, + { + "epoch": 1.32171581769437, + "loss": 1.7659146785736084, + "loss_ce": 0.22494441270828247, + "loss_iou": 0.6785661578178406, + "loss_num": 0.036865234375, + "loss_xval": 1.5409702062606812, + "num_input_tokens_seen": 596550160, + "step": 3451 + }, + { + "epoch": 1.3220988127154347, + "grad_norm": 187.2132726586493, + "learning_rate": 5e-06, + "loss": 1.6678, + "num_input_tokens_seen": 596723184, + "step": 3452 + }, + { + "epoch": 1.3220988127154347, + "loss": 1.6605591773986816, + "loss_ce": 0.20885691046714783, + "loss_iou": 0.6607265472412109, + "loss_num": 0.0260009765625, + "loss_xval": 1.4517021179199219, + "num_input_tokens_seen": 596723184, + "step": 3452 + }, + { + "epoch": 1.3224818077364995, + "grad_norm": 90.99317747748806, + "learning_rate": 5e-06, + "loss": 1.5969, + "num_input_tokens_seen": 596896464, + "step": 3453 + }, + { + "epoch": 1.3224818077364995, + "loss": 1.510523796081543, + "loss_ce": 0.22982454299926758, + "loss_iou": 0.5661919713020325, + "loss_num": 0.0296630859375, + "loss_xval": 1.280699372291565, + "num_input_tokens_seen": 596896464, + "step": 3453 + }, + { + "epoch": 1.322864802757564, + "grad_norm": 141.30134918580436, + "learning_rate": 5e-06, + "loss": 1.2841, + "num_input_tokens_seen": 597069352, + "step": 3454 + }, + { + "epoch": 1.322864802757564, + "loss": 1.2556250095367432, + "loss_ce": 0.24805593490600586, + "loss_iou": 0.46800267696380615, + "loss_num": 0.0142822265625, + "loss_xval": 0.9946906566619873, + "num_input_tokens_seen": 597069352, + "step": 3454 + }, + { + "epoch": 1.3232477977786288, + "grad_norm": 148.12058201644876, + "learning_rate": 5e-06, + "loss": 1.5733, + "num_input_tokens_seen": 597242064, + "step": 3455 + }, + { + "epoch": 1.3232477977786288, + "loss": 1.5438635349273682, + "loss_ce": 0.22933542728424072, + "loss_iou": 0.590186357498169, + "loss_num": 0.02685546875, + "loss_xval": 1.314527988433838, + "num_input_tokens_seen": 597242064, + "step": 3455 + }, + { + "epoch": 1.3236307927996935, + "grad_norm": 86.24126150448357, + "learning_rate": 5e-06, + "loss": 1.415, + "num_input_tokens_seen": 597414864, + "step": 3456 + }, + { + "epoch": 1.3236307927996935, + "loss": 1.4794440269470215, + "loss_ce": 0.19419018924236298, + "loss_iou": 0.598406970500946, + "loss_num": 0.0177001953125, + "loss_xval": 1.285253882408142, + "num_input_tokens_seen": 597414864, + "step": 3456 + }, + { + "epoch": 1.3240137878207583, + "grad_norm": 205.26520536273173, + "learning_rate": 5e-06, + "loss": 1.5919, + "num_input_tokens_seen": 597587928, + "step": 3457 + }, + { + "epoch": 1.3240137878207583, + "loss": 1.6292989253997803, + "loss_ce": 0.18911823630332947, + "loss_iou": 0.678128719329834, + "loss_num": 0.016845703125, + "loss_xval": 1.440180778503418, + "num_input_tokens_seen": 597587928, + "step": 3457 + }, + { + "epoch": 1.324396782841823, + "grad_norm": 148.95097772210877, + "learning_rate": 5e-06, + "loss": 1.5373, + "num_input_tokens_seen": 597760640, + "step": 3458 + }, + { + "epoch": 1.324396782841823, + "loss": 1.580047845840454, + "loss_ce": 0.1832701712846756, + "loss_iou": 0.6467683911323547, + "loss_num": 0.0206298828125, + "loss_xval": 1.3967777490615845, + "num_input_tokens_seen": 597760640, + "step": 3458 + }, + { + "epoch": 1.3247797778628878, + "grad_norm": 212.87837915686478, + "learning_rate": 5e-06, + "loss": 2.0722, + "num_input_tokens_seen": 597933448, + "step": 3459 + }, + { + "epoch": 1.3247797778628878, + "loss": 2.0425939559936523, + "loss_ce": 0.26647183299064636, + "loss_iou": 0.8155207633972168, + "loss_num": 0.029052734375, + "loss_xval": 1.7761220932006836, + "num_input_tokens_seen": 597933448, + "step": 3459 + }, + { + "epoch": 1.3251627728839526, + "grad_norm": 164.9439321869532, + "learning_rate": 5e-06, + "loss": 1.7587, + "num_input_tokens_seen": 598106576, + "step": 3460 + }, + { + "epoch": 1.3251627728839526, + "loss": 1.8102011680603027, + "loss_ce": 0.23557034134864807, + "loss_iou": 0.7101669311523438, + "loss_num": 0.0308837890625, + "loss_xval": 1.5746307373046875, + "num_input_tokens_seen": 598106576, + "step": 3460 + }, + { + "epoch": 1.3255457679050173, + "grad_norm": 216.54728902785226, + "learning_rate": 5e-06, + "loss": 1.4584, + "num_input_tokens_seen": 598279824, + "step": 3461 + }, + { + "epoch": 1.3255457679050173, + "loss": 1.3819681406021118, + "loss_ce": 0.19735556840896606, + "loss_iou": 0.558477520942688, + "loss_num": 0.0135498046875, + "loss_xval": 1.184612512588501, + "num_input_tokens_seen": 598279824, + "step": 3461 + }, + { + "epoch": 1.325928762926082, + "grad_norm": 131.5791298294782, + "learning_rate": 5e-06, + "loss": 1.9053, + "num_input_tokens_seen": 598452728, + "step": 3462 + }, + { + "epoch": 1.325928762926082, + "loss": 1.9645386934280396, + "loss_ce": 0.1781918704509735, + "loss_iou": 0.8089753985404968, + "loss_num": 0.03369140625, + "loss_xval": 1.7863469123840332, + "num_input_tokens_seen": 598452728, + "step": 3462 + }, + { + "epoch": 1.3263117579471466, + "grad_norm": 102.69471251638501, + "learning_rate": 5e-06, + "loss": 1.3905, + "num_input_tokens_seen": 598625568, + "step": 3463 + }, + { + "epoch": 1.3263117579471466, + "loss": 1.3003264665603638, + "loss_ce": 0.1388418972492218, + "loss_iou": 0.5353016257286072, + "loss_num": 0.0181884765625, + "loss_xval": 1.1614845991134644, + "num_input_tokens_seen": 598625568, + "step": 3463 + }, + { + "epoch": 1.3266947529682114, + "grad_norm": 285.2463484476111, + "learning_rate": 5e-06, + "loss": 1.6416, + "num_input_tokens_seen": 598798448, + "step": 3464 + }, + { + "epoch": 1.3266947529682114, + "loss": 1.7376794815063477, + "loss_ce": 0.2819916605949402, + "loss_iou": 0.6811367869377136, + "loss_num": 0.0186767578125, + "loss_xval": 1.4556878805160522, + "num_input_tokens_seen": 598798448, + "step": 3464 + }, + { + "epoch": 1.3270777479892761, + "grad_norm": 86.81365492022437, + "learning_rate": 5e-06, + "loss": 2.2049, + "num_input_tokens_seen": 598971512, + "step": 3465 + }, + { + "epoch": 1.3270777479892761, + "loss": 2.11521053314209, + "loss_ce": 0.23786184191703796, + "loss_iou": 0.8401026129722595, + "loss_num": 0.03955078125, + "loss_xval": 1.877348780632019, + "num_input_tokens_seen": 598971512, + "step": 3465 + }, + { + "epoch": 1.327460743010341, + "grad_norm": 184.49532333398986, + "learning_rate": 5e-06, + "loss": 1.7217, + "num_input_tokens_seen": 599144320, + "step": 3466 + }, + { + "epoch": 1.327460743010341, + "loss": 1.6120097637176514, + "loss_ce": 0.2162342518568039, + "loss_iou": 0.6482966542243958, + "loss_num": 0.0198974609375, + "loss_xval": 1.3957754373550415, + "num_input_tokens_seen": 599144320, + "step": 3466 + }, + { + "epoch": 1.3278437380314057, + "grad_norm": 198.4854520617887, + "learning_rate": 5e-06, + "loss": 1.721, + "num_input_tokens_seen": 599313952, + "step": 3467 + }, + { + "epoch": 1.3278437380314057, + "loss": 1.8033454418182373, + "loss_ce": 0.25241461396217346, + "loss_iou": 0.7142319679260254, + "loss_num": 0.0245361328125, + "loss_xval": 1.5509309768676758, + "num_input_tokens_seen": 599313952, + "step": 3467 + }, + { + "epoch": 1.3282267330524702, + "grad_norm": 136.42639742425405, + "learning_rate": 5e-06, + "loss": 1.9464, + "num_input_tokens_seen": 599487080, + "step": 3468 + }, + { + "epoch": 1.3282267330524702, + "loss": 1.9296438694000244, + "loss_ce": 0.2463741898536682, + "loss_iou": 0.7575284242630005, + "loss_num": 0.03369140625, + "loss_xval": 1.683269739151001, + "num_input_tokens_seen": 599487080, + "step": 3468 + }, + { + "epoch": 1.328609728073535, + "grad_norm": 124.98895467778436, + "learning_rate": 5e-06, + "loss": 1.5272, + "num_input_tokens_seen": 599660120, + "step": 3469 + }, + { + "epoch": 1.328609728073535, + "loss": 1.6587231159210205, + "loss_ce": 0.24159637093544006, + "loss_iou": 0.6685395240783691, + "loss_num": 0.0159912109375, + "loss_xval": 1.4171266555786133, + "num_input_tokens_seen": 599660120, + "step": 3469 + }, + { + "epoch": 1.3289927230945997, + "grad_norm": 180.30626110863463, + "learning_rate": 5e-06, + "loss": 1.9557, + "num_input_tokens_seen": 599833224, + "step": 3470 + }, + { + "epoch": 1.3289927230945997, + "loss": 1.9767966270446777, + "loss_ce": 0.22852396965026855, + "loss_iou": 0.8138030171394348, + "loss_num": 0.024169921875, + "loss_xval": 1.7482725381851196, + "num_input_tokens_seen": 599833224, + "step": 3470 + }, + { + "epoch": 1.3293757181156645, + "grad_norm": 89.62017521644816, + "learning_rate": 5e-06, + "loss": 1.7182, + "num_input_tokens_seen": 600005984, + "step": 3471 + }, + { + "epoch": 1.3293757181156645, + "loss": 1.600099802017212, + "loss_ce": 0.22685885429382324, + "loss_iou": 0.6239984035491943, + "loss_num": 0.0250244140625, + "loss_xval": 1.3732409477233887, + "num_input_tokens_seen": 600005984, + "step": 3471 + }, + { + "epoch": 1.3297587131367292, + "grad_norm": 149.46918114341312, + "learning_rate": 5e-06, + "loss": 1.5313, + "num_input_tokens_seen": 600178728, + "step": 3472 + }, + { + "epoch": 1.3297587131367292, + "loss": 1.872377634048462, + "loss_ce": 0.21394246816635132, + "loss_iou": 0.7716308832168579, + "loss_num": 0.0230712890625, + "loss_xval": 1.6584351062774658, + "num_input_tokens_seen": 600178728, + "step": 3472 + }, + { + "epoch": 1.330141708157794, + "grad_norm": 126.44145986486113, + "learning_rate": 5e-06, + "loss": 1.7678, + "num_input_tokens_seen": 600351824, + "step": 3473 + }, + { + "epoch": 1.330141708157794, + "loss": 1.6964324712753296, + "loss_ce": 0.20465189218521118, + "loss_iou": 0.6850992441177368, + "loss_num": 0.0242919921875, + "loss_xval": 1.4917805194854736, + "num_input_tokens_seen": 600351824, + "step": 3473 + }, + { + "epoch": 1.3305247031788587, + "grad_norm": 128.1187390726551, + "learning_rate": 5e-06, + "loss": 1.6612, + "num_input_tokens_seen": 600524840, + "step": 3474 + }, + { + "epoch": 1.3305247031788587, + "loss": 1.6536750793457031, + "loss_ce": 0.2202017456293106, + "loss_iou": 0.6750801205635071, + "loss_num": 0.0166015625, + "loss_xval": 1.4334732294082642, + "num_input_tokens_seen": 600524840, + "step": 3474 + }, + { + "epoch": 1.3309076981999235, + "grad_norm": 149.5298696161023, + "learning_rate": 5e-06, + "loss": 1.7771, + "num_input_tokens_seen": 600697480, + "step": 3475 + }, + { + "epoch": 1.3309076981999235, + "loss": 1.8149094581604004, + "loss_ce": 0.1902473270893097, + "loss_iou": 0.7272480130195618, + "loss_num": 0.033935546875, + "loss_xval": 1.6246620416641235, + "num_input_tokens_seen": 600697480, + "step": 3475 + }, + { + "epoch": 1.3312906932209883, + "grad_norm": 113.263239695, + "learning_rate": 5e-06, + "loss": 1.6474, + "num_input_tokens_seen": 600870552, + "step": 3476 + }, + { + "epoch": 1.3312906932209883, + "loss": 1.6476244926452637, + "loss_ce": 0.2178780734539032, + "loss_iou": 0.668364405632019, + "loss_num": 0.0186767578125, + "loss_xval": 1.429746389389038, + "num_input_tokens_seen": 600870552, + "step": 3476 + }, + { + "epoch": 1.3316736882420528, + "grad_norm": 215.23388526200247, + "learning_rate": 5e-06, + "loss": 1.65, + "num_input_tokens_seen": 601043640, + "step": 3477 + }, + { + "epoch": 1.3316736882420528, + "loss": 1.5874621868133545, + "loss_ce": 0.25152575969696045, + "loss_iou": 0.6199029684066772, + "loss_num": 0.019287109375, + "loss_xval": 1.3359363079071045, + "num_input_tokens_seen": 601043640, + "step": 3477 + }, + { + "epoch": 1.3320566832631175, + "grad_norm": 145.5701925704748, + "learning_rate": 5e-06, + "loss": 1.7505, + "num_input_tokens_seen": 601216648, + "step": 3478 + }, + { + "epoch": 1.3320566832631175, + "loss": 1.7719186544418335, + "loss_ce": 0.2581689655780792, + "loss_iou": 0.6935508251190186, + "loss_num": 0.025390625, + "loss_xval": 1.513749599456787, + "num_input_tokens_seen": 601216648, + "step": 3478 + }, + { + "epoch": 1.3324396782841823, + "grad_norm": 99.1289875141019, + "learning_rate": 5e-06, + "loss": 1.3603, + "num_input_tokens_seen": 601389632, + "step": 3479 + }, + { + "epoch": 1.3324396782841823, + "loss": 1.467305064201355, + "loss_ce": 0.17460595071315765, + "loss_iou": 0.6101099252700806, + "loss_num": 0.0145263671875, + "loss_xval": 1.2926990985870361, + "num_input_tokens_seen": 601389632, + "step": 3479 + }, + { + "epoch": 1.332822673305247, + "grad_norm": 158.04506298297707, + "learning_rate": 5e-06, + "loss": 1.5577, + "num_input_tokens_seen": 601562648, + "step": 3480 + }, + { + "epoch": 1.332822673305247, + "loss": 1.596683144569397, + "loss_ce": 0.2407524287700653, + "loss_iou": 0.6262075901031494, + "loss_num": 0.020751953125, + "loss_xval": 1.3559308052062988, + "num_input_tokens_seen": 601562648, + "step": 3480 + }, + { + "epoch": 1.3332056683263118, + "grad_norm": 186.04044373935972, + "learning_rate": 5e-06, + "loss": 1.6341, + "num_input_tokens_seen": 601735416, + "step": 3481 + }, + { + "epoch": 1.3332056683263118, + "loss": 1.6800787448883057, + "loss_ce": 0.22159579396247864, + "loss_iou": 0.6769953966140747, + "loss_num": 0.0208740234375, + "loss_xval": 1.4584829807281494, + "num_input_tokens_seen": 601735416, + "step": 3481 + }, + { + "epoch": 1.3335886633473764, + "grad_norm": 78.26524965179112, + "learning_rate": 5e-06, + "loss": 2.0157, + "num_input_tokens_seen": 601908736, + "step": 3482 + }, + { + "epoch": 1.3335886633473764, + "loss": 2.0095057487487793, + "loss_ce": 0.2427942454814911, + "loss_iou": 0.8008362054824829, + "loss_num": 0.032958984375, + "loss_xval": 1.7667114734649658, + "num_input_tokens_seen": 601908736, + "step": 3482 + }, + { + "epoch": 1.3339716583684411, + "grad_norm": 58.68459387284598, + "learning_rate": 5e-06, + "loss": 1.4854, + "num_input_tokens_seen": 602081568, + "step": 3483 + }, + { + "epoch": 1.3339716583684411, + "loss": 1.6807550191879272, + "loss_ce": 0.20905466377735138, + "loss_iou": 0.6858318448066711, + "loss_num": 0.02001953125, + "loss_xval": 1.4717003107070923, + "num_input_tokens_seen": 602081568, + "step": 3483 + }, + { + "epoch": 1.3343546533895059, + "grad_norm": 225.15046392943285, + "learning_rate": 5e-06, + "loss": 1.4289, + "num_input_tokens_seen": 602254184, + "step": 3484 + }, + { + "epoch": 1.3343546533895059, + "loss": 1.378389596939087, + "loss_ce": 0.2561156153678894, + "loss_iou": 0.5120189189910889, + "loss_num": 0.0196533203125, + "loss_xval": 1.1222739219665527, + "num_input_tokens_seen": 602254184, + "step": 3484 + }, + { + "epoch": 1.3347376484105706, + "grad_norm": 149.07422323153412, + "learning_rate": 5e-06, + "loss": 1.6976, + "num_input_tokens_seen": 602426936, + "step": 3485 + }, + { + "epoch": 1.3347376484105706, + "loss": 1.577803134918213, + "loss_ce": 0.18569129705429077, + "loss_iou": 0.6507525444030762, + "loss_num": 0.01806640625, + "loss_xval": 1.3921117782592773, + "num_input_tokens_seen": 602426936, + "step": 3485 + }, + { + "epoch": 1.3351206434316354, + "grad_norm": 145.89055009931414, + "learning_rate": 5e-06, + "loss": 1.558, + "num_input_tokens_seen": 602599912, + "step": 3486 + }, + { + "epoch": 1.3351206434316354, + "loss": 1.4918200969696045, + "loss_ce": 0.2002352774143219, + "loss_iou": 0.5969338417053223, + "loss_num": 0.01953125, + "loss_xval": 1.2915849685668945, + "num_input_tokens_seen": 602599912, + "step": 3486 + }, + { + "epoch": 1.3355036384527001, + "grad_norm": 131.9695778630955, + "learning_rate": 5e-06, + "loss": 1.9589, + "num_input_tokens_seen": 602772952, + "step": 3487 + }, + { + "epoch": 1.3355036384527001, + "loss": 1.8470755815505981, + "loss_ce": 0.21932345628738403, + "loss_iou": 0.7450588941574097, + "loss_num": 0.027587890625, + "loss_xval": 1.6277520656585693, + "num_input_tokens_seen": 602772952, + "step": 3487 + }, + { + "epoch": 1.335886633473765, + "grad_norm": 94.05160146535559, + "learning_rate": 5e-06, + "loss": 1.4983, + "num_input_tokens_seen": 602945816, + "step": 3488 + }, + { + "epoch": 1.335886633473765, + "loss": 1.445399284362793, + "loss_ce": 0.20202450454235077, + "loss_iou": 0.5794968008995056, + "loss_num": 0.016845703125, + "loss_xval": 1.2433747053146362, + "num_input_tokens_seen": 602945816, + "step": 3488 + }, + { + "epoch": 1.3362696284948297, + "grad_norm": 102.22866160437519, + "learning_rate": 5e-06, + "loss": 1.3725, + "num_input_tokens_seen": 603118896, + "step": 3489 + }, + { + "epoch": 1.3362696284948297, + "loss": 1.4227652549743652, + "loss_ce": 0.19586127996444702, + "loss_iou": 0.5797452926635742, + "loss_num": 0.01348876953125, + "loss_xval": 1.2269039154052734, + "num_input_tokens_seen": 603118896, + "step": 3489 + }, + { + "epoch": 1.3366526235158944, + "grad_norm": 111.9340649797218, + "learning_rate": 5e-06, + "loss": 1.565, + "num_input_tokens_seen": 603291552, + "step": 3490 + }, + { + "epoch": 1.3366526235158944, + "loss": 1.350496768951416, + "loss_ce": 0.20106390118598938, + "loss_iou": 0.526208758354187, + "loss_num": 0.0194091796875, + "loss_xval": 1.149432897567749, + "num_input_tokens_seen": 603291552, + "step": 3490 + }, + { + "epoch": 1.337035618536959, + "grad_norm": 123.09725427670344, + "learning_rate": 5e-06, + "loss": 1.695, + "num_input_tokens_seen": 603460576, + "step": 3491 + }, + { + "epoch": 1.337035618536959, + "loss": 1.5950605869293213, + "loss_ce": 0.1703254133462906, + "loss_iou": 0.6679187417030334, + "loss_num": 0.017822265625, + "loss_xval": 1.424735188484192, + "num_input_tokens_seen": 603460576, + "step": 3491 + }, + { + "epoch": 1.3374186135580237, + "grad_norm": 260.9762500876592, + "learning_rate": 5e-06, + "loss": 2.0278, + "num_input_tokens_seen": 603633856, + "step": 3492 + }, + { + "epoch": 1.3374186135580237, + "loss": 1.98429274559021, + "loss_ce": 0.2618187665939331, + "loss_iou": 0.8138126134872437, + "loss_num": 0.0189208984375, + "loss_xval": 1.7224738597869873, + "num_input_tokens_seen": 603633856, + "step": 3492 + }, + { + "epoch": 1.3378016085790885, + "grad_norm": 128.98469840714696, + "learning_rate": 5e-06, + "loss": 1.9122, + "num_input_tokens_seen": 603806712, + "step": 3493 + }, + { + "epoch": 1.3378016085790885, + "loss": 1.9915841817855835, + "loss_ce": 0.19809076189994812, + "loss_iou": 0.8212156891822815, + "loss_num": 0.0302734375, + "loss_xval": 1.793493390083313, + "num_input_tokens_seen": 603806712, + "step": 3493 + }, + { + "epoch": 1.3381846036001532, + "grad_norm": 112.9546501008244, + "learning_rate": 5e-06, + "loss": 1.5566, + "num_input_tokens_seen": 603979632, + "step": 3494 + }, + { + "epoch": 1.3381846036001532, + "loss": 1.6332077980041504, + "loss_ce": 0.1553523987531662, + "loss_iou": 0.6758478283882141, + "loss_num": 0.0252685546875, + "loss_xval": 1.4778553247451782, + "num_input_tokens_seen": 603979632, + "step": 3494 + }, + { + "epoch": 1.338567598621218, + "grad_norm": 112.76004031268579, + "learning_rate": 5e-06, + "loss": 2.0918, + "num_input_tokens_seen": 604152776, + "step": 3495 + }, + { + "epoch": 1.338567598621218, + "loss": 2.122227191925049, + "loss_ce": 0.20239807665348053, + "loss_iou": 0.8473047018051147, + "loss_num": 0.044921875, + "loss_xval": 1.919829249382019, + "num_input_tokens_seen": 604152776, + "step": 3495 + }, + { + "epoch": 1.3389505936422825, + "grad_norm": 61.13943662348016, + "learning_rate": 5e-06, + "loss": 1.2617, + "num_input_tokens_seen": 604325528, + "step": 3496 + }, + { + "epoch": 1.3389505936422825, + "loss": 1.3334009647369385, + "loss_ce": 0.2177307903766632, + "loss_iou": 0.5026745796203613, + "loss_num": 0.0220947265625, + "loss_xval": 1.1156702041625977, + "num_input_tokens_seen": 604325528, + "step": 3496 + }, + { + "epoch": 1.3393335886633473, + "grad_norm": 125.2813718635993, + "learning_rate": 5e-06, + "loss": 1.6588, + "num_input_tokens_seen": 604498376, + "step": 3497 + }, + { + "epoch": 1.3393335886633473, + "loss": 1.6947588920593262, + "loss_ce": 0.1737125813961029, + "loss_iou": 0.6697334051132202, + "loss_num": 0.036376953125, + "loss_xval": 1.5210464000701904, + "num_input_tokens_seen": 604498376, + "step": 3497 + }, + { + "epoch": 1.339716583684412, + "grad_norm": 110.09947659583729, + "learning_rate": 5e-06, + "loss": 1.9428, + "num_input_tokens_seen": 604671360, + "step": 3498 + }, + { + "epoch": 1.339716583684412, + "loss": 1.9898093938827515, + "loss_ce": 0.21323680877685547, + "loss_iou": 0.806773841381073, + "loss_num": 0.03271484375, + "loss_xval": 1.776572585105896, + "num_input_tokens_seen": 604671360, + "step": 3498 + }, + { + "epoch": 1.3400995787054768, + "grad_norm": 249.006305925835, + "learning_rate": 5e-06, + "loss": 1.37, + "num_input_tokens_seen": 604844368, + "step": 3499 + }, + { + "epoch": 1.3400995787054768, + "loss": 1.4726142883300781, + "loss_ce": 0.1997637003660202, + "loss_iou": 0.5933191776275635, + "loss_num": 0.0172119140625, + "loss_xval": 1.272850513458252, + "num_input_tokens_seen": 604844368, + "step": 3499 + }, + { + "epoch": 1.3404825737265416, + "grad_norm": 140.0807669408355, + "learning_rate": 5e-06, + "loss": 2.0479, + "num_input_tokens_seen": 605017096, + "step": 3500 + }, + { + "epoch": 1.3404825737265416, + "eval_websight_new_CIoU": 0.9405237138271332, + "eval_websight_new_GIoU": 0.940310925245285, + "eval_websight_new_IoU": 0.9406976103782654, + "eval_websight_new_MAE_all": 0.006035956786945462, + "eval_websight_new_MAE_h": 0.004176952177658677, + "eval_websight_new_MAE_w": 0.009054332505911589, + "eval_websight_new_MAE_x": 0.005822476930916309, + "eval_websight_new_MAE_y": 0.005090065533295274, + "eval_websight_new_NUM_probability": 0.04790673404932022, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 1.0469005107879639, + "eval_websight_new_loss_ce": 0.49881765246391296, + "eval_websight_new_loss_iou": 0.2625816911458969, + "eval_websight_new_loss_num": 0.00641632080078125, + "eval_websight_new_loss_xval": 0.5572678744792938, + "eval_websight_new_runtime": 56.503, + "eval_websight_new_samples_per_second": 0.885, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 605017096, + "step": 3500 + }, + { + "epoch": 1.3404825737265416, + "eval_seeclick_CIoU": 0.5847286581993103, + "eval_seeclick_GIoU": 0.6049769222736359, + "eval_seeclick_IoU": 0.6383702158927917, + "eval_seeclick_MAE_all": 0.07808250188827515, + "eval_seeclick_MAE_h": 0.029664406552910805, + "eval_seeclick_MAE_w": 0.1290624961256981, + "eval_seeclick_MAE_x": 0.11963751167058945, + "eval_seeclick_MAE_y": 0.03396560251712799, + "eval_seeclick_NUM_probability": 0.24144858866930008, + "eval_seeclick_inside_bbox": 0.8142361044883728, + "eval_seeclick_loss": 1.9916496276855469, + "eval_seeclick_loss_ce": 0.33574724197387695, + "eval_seeclick_loss_iou": 0.6812882423400879, + "eval_seeclick_loss_num": 0.0514373779296875, + "eval_seeclick_loss_xval": 1.6200228333473206, + "eval_seeclick_runtime": 92.076, + "eval_seeclick_samples_per_second": 0.543, + "eval_seeclick_steps_per_second": 0.022, + "num_input_tokens_seen": 605017096, + "step": 3500 + }, + { + "epoch": 1.3404825737265416, + "eval_icons_CIoU": 0.8462036848068237, + "eval_icons_GIoU": 0.8470831513404846, + "eval_icons_IoU": 0.8618262708187103, + "eval_icons_MAE_all": 0.027814035303890705, + "eval_icons_MAE_h": 0.014042175374925137, + "eval_icons_MAE_w": 0.04157224856317043, + "eval_icons_MAE_x": 0.04165832698345184, + "eval_icons_MAE_y": 0.01398338982835412, + "eval_icons_NUM_probability": 0.32583674788475037, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 1.869412899017334, + "eval_icons_loss_ce": 0.40531717240810394, + "eval_icons_loss_iou": 0.6782236099243164, + "eval_icons_loss_num": 0.013891220092773438, + "eval_icons_loss_xval": 1.425851821899414, + "eval_icons_runtime": 84.4096, + "eval_icons_samples_per_second": 0.592, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 605017096, + "step": 3500 + }, + { + "epoch": 1.3404825737265416, + "loss": 1.8371400833129883, + "loss_ce": 0.40471842885017395, + "loss_iou": 0.6813368797302246, + "loss_num": 0.01397705078125, + "loss_xval": 1.4324216842651367, + "num_input_tokens_seen": 605017096, + "step": 3500 + }, + { + "epoch": 1.3408655687476063, + "grad_norm": 112.9518999820373, + "learning_rate": 5e-06, + "loss": 1.4466, + "num_input_tokens_seen": 605189832, + "step": 3501 + }, + { + "epoch": 1.3408655687476063, + "loss": 1.4242026805877686, + "loss_ce": 0.2152501493692398, + "loss_iou": 0.5491173267364502, + "loss_num": 0.022216796875, + "loss_xval": 1.2089524269104004, + "num_input_tokens_seen": 605189832, + "step": 3501 + }, + { + "epoch": 1.341248563768671, + "grad_norm": 93.35543209952594, + "learning_rate": 5e-06, + "loss": 1.6746, + "num_input_tokens_seen": 605362848, + "step": 3502 + }, + { + "epoch": 1.341248563768671, + "loss": 1.530531883239746, + "loss_ce": 0.20783312618732452, + "loss_iou": 0.6111785173416138, + "loss_num": 0.0201416015625, + "loss_xval": 1.3226988315582275, + "num_input_tokens_seen": 605362848, + "step": 3502 + }, + { + "epoch": 1.3416315587897358, + "grad_norm": 144.6148396653385, + "learning_rate": 5e-06, + "loss": 1.3291, + "num_input_tokens_seen": 605535600, + "step": 3503 + }, + { + "epoch": 1.3416315587897358, + "loss": 1.2425280809402466, + "loss_ce": 0.15439146757125854, + "loss_iou": 0.49774259328842163, + "loss_num": 0.0185546875, + "loss_xval": 1.0881365537643433, + "num_input_tokens_seen": 605535600, + "step": 3503 + }, + { + "epoch": 1.3420145538108006, + "grad_norm": 108.5974686234195, + "learning_rate": 5e-06, + "loss": 1.8874, + "num_input_tokens_seen": 605708712, + "step": 3504 + }, + { + "epoch": 1.3420145538108006, + "loss": 1.9352554082870483, + "loss_ce": 0.2253366857767105, + "loss_iou": 0.7844027280807495, + "loss_num": 0.0281982421875, + "loss_xval": 1.709918737411499, + "num_input_tokens_seen": 605708712, + "step": 3504 + }, + { + "epoch": 1.3423975488318651, + "grad_norm": 87.69758816123331, + "learning_rate": 5e-06, + "loss": 1.356, + "num_input_tokens_seen": 605881576, + "step": 3505 + }, + { + "epoch": 1.3423975488318651, + "loss": 1.414637565612793, + "loss_ce": 0.2152813822031021, + "loss_iou": 0.544441282749176, + "loss_num": 0.0220947265625, + "loss_xval": 1.199356198310852, + "num_input_tokens_seen": 605881576, + "step": 3505 + }, + { + "epoch": 1.3427805438529299, + "grad_norm": 187.31954750786883, + "learning_rate": 5e-06, + "loss": 1.5291, + "num_input_tokens_seen": 606054360, + "step": 3506 + }, + { + "epoch": 1.3427805438529299, + "loss": 1.408973217010498, + "loss_ce": 0.17048245668411255, + "loss_iou": 0.5761851072311401, + "loss_num": 0.0172119140625, + "loss_xval": 1.2384908199310303, + "num_input_tokens_seen": 606054360, + "step": 3506 + }, + { + "epoch": 1.3431635388739946, + "grad_norm": 128.05965769265785, + "learning_rate": 5e-06, + "loss": 1.7629, + "num_input_tokens_seen": 606227248, + "step": 3507 + }, + { + "epoch": 1.3431635388739946, + "loss": 1.6909637451171875, + "loss_ce": 0.22392970323562622, + "loss_iou": 0.665676474571228, + "loss_num": 0.027099609375, + "loss_xval": 1.467034101486206, + "num_input_tokens_seen": 606227248, + "step": 3507 + }, + { + "epoch": 1.3435465338950594, + "grad_norm": 126.99911627210503, + "learning_rate": 5e-06, + "loss": 1.4575, + "num_input_tokens_seen": 606400064, + "step": 3508 + }, + { + "epoch": 1.3435465338950594, + "loss": 1.6847248077392578, + "loss_ce": 0.22748036682605743, + "loss_iou": 0.6699368953704834, + "loss_num": 0.0234375, + "loss_xval": 1.4572443962097168, + "num_input_tokens_seen": 606400064, + "step": 3508 + }, + { + "epoch": 1.3439295289161242, + "grad_norm": 140.8328950124996, + "learning_rate": 5e-06, + "loss": 1.9108, + "num_input_tokens_seen": 606573192, + "step": 3509 + }, + { + "epoch": 1.3439295289161242, + "loss": 1.89664626121521, + "loss_ce": 0.19021235406398773, + "loss_iou": 0.7913883328437805, + "loss_num": 0.0247802734375, + "loss_xval": 1.706433892250061, + "num_input_tokens_seen": 606573192, + "step": 3509 + }, + { + "epoch": 1.3443125239371887, + "grad_norm": 101.83869826114956, + "learning_rate": 5e-06, + "loss": 1.6246, + "num_input_tokens_seen": 606745992, + "step": 3510 + }, + { + "epoch": 1.3443125239371887, + "loss": 1.769139051437378, + "loss_ce": 0.1718544363975525, + "loss_iou": 0.7396976351737976, + "loss_num": 0.0235595703125, + "loss_xval": 1.5972846746444702, + "num_input_tokens_seen": 606745992, + "step": 3510 + }, + { + "epoch": 1.3446955189582535, + "grad_norm": 140.4556739205744, + "learning_rate": 5e-06, + "loss": 1.4881, + "num_input_tokens_seen": 606919272, + "step": 3511 + }, + { + "epoch": 1.3446955189582535, + "loss": 1.4090797901153564, + "loss_ce": 0.22030985355377197, + "loss_iou": 0.5447786450386047, + "loss_num": 0.0198974609375, + "loss_xval": 1.1887699365615845, + "num_input_tokens_seen": 606919272, + "step": 3511 + }, + { + "epoch": 1.3450785139793182, + "grad_norm": 113.42709916370082, + "learning_rate": 5e-06, + "loss": 1.9464, + "num_input_tokens_seen": 607092528, + "step": 3512 + }, + { + "epoch": 1.3450785139793182, + "loss": 1.916111946105957, + "loss_ce": 0.25201815366744995, + "loss_iou": 0.7657628059387207, + "loss_num": 0.0264892578125, + "loss_xval": 1.6640939712524414, + "num_input_tokens_seen": 607092528, + "step": 3512 + }, + { + "epoch": 1.345461509000383, + "grad_norm": 91.05770661450043, + "learning_rate": 5e-06, + "loss": 1.4648, + "num_input_tokens_seen": 607265624, + "step": 3513 + }, + { + "epoch": 1.345461509000383, + "loss": 1.5231677293777466, + "loss_ce": 0.2110317200422287, + "loss_iou": 0.5995036959648132, + "loss_num": 0.022705078125, + "loss_xval": 1.3121360540390015, + "num_input_tokens_seen": 607265624, + "step": 3513 + }, + { + "epoch": 1.3458445040214477, + "grad_norm": 191.65907520626683, + "learning_rate": 5e-06, + "loss": 1.4242, + "num_input_tokens_seen": 607438648, + "step": 3514 + }, + { + "epoch": 1.3458445040214477, + "loss": 1.2999374866485596, + "loss_ce": 0.1912992000579834, + "loss_iou": 0.5133645534515381, + "loss_num": 0.016357421875, + "loss_xval": 1.1086382865905762, + "num_input_tokens_seen": 607438648, + "step": 3514 + }, + { + "epoch": 1.3462274990425125, + "grad_norm": 115.8155073206714, + "learning_rate": 5e-06, + "loss": 2.0663, + "num_input_tokens_seen": 607611496, + "step": 3515 + }, + { + "epoch": 1.3462274990425125, + "loss": 1.9306249618530273, + "loss_ce": 0.15561552345752716, + "loss_iou": 0.7690355181694031, + "loss_num": 0.04736328125, + "loss_xval": 1.7486423254013062, + "num_input_tokens_seen": 607611496, + "step": 3515 + }, + { + "epoch": 1.3466104940635772, + "grad_norm": 185.59225077752072, + "learning_rate": 5e-06, + "loss": 1.6295, + "num_input_tokens_seen": 607784448, + "step": 3516 + }, + { + "epoch": 1.3466104940635772, + "loss": 1.791547417640686, + "loss_ce": 0.2342563271522522, + "loss_iou": 0.7256364822387695, + "loss_num": 0.021240234375, + "loss_xval": 1.557291030883789, + "num_input_tokens_seen": 607784448, + "step": 3516 + }, + { + "epoch": 1.346993489084642, + "grad_norm": 248.29660682207296, + "learning_rate": 5e-06, + "loss": 2.5236, + "num_input_tokens_seen": 607957392, + "step": 3517 + }, + { + "epoch": 1.346993489084642, + "loss": 2.4775795936584473, + "loss_ce": 0.18136361241340637, + "loss_iou": 1.0829529762268066, + "loss_num": 0.0260009765625, + "loss_xval": 2.2962160110473633, + "num_input_tokens_seen": 607957392, + "step": 3517 + }, + { + "epoch": 1.3473764841057068, + "grad_norm": 125.59973971151662, + "learning_rate": 5e-06, + "loss": 1.7813, + "num_input_tokens_seen": 608130072, + "step": 3518 + }, + { + "epoch": 1.3473764841057068, + "loss": 1.811166524887085, + "loss_ce": 0.20386719703674316, + "loss_iou": 0.7005002498626709, + "loss_num": 0.041259765625, + "loss_xval": 1.6072993278503418, + "num_input_tokens_seen": 608130072, + "step": 3518 + }, + { + "epoch": 1.3477594791267713, + "grad_norm": 343.13462323871494, + "learning_rate": 5e-06, + "loss": 1.9953, + "num_input_tokens_seen": 608303304, + "step": 3519 + }, + { + "epoch": 1.3477594791267713, + "loss": 2.134481430053711, + "loss_ce": 0.213484525680542, + "loss_iou": 0.8866459727287292, + "loss_num": 0.029541015625, + "loss_xval": 1.9209970235824585, + "num_input_tokens_seen": 608303304, + "step": 3519 + }, + { + "epoch": 1.348142474147836, + "grad_norm": 124.55212862480187, + "learning_rate": 5e-06, + "loss": 1.9489, + "num_input_tokens_seen": 608476184, + "step": 3520 + }, + { + "epoch": 1.348142474147836, + "loss": 1.8672987222671509, + "loss_ce": 0.25148433446884155, + "loss_iou": 0.723404049873352, + "loss_num": 0.03369140625, + "loss_xval": 1.615814447402954, + "num_input_tokens_seen": 608476184, + "step": 3520 + }, + { + "epoch": 1.3485254691689008, + "grad_norm": 289.87028842325446, + "learning_rate": 5e-06, + "loss": 1.78, + "num_input_tokens_seen": 608649008, + "step": 3521 + }, + { + "epoch": 1.3485254691689008, + "loss": 1.7762246131896973, + "loss_ce": 0.18558359146118164, + "loss_iou": 0.7254962921142578, + "loss_num": 0.0279541015625, + "loss_xval": 1.5906410217285156, + "num_input_tokens_seen": 608649008, + "step": 3521 + }, + { + "epoch": 1.3489084641899656, + "grad_norm": 88.59708559388797, + "learning_rate": 5e-06, + "loss": 1.5324, + "num_input_tokens_seen": 608821992, + "step": 3522 + }, + { + "epoch": 1.3489084641899656, + "loss": 1.530510663986206, + "loss_ce": 0.22984153032302856, + "loss_iou": 0.5942737460136414, + "loss_num": 0.0224609375, + "loss_xval": 1.3006690740585327, + "num_input_tokens_seen": 608821992, + "step": 3522 + }, + { + "epoch": 1.3492914592110303, + "grad_norm": 100.5229129843677, + "learning_rate": 5e-06, + "loss": 1.496, + "num_input_tokens_seen": 608994904, + "step": 3523 + }, + { + "epoch": 1.3492914592110303, + "loss": 1.6989405155181885, + "loss_ce": 0.18340282142162323, + "loss_iou": 0.6874868869781494, + "loss_num": 0.028076171875, + "loss_xval": 1.5155377388000488, + "num_input_tokens_seen": 608994904, + "step": 3523 + }, + { + "epoch": 1.3496744542320949, + "grad_norm": 112.6623939901179, + "learning_rate": 5e-06, + "loss": 1.8221, + "num_input_tokens_seen": 609168032, + "step": 3524 + }, + { + "epoch": 1.3496744542320949, + "loss": 1.9208285808563232, + "loss_ce": 0.21432477235794067, + "loss_iou": 0.7663684487342834, + "loss_num": 0.03466796875, + "loss_xval": 1.706503987312317, + "num_input_tokens_seen": 609168032, + "step": 3524 + }, + { + "epoch": 1.3500574492531596, + "grad_norm": 95.7038487178071, + "learning_rate": 5e-06, + "loss": 1.6282, + "num_input_tokens_seen": 609340992, + "step": 3525 + }, + { + "epoch": 1.3500574492531596, + "loss": 1.6250360012054443, + "loss_ce": 0.2213936150074005, + "loss_iou": 0.6471489667892456, + "loss_num": 0.0218505859375, + "loss_xval": 1.4036424160003662, + "num_input_tokens_seen": 609340992, + "step": 3525 + }, + { + "epoch": 1.3504404442742244, + "grad_norm": 222.96820677305527, + "learning_rate": 5e-06, + "loss": 1.5, + "num_input_tokens_seen": 609513624, + "step": 3526 + }, + { + "epoch": 1.3504404442742244, + "loss": 1.4050097465515137, + "loss_ce": 0.17917197942733765, + "loss_iou": 0.5576211214065552, + "loss_num": 0.0220947265625, + "loss_xval": 1.2258379459381104, + "num_input_tokens_seen": 609513624, + "step": 3526 + }, + { + "epoch": 1.3508234392952891, + "grad_norm": 156.76325591007046, + "learning_rate": 5e-06, + "loss": 1.7096, + "num_input_tokens_seen": 609686208, + "step": 3527 + }, + { + "epoch": 1.3508234392952891, + "loss": 1.935443639755249, + "loss_ce": 0.1976446509361267, + "loss_iou": 0.7885772585868835, + "loss_num": 0.0322265625, + "loss_xval": 1.737799048423767, + "num_input_tokens_seen": 609686208, + "step": 3527 + }, + { + "epoch": 1.351206434316354, + "grad_norm": 290.8758488378372, + "learning_rate": 5e-06, + "loss": 2.0351, + "num_input_tokens_seen": 609859128, + "step": 3528 + }, + { + "epoch": 1.351206434316354, + "loss": 1.888675332069397, + "loss_ce": 0.22578385472297668, + "loss_iou": 0.7497501373291016, + "loss_num": 0.03271484375, + "loss_xval": 1.6628913879394531, + "num_input_tokens_seen": 609859128, + "step": 3528 + }, + { + "epoch": 1.3515894293374187, + "grad_norm": 183.54240686583393, + "learning_rate": 5e-06, + "loss": 1.5176, + "num_input_tokens_seen": 610032096, + "step": 3529 + }, + { + "epoch": 1.3515894293374187, + "loss": 1.494391918182373, + "loss_ce": 0.1818937063217163, + "loss_iou": 0.6103659272193909, + "loss_num": 0.018310546875, + "loss_xval": 1.3124982118606567, + "num_input_tokens_seen": 610032096, + "step": 3529 + }, + { + "epoch": 1.3519724243584834, + "grad_norm": 416.0503668947494, + "learning_rate": 5e-06, + "loss": 2.2382, + "num_input_tokens_seen": 610201696, + "step": 3530 + }, + { + "epoch": 1.3519724243584834, + "loss": 2.271575450897217, + "loss_ce": 0.20846495032310486, + "loss_iou": 0.9695436954498291, + "loss_num": 0.0247802734375, + "loss_xval": 2.063110828399658, + "num_input_tokens_seen": 610201696, + "step": 3530 + }, + { + "epoch": 1.3523554193795482, + "grad_norm": 130.61666940566212, + "learning_rate": 5e-06, + "loss": 1.8439, + "num_input_tokens_seen": 610374552, + "step": 3531 + }, + { + "epoch": 1.3523554193795482, + "loss": 1.9753737449645996, + "loss_ce": 0.18173204362392426, + "loss_iou": 0.8015450239181519, + "loss_num": 0.0380859375, + "loss_xval": 1.7936418056488037, + "num_input_tokens_seen": 610374552, + "step": 3531 + }, + { + "epoch": 1.3527384144006127, + "grad_norm": 194.02855345479304, + "learning_rate": 5e-06, + "loss": 1.6476, + "num_input_tokens_seen": 610547976, + "step": 3532 + }, + { + "epoch": 1.3527384144006127, + "loss": 1.765662670135498, + "loss_ce": 0.24570322036743164, + "loss_iou": 0.7102513313293457, + "loss_num": 0.0198974609375, + "loss_xval": 1.5199594497680664, + "num_input_tokens_seen": 610547976, + "step": 3532 + }, + { + "epoch": 1.3531214094216775, + "grad_norm": 325.080996320109, + "learning_rate": 5e-06, + "loss": 2.3593, + "num_input_tokens_seen": 610721000, + "step": 3533 + }, + { + "epoch": 1.3531214094216775, + "loss": 2.304840326309204, + "loss_ce": 0.21709582209587097, + "loss_iou": 0.9870485067367554, + "loss_num": 0.022705078125, + "loss_xval": 2.0877444744110107, + "num_input_tokens_seen": 610721000, + "step": 3533 + }, + { + "epoch": 1.3535044044427422, + "grad_norm": 83.10097665107112, + "learning_rate": 5e-06, + "loss": 1.6988, + "num_input_tokens_seen": 610893856, + "step": 3534 + }, + { + "epoch": 1.3535044044427422, + "loss": 1.6446635723114014, + "loss_ce": 0.15993928909301758, + "loss_iou": 0.6867896318435669, + "loss_num": 0.022216796875, + "loss_xval": 1.4847242832183838, + "num_input_tokens_seen": 610893856, + "step": 3534 + }, + { + "epoch": 1.353887399463807, + "grad_norm": 101.77705256899391, + "learning_rate": 5e-06, + "loss": 1.4157, + "num_input_tokens_seen": 611066984, + "step": 3535 + }, + { + "epoch": 1.353887399463807, + "loss": 1.5131068229675293, + "loss_ce": 0.1964910626411438, + "loss_iou": 0.6061533093452454, + "loss_num": 0.0208740234375, + "loss_xval": 1.3166157007217407, + "num_input_tokens_seen": 611066984, + "step": 3535 + }, + { + "epoch": 1.3542703944848717, + "grad_norm": 204.98252957985457, + "learning_rate": 5e-06, + "loss": 1.644, + "num_input_tokens_seen": 611240120, + "step": 3536 + }, + { + "epoch": 1.3542703944848717, + "loss": 1.608548641204834, + "loss_ce": 0.22869566082954407, + "loss_iou": 0.6464694738388062, + "loss_num": 0.017333984375, + "loss_xval": 1.3798530101776123, + "num_input_tokens_seen": 611240120, + "step": 3536 + }, + { + "epoch": 1.3546533895059365, + "grad_norm": 109.11836657217839, + "learning_rate": 5e-06, + "loss": 1.7624, + "num_input_tokens_seen": 611412856, + "step": 3537 + }, + { + "epoch": 1.3546533895059365, + "loss": 1.5386110544204712, + "loss_ce": 0.19527839124202728, + "loss_iou": 0.6114704608917236, + "loss_num": 0.0240478515625, + "loss_xval": 1.3433327674865723, + "num_input_tokens_seen": 611412856, + "step": 3537 + }, + { + "epoch": 1.355036384527001, + "grad_norm": 108.76088799857489, + "learning_rate": 5e-06, + "loss": 1.5263, + "num_input_tokens_seen": 611585592, + "step": 3538 + }, + { + "epoch": 1.355036384527001, + "loss": 1.7183423042297363, + "loss_ce": 0.28218644857406616, + "loss_iou": 0.6774896383285522, + "loss_num": 0.0162353515625, + "loss_xval": 1.4361560344696045, + "num_input_tokens_seen": 611585592, + "step": 3538 + }, + { + "epoch": 1.3554193795480658, + "grad_norm": 103.06026257785365, + "learning_rate": 5e-06, + "loss": 1.5056, + "num_input_tokens_seen": 611758768, + "step": 3539 + }, + { + "epoch": 1.3554193795480658, + "loss": 1.5339665412902832, + "loss_ce": 0.19743913412094116, + "loss_iou": 0.6244862079620361, + "loss_num": 0.0174560546875, + "loss_xval": 1.3365273475646973, + "num_input_tokens_seen": 611758768, + "step": 3539 + }, + { + "epoch": 1.3558023745691306, + "grad_norm": 117.67632754300494, + "learning_rate": 5e-06, + "loss": 1.6944, + "num_input_tokens_seen": 611928664, + "step": 3540 + }, + { + "epoch": 1.3558023745691306, + "loss": 1.6984690427780151, + "loss_ce": 0.22155040502548218, + "loss_iou": 0.6877696514129639, + "loss_num": 0.020263671875, + "loss_xval": 1.4769186973571777, + "num_input_tokens_seen": 611928664, + "step": 3540 + }, + { + "epoch": 1.3561853695901953, + "grad_norm": 86.88476948794022, + "learning_rate": 5e-06, + "loss": 1.3772, + "num_input_tokens_seen": 612101776, + "step": 3541 + }, + { + "epoch": 1.3561853695901953, + "loss": 1.4097684621810913, + "loss_ce": 0.1925264596939087, + "loss_iou": 0.5699399709701538, + "loss_num": 0.0155029296875, + "loss_xval": 1.2172420024871826, + "num_input_tokens_seen": 612101776, + "step": 3541 + }, + { + "epoch": 1.35656836461126, + "grad_norm": 104.28539292758965, + "learning_rate": 5e-06, + "loss": 1.4779, + "num_input_tokens_seen": 612275072, + "step": 3542 + }, + { + "epoch": 1.35656836461126, + "loss": 1.4341868162155151, + "loss_ce": 0.18426361680030823, + "loss_iou": 0.5773999691009521, + "loss_num": 0.01904296875, + "loss_xval": 1.2499232292175293, + "num_input_tokens_seen": 612275072, + "step": 3542 + }, + { + "epoch": 1.3569513596323248, + "grad_norm": 137.64803241159876, + "learning_rate": 5e-06, + "loss": 1.6254, + "num_input_tokens_seen": 612447672, + "step": 3543 + }, + { + "epoch": 1.3569513596323248, + "loss": 1.5099307298660278, + "loss_ce": 0.15565082430839539, + "loss_iou": 0.6298681497573853, + "loss_num": 0.0189208984375, + "loss_xval": 1.3542797565460205, + "num_input_tokens_seen": 612447672, + "step": 3543 + }, + { + "epoch": 1.3573343546533896, + "grad_norm": 203.08852558116658, + "learning_rate": 5e-06, + "loss": 1.8637, + "num_input_tokens_seen": 612616752, + "step": 3544 + }, + { + "epoch": 1.3573343546533896, + "loss": 1.918691635131836, + "loss_ce": 0.1849869042634964, + "loss_iou": 0.7682195901870728, + "loss_num": 0.03955078125, + "loss_xval": 1.7337048053741455, + "num_input_tokens_seen": 612616752, + "step": 3544 + }, + { + "epoch": 1.3577173496744543, + "grad_norm": 158.2249014581109, + "learning_rate": 5e-06, + "loss": 1.4388, + "num_input_tokens_seen": 612789736, + "step": 3545 + }, + { + "epoch": 1.3577173496744543, + "loss": 1.4531426429748535, + "loss_ce": 0.12079142779111862, + "loss_iou": 0.6085736751556396, + "loss_num": 0.0230712890625, + "loss_xval": 1.3323512077331543, + "num_input_tokens_seen": 612789736, + "step": 3545 + }, + { + "epoch": 1.3581003446955189, + "grad_norm": 152.06662030528923, + "learning_rate": 5e-06, + "loss": 1.4821, + "num_input_tokens_seen": 612962848, + "step": 3546 + }, + { + "epoch": 1.3581003446955189, + "loss": 1.4804325103759766, + "loss_ce": 0.20803119242191315, + "loss_iou": 0.5983589291572571, + "loss_num": 0.01513671875, + "loss_xval": 1.2724014520645142, + "num_input_tokens_seen": 612962848, + "step": 3546 + }, + { + "epoch": 1.3584833397165836, + "grad_norm": 275.75166928757216, + "learning_rate": 5e-06, + "loss": 1.5002, + "num_input_tokens_seen": 613135504, + "step": 3547 + }, + { + "epoch": 1.3584833397165836, + "loss": 1.507258653640747, + "loss_ce": 0.19036146998405457, + "loss_iou": 0.6261304616928101, + "loss_num": 0.012939453125, + "loss_xval": 1.3168971538543701, + "num_input_tokens_seen": 613135504, + "step": 3547 + }, + { + "epoch": 1.3588663347376484, + "grad_norm": 123.4670584102338, + "learning_rate": 5e-06, + "loss": 1.8353, + "num_input_tokens_seen": 613308312, + "step": 3548 + }, + { + "epoch": 1.3588663347376484, + "loss": 1.8402364253997803, + "loss_ce": 0.21213197708129883, + "loss_iou": 0.7385822534561157, + "loss_num": 0.0301513671875, + "loss_xval": 1.6281044483184814, + "num_input_tokens_seen": 613308312, + "step": 3548 + }, + { + "epoch": 1.3592493297587132, + "grad_norm": 166.29377812510188, + "learning_rate": 5e-06, + "loss": 1.646, + "num_input_tokens_seen": 613481424, + "step": 3549 + }, + { + "epoch": 1.3592493297587132, + "loss": 1.6446444988250732, + "loss_ce": 0.15899218618869781, + "loss_iou": 0.6780067682266235, + "loss_num": 0.02587890625, + "loss_xval": 1.485652208328247, + "num_input_tokens_seen": 613481424, + "step": 3549 + }, + { + "epoch": 1.359632324779778, + "grad_norm": 160.32427315708424, + "learning_rate": 5e-06, + "loss": 1.5357, + "num_input_tokens_seen": 613654432, + "step": 3550 + }, + { + "epoch": 1.359632324779778, + "loss": 1.657616138458252, + "loss_ce": 0.2261035144329071, + "loss_iou": 0.6774415969848633, + "loss_num": 0.01531982421875, + "loss_xval": 1.4315128326416016, + "num_input_tokens_seen": 613654432, + "step": 3550 + }, + { + "epoch": 1.3600153198008427, + "grad_norm": 130.92150090516574, + "learning_rate": 5e-06, + "loss": 1.5814, + "num_input_tokens_seen": 613827304, + "step": 3551 + }, + { + "epoch": 1.3600153198008427, + "loss": 1.6296170949935913, + "loss_ce": 0.2307167500257492, + "loss_iou": 0.6356074213981628, + "loss_num": 0.0255126953125, + "loss_xval": 1.3989003896713257, + "num_input_tokens_seen": 613827304, + "step": 3551 + }, + { + "epoch": 1.3603983148219072, + "grad_norm": 145.42628896473127, + "learning_rate": 5e-06, + "loss": 1.4659, + "num_input_tokens_seen": 614000248, + "step": 3552 + }, + { + "epoch": 1.3603983148219072, + "loss": 1.3170982599258423, + "loss_ce": 0.16705623269081116, + "loss_iou": 0.5328152179718018, + "loss_num": 0.016845703125, + "loss_xval": 1.1439690589904785, + "num_input_tokens_seen": 614000248, + "step": 3552 + }, + { + "epoch": 1.360781309842972, + "grad_norm": 121.05505468908999, + "learning_rate": 5e-06, + "loss": 1.6945, + "num_input_tokens_seen": 614173160, + "step": 3553 + }, + { + "epoch": 1.360781309842972, + "loss": 1.5743227005004883, + "loss_ce": 0.16809436678886414, + "loss_iou": 0.6442152857780457, + "loss_num": 0.0235595703125, + "loss_xval": 1.4062284231185913, + "num_input_tokens_seen": 614173160, + "step": 3553 + }, + { + "epoch": 1.3611643048640367, + "grad_norm": 112.7108650207361, + "learning_rate": 5e-06, + "loss": 1.3712, + "num_input_tokens_seen": 614346064, + "step": 3554 + }, + { + "epoch": 1.3611643048640367, + "loss": 1.3476276397705078, + "loss_ce": 0.20763450860977173, + "loss_iou": 0.5259749293327332, + "loss_num": 0.017578125, + "loss_xval": 1.1399930715560913, + "num_input_tokens_seen": 614346064, + "step": 3554 + }, + { + "epoch": 1.3615472998851015, + "grad_norm": 330.86850954956566, + "learning_rate": 5e-06, + "loss": 1.7237, + "num_input_tokens_seen": 614518840, + "step": 3555 + }, + { + "epoch": 1.3615472998851015, + "loss": 1.6065523624420166, + "loss_ce": 0.16759878396987915, + "loss_iou": 0.6765080690383911, + "loss_num": 0.0172119140625, + "loss_xval": 1.4389536380767822, + "num_input_tokens_seen": 614518840, + "step": 3555 + }, + { + "epoch": 1.3619302949061662, + "grad_norm": 142.18084009906164, + "learning_rate": 5e-06, + "loss": 1.7091, + "num_input_tokens_seen": 614691528, + "step": 3556 + }, + { + "epoch": 1.3619302949061662, + "loss": 1.593044400215149, + "loss_ce": 0.16852545738220215, + "loss_iou": 0.6348363757133484, + "loss_num": 0.031005859375, + "loss_xval": 1.4245189428329468, + "num_input_tokens_seen": 614691528, + "step": 3556 + }, + { + "epoch": 1.362313289927231, + "grad_norm": 200.13824810397716, + "learning_rate": 5e-06, + "loss": 1.6684, + "num_input_tokens_seen": 614864280, + "step": 3557 + }, + { + "epoch": 1.362313289927231, + "loss": 1.684021234512329, + "loss_ce": 0.19837597012519836, + "loss_iou": 0.6903018951416016, + "loss_num": 0.02099609375, + "loss_xval": 1.4856452941894531, + "num_input_tokens_seen": 614864280, + "step": 3557 + }, + { + "epoch": 1.3626962849482958, + "grad_norm": 136.52859958092625, + "learning_rate": 5e-06, + "loss": 2.1012, + "num_input_tokens_seen": 615037440, + "step": 3558 + }, + { + "epoch": 1.3626962849482958, + "loss": 2.1489267349243164, + "loss_ce": 0.19121010601520538, + "loss_iou": 0.885474443435669, + "loss_num": 0.037353515625, + "loss_xval": 1.957716464996338, + "num_input_tokens_seen": 615037440, + "step": 3558 + }, + { + "epoch": 1.3630792799693605, + "grad_norm": 95.58481875319109, + "learning_rate": 5e-06, + "loss": 1.6488, + "num_input_tokens_seen": 615209168, + "step": 3559 + }, + { + "epoch": 1.3630792799693605, + "loss": 1.5920864343643188, + "loss_ce": 0.19161845743656158, + "loss_iou": 0.6444936394691467, + "loss_num": 0.0223388671875, + "loss_xval": 1.4004679918289185, + "num_input_tokens_seen": 615209168, + "step": 3559 + }, + { + "epoch": 1.363462274990425, + "grad_norm": 199.02495995648124, + "learning_rate": 5e-06, + "loss": 2.1887, + "num_input_tokens_seen": 615382208, + "step": 3560 + }, + { + "epoch": 1.363462274990425, + "loss": 2.1514439582824707, + "loss_ce": 0.19695159792900085, + "loss_iou": 0.898175060749054, + "loss_num": 0.03173828125, + "loss_xval": 1.954492211341858, + "num_input_tokens_seen": 615382208, + "step": 3560 + }, + { + "epoch": 1.3638452700114898, + "grad_norm": 112.0891784952754, + "learning_rate": 5e-06, + "loss": 1.705, + "num_input_tokens_seen": 615555544, + "step": 3561 + }, + { + "epoch": 1.3638452700114898, + "loss": 1.7380800247192383, + "loss_ce": 0.20713254809379578, + "loss_iou": 0.7163404226303101, + "loss_num": 0.0196533203125, + "loss_xval": 1.5309474468231201, + "num_input_tokens_seen": 615555544, + "step": 3561 + }, + { + "epoch": 1.3642282650325546, + "grad_norm": 192.29869113246352, + "learning_rate": 5e-06, + "loss": 1.8479, + "num_input_tokens_seen": 615728920, + "step": 3562 + }, + { + "epoch": 1.3642282650325546, + "loss": 1.7795076370239258, + "loss_ce": 0.2079392671585083, + "loss_iou": 0.7436088919639587, + "loss_num": 0.016845703125, + "loss_xval": 1.5715683698654175, + "num_input_tokens_seen": 615728920, + "step": 3562 + }, + { + "epoch": 1.3646112600536193, + "grad_norm": 68.36400261160614, + "learning_rate": 5e-06, + "loss": 1.9486, + "num_input_tokens_seen": 615901720, + "step": 3563 + }, + { + "epoch": 1.3646112600536193, + "loss": 1.926328182220459, + "loss_ce": 0.20747782289981842, + "loss_iou": 0.7692152261734009, + "loss_num": 0.0361328125, + "loss_xval": 1.7188503742218018, + "num_input_tokens_seen": 615901720, + "step": 3563 + }, + { + "epoch": 1.364994255074684, + "grad_norm": 112.75874750994122, + "learning_rate": 5e-06, + "loss": 1.5633, + "num_input_tokens_seen": 616074560, + "step": 3564 + }, + { + "epoch": 1.364994255074684, + "loss": 1.5410892963409424, + "loss_ce": 0.1854608654975891, + "loss_iou": 0.627856969833374, + "loss_num": 0.02001953125, + "loss_xval": 1.355628490447998, + "num_input_tokens_seen": 616074560, + "step": 3564 + }, + { + "epoch": 1.3653772500957486, + "grad_norm": 175.84719838322135, + "learning_rate": 5e-06, + "loss": 2.109, + "num_input_tokens_seen": 616247752, + "step": 3565 + }, + { + "epoch": 1.3653772500957486, + "loss": 1.9987949132919312, + "loss_ce": 0.18364642560482025, + "loss_iou": 0.8309751152992249, + "loss_num": 0.0306396484375, + "loss_xval": 1.8151484727859497, + "num_input_tokens_seen": 616247752, + "step": 3565 + }, + { + "epoch": 1.3657602451168134, + "grad_norm": 71.38045533359247, + "learning_rate": 5e-06, + "loss": 1.7527, + "num_input_tokens_seen": 616420520, + "step": 3566 + }, + { + "epoch": 1.3657602451168134, + "loss": 1.66278076171875, + "loss_ce": 0.19346889853477478, + "loss_iou": 0.6478638648986816, + "loss_num": 0.03466796875, + "loss_xval": 1.4693117141723633, + "num_input_tokens_seen": 616420520, + "step": 3566 + }, + { + "epoch": 1.3661432401378781, + "grad_norm": 67.2068048753262, + "learning_rate": 5e-06, + "loss": 1.6118, + "num_input_tokens_seen": 616593520, + "step": 3567 + }, + { + "epoch": 1.3661432401378781, + "loss": 1.6439144611358643, + "loss_ce": 0.29087749123573303, + "loss_iou": 0.6355944275856018, + "loss_num": 0.016357421875, + "loss_xval": 1.3530369997024536, + "num_input_tokens_seen": 616593520, + "step": 3567 + }, + { + "epoch": 1.366526235158943, + "grad_norm": 89.2511966558542, + "learning_rate": 5e-06, + "loss": 1.4504, + "num_input_tokens_seen": 616766360, + "step": 3568 + }, + { + "epoch": 1.366526235158943, + "loss": 1.5952684879302979, + "loss_ce": 0.2199249565601349, + "loss_iou": 0.6187782883644104, + "loss_num": 0.027587890625, + "loss_xval": 1.3753434419631958, + "num_input_tokens_seen": 616766360, + "step": 3568 + }, + { + "epoch": 1.3669092301800077, + "grad_norm": 164.71363153291628, + "learning_rate": 5e-06, + "loss": 2.0781, + "num_input_tokens_seen": 616939432, + "step": 3569 + }, + { + "epoch": 1.3669092301800077, + "loss": 2.082994222640991, + "loss_ce": 0.23589718341827393, + "loss_iou": 0.8618725538253784, + "loss_num": 0.024658203125, + "loss_xval": 1.8470971584320068, + "num_input_tokens_seen": 616939432, + "step": 3569 + }, + { + "epoch": 1.3672922252010724, + "grad_norm": 144.59929639130402, + "learning_rate": 5e-06, + "loss": 1.7726, + "num_input_tokens_seen": 617112248, + "step": 3570 + }, + { + "epoch": 1.3672922252010724, + "loss": 1.602302074432373, + "loss_ce": 0.18006178736686707, + "loss_iou": 0.6458430290222168, + "loss_num": 0.026123046875, + "loss_xval": 1.4081411361694336, + "num_input_tokens_seen": 617112248, + "step": 3570 + }, + { + "epoch": 1.3676752202221372, + "grad_norm": 141.1230383930963, + "learning_rate": 5e-06, + "loss": 1.5116, + "num_input_tokens_seen": 617285568, + "step": 3571 + }, + { + "epoch": 1.3676752202221372, + "loss": 1.4005942344665527, + "loss_ce": 0.19415241479873657, + "loss_iou": 0.5592755675315857, + "loss_num": 0.017578125, + "loss_xval": 1.2064417600631714, + "num_input_tokens_seen": 617285568, + "step": 3571 + }, + { + "epoch": 1.368058215243202, + "grad_norm": 84.54980833831, + "learning_rate": 5e-06, + "loss": 1.6988, + "num_input_tokens_seen": 617458336, + "step": 3572 + }, + { + "epoch": 1.368058215243202, + "loss": 1.5296988487243652, + "loss_ce": 0.1943495273590088, + "loss_iou": 0.614039957523346, + "loss_num": 0.021484375, + "loss_xval": 1.335349202156067, + "num_input_tokens_seen": 617458336, + "step": 3572 + }, + { + "epoch": 1.3684412102642667, + "grad_norm": 184.85652314277982, + "learning_rate": 5e-06, + "loss": 1.6501, + "num_input_tokens_seen": 617631352, + "step": 3573 + }, + { + "epoch": 1.3684412102642667, + "loss": 1.5000786781311035, + "loss_ce": 0.2294812798500061, + "loss_iou": 0.5807638168334961, + "loss_num": 0.0218505859375, + "loss_xval": 1.2705974578857422, + "num_input_tokens_seen": 617631352, + "step": 3573 + }, + { + "epoch": 1.3688242052853312, + "grad_norm": 183.29486314610384, + "learning_rate": 5e-06, + "loss": 1.6727, + "num_input_tokens_seen": 617804456, + "step": 3574 + }, + { + "epoch": 1.3688242052853312, + "loss": 1.6086540222167969, + "loss_ce": 0.21161183714866638, + "loss_iou": 0.6385082602500916, + "loss_num": 0.0240478515625, + "loss_xval": 1.397042155265808, + "num_input_tokens_seen": 617804456, + "step": 3574 + }, + { + "epoch": 1.369207200306396, + "grad_norm": 84.22326668577249, + "learning_rate": 5e-06, + "loss": 2.0519, + "num_input_tokens_seen": 617977784, + "step": 3575 + }, + { + "epoch": 1.369207200306396, + "loss": 2.0773351192474365, + "loss_ce": 0.21203234791755676, + "loss_iou": 0.8519018888473511, + "loss_num": 0.0322265625, + "loss_xval": 1.8653028011322021, + "num_input_tokens_seen": 617977784, + "step": 3575 + }, + { + "epoch": 1.3695901953274607, + "grad_norm": 71.3680249380758, + "learning_rate": 5e-06, + "loss": 1.4871, + "num_input_tokens_seen": 618150600, + "step": 3576 + }, + { + "epoch": 1.3695901953274607, + "loss": 1.4333336353302002, + "loss_ce": 0.16744062304496765, + "loss_iou": -3.073839740252618e+36, + "loss_num": 0.020751953125, + "loss_xval": -7.272897730801609e+28, + "num_input_tokens_seen": 618150600, + "step": 3576 + }, + { + "epoch": 1.3699731903485255, + "grad_norm": 86.93418845876175, + "learning_rate": 5e-06, + "loss": 1.3279, + "num_input_tokens_seen": 618323584, + "step": 3577 + }, + { + "epoch": 1.3699731903485255, + "loss": 1.43703031539917, + "loss_ce": 0.22806599736213684, + "loss_iou": 0.5516409873962402, + "loss_num": 0.0211181640625, + "loss_xval": 1.2089643478393555, + "num_input_tokens_seen": 618323584, + "step": 3577 + }, + { + "epoch": 1.3703561853695903, + "grad_norm": 157.4833698628764, + "learning_rate": 5e-06, + "loss": 1.4033, + "num_input_tokens_seen": 618496408, + "step": 3578 + }, + { + "epoch": 1.3703561853695903, + "loss": 1.3804898262023926, + "loss_ce": 0.22576244175434113, + "loss_iou": 0.5269334316253662, + "loss_num": 0.0201416015625, + "loss_xval": 1.1547274589538574, + "num_input_tokens_seen": 618496408, + "step": 3578 + }, + { + "epoch": 1.3707391803906548, + "grad_norm": 138.00020927299406, + "learning_rate": 5e-06, + "loss": 1.4502, + "num_input_tokens_seen": 618668840, + "step": 3579 + }, + { + "epoch": 1.3707391803906548, + "loss": 1.4024038314819336, + "loss_ce": 0.19461020827293396, + "loss_iou": 0.5448606014251709, + "loss_num": 0.023681640625, + "loss_xval": 1.2077937126159668, + "num_input_tokens_seen": 618668840, + "step": 3579 + }, + { + "epoch": 1.3711221754117195, + "grad_norm": 113.76663493518731, + "learning_rate": 5e-06, + "loss": 1.6558, + "num_input_tokens_seen": 618841784, + "step": 3580 + }, + { + "epoch": 1.3711221754117195, + "loss": 1.6215949058532715, + "loss_ce": 0.16539452970027924, + "loss_iou": 0.6719478368759155, + "loss_num": 0.0224609375, + "loss_xval": 1.456200361251831, + "num_input_tokens_seen": 618841784, + "step": 3580 + }, + { + "epoch": 1.3715051704327843, + "grad_norm": 93.92814455177887, + "learning_rate": 5e-06, + "loss": 1.3708, + "num_input_tokens_seen": 619014496, + "step": 3581 + }, + { + "epoch": 1.3715051704327843, + "loss": 1.3997132778167725, + "loss_ce": 0.16591432690620422, + "loss_iou": 0.5728321075439453, + "loss_num": 0.017578125, + "loss_xval": 1.2337989807128906, + "num_input_tokens_seen": 619014496, + "step": 3581 + }, + { + "epoch": 1.371888165453849, + "grad_norm": 146.42374708408562, + "learning_rate": 5e-06, + "loss": 1.6331, + "num_input_tokens_seen": 619187808, + "step": 3582 + }, + { + "epoch": 1.371888165453849, + "loss": 1.6726750135421753, + "loss_ce": 0.18158861994743347, + "loss_iou": 0.6885516047477722, + "loss_num": 0.0228271484375, + "loss_xval": 1.4910863637924194, + "num_input_tokens_seen": 619187808, + "step": 3582 + }, + { + "epoch": 1.3722711604749138, + "grad_norm": 120.95380911160353, + "learning_rate": 5e-06, + "loss": 1.6599, + "num_input_tokens_seen": 619360944, + "step": 3583 + }, + { + "epoch": 1.3722711604749138, + "loss": 1.6582934856414795, + "loss_ce": 0.20701220631599426, + "loss_iou": 0.6840451955795288, + "loss_num": 0.0166015625, + "loss_xval": 1.4512813091278076, + "num_input_tokens_seen": 619360944, + "step": 3583 + }, + { + "epoch": 1.3726541554959786, + "grad_norm": 78.01497378817737, + "learning_rate": 5e-06, + "loss": 1.3103, + "num_input_tokens_seen": 619534224, + "step": 3584 + }, + { + "epoch": 1.3726541554959786, + "loss": 1.2011638879776, + "loss_ce": 0.19282567501068115, + "loss_iou": 0.4743228852748871, + "loss_num": 0.011962890625, + "loss_xval": 1.008338212966919, + "num_input_tokens_seen": 619534224, + "step": 3584 + }, + { + "epoch": 1.3730371505170433, + "grad_norm": 92.40419626817528, + "learning_rate": 5e-06, + "loss": 1.5359, + "num_input_tokens_seen": 619707352, + "step": 3585 + }, + { + "epoch": 1.3730371505170433, + "loss": 1.704606056213379, + "loss_ce": 0.21170318126678467, + "loss_iou": 0.7000647783279419, + "loss_num": 0.0185546875, + "loss_xval": 1.4929029941558838, + "num_input_tokens_seen": 619707352, + "step": 3585 + }, + { + "epoch": 1.373420145538108, + "grad_norm": 124.85981223625015, + "learning_rate": 5e-06, + "loss": 1.7413, + "num_input_tokens_seen": 619880480, + "step": 3586 + }, + { + "epoch": 1.373420145538108, + "loss": 1.7765295505523682, + "loss_ce": 0.24645628035068512, + "loss_iou": 0.7011939287185669, + "loss_num": 0.0255126953125, + "loss_xval": 1.5300734043121338, + "num_input_tokens_seen": 619880480, + "step": 3586 + }, + { + "epoch": 1.3738031405591729, + "grad_norm": 138.66393423856508, + "learning_rate": 5e-06, + "loss": 1.3644, + "num_input_tokens_seen": 620053512, + "step": 3587 + }, + { + "epoch": 1.3738031405591729, + "loss": 1.4122133255004883, + "loss_ce": 0.14615964889526367, + "loss_iou": 0.5936439037322998, + "loss_num": 0.0157470703125, + "loss_xval": 1.2660536766052246, + "num_input_tokens_seen": 620053512, + "step": 3587 + }, + { + "epoch": 1.3741861355802374, + "grad_norm": 153.9009960217307, + "learning_rate": 5e-06, + "loss": 1.4305, + "num_input_tokens_seen": 620226264, + "step": 3588 + }, + { + "epoch": 1.3741861355802374, + "loss": 1.2958606481552124, + "loss_ce": 0.13410046696662903, + "loss_iou": 0.5219964385032654, + "loss_num": 0.0235595703125, + "loss_xval": 1.1617602109909058, + "num_input_tokens_seen": 620226264, + "step": 3588 + }, + { + "epoch": 1.3745691306013021, + "grad_norm": 188.06566009943361, + "learning_rate": 5e-06, + "loss": 1.3848, + "num_input_tokens_seen": 620398928, + "step": 3589 + }, + { + "epoch": 1.3745691306013021, + "loss": 1.2425801753997803, + "loss_ce": 0.1674756109714508, + "loss_iou": 0.5067752599716187, + "loss_num": 0.0123291015625, + "loss_xval": 1.0751044750213623, + "num_input_tokens_seen": 620398928, + "step": 3589 + }, + { + "epoch": 1.374952125622367, + "grad_norm": 82.30347593001349, + "learning_rate": 5e-06, + "loss": 1.8051, + "num_input_tokens_seen": 620571976, + "step": 3590 + }, + { + "epoch": 1.374952125622367, + "loss": 1.7292916774749756, + "loss_ce": 0.20649453997612, + "loss_iou": 0.6882479190826416, + "loss_num": 0.029296875, + "loss_xval": 1.5227971076965332, + "num_input_tokens_seen": 620571976, + "step": 3590 + }, + { + "epoch": 1.3753351206434317, + "grad_norm": 101.65268624823504, + "learning_rate": 5e-06, + "loss": 1.3789, + "num_input_tokens_seen": 620744872, + "step": 3591 + }, + { + "epoch": 1.3753351206434317, + "loss": 1.3920843601226807, + "loss_ce": 0.2281237542629242, + "loss_iou": 0.5459238290786743, + "loss_num": 0.014404296875, + "loss_xval": 1.1639606952667236, + "num_input_tokens_seen": 620744872, + "step": 3591 + }, + { + "epoch": 1.3757181156644964, + "grad_norm": 177.78046060644215, + "learning_rate": 5e-06, + "loss": 1.5136, + "num_input_tokens_seen": 620918000, + "step": 3592 + }, + { + "epoch": 1.3757181156644964, + "loss": 1.5106201171875, + "loss_ce": 0.17325910925865173, + "loss_iou": 0.6299536824226379, + "loss_num": 0.0155029296875, + "loss_xval": 1.3373609781265259, + "num_input_tokens_seen": 620918000, + "step": 3592 + }, + { + "epoch": 1.376101110685561, + "grad_norm": 109.71608368807934, + "learning_rate": 5e-06, + "loss": 1.7797, + "num_input_tokens_seen": 621091064, + "step": 3593 + }, + { + "epoch": 1.376101110685561, + "loss": 1.7327550649642944, + "loss_ce": 0.22606757283210754, + "loss_iou": 0.6914541721343994, + "loss_num": 0.0247802734375, + "loss_xval": 1.5066876411437988, + "num_input_tokens_seen": 621091064, + "step": 3593 + }, + { + "epoch": 1.3764841057066257, + "grad_norm": 97.89136025517432, + "learning_rate": 5e-06, + "loss": 1.3367, + "num_input_tokens_seen": 621263880, + "step": 3594 + }, + { + "epoch": 1.3764841057066257, + "loss": 1.2630410194396973, + "loss_ce": 0.2281639277935028, + "loss_iou": 0.47805559635162354, + "loss_num": 0.0157470703125, + "loss_xval": 1.034877061843872, + "num_input_tokens_seen": 621263880, + "step": 3594 + }, + { + "epoch": 1.3768671007276905, + "grad_norm": 113.75836262361007, + "learning_rate": 5e-06, + "loss": 1.3874, + "num_input_tokens_seen": 621437032, + "step": 3595 + }, + { + "epoch": 1.3768671007276905, + "loss": 1.394885778427124, + "loss_ce": 0.18021541833877563, + "loss_iou": 0.5659228563308716, + "loss_num": 0.0166015625, + "loss_xval": 1.2146704196929932, + "num_input_tokens_seen": 621437032, + "step": 3595 + }, + { + "epoch": 1.3772500957487552, + "grad_norm": 132.61150641515562, + "learning_rate": 5e-06, + "loss": 1.466, + "num_input_tokens_seen": 621610152, + "step": 3596 + }, + { + "epoch": 1.3772500957487552, + "loss": 1.277604579925537, + "loss_ce": 0.20018568634986877, + "loss_iou": 0.5098550319671631, + "loss_num": 0.01153564453125, + "loss_xval": 1.0774188041687012, + "num_input_tokens_seen": 621610152, + "step": 3596 + }, + { + "epoch": 1.37763309076982, + "grad_norm": 197.29756768755266, + "learning_rate": 5e-06, + "loss": 1.5008, + "num_input_tokens_seen": 621782832, + "step": 3597 + }, + { + "epoch": 1.37763309076982, + "loss": 1.2304048538208008, + "loss_ce": 0.15923373401165009, + "loss_iou": -2.3966519160564962e+30, + "loss_num": 0.0167236328125, + "loss_xval": 1.2850934724873698e+35, + "num_input_tokens_seen": 621782832, + "step": 3597 + }, + { + "epoch": 1.3780160857908847, + "grad_norm": 132.8124837495467, + "learning_rate": 5e-06, + "loss": 1.6714, + "num_input_tokens_seen": 621955864, + "step": 3598 + }, + { + "epoch": 1.3780160857908847, + "loss": 1.5206103324890137, + "loss_ce": 0.22293275594711304, + "loss_iou": 0.5956466197967529, + "loss_num": 0.021240234375, + "loss_xval": 1.2976775169372559, + "num_input_tokens_seen": 621955864, + "step": 3598 + }, + { + "epoch": 1.3783990808119495, + "grad_norm": 143.7155048499848, + "learning_rate": 5e-06, + "loss": 1.5459, + "num_input_tokens_seen": 622128792, + "step": 3599 + }, + { + "epoch": 1.3783990808119495, + "loss": 1.546707272529602, + "loss_ce": 0.17243452370166779, + "loss_iou": 0.6349208950996399, + "loss_num": 0.0208740234375, + "loss_xval": 1.3742729425430298, + "num_input_tokens_seen": 622128792, + "step": 3599 + }, + { + "epoch": 1.3787820758330143, + "grad_norm": 201.95855457358925, + "learning_rate": 5e-06, + "loss": 1.5652, + "num_input_tokens_seen": 622301776, + "step": 3600 + }, + { + "epoch": 1.3787820758330143, + "loss": 1.778625726699829, + "loss_ce": 0.2336582988500595, + "loss_iou": 0.7346419095993042, + "loss_num": 0.01513671875, + "loss_xval": 1.5449674129486084, + "num_input_tokens_seen": 622301776, + "step": 3600 + }, + { + "epoch": 1.379165070854079, + "grad_norm": 374.5716709419254, + "learning_rate": 5e-06, + "loss": 2.083, + "num_input_tokens_seen": 622474968, + "step": 3601 + }, + { + "epoch": 1.379165070854079, + "loss": 2.073340892791748, + "loss_ce": 0.2615928053855896, + "loss_iou": 0.8359583020210266, + "loss_num": 0.0279541015625, + "loss_xval": 1.8117482662200928, + "num_input_tokens_seen": 622474968, + "step": 3601 + }, + { + "epoch": 1.3795480658751436, + "grad_norm": 88.0574071209228, + "learning_rate": 5e-06, + "loss": 2.4811, + "num_input_tokens_seen": 622648152, + "step": 3602 + }, + { + "epoch": 1.3795480658751436, + "loss": 2.474989891052246, + "loss_ce": 0.217107355594635, + "loss_iou": 0.9219099283218384, + "loss_num": 0.0830078125, + "loss_xval": 2.2578823566436768, + "num_input_tokens_seen": 622648152, + "step": 3602 + }, + { + "epoch": 1.3799310608962083, + "grad_norm": 214.45984808367047, + "learning_rate": 5e-06, + "loss": 1.7667, + "num_input_tokens_seen": 622821168, + "step": 3603 + }, + { + "epoch": 1.3799310608962083, + "loss": 1.8158915042877197, + "loss_ce": 0.2158391922712326, + "loss_iou": 0.7445147037506104, + "loss_num": 0.022216796875, + "loss_xval": 1.6000523567199707, + "num_input_tokens_seen": 622821168, + "step": 3603 + }, + { + "epoch": 1.380314055917273, + "grad_norm": 292.4304309739641, + "learning_rate": 5e-06, + "loss": 2.5309, + "num_input_tokens_seen": 622994408, + "step": 3604 + }, + { + "epoch": 1.380314055917273, + "loss": 2.4345271587371826, + "loss_ce": 0.23869533836841583, + "loss_iou": 1.0452425479888916, + "loss_num": 0.02099609375, + "loss_xval": 2.195831775665283, + "num_input_tokens_seen": 622994408, + "step": 3604 + }, + { + "epoch": 1.3806970509383378, + "grad_norm": 140.3299423413433, + "learning_rate": 5e-06, + "loss": 2.1769, + "num_input_tokens_seen": 623167592, + "step": 3605 + }, + { + "epoch": 1.3806970509383378, + "loss": 2.1421940326690674, + "loss_ce": 0.19230958819389343, + "loss_iou": 0.832730233669281, + "loss_num": 0.056884765625, + "loss_xval": 1.949884295463562, + "num_input_tokens_seen": 623167592, + "step": 3605 + }, + { + "epoch": 1.3810800459594026, + "grad_norm": 90.03475829845245, + "learning_rate": 5e-06, + "loss": 1.4828, + "num_input_tokens_seen": 623340672, + "step": 3606 + }, + { + "epoch": 1.3810800459594026, + "loss": 1.4579837322235107, + "loss_ce": 0.19193831086158752, + "loss_iou": 0.5905880331993103, + "loss_num": 0.0169677734375, + "loss_xval": 1.2660454511642456, + "num_input_tokens_seen": 623340672, + "step": 3606 + }, + { + "epoch": 1.3814630409804671, + "grad_norm": 255.06613070780614, + "learning_rate": 5e-06, + "loss": 1.6012, + "num_input_tokens_seen": 623510016, + "step": 3607 + }, + { + "epoch": 1.3814630409804671, + "loss": 1.664123296737671, + "loss_ce": 0.18670949339866638, + "loss_iou": 0.6796249151229858, + "loss_num": 0.0235595703125, + "loss_xval": 1.4774138927459717, + "num_input_tokens_seen": 623510016, + "step": 3607 + }, + { + "epoch": 1.3818460360015319, + "grad_norm": 97.47083310193024, + "learning_rate": 5e-06, + "loss": 1.7039, + "num_input_tokens_seen": 623682848, + "step": 3608 + }, + { + "epoch": 1.3818460360015319, + "loss": 1.6879403591156006, + "loss_ce": 0.20535466074943542, + "loss_iou": 0.6692103147506714, + "loss_num": 0.02880859375, + "loss_xval": 1.4825856685638428, + "num_input_tokens_seen": 623682848, + "step": 3608 + }, + { + "epoch": 1.3822290310225966, + "grad_norm": 335.93712642421195, + "learning_rate": 5e-06, + "loss": 1.8219, + "num_input_tokens_seen": 623855784, + "step": 3609 + }, + { + "epoch": 1.3822290310225966, + "loss": 1.798883080482483, + "loss_ce": 0.21966424584388733, + "loss_iou": 0.722043514251709, + "loss_num": 0.0269775390625, + "loss_xval": 1.579218864440918, + "num_input_tokens_seen": 623855784, + "step": 3609 + }, + { + "epoch": 1.3826120260436614, + "grad_norm": 115.76381225993897, + "learning_rate": 5e-06, + "loss": 1.9713, + "num_input_tokens_seen": 624028672, + "step": 3610 + }, + { + "epoch": 1.3826120260436614, + "loss": 2.0470962524414062, + "loss_ce": 0.20051613450050354, + "loss_iou": 0.8092153668403625, + "loss_num": 0.045654296875, + "loss_xval": 1.846580147743225, + "num_input_tokens_seen": 624028672, + "step": 3610 + }, + { + "epoch": 1.3829950210647262, + "grad_norm": 289.03065419006526, + "learning_rate": 5e-06, + "loss": 1.8144, + "num_input_tokens_seen": 624201688, + "step": 3611 + }, + { + "epoch": 1.3829950210647262, + "loss": 1.8520580530166626, + "loss_ce": 0.19172219932079315, + "loss_iou": 0.7532635927200317, + "loss_num": 0.03076171875, + "loss_xval": 1.6603357791900635, + "num_input_tokens_seen": 624201688, + "step": 3611 + }, + { + "epoch": 1.383378016085791, + "grad_norm": 126.01295749728209, + "learning_rate": 5e-06, + "loss": 1.7867, + "num_input_tokens_seen": 624374544, + "step": 3612 + }, + { + "epoch": 1.383378016085791, + "loss": 1.8940067291259766, + "loss_ce": 0.18726450204849243, + "loss_iou": 0.8059163093566895, + "loss_num": 0.0189208984375, + "loss_xval": 1.706742286682129, + "num_input_tokens_seen": 624374544, + "step": 3612 + }, + { + "epoch": 1.3837610111068557, + "grad_norm": 106.44765104110519, + "learning_rate": 5e-06, + "loss": 1.782, + "num_input_tokens_seen": 624547768, + "step": 3613 + }, + { + "epoch": 1.3837610111068557, + "loss": 1.7128829956054688, + "loss_ce": 0.20912262797355652, + "loss_iou": 0.6836427450180054, + "loss_num": 0.02734375, + "loss_xval": 1.5037600994110107, + "num_input_tokens_seen": 624547768, + "step": 3613 + }, + { + "epoch": 1.3841440061279204, + "grad_norm": 145.1517137796239, + "learning_rate": 5e-06, + "loss": 1.6311, + "num_input_tokens_seen": 624720400, + "step": 3614 + }, + { + "epoch": 1.3841440061279204, + "loss": 1.650829553604126, + "loss_ce": 0.22188735008239746, + "loss_iou": 0.6540767550468445, + "loss_num": 0.024169921875, + "loss_xval": 1.428942084312439, + "num_input_tokens_seen": 624720400, + "step": 3614 + }, + { + "epoch": 1.3845270011489852, + "grad_norm": 146.12296407478735, + "learning_rate": 5e-06, + "loss": 1.8438, + "num_input_tokens_seen": 624893424, + "step": 3615 + }, + { + "epoch": 1.3845270011489852, + "loss": 1.6571210622787476, + "loss_ce": 0.22394540905952454, + "loss_iou": 0.6682174205780029, + "loss_num": 0.0194091796875, + "loss_xval": 1.4331756830215454, + "num_input_tokens_seen": 624893424, + "step": 3615 + }, + { + "epoch": 1.3849099961700497, + "grad_norm": 69.83785985774774, + "learning_rate": 5e-06, + "loss": 1.35, + "num_input_tokens_seen": 625066432, + "step": 3616 + }, + { + "epoch": 1.3849099961700497, + "loss": 1.428694248199463, + "loss_ce": 0.2676292359828949, + "loss_iou": 0.5431942343711853, + "loss_num": 0.01495361328125, + "loss_xval": 1.161064863204956, + "num_input_tokens_seen": 625066432, + "step": 3616 + }, + { + "epoch": 1.3852929911911145, + "grad_norm": 75.65348777664539, + "learning_rate": 5e-06, + "loss": 1.2476, + "num_input_tokens_seen": 625239336, + "step": 3617 + }, + { + "epoch": 1.3852929911911145, + "loss": 1.1371839046478271, + "loss_ce": 0.18536092340946198, + "loss_iou": 0.4348958730697632, + "loss_num": 0.016357421875, + "loss_xval": 0.9518229961395264, + "num_input_tokens_seen": 625239336, + "step": 3617 + }, + { + "epoch": 1.3856759862121792, + "grad_norm": 67.4584138420159, + "learning_rate": 5e-06, + "loss": 1.3155, + "num_input_tokens_seen": 625412304, + "step": 3618 + }, + { + "epoch": 1.3856759862121792, + "loss": 1.2084017992019653, + "loss_ce": 0.18417063355445862, + "loss_iou": 0.4751892685890198, + "loss_num": 0.0147705078125, + "loss_xval": 1.0173646211624146, + "num_input_tokens_seen": 625412304, + "step": 3618 + }, + { + "epoch": 1.386058981233244, + "grad_norm": 151.97864710302989, + "learning_rate": 5e-06, + "loss": 1.5277, + "num_input_tokens_seen": 625585048, + "step": 3619 + }, + { + "epoch": 1.386058981233244, + "loss": 1.6334717273712158, + "loss_ce": 0.22242796421051025, + "loss_iou": 0.6616528630256653, + "loss_num": 0.017578125, + "loss_xval": 1.4110437631607056, + "num_input_tokens_seen": 625585048, + "step": 3619 + }, + { + "epoch": 1.3864419762543088, + "grad_norm": 114.49035974980248, + "learning_rate": 5e-06, + "loss": 1.7804, + "num_input_tokens_seen": 625758288, + "step": 3620 + }, + { + "epoch": 1.3864419762543088, + "loss": 1.7300214767456055, + "loss_ce": 0.19582955539226532, + "loss_iou": 0.6872619986534119, + "loss_num": 0.031982421875, + "loss_xval": 1.5341919660568237, + "num_input_tokens_seen": 625758288, + "step": 3620 + }, + { + "epoch": 1.3868249712753733, + "grad_norm": 97.59979423682461, + "learning_rate": 5e-06, + "loss": 1.4962, + "num_input_tokens_seen": 625931048, + "step": 3621 + }, + { + "epoch": 1.3868249712753733, + "loss": 1.4186863899230957, + "loss_ce": 0.19230541586875916, + "loss_iou": 0.5793540477752686, + "loss_num": 0.0135498046875, + "loss_xval": 1.2263808250427246, + "num_input_tokens_seen": 625931048, + "step": 3621 + }, + { + "epoch": 1.387207966296438, + "grad_norm": 146.54098911248724, + "learning_rate": 5e-06, + "loss": 1.3584, + "num_input_tokens_seen": 626103680, + "step": 3622 + }, + { + "epoch": 1.387207966296438, + "loss": 1.3940868377685547, + "loss_ce": 0.18014013767242432, + "loss_iou": 0.5690094232559204, + "loss_num": 0.01519775390625, + "loss_xval": 1.2139465808868408, + "num_input_tokens_seen": 626103680, + "step": 3622 + }, + { + "epoch": 1.3875909613175028, + "grad_norm": 120.98971428742999, + "learning_rate": 5e-06, + "loss": 1.4945, + "num_input_tokens_seen": 626276424, + "step": 3623 + }, + { + "epoch": 1.3875909613175028, + "loss": 1.5319600105285645, + "loss_ce": 0.2022242546081543, + "loss_iou": 0.6283536553382874, + "loss_num": 0.01458740234375, + "loss_xval": 1.3297358751296997, + "num_input_tokens_seen": 626276424, + "step": 3623 + }, + { + "epoch": 1.3879739563385676, + "grad_norm": 97.84442574480609, + "learning_rate": 5e-06, + "loss": 1.5906, + "num_input_tokens_seen": 626449648, + "step": 3624 + }, + { + "epoch": 1.3879739563385676, + "loss": 1.5129203796386719, + "loss_ce": 0.21519820392131805, + "loss_iou": 0.6155816316604614, + "loss_num": 0.0133056640625, + "loss_xval": 1.2977221012115479, + "num_input_tokens_seen": 626449648, + "step": 3624 + }, + { + "epoch": 1.3883569513596323, + "grad_norm": 152.8080708366177, + "learning_rate": 5e-06, + "loss": 1.6142, + "num_input_tokens_seen": 626622808, + "step": 3625 + }, + { + "epoch": 1.3883569513596323, + "loss": 1.673858880996704, + "loss_ce": 0.17594227194786072, + "loss_iou": 0.6999318599700928, + "loss_num": 0.0196533203125, + "loss_xval": 1.4979166984558105, + "num_input_tokens_seen": 626622808, + "step": 3625 + }, + { + "epoch": 1.388739946380697, + "grad_norm": 149.94144582539803, + "learning_rate": 5e-06, + "loss": 1.4942, + "num_input_tokens_seen": 626795728, + "step": 3626 + }, + { + "epoch": 1.388739946380697, + "loss": 1.3925130367279053, + "loss_ce": 0.20805421471595764, + "loss_iou": 0.5429129600524902, + "loss_num": 0.0196533203125, + "loss_xval": 1.1844587326049805, + "num_input_tokens_seen": 626795728, + "step": 3626 + }, + { + "epoch": 1.3891229414017618, + "grad_norm": 113.10008803350293, + "learning_rate": 5e-06, + "loss": 1.9643, + "num_input_tokens_seen": 626969064, + "step": 3627 + }, + { + "epoch": 1.3891229414017618, + "loss": 1.9852036237716675, + "loss_ce": 0.1441543996334076, + "loss_iou": 0.8232345581054688, + "loss_num": 0.0390625, + "loss_xval": 1.8410491943359375, + "num_input_tokens_seen": 626969064, + "step": 3627 + }, + { + "epoch": 1.3895059364228266, + "grad_norm": 76.91817878452832, + "learning_rate": 5e-06, + "loss": 1.4131, + "num_input_tokens_seen": 627141552, + "step": 3628 + }, + { + "epoch": 1.3895059364228266, + "loss": 1.406415343284607, + "loss_ce": 0.15866446495056152, + "loss_iou": 0.5848433971405029, + "loss_num": 0.015625, + "loss_xval": 1.2477507591247559, + "num_input_tokens_seen": 627141552, + "step": 3628 + }, + { + "epoch": 1.3898889314438914, + "grad_norm": 96.04041022337205, + "learning_rate": 5e-06, + "loss": 1.426, + "num_input_tokens_seen": 627314736, + "step": 3629 + }, + { + "epoch": 1.3898889314438914, + "loss": 1.3025336265563965, + "loss_ce": 0.19101479649543762, + "loss_iou": 0.5096931457519531, + "loss_num": 0.0184326171875, + "loss_xval": 1.1115188598632812, + "num_input_tokens_seen": 627314736, + "step": 3629 + }, + { + "epoch": 1.390271926464956, + "grad_norm": 145.9791382708225, + "learning_rate": 5e-06, + "loss": 1.6838, + "num_input_tokens_seen": 627487544, + "step": 3630 + }, + { + "epoch": 1.390271926464956, + "loss": 1.740018606185913, + "loss_ce": 0.15928755700588226, + "loss_iou": 0.7386229634284973, + "loss_num": 0.020751953125, + "loss_xval": 1.5807310342788696, + "num_input_tokens_seen": 627487544, + "step": 3630 + }, + { + "epoch": 1.3906549214860207, + "grad_norm": 128.38685082067695, + "learning_rate": 5e-06, + "loss": 1.6129, + "num_input_tokens_seen": 627660392, + "step": 3631 + }, + { + "epoch": 1.3906549214860207, + "loss": 1.6399121284484863, + "loss_ce": 0.18036915361881256, + "loss_iou": 0.6751756072044373, + "loss_num": 0.0218505859375, + "loss_xval": 1.4595431089401245, + "num_input_tokens_seen": 627660392, + "step": 3631 + }, + { + "epoch": 1.3910379165070854, + "grad_norm": 114.96278717059289, + "learning_rate": 5e-06, + "loss": 1.7292, + "num_input_tokens_seen": 627833672, + "step": 3632 + }, + { + "epoch": 1.3910379165070854, + "loss": 1.6817591190338135, + "loss_ce": 0.15832598507404327, + "loss_iou": 0.7013833522796631, + "loss_num": 0.024169921875, + "loss_xval": 1.5234332084655762, + "num_input_tokens_seen": 627833672, + "step": 3632 + }, + { + "epoch": 1.3914209115281502, + "grad_norm": 61.58322538749416, + "learning_rate": 5e-06, + "loss": 1.4345, + "num_input_tokens_seen": 628006720, + "step": 3633 + }, + { + "epoch": 1.3914209115281502, + "loss": 1.4161045551300049, + "loss_ce": 0.19610156118869781, + "loss_iou": 0.5703591108322144, + "loss_num": 0.015869140625, + "loss_xval": 1.2200028896331787, + "num_input_tokens_seen": 628006720, + "step": 3633 + }, + { + "epoch": 1.391803906549215, + "grad_norm": 115.21706585870986, + "learning_rate": 5e-06, + "loss": 1.3091, + "num_input_tokens_seen": 628179688, + "step": 3634 + }, + { + "epoch": 1.391803906549215, + "loss": 1.316110610961914, + "loss_ce": 0.19886082410812378, + "loss_iou": 0.5165106058120728, + "loss_num": 0.016845703125, + "loss_xval": 1.1172497272491455, + "num_input_tokens_seen": 628179688, + "step": 3634 + }, + { + "epoch": 1.3921869015702795, + "grad_norm": 149.91107409355843, + "learning_rate": 5e-06, + "loss": 1.6016, + "num_input_tokens_seen": 628352616, + "step": 3635 + }, + { + "epoch": 1.3921869015702795, + "loss": 1.632288932800293, + "loss_ce": 0.15338543057441711, + "loss_iou": 0.6840776205062866, + "loss_num": 0.0220947265625, + "loss_xval": 1.4789035320281982, + "num_input_tokens_seen": 628352616, + "step": 3635 + }, + { + "epoch": 1.3925698965913442, + "grad_norm": 122.59793314083974, + "learning_rate": 5e-06, + "loss": 1.7816, + "num_input_tokens_seen": 628525488, + "step": 3636 + }, + { + "epoch": 1.3925698965913442, + "loss": 1.639886736869812, + "loss_ce": 0.1811191737651825, + "loss_iou": 0.6472305059432983, + "loss_num": 0.032958984375, + "loss_xval": 1.4587676525115967, + "num_input_tokens_seen": 628525488, + "step": 3636 + }, + { + "epoch": 1.392952891612409, + "grad_norm": 74.43094317206129, + "learning_rate": 5e-06, + "loss": 1.4292, + "num_input_tokens_seen": 628694824, + "step": 3637 + }, + { + "epoch": 1.392952891612409, + "loss": 1.1312984228134155, + "loss_ce": 0.17416593432426453, + "loss_iou": 0.4436388611793518, + "loss_num": 0.01397705078125, + "loss_xval": 0.9571324586868286, + "num_input_tokens_seen": 628694824, + "step": 3637 + }, + { + "epoch": 1.3933358866334737, + "grad_norm": 127.56365057135433, + "learning_rate": 5e-06, + "loss": 1.7343, + "num_input_tokens_seen": 628867976, + "step": 3638 + }, + { + "epoch": 1.3933358866334737, + "loss": 1.6866893768310547, + "loss_ce": 0.1625714898109436, + "loss_iou": 0.7057845592498779, + "loss_num": 0.0224609375, + "loss_xval": 1.5241179466247559, + "num_input_tokens_seen": 628867976, + "step": 3638 + }, + { + "epoch": 1.3937188816545385, + "grad_norm": 94.22051678330423, + "learning_rate": 5e-06, + "loss": 1.9939, + "num_input_tokens_seen": 629041152, + "step": 3639 + }, + { + "epoch": 1.3937188816545385, + "loss": 1.8643584251403809, + "loss_ce": 0.2131306231021881, + "loss_iou": 0.7520970106124878, + "loss_num": 0.0294189453125, + "loss_xval": 1.6512277126312256, + "num_input_tokens_seen": 629041152, + "step": 3639 + }, + { + "epoch": 1.3941018766756033, + "grad_norm": 165.30328183868085, + "learning_rate": 5e-06, + "loss": 1.412, + "num_input_tokens_seen": 629213752, + "step": 3640 + }, + { + "epoch": 1.3941018766756033, + "loss": 1.2959129810333252, + "loss_ce": 0.18002738058567047, + "loss_iou": 0.5097403526306152, + "loss_num": 0.019287109375, + "loss_xval": 1.1040449142456055, + "num_input_tokens_seen": 629213752, + "step": 3640 + }, + { + "epoch": 1.394484871696668, + "grad_norm": 117.91491116115839, + "learning_rate": 5e-06, + "loss": 1.7211, + "num_input_tokens_seen": 629387048, + "step": 3641 + }, + { + "epoch": 1.394484871696668, + "loss": 1.7913999557495117, + "loss_ce": 0.21221789717674255, + "loss_iou": 0.7316076159477234, + "loss_num": 0.023193359375, + "loss_xval": 1.5791820287704468, + "num_input_tokens_seen": 629387048, + "step": 3641 + }, + { + "epoch": 1.3948678667177328, + "grad_norm": 205.35782887366204, + "learning_rate": 5e-06, + "loss": 1.6886, + "num_input_tokens_seen": 629560248, + "step": 3642 + }, + { + "epoch": 1.3948678667177328, + "loss": 1.7776062488555908, + "loss_ce": 0.20876279473304749, + "loss_iou": 0.7054269909858704, + "loss_num": 0.031494140625, + "loss_xval": 1.5688434839248657, + "num_input_tokens_seen": 629560248, + "step": 3642 + }, + { + "epoch": 1.3952508617387975, + "grad_norm": 146.09051892094294, + "learning_rate": 5e-06, + "loss": 1.6681, + "num_input_tokens_seen": 629733592, + "step": 3643 + }, + { + "epoch": 1.3952508617387975, + "loss": 1.5654737949371338, + "loss_ce": 0.21972210705280304, + "loss_iou": 0.6193785071372986, + "loss_num": 0.0213623046875, + "loss_xval": 1.3457516431808472, + "num_input_tokens_seen": 629733592, + "step": 3643 + }, + { + "epoch": 1.395633856759862, + "grad_norm": 111.08368485993188, + "learning_rate": 5e-06, + "loss": 1.5608, + "num_input_tokens_seen": 629906992, + "step": 3644 + }, + { + "epoch": 1.395633856759862, + "loss": 1.566825032234192, + "loss_ce": 0.1464221030473709, + "loss_iou": 0.6561853885650635, + "loss_num": 0.0216064453125, + "loss_xval": 1.420403003692627, + "num_input_tokens_seen": 629906992, + "step": 3644 + }, + { + "epoch": 1.3960168517809268, + "grad_norm": 70.18813202929198, + "learning_rate": 5e-06, + "loss": 1.4565, + "num_input_tokens_seen": 630079776, + "step": 3645 + }, + { + "epoch": 1.3960168517809268, + "loss": 1.5499849319458008, + "loss_ce": 0.2004162073135376, + "loss_iou": 0.6262614130973816, + "loss_num": 0.0194091796875, + "loss_xval": 1.3495687246322632, + "num_input_tokens_seen": 630079776, + "step": 3645 + }, + { + "epoch": 1.3963998468019916, + "grad_norm": 171.39538329360198, + "learning_rate": 5e-06, + "loss": 1.2866, + "num_input_tokens_seen": 630252768, + "step": 3646 + }, + { + "epoch": 1.3963998468019916, + "loss": 1.5590405464172363, + "loss_ce": 0.20193514227867126, + "loss_iou": 0.6220951676368713, + "loss_num": 0.0225830078125, + "loss_xval": 1.3571053743362427, + "num_input_tokens_seen": 630252768, + "step": 3646 + }, + { + "epoch": 1.3967828418230563, + "grad_norm": 126.50631427677583, + "learning_rate": 5e-06, + "loss": 1.707, + "num_input_tokens_seen": 630425480, + "step": 3647 + }, + { + "epoch": 1.3967828418230563, + "loss": 1.6378086805343628, + "loss_ce": 0.17789973318576813, + "loss_iou": 0.6760299205780029, + "loss_num": 0.0216064453125, + "loss_xval": 1.4599089622497559, + "num_input_tokens_seen": 630425480, + "step": 3647 + }, + { + "epoch": 1.397165836844121, + "grad_norm": 123.02433156337872, + "learning_rate": 5e-06, + "loss": 1.5238, + "num_input_tokens_seen": 630598432, + "step": 3648 + }, + { + "epoch": 1.397165836844121, + "loss": 1.542478084564209, + "loss_ce": 0.20391885936260223, + "loss_iou": 0.6159501075744629, + "loss_num": 0.0213623046875, + "loss_xval": 1.3385591506958008, + "num_input_tokens_seen": 630598432, + "step": 3648 + }, + { + "epoch": 1.3975488318651856, + "grad_norm": 109.74008233158122, + "learning_rate": 5e-06, + "loss": 1.4739, + "num_input_tokens_seen": 630771424, + "step": 3649 + }, + { + "epoch": 1.3975488318651856, + "loss": 1.6322606801986694, + "loss_ce": 0.20045846700668335, + "loss_iou": 0.6789138317108154, + "loss_num": 0.0147705078125, + "loss_xval": 1.4318022727966309, + "num_input_tokens_seen": 630771424, + "step": 3649 + }, + { + "epoch": 1.3979318268862504, + "grad_norm": 176.64068054854914, + "learning_rate": 5e-06, + "loss": 1.5302, + "num_input_tokens_seen": 630944536, + "step": 3650 + }, + { + "epoch": 1.3979318268862504, + "loss": 1.3865649700164795, + "loss_ce": 0.18322482705116272, + "loss_iou": 0.5652015209197998, + "loss_num": 0.01458740234375, + "loss_xval": 1.2033400535583496, + "num_input_tokens_seen": 630944536, + "step": 3650 + }, + { + "epoch": 1.3983148219073152, + "grad_norm": 127.73682160398597, + "learning_rate": 5e-06, + "loss": 1.79, + "num_input_tokens_seen": 631117648, + "step": 3651 + }, + { + "epoch": 1.3983148219073152, + "loss": 1.6050747632980347, + "loss_ce": 0.15001869201660156, + "loss_iou": 0.6679272651672363, + "loss_num": 0.0238037109375, + "loss_xval": 1.4550561904907227, + "num_input_tokens_seen": 631117648, + "step": 3651 + }, + { + "epoch": 1.39869781692838, + "grad_norm": 121.84412608839591, + "learning_rate": 5e-06, + "loss": 1.3806, + "num_input_tokens_seen": 631290560, + "step": 3652 + }, + { + "epoch": 1.39869781692838, + "loss": 1.3234338760375977, + "loss_ce": 0.16607999801635742, + "loss_iou": 0.5445430278778076, + "loss_num": 0.013671875, + "loss_xval": 1.1573538780212402, + "num_input_tokens_seen": 631290560, + "step": 3652 + }, + { + "epoch": 1.3990808119494447, + "grad_norm": 155.74749740528833, + "learning_rate": 5e-06, + "loss": 1.5854, + "num_input_tokens_seen": 631463608, + "step": 3653 + }, + { + "epoch": 1.3990808119494447, + "loss": 1.6742525100708008, + "loss_ce": 0.2013673633337021, + "loss_iou": 0.6678390502929688, + "loss_num": 0.0274658203125, + "loss_xval": 1.4728851318359375, + "num_input_tokens_seen": 631463608, + "step": 3653 + }, + { + "epoch": 1.3994638069705094, + "grad_norm": 132.56573499627265, + "learning_rate": 5e-06, + "loss": 1.3746, + "num_input_tokens_seen": 631636760, + "step": 3654 + }, + { + "epoch": 1.3994638069705094, + "loss": 1.409779667854309, + "loss_ce": 0.23161810636520386, + "loss_iou": 0.5528411269187927, + "loss_num": 0.0145263671875, + "loss_xval": 1.1781615018844604, + "num_input_tokens_seen": 631636760, + "step": 3654 + }, + { + "epoch": 1.3998468019915742, + "grad_norm": 228.82114588109818, + "learning_rate": 5e-06, + "loss": 1.7761, + "num_input_tokens_seen": 631809640, + "step": 3655 + }, + { + "epoch": 1.3998468019915742, + "loss": 1.7355780601501465, + "loss_ce": 0.16319149732589722, + "loss_iou": 0.7151482701301575, + "loss_num": 0.0284423828125, + "loss_xval": 1.572386384010315, + "num_input_tokens_seen": 631809640, + "step": 3655 + }, + { + "epoch": 1.400229797012639, + "grad_norm": 149.41649831300995, + "learning_rate": 5e-06, + "loss": 1.5748, + "num_input_tokens_seen": 631982616, + "step": 3656 + }, + { + "epoch": 1.400229797012639, + "loss": 1.7118803262710571, + "loss_ce": 0.2118891477584839, + "loss_iou": 0.6864274740219116, + "loss_num": 0.025390625, + "loss_xval": 1.4999911785125732, + "num_input_tokens_seen": 631982616, + "step": 3656 + }, + { + "epoch": 1.4006127920337035, + "grad_norm": 186.10776374164269, + "learning_rate": 5e-06, + "loss": 1.814, + "num_input_tokens_seen": 632155624, + "step": 3657 + }, + { + "epoch": 1.4006127920337035, + "loss": 1.858457088470459, + "loss_ce": 0.21200542151927948, + "loss_iou": 0.7538898587226868, + "loss_num": 0.0277099609375, + "loss_xval": 1.6464515924453735, + "num_input_tokens_seen": 632155624, + "step": 3657 + }, + { + "epoch": 1.4009957870547682, + "grad_norm": 133.89308676528069, + "learning_rate": 5e-06, + "loss": 2.081, + "num_input_tokens_seen": 632328096, + "step": 3658 + }, + { + "epoch": 1.4009957870547682, + "loss": 2.1265530586242676, + "loss_ce": 0.16438747942447662, + "loss_iou": 0.8673743009567261, + "loss_num": 0.04541015625, + "loss_xval": 1.9621655941009521, + "num_input_tokens_seen": 632328096, + "step": 3658 + }, + { + "epoch": 1.401378782075833, + "grad_norm": 117.00820295619212, + "learning_rate": 5e-06, + "loss": 1.6316, + "num_input_tokens_seen": 632501288, + "step": 3659 + }, + { + "epoch": 1.401378782075833, + "loss": 1.6250028610229492, + "loss_ce": 0.19171608984470367, + "loss_iou": 0.6683492660522461, + "loss_num": 0.019287109375, + "loss_xval": 1.4332866668701172, + "num_input_tokens_seen": 632501288, + "step": 3659 + }, + { + "epoch": 1.4017617770968978, + "grad_norm": 148.2435165289862, + "learning_rate": 5e-06, + "loss": 1.5913, + "num_input_tokens_seen": 632674584, + "step": 3660 + }, + { + "epoch": 1.4017617770968978, + "loss": 1.769160270690918, + "loss_ce": 0.2604833245277405, + "loss_iou": 0.7003681659698486, + "loss_num": 0.0216064453125, + "loss_xval": 1.5086770057678223, + "num_input_tokens_seen": 632674584, + "step": 3660 + }, + { + "epoch": 1.4021447721179625, + "grad_norm": 102.0281381022801, + "learning_rate": 5e-06, + "loss": 1.7656, + "num_input_tokens_seen": 632847376, + "step": 3661 + }, + { + "epoch": 1.4021447721179625, + "loss": 1.818744421005249, + "loss_ce": 0.20584852993488312, + "loss_iou": 0.7327175140380859, + "loss_num": 0.029541015625, + "loss_xval": 1.6128959655761719, + "num_input_tokens_seen": 632847376, + "step": 3661 + }, + { + "epoch": 1.4025277671390273, + "grad_norm": 97.21557210103879, + "learning_rate": 5e-06, + "loss": 1.3706, + "num_input_tokens_seen": 633020272, + "step": 3662 + }, + { + "epoch": 1.4025277671390273, + "loss": 1.3934794664382935, + "loss_ce": 0.21816977858543396, + "loss_iou": 0.5200889110565186, + "loss_num": 0.027099609375, + "loss_xval": 1.175309658050537, + "num_input_tokens_seen": 633020272, + "step": 3662 + }, + { + "epoch": 1.4029107621600918, + "grad_norm": 134.3171957222966, + "learning_rate": 5e-06, + "loss": 1.5424, + "num_input_tokens_seen": 633193384, + "step": 3663 + }, + { + "epoch": 1.4029107621600918, + "loss": 1.334301233291626, + "loss_ce": 0.1728307008743286, + "loss_iou": 0.5449382066726685, + "loss_num": 0.0142822265625, + "loss_xval": 1.161470651626587, + "num_input_tokens_seen": 633193384, + "step": 3663 + }, + { + "epoch": 1.4032937571811566, + "grad_norm": 105.19535281678613, + "learning_rate": 5e-06, + "loss": 1.6734, + "num_input_tokens_seen": 633366136, + "step": 3664 + }, + { + "epoch": 1.4032937571811566, + "loss": 1.624187707901001, + "loss_ce": 0.15212851762771606, + "loss_iou": 0.6565923690795898, + "loss_num": 0.03173828125, + "loss_xval": 1.4720592498779297, + "num_input_tokens_seen": 633366136, + "step": 3664 + }, + { + "epoch": 1.4036767522022213, + "grad_norm": 68.19636593146579, + "learning_rate": 5e-06, + "loss": 1.5386, + "num_input_tokens_seen": 633538880, + "step": 3665 + }, + { + "epoch": 1.4036767522022213, + "loss": 1.4059292078018188, + "loss_ce": 0.18063823878765106, + "loss_iou": 0.5717671513557434, + "loss_num": 0.016357421875, + "loss_xval": 1.2252908945083618, + "num_input_tokens_seen": 633538880, + "step": 3665 + }, + { + "epoch": 1.404059747223286, + "grad_norm": 141.4386747847512, + "learning_rate": 5e-06, + "loss": 1.4879, + "num_input_tokens_seen": 633712216, + "step": 3666 + }, + { + "epoch": 1.404059747223286, + "loss": 1.5945827960968018, + "loss_ce": 0.20796307921409607, + "loss_iou": 0.6225091218948364, + "loss_num": 0.0283203125, + "loss_xval": 1.3866198062896729, + "num_input_tokens_seen": 633712216, + "step": 3666 + }, + { + "epoch": 1.4044427422443508, + "grad_norm": 124.74239464839431, + "learning_rate": 5e-06, + "loss": 1.6612, + "num_input_tokens_seen": 633884912, + "step": 3667 + }, + { + "epoch": 1.4044427422443508, + "loss": 1.5374257564544678, + "loss_ce": 0.1949932724237442, + "loss_iou": 0.6300327181816101, + "loss_num": 0.0164794921875, + "loss_xval": 1.3424323797225952, + "num_input_tokens_seen": 633884912, + "step": 3667 + }, + { + "epoch": 1.4048257372654156, + "grad_norm": 144.58184486992087, + "learning_rate": 5e-06, + "loss": 1.5791, + "num_input_tokens_seen": 634057744, + "step": 3668 + }, + { + "epoch": 1.4048257372654156, + "loss": 1.656828761100769, + "loss_ce": 0.1751592755317688, + "loss_iou": 0.69180828332901, + "loss_num": 0.0196533203125, + "loss_xval": 1.481669545173645, + "num_input_tokens_seen": 634057744, + "step": 3668 + }, + { + "epoch": 1.4052087322864804, + "grad_norm": 108.73694816194813, + "learning_rate": 5e-06, + "loss": 1.4652, + "num_input_tokens_seen": 634230824, + "step": 3669 + }, + { + "epoch": 1.4052087322864804, + "loss": 1.6303739547729492, + "loss_ce": 0.20090125501155853, + "loss_iou": 0.6692651510238647, + "loss_num": 0.0181884765625, + "loss_xval": 1.4294726848602295, + "num_input_tokens_seen": 634230824, + "step": 3669 + }, + { + "epoch": 1.4055917273075451, + "grad_norm": 116.11450724984425, + "learning_rate": 5e-06, + "loss": 1.6504, + "num_input_tokens_seen": 634403640, + "step": 3670 + }, + { + "epoch": 1.4055917273075451, + "loss": 1.6293880939483643, + "loss_ce": 0.17838090658187866, + "loss_iou": 0.6587921380996704, + "loss_num": 0.0267333984375, + "loss_xval": 1.4510071277618408, + "num_input_tokens_seen": 634403640, + "step": 3670 + }, + { + "epoch": 1.4059747223286096, + "grad_norm": 76.9912662304742, + "learning_rate": 5e-06, + "loss": 1.4265, + "num_input_tokens_seen": 634576848, + "step": 3671 + }, + { + "epoch": 1.4059747223286096, + "loss": 1.4073896408081055, + "loss_ce": 0.157753124833107, + "loss_iou": 0.5690321326255798, + "loss_num": 0.0223388671875, + "loss_xval": 1.2496365308761597, + "num_input_tokens_seen": 634576848, + "step": 3671 + }, + { + "epoch": 1.4063577173496744, + "grad_norm": 149.5367189108514, + "learning_rate": 5e-06, + "loss": 1.5006, + "num_input_tokens_seen": 634750160, + "step": 3672 + }, + { + "epoch": 1.4063577173496744, + "loss": 1.475905418395996, + "loss_ce": 0.2115403562784195, + "loss_iou": 0.6025347113609314, + "loss_num": 0.0118408203125, + "loss_xval": 1.2643650770187378, + "num_input_tokens_seen": 634750160, + "step": 3672 + }, + { + "epoch": 1.4067407123707392, + "grad_norm": 122.68840219726172, + "learning_rate": 5e-06, + "loss": 1.647, + "num_input_tokens_seen": 634922744, + "step": 3673 + }, + { + "epoch": 1.4067407123707392, + "loss": 1.7613252401351929, + "loss_ce": 0.1777595579624176, + "loss_iou": 0.7400555610656738, + "loss_num": 0.0206298828125, + "loss_xval": 1.5835657119750977, + "num_input_tokens_seen": 634922744, + "step": 3673 + }, + { + "epoch": 1.407123707391804, + "grad_norm": 141.85612757119978, + "learning_rate": 5e-06, + "loss": 1.7861, + "num_input_tokens_seen": 635095576, + "step": 3674 + }, + { + "epoch": 1.407123707391804, + "loss": 1.7490111589431763, + "loss_ce": 0.1951323002576828, + "loss_iou": 0.7126999497413635, + "loss_num": 0.025634765625, + "loss_xval": 1.553878903388977, + "num_input_tokens_seen": 635095576, + "step": 3674 + }, + { + "epoch": 1.4075067024128687, + "grad_norm": 117.45742225619787, + "learning_rate": 5e-06, + "loss": 1.3144, + "num_input_tokens_seen": 635268088, + "step": 3675 + }, + { + "epoch": 1.4075067024128687, + "loss": 1.2747701406478882, + "loss_ce": 0.15608471632003784, + "loss_iou": 0.47549569606781006, + "loss_num": 0.033447265625, + "loss_xval": 1.1186854839324951, + "num_input_tokens_seen": 635268088, + "step": 3675 + }, + { + "epoch": 1.4078896974339334, + "grad_norm": 127.83676373850088, + "learning_rate": 5e-06, + "loss": 1.6213, + "num_input_tokens_seen": 635441208, + "step": 3676 + }, + { + "epoch": 1.4078896974339334, + "loss": 1.6106691360473633, + "loss_ce": 0.1536344438791275, + "loss_iou": 0.6680924892425537, + "loss_num": 0.024169921875, + "loss_xval": 1.4570345878601074, + "num_input_tokens_seen": 635441208, + "step": 3676 + }, + { + "epoch": 1.408272692454998, + "grad_norm": 123.81896112475852, + "learning_rate": 5e-06, + "loss": 1.3013, + "num_input_tokens_seen": 635614088, + "step": 3677 + }, + { + "epoch": 1.408272692454998, + "loss": 1.3278595209121704, + "loss_ce": 0.2005331963300705, + "loss_iou": 0.5221745371818542, + "loss_num": 0.0166015625, + "loss_xval": 1.1273263692855835, + "num_input_tokens_seen": 635614088, + "step": 3677 + }, + { + "epoch": 1.4086556874760627, + "grad_norm": 150.54447805865803, + "learning_rate": 5e-06, + "loss": 1.6948, + "num_input_tokens_seen": 635787536, + "step": 3678 + }, + { + "epoch": 1.4086556874760627, + "loss": 1.6913161277770996, + "loss_ce": 0.2019982933998108, + "loss_iou": 0.6712336540222168, + "loss_num": 0.029296875, + "loss_xval": 1.4893178939819336, + "num_input_tokens_seen": 635787536, + "step": 3678 + }, + { + "epoch": 1.4090386824971275, + "grad_norm": 177.2569650026807, + "learning_rate": 5e-06, + "loss": 1.5617, + "num_input_tokens_seen": 635960624, + "step": 3679 + }, + { + "epoch": 1.4090386824971275, + "loss": 1.7486238479614258, + "loss_ce": 0.20002582669258118, + "loss_iou": 0.7126840353012085, + "loss_num": 0.024658203125, + "loss_xval": 1.548598051071167, + "num_input_tokens_seen": 635960624, + "step": 3679 + }, + { + "epoch": 1.4094216775181923, + "grad_norm": 91.34441728373196, + "learning_rate": 5e-06, + "loss": 1.7018, + "num_input_tokens_seen": 636133752, + "step": 3680 + }, + { + "epoch": 1.4094216775181923, + "loss": 1.7471097707748413, + "loss_ce": 0.2088487446308136, + "loss_iou": 0.7028005123138428, + "loss_num": 0.0264892578125, + "loss_xval": 1.5382609367370605, + "num_input_tokens_seen": 636133752, + "step": 3680 + }, + { + "epoch": 1.409804672539257, + "grad_norm": 118.20382505311971, + "learning_rate": 5e-06, + "loss": 1.2404, + "num_input_tokens_seen": 636306808, + "step": 3681 + }, + { + "epoch": 1.409804672539257, + "loss": 1.1724213361740112, + "loss_ce": 0.23402735590934753, + "loss_iou": 0.42903590202331543, + "loss_num": 0.01611328125, + "loss_xval": 0.9383940696716309, + "num_input_tokens_seen": 636306808, + "step": 3681 + }, + { + "epoch": 1.4101876675603218, + "grad_norm": 155.07307351540132, + "learning_rate": 5e-06, + "loss": 1.5644, + "num_input_tokens_seen": 636479888, + "step": 3682 + }, + { + "epoch": 1.4101876675603218, + "loss": 1.5142014026641846, + "loss_ce": 0.16153010725975037, + "loss_iou": 0.6491597890853882, + "loss_num": 0.0108642578125, + "loss_xval": 1.3526713848114014, + "num_input_tokens_seen": 636479888, + "step": 3682 + }, + { + "epoch": 1.4105706625813865, + "grad_norm": 106.23004908580938, + "learning_rate": 5e-06, + "loss": 1.7452, + "num_input_tokens_seen": 636653024, + "step": 3683 + }, + { + "epoch": 1.4105706625813865, + "loss": 1.6639301776885986, + "loss_ce": 0.15145039558410645, + "loss_iou": 0.6784200668334961, + "loss_num": 0.0311279296875, + "loss_xval": 1.4862346649169922, + "num_input_tokens_seen": 636653024, + "step": 3683 + }, + { + "epoch": 1.4109536576024513, + "grad_norm": 92.66027418804897, + "learning_rate": 5e-06, + "loss": 1.2667, + "num_input_tokens_seen": 636825744, + "step": 3684 + }, + { + "epoch": 1.4109536576024513, + "loss": 1.0458866357803345, + "loss_ce": 0.15764087438583374, + "loss_iou": 0.420815110206604, + "loss_num": 0.00933837890625, + "loss_xval": 0.8803722858428955, + "num_input_tokens_seen": 636825744, + "step": 3684 + }, + { + "epoch": 1.4113366526235158, + "grad_norm": 110.78237425208994, + "learning_rate": 5e-06, + "loss": 1.3655, + "num_input_tokens_seen": 636998592, + "step": 3685 + }, + { + "epoch": 1.4113366526235158, + "loss": 1.4087257385253906, + "loss_ce": 0.18637892603874207, + "loss_iou": 0.5656717419624329, + "loss_num": 0.0181884765625, + "loss_xval": 1.2223469018936157, + "num_input_tokens_seen": 636998592, + "step": 3685 + }, + { + "epoch": 1.4117196476445806, + "grad_norm": 119.81532075716193, + "learning_rate": 5e-06, + "loss": 1.6419, + "num_input_tokens_seen": 637171880, + "step": 3686 + }, + { + "epoch": 1.4117196476445806, + "loss": 1.6677496433258057, + "loss_ce": 0.2320818305015564, + "loss_iou": 0.670623242855072, + "loss_num": 0.0189208984375, + "loss_xval": 1.435667872428894, + "num_input_tokens_seen": 637171880, + "step": 3686 + }, + { + "epoch": 1.4121026426656453, + "grad_norm": 131.88755969852434, + "learning_rate": 5e-06, + "loss": 1.4355, + "num_input_tokens_seen": 637341968, + "step": 3687 + }, + { + "epoch": 1.4121026426656453, + "loss": 1.3841338157653809, + "loss_ce": 0.17219631373882294, + "loss_iou": 0.5581018924713135, + "loss_num": 0.0191650390625, + "loss_xval": 1.211937427520752, + "num_input_tokens_seen": 637341968, + "step": 3687 + }, + { + "epoch": 1.41248563768671, + "grad_norm": 151.8059388176097, + "learning_rate": 5e-06, + "loss": 1.8072, + "num_input_tokens_seen": 637514528, + "step": 3688 + }, + { + "epoch": 1.41248563768671, + "loss": 1.9605388641357422, + "loss_ce": 0.19696488976478577, + "loss_iou": 0.8153807520866394, + "loss_num": 0.026611328125, + "loss_xval": 1.7635740041732788, + "num_input_tokens_seen": 637514528, + "step": 3688 + }, + { + "epoch": 1.4128686327077749, + "grad_norm": 135.8035768969732, + "learning_rate": 5e-06, + "loss": 1.6703, + "num_input_tokens_seen": 637687248, + "step": 3689 + }, + { + "epoch": 1.4128686327077749, + "loss": 1.5969858169555664, + "loss_ce": 0.19671300053596497, + "loss_iou": 0.6430990695953369, + "loss_num": 0.0228271484375, + "loss_xval": 1.4002728462219238, + "num_input_tokens_seen": 637687248, + "step": 3689 + }, + { + "epoch": 1.4132516277288394, + "grad_norm": 120.6836359932711, + "learning_rate": 5e-06, + "loss": 1.5456, + "num_input_tokens_seen": 637860232, + "step": 3690 + }, + { + "epoch": 1.4132516277288394, + "loss": 1.6012014150619507, + "loss_ce": 0.20785996317863464, + "loss_iou": 0.6385194659233093, + "loss_num": 0.0233154296875, + "loss_xval": 1.3933414220809937, + "num_input_tokens_seen": 637860232, + "step": 3690 + }, + { + "epoch": 1.4136346227499041, + "grad_norm": 92.45627459761134, + "learning_rate": 5e-06, + "loss": 1.5551, + "num_input_tokens_seen": 638033256, + "step": 3691 + }, + { + "epoch": 1.4136346227499041, + "loss": 1.7157249450683594, + "loss_ce": 0.22539356350898743, + "loss_iou": 0.6931637525558472, + "loss_num": 0.020751953125, + "loss_xval": 1.4903314113616943, + "num_input_tokens_seen": 638033256, + "step": 3691 + }, + { + "epoch": 1.414017617770969, + "grad_norm": 136.57183184362495, + "learning_rate": 5e-06, + "loss": 1.2977, + "num_input_tokens_seen": 638205808, + "step": 3692 + }, + { + "epoch": 1.414017617770969, + "loss": 1.3344258069992065, + "loss_ce": 0.18167954683303833, + "loss_iou": 0.5222960114479065, + "loss_num": 0.0216064453125, + "loss_xval": 1.152746319770813, + "num_input_tokens_seen": 638205808, + "step": 3692 + }, + { + "epoch": 1.4144006127920337, + "grad_norm": 136.52495627908385, + "learning_rate": 5e-06, + "loss": 1.9508, + "num_input_tokens_seen": 638378784, + "step": 3693 + }, + { + "epoch": 1.4144006127920337, + "loss": 1.865358829498291, + "loss_ce": 0.16259488463401794, + "loss_iou": 0.7511622309684753, + "loss_num": 0.0400390625, + "loss_xval": 1.7027639150619507, + "num_input_tokens_seen": 638378784, + "step": 3693 + }, + { + "epoch": 1.4147836078130984, + "grad_norm": 238.08665224260275, + "learning_rate": 5e-06, + "loss": 1.4559, + "num_input_tokens_seen": 638551760, + "step": 3694 + }, + { + "epoch": 1.4147836078130984, + "loss": 1.5375279188156128, + "loss_ce": 0.1788027137517929, + "loss_iou": 0.6223556995391846, + "loss_num": 0.022705078125, + "loss_xval": 1.3587250709533691, + "num_input_tokens_seen": 638551760, + "step": 3694 + }, + { + "epoch": 1.4151666028341632, + "grad_norm": 86.71693668470905, + "learning_rate": 5e-06, + "loss": 1.897, + "num_input_tokens_seen": 638724952, + "step": 3695 + }, + { + "epoch": 1.4151666028341632, + "loss": 1.8192293643951416, + "loss_ce": 0.20848700404167175, + "loss_iou": 0.7126893401145935, + "loss_num": 0.037109375, + "loss_xval": 1.6107425689697266, + "num_input_tokens_seen": 638724952, + "step": 3695 + }, + { + "epoch": 1.415549597855228, + "grad_norm": 138.74805554876042, + "learning_rate": 5e-06, + "loss": 1.3908, + "num_input_tokens_seen": 638897848, + "step": 3696 + }, + { + "epoch": 1.415549597855228, + "loss": 1.4511733055114746, + "loss_ce": 0.1650530993938446, + "loss_iou": 0.5951322317123413, + "loss_num": 0.0191650390625, + "loss_xval": 1.2861201763153076, + "num_input_tokens_seen": 638897848, + "step": 3696 + }, + { + "epoch": 1.4159325928762927, + "grad_norm": 306.11785917938215, + "learning_rate": 5e-06, + "loss": 1.7482, + "num_input_tokens_seen": 639071096, + "step": 3697 + }, + { + "epoch": 1.4159325928762927, + "loss": 1.8727173805236816, + "loss_ce": 0.21603775024414062, + "loss_iou": 0.7770397663116455, + "loss_num": 0.0205078125, + "loss_xval": 1.656679630279541, + "num_input_tokens_seen": 639071096, + "step": 3697 + }, + { + "epoch": 1.4163155878973575, + "grad_norm": 85.81341137112987, + "learning_rate": 5e-06, + "loss": 1.8047, + "num_input_tokens_seen": 639243728, + "step": 3698 + }, + { + "epoch": 1.4163155878973575, + "loss": 1.8856124877929688, + "loss_ce": 0.2053176760673523, + "loss_iou": 0.7709640264511108, + "loss_num": 0.0277099609375, + "loss_xval": 1.6802948713302612, + "num_input_tokens_seen": 639243728, + "step": 3698 + }, + { + "epoch": 1.416698582918422, + "grad_norm": 336.9428656679885, + "learning_rate": 5e-06, + "loss": 2.2322, + "num_input_tokens_seen": 639416896, + "step": 3699 + }, + { + "epoch": 1.416698582918422, + "loss": 2.191256523132324, + "loss_ce": 0.19940683245658875, + "loss_iou": 0.9435871839523315, + "loss_num": 0.02099609375, + "loss_xval": 1.991849660873413, + "num_input_tokens_seen": 639416896, + "step": 3699 + }, + { + "epoch": 1.4170815779394867, + "grad_norm": 92.35111266977904, + "learning_rate": 5e-06, + "loss": 1.8368, + "num_input_tokens_seen": 639589640, + "step": 3700 + }, + { + "epoch": 1.4170815779394867, + "loss": 1.8230584859848022, + "loss_ce": 0.16814501583576202, + "loss_iou": 0.7557098865509033, + "loss_num": 0.0286865234375, + "loss_xval": 1.6549134254455566, + "num_input_tokens_seen": 639589640, + "step": 3700 + }, + { + "epoch": 1.4174645729605515, + "grad_norm": 327.4133991835956, + "learning_rate": 5e-06, + "loss": 1.6466, + "num_input_tokens_seen": 639762656, + "step": 3701 + }, + { + "epoch": 1.4174645729605515, + "loss": 1.5542529821395874, + "loss_ce": 0.13617312908172607, + "loss_iou": 0.6641790866851807, + "loss_num": 0.0179443359375, + "loss_xval": 1.4180798530578613, + "num_input_tokens_seen": 639762656, + "step": 3701 + }, + { + "epoch": 1.4178475679816163, + "grad_norm": 62.16172672217173, + "learning_rate": 5e-06, + "loss": 1.957, + "num_input_tokens_seen": 639931984, + "step": 3702 + }, + { + "epoch": 1.4178475679816163, + "loss": 1.9768989086151123, + "loss_ce": 0.1790628433227539, + "loss_iou": 0.7930219769477844, + "loss_num": 0.04248046875, + "loss_xval": 1.7978359460830688, + "num_input_tokens_seen": 639931984, + "step": 3702 + }, + { + "epoch": 1.418230563002681, + "grad_norm": 250.41063902161702, + "learning_rate": 5e-06, + "loss": 1.9669, + "num_input_tokens_seen": 640105168, + "step": 3703 + }, + { + "epoch": 1.418230563002681, + "loss": 2.0117201805114746, + "loss_ce": 0.22301751375198364, + "loss_iou": 0.8056672215461731, + "loss_num": 0.035400390625, + "loss_xval": 1.7887026071548462, + "num_input_tokens_seen": 640105168, + "step": 3703 + }, + { + "epoch": 1.4186135580237456, + "grad_norm": 163.9289972514936, + "learning_rate": 5e-06, + "loss": 1.7896, + "num_input_tokens_seen": 640278344, + "step": 3704 + }, + { + "epoch": 1.4186135580237456, + "loss": 1.970999002456665, + "loss_ce": 0.2278166115283966, + "loss_iou": 0.8001800775527954, + "loss_num": 0.028564453125, + "loss_xval": 1.7431824207305908, + "num_input_tokens_seen": 640278344, + "step": 3704 + }, + { + "epoch": 1.4189965530448103, + "grad_norm": 249.6520301998006, + "learning_rate": 5e-06, + "loss": 2.0686, + "num_input_tokens_seen": 640451592, + "step": 3705 + }, + { + "epoch": 1.4189965530448103, + "loss": 2.0172922611236572, + "loss_ce": 0.19945912063121796, + "loss_iou": 0.8343011140823364, + "loss_num": 0.02978515625, + "loss_xval": 1.8178331851959229, + "num_input_tokens_seen": 640451592, + "step": 3705 + }, + { + "epoch": 1.419379548065875, + "grad_norm": 67.82720968672345, + "learning_rate": 5e-06, + "loss": 1.8445, + "num_input_tokens_seen": 640624408, + "step": 3706 + }, + { + "epoch": 1.419379548065875, + "loss": 1.8409152030944824, + "loss_ce": 0.15923503041267395, + "loss_iou": 0.7513015270233154, + "loss_num": 0.035888671875, + "loss_xval": 1.6816802024841309, + "num_input_tokens_seen": 640624408, + "step": 3706 + }, + { + "epoch": 1.4197625430869398, + "grad_norm": 132.1660312975049, + "learning_rate": 5e-06, + "loss": 1.4361, + "num_input_tokens_seen": 640797160, + "step": 3707 + }, + { + "epoch": 1.4197625430869398, + "loss": 1.3635417222976685, + "loss_ce": 0.19868165254592896, + "loss_iou": 0.5315419435501099, + "loss_num": 0.0203857421875, + "loss_xval": 1.1648600101470947, + "num_input_tokens_seen": 640797160, + "step": 3707 + }, + { + "epoch": 1.4201455381080046, + "grad_norm": 159.00627526237912, + "learning_rate": 5e-06, + "loss": 1.7623, + "num_input_tokens_seen": 640970472, + "step": 3708 + }, + { + "epoch": 1.4201455381080046, + "loss": 1.839440941810608, + "loss_ce": 0.23471230268478394, + "loss_iou": 0.7542381286621094, + "loss_num": 0.019287109375, + "loss_xval": 1.6047286987304688, + "num_input_tokens_seen": 640970472, + "step": 3708 + }, + { + "epoch": 1.4205285331290693, + "grad_norm": 91.5233965072586, + "learning_rate": 5e-06, + "loss": 1.5655, + "num_input_tokens_seen": 641143488, + "step": 3709 + }, + { + "epoch": 1.4205285331290693, + "loss": 1.4377386569976807, + "loss_ce": 0.18732315301895142, + "loss_iou": 0.5748385190963745, + "loss_num": 0.0201416015625, + "loss_xval": 1.250415563583374, + "num_input_tokens_seen": 641143488, + "step": 3709 + }, + { + "epoch": 1.420911528150134, + "grad_norm": 110.57612094579471, + "learning_rate": 5e-06, + "loss": 1.4309, + "num_input_tokens_seen": 641316656, + "step": 3710 + }, + { + "epoch": 1.420911528150134, + "loss": 1.3285162448883057, + "loss_ce": 0.1683727502822876, + "loss_iou": 0.5430691838264465, + "loss_num": 0.0147705078125, + "loss_xval": 1.160143494606018, + "num_input_tokens_seen": 641316656, + "step": 3710 + }, + { + "epoch": 1.4212945231711989, + "grad_norm": 212.41504148692457, + "learning_rate": 5e-06, + "loss": 1.6513, + "num_input_tokens_seen": 641489936, + "step": 3711 + }, + { + "epoch": 1.4212945231711989, + "loss": 1.6178189516067505, + "loss_ce": 0.1876608431339264, + "loss_iou": 0.6751620769500732, + "loss_num": 0.0159912109375, + "loss_xval": 1.4301581382751465, + "num_input_tokens_seen": 641489936, + "step": 3711 + }, + { + "epoch": 1.4216775181922636, + "grad_norm": 103.55674639460399, + "learning_rate": 5e-06, + "loss": 1.7225, + "num_input_tokens_seen": 641662936, + "step": 3712 + }, + { + "epoch": 1.4216775181922636, + "loss": 1.9159207344055176, + "loss_ce": 0.21713170409202576, + "loss_iou": 0.7882983684539795, + "loss_num": 0.0244140625, + "loss_xval": 1.698789119720459, + "num_input_tokens_seen": 641662936, + "step": 3712 + }, + { + "epoch": 1.4220605132133282, + "grad_norm": 83.6160439494949, + "learning_rate": 5e-06, + "loss": 1.5368, + "num_input_tokens_seen": 641835768, + "step": 3713 + }, + { + "epoch": 1.4220605132133282, + "loss": 1.5422358512878418, + "loss_ce": 0.19803684949874878, + "loss_iou": 0.6211351156234741, + "loss_num": 0.0203857421875, + "loss_xval": 1.3441989421844482, + "num_input_tokens_seen": 641835768, + "step": 3713 + }, + { + "epoch": 1.422443508234393, + "grad_norm": 156.9246201501115, + "learning_rate": 5e-06, + "loss": 1.6804, + "num_input_tokens_seen": 642008592, + "step": 3714 + }, + { + "epoch": 1.422443508234393, + "loss": 1.8472943305969238, + "loss_ce": 0.16469363868236542, + "loss_iou": 0.770102858543396, + "loss_num": 0.0284423828125, + "loss_xval": 1.682600736618042, + "num_input_tokens_seen": 642008592, + "step": 3714 + }, + { + "epoch": 1.4228265032554577, + "grad_norm": 91.33352353280192, + "learning_rate": 5e-06, + "loss": 1.774, + "num_input_tokens_seen": 642181552, + "step": 3715 + }, + { + "epoch": 1.4228265032554577, + "loss": 1.6739634275436401, + "loss_ce": 0.17115110158920288, + "loss_iou": 0.6929649710655212, + "loss_num": 0.0233154296875, + "loss_xval": 1.4878586530685425, + "num_input_tokens_seen": 642181552, + "step": 3715 + }, + { + "epoch": 1.4232094982765224, + "grad_norm": 91.89252200885332, + "learning_rate": 5e-06, + "loss": 1.4894, + "num_input_tokens_seen": 642354568, + "step": 3716 + }, + { + "epoch": 1.4232094982765224, + "loss": 1.5184659957885742, + "loss_ce": 0.1681097000837326, + "loss_iou": 0.6297527551651001, + "loss_num": 0.0181884765625, + "loss_xval": 1.3503563404083252, + "num_input_tokens_seen": 642354568, + "step": 3716 + }, + { + "epoch": 1.4235924932975872, + "grad_norm": 185.32181837068592, + "learning_rate": 5e-06, + "loss": 1.4635, + "num_input_tokens_seen": 642527512, + "step": 3717 + }, + { + "epoch": 1.4235924932975872, + "loss": 1.4110028743743896, + "loss_ce": 0.15500202775001526, + "loss_iou": 0.5823765993118286, + "loss_num": 0.0181884765625, + "loss_xval": 1.2560007572174072, + "num_input_tokens_seen": 642527512, + "step": 3717 + }, + { + "epoch": 1.4239754883186517, + "grad_norm": 95.47791705566677, + "learning_rate": 5e-06, + "loss": 1.7641, + "num_input_tokens_seen": 642700576, + "step": 3718 + }, + { + "epoch": 1.4239754883186517, + "loss": 1.8137187957763672, + "loss_ce": 0.17136678099632263, + "loss_iou": 0.7560819387435913, + "loss_num": 0.0260009765625, + "loss_xval": 1.6423518657684326, + "num_input_tokens_seen": 642700576, + "step": 3718 + }, + { + "epoch": 1.4243584833397165, + "grad_norm": 130.32200948376027, + "learning_rate": 5e-06, + "loss": 1.5196, + "num_input_tokens_seen": 642873912, + "step": 3719 + }, + { + "epoch": 1.4243584833397165, + "loss": 1.578095555305481, + "loss_ce": 0.22507821023464203, + "loss_iou": 0.6102397441864014, + "loss_num": 0.0264892578125, + "loss_xval": 1.3530173301696777, + "num_input_tokens_seen": 642873912, + "step": 3719 + }, + { + "epoch": 1.4247414783607812, + "grad_norm": 164.4305841901078, + "learning_rate": 5e-06, + "loss": 1.7802, + "num_input_tokens_seen": 643047464, + "step": 3720 + }, + { + "epoch": 1.4247414783607812, + "loss": 1.7335370779037476, + "loss_ce": 0.18880623579025269, + "loss_iou": 0.7317770719528198, + "loss_num": 0.0162353515625, + "loss_xval": 1.5447309017181396, + "num_input_tokens_seen": 643047464, + "step": 3720 + }, + { + "epoch": 1.425124473381846, + "grad_norm": 111.37483587714223, + "learning_rate": 5e-06, + "loss": 1.8079, + "num_input_tokens_seen": 643220608, + "step": 3721 + }, + { + "epoch": 1.425124473381846, + "loss": 1.8084741830825806, + "loss_ce": 0.16545546054840088, + "loss_iou": 0.7520818710327148, + "loss_num": 0.02783203125, + "loss_xval": 1.6430187225341797, + "num_input_tokens_seen": 643220608, + "step": 3721 + }, + { + "epoch": 1.4255074684029108, + "grad_norm": 70.5761506236639, + "learning_rate": 5e-06, + "loss": 1.3081, + "num_input_tokens_seen": 643393384, + "step": 3722 + }, + { + "epoch": 1.4255074684029108, + "loss": 1.4663512706756592, + "loss_ce": 0.1774357110261917, + "loss_iou": 0.5970029234886169, + "loss_num": 0.01904296875, + "loss_xval": 1.2889155149459839, + "num_input_tokens_seen": 643393384, + "step": 3722 + }, + { + "epoch": 1.4258904634239755, + "grad_norm": 174.3773016576876, + "learning_rate": 5e-06, + "loss": 1.3919, + "num_input_tokens_seen": 643565864, + "step": 3723 + }, + { + "epoch": 1.4258904634239755, + "loss": 1.5935523509979248, + "loss_ce": 0.12954799830913544, + "loss_iou": 0.6833571195602417, + "loss_num": 0.01953125, + "loss_xval": 1.4640042781829834, + "num_input_tokens_seen": 643565864, + "step": 3723 + }, + { + "epoch": 1.4262734584450403, + "grad_norm": 157.3914023402223, + "learning_rate": 5e-06, + "loss": 1.7692, + "num_input_tokens_seen": 643738968, + "step": 3724 + }, + { + "epoch": 1.4262734584450403, + "loss": 1.7137362957000732, + "loss_ce": 0.19263222813606262, + "loss_iou": 0.6818166971206665, + "loss_num": 0.031494140625, + "loss_xval": 1.521104097366333, + "num_input_tokens_seen": 643738968, + "step": 3724 + }, + { + "epoch": 1.426656453466105, + "grad_norm": 103.36857082377544, + "learning_rate": 5e-06, + "loss": 1.3669, + "num_input_tokens_seen": 643912112, + "step": 3725 + }, + { + "epoch": 1.426656453466105, + "loss": 1.345698356628418, + "loss_ce": 0.18965527415275574, + "loss_iou": 0.5394168496131897, + "loss_num": 0.01544189453125, + "loss_xval": 1.1560431718826294, + "num_input_tokens_seen": 643912112, + "step": 3725 + }, + { + "epoch": 1.4270394484871698, + "grad_norm": 93.56588767405124, + "learning_rate": 5e-06, + "loss": 1.4712, + "num_input_tokens_seen": 644085240, + "step": 3726 + }, + { + "epoch": 1.4270394484871698, + "loss": 1.3277511596679688, + "loss_ce": 0.16545361280441284, + "loss_iou": 0.5361658334732056, + "loss_num": 0.0179443359375, + "loss_xval": 1.1622974872589111, + "num_input_tokens_seen": 644085240, + "step": 3726 + }, + { + "epoch": 1.4274224435082343, + "grad_norm": 112.12544501076535, + "learning_rate": 5e-06, + "loss": 1.6564, + "num_input_tokens_seen": 644254872, + "step": 3727 + }, + { + "epoch": 1.4274224435082343, + "loss": 1.8178613185882568, + "loss_ce": 0.20174267888069153, + "loss_iou": 0.7438503503799438, + "loss_num": 0.025634765625, + "loss_xval": 1.6161186695098877, + "num_input_tokens_seen": 644254872, + "step": 3727 + }, + { + "epoch": 1.427805438529299, + "grad_norm": 73.91467844184959, + "learning_rate": 5e-06, + "loss": 1.205, + "num_input_tokens_seen": 644427824, + "step": 3728 + }, + { + "epoch": 1.427805438529299, + "loss": 1.2962121963500977, + "loss_ce": 0.24076081812381744, + "loss_iou": 0.4939274191856384, + "loss_num": 0.01348876953125, + "loss_xval": 1.0554512739181519, + "num_input_tokens_seen": 644427824, + "step": 3728 + }, + { + "epoch": 1.4281884335503638, + "grad_norm": 152.54727985440198, + "learning_rate": 5e-06, + "loss": 1.7261, + "num_input_tokens_seen": 644601480, + "step": 3729 + }, + { + "epoch": 1.4281884335503638, + "loss": 1.8054468631744385, + "loss_ce": 0.1608993113040924, + "loss_iou": 0.7548604607582092, + "loss_num": 0.0269775390625, + "loss_xval": 1.6445475816726685, + "num_input_tokens_seen": 644601480, + "step": 3729 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 82.0129507976678, + "learning_rate": 5e-06, + "loss": 2.032, + "num_input_tokens_seen": 644774432, + "step": 3730 + }, + { + "epoch": 1.4285714285714286, + "loss": 1.9827988147735596, + "loss_ce": 0.18624532222747803, + "loss_iou": 0.8012003898620605, + "loss_num": 0.038818359375, + "loss_xval": 1.796553611755371, + "num_input_tokens_seen": 644774432, + "step": 3730 + }, + { + "epoch": 1.4289544235924934, + "grad_norm": 152.8632448487534, + "learning_rate": 5e-06, + "loss": 1.5189, + "num_input_tokens_seen": 644947120, + "step": 3731 + }, + { + "epoch": 1.4289544235924934, + "loss": 1.4444881677627563, + "loss_ce": 0.16636133193969727, + "loss_iou": 0.5974984765052795, + "loss_num": 0.0166015625, + "loss_xval": 1.278126835823059, + "num_input_tokens_seen": 644947120, + "step": 3731 + }, + { + "epoch": 1.429337418613558, + "grad_norm": 283.4599789297149, + "learning_rate": 5e-06, + "loss": 2.0318, + "num_input_tokens_seen": 645120224, + "step": 3732 + }, + { + "epoch": 1.429337418613558, + "loss": 1.9308521747589111, + "loss_ce": 0.21116694808006287, + "loss_iou": 0.8041785359382629, + "loss_num": 0.022216796875, + "loss_xval": 1.7196851968765259, + "num_input_tokens_seen": 645120224, + "step": 3732 + }, + { + "epoch": 1.4297204136346227, + "grad_norm": 130.5335208854067, + "learning_rate": 5e-06, + "loss": 2.0281, + "num_input_tokens_seen": 645293208, + "step": 3733 + }, + { + "epoch": 1.4297204136346227, + "loss": 1.900571584701538, + "loss_ce": 0.21438062191009521, + "loss_iou": 0.7400986552238464, + "loss_num": 0.041259765625, + "loss_xval": 1.6861909627914429, + "num_input_tokens_seen": 645293208, + "step": 3733 + }, + { + "epoch": 1.4301034086556874, + "grad_norm": 77.59650536748586, + "learning_rate": 5e-06, + "loss": 1.7141, + "num_input_tokens_seen": 645466184, + "step": 3734 + }, + { + "epoch": 1.4301034086556874, + "loss": 1.670695185661316, + "loss_ce": 0.18912170827388763, + "loss_iou": 0.6798431277275085, + "loss_num": 0.0244140625, + "loss_xval": 1.481573462486267, + "num_input_tokens_seen": 645466184, + "step": 3734 + }, + { + "epoch": 1.4304864036767522, + "grad_norm": 210.19312482574938, + "learning_rate": 5e-06, + "loss": 1.9514, + "num_input_tokens_seen": 645639128, + "step": 3735 + }, + { + "epoch": 1.4304864036767522, + "loss": 2.1336963176727295, + "loss_ce": 0.22373518347740173, + "loss_iou": 0.8884216547012329, + "loss_num": 0.026611328125, + "loss_xval": 1.9099611043930054, + "num_input_tokens_seen": 645639128, + "step": 3735 + }, + { + "epoch": 1.430869398697817, + "grad_norm": 97.08987338223963, + "learning_rate": 5e-06, + "loss": 1.7825, + "num_input_tokens_seen": 645811568, + "step": 3736 + }, + { + "epoch": 1.430869398697817, + "loss": 1.7851035594940186, + "loss_ce": 0.21304568648338318, + "loss_iou": 0.7253295183181763, + "loss_num": 0.0242919921875, + "loss_xval": 1.5720579624176025, + "num_input_tokens_seen": 645811568, + "step": 3736 + }, + { + "epoch": 1.4312523937188817, + "grad_norm": 268.82543845535594, + "learning_rate": 5e-06, + "loss": 1.4205, + "num_input_tokens_seen": 645984384, + "step": 3737 + }, + { + "epoch": 1.4312523937188817, + "loss": 1.3121609687805176, + "loss_ce": 0.18860487639904022, + "loss_iou": 0.5270795822143555, + "loss_num": 0.01385498046875, + "loss_xval": 1.123556137084961, + "num_input_tokens_seen": 645984384, + "step": 3737 + }, + { + "epoch": 1.4316353887399464, + "grad_norm": 83.84882696903573, + "learning_rate": 5e-06, + "loss": 1.8783, + "num_input_tokens_seen": 646157008, + "step": 3738 + }, + { + "epoch": 1.4316353887399464, + "loss": 1.8196080923080444, + "loss_ce": 0.17939066886901855, + "loss_iou": 0.7516272664070129, + "loss_num": 0.02734375, + "loss_xval": 1.6402174234390259, + "num_input_tokens_seen": 646157008, + "step": 3738 + }, + { + "epoch": 1.4320183837610112, + "grad_norm": 190.26463881770857, + "learning_rate": 5e-06, + "loss": 1.6885, + "num_input_tokens_seen": 646329528, + "step": 3739 + }, + { + "epoch": 1.4320183837610112, + "loss": 1.6450743675231934, + "loss_ce": 0.23552067577838898, + "loss_iou": 0.6517372727394104, + "loss_num": 0.021240234375, + "loss_xval": 1.4095536470413208, + "num_input_tokens_seen": 646329528, + "step": 3739 + }, + { + "epoch": 1.432401378782076, + "grad_norm": 221.4220954318138, + "learning_rate": 5e-06, + "loss": 1.5916, + "num_input_tokens_seen": 646502176, + "step": 3740 + }, + { + "epoch": 1.432401378782076, + "loss": 1.6941814422607422, + "loss_ce": 0.1842448115348816, + "loss_iou": 0.7121521234512329, + "loss_num": 0.01708984375, + "loss_xval": 1.5099365711212158, + "num_input_tokens_seen": 646502176, + "step": 3740 + }, + { + "epoch": 1.4327843738031405, + "grad_norm": 97.99613947599644, + "learning_rate": 5e-06, + "loss": 1.9946, + "num_input_tokens_seen": 646675432, + "step": 3741 + }, + { + "epoch": 1.4327843738031405, + "loss": 2.0199780464172363, + "loss_ce": 0.22191393375396729, + "loss_iou": 0.8097376227378845, + "loss_num": 0.03564453125, + "loss_xval": 1.798064112663269, + "num_input_tokens_seen": 646675432, + "step": 3741 + }, + { + "epoch": 1.4331673688242053, + "grad_norm": 132.07464574854623, + "learning_rate": 5e-06, + "loss": 1.3352, + "num_input_tokens_seen": 646848008, + "step": 3742 + }, + { + "epoch": 1.4331673688242053, + "loss": 1.2995100021362305, + "loss_ce": 0.19524481892585754, + "loss_iou": 0.5238122940063477, + "loss_num": 0.01129150390625, + "loss_xval": 1.1042652130126953, + "num_input_tokens_seen": 646848008, + "step": 3742 + }, + { + "epoch": 1.43355036384527, + "grad_norm": 245.3325410425919, + "learning_rate": 5e-06, + "loss": 1.9307, + "num_input_tokens_seen": 647020960, + "step": 3743 + }, + { + "epoch": 1.43355036384527, + "loss": 1.9817793369293213, + "loss_ce": 0.20634689927101135, + "loss_iou": 0.8121546506881714, + "loss_num": 0.0302734375, + "loss_xval": 1.7754323482513428, + "num_input_tokens_seen": 647020960, + "step": 3743 + }, + { + "epoch": 1.4339333588663348, + "grad_norm": 166.79214130718145, + "learning_rate": 5e-06, + "loss": 1.7984, + "num_input_tokens_seen": 647194368, + "step": 3744 + }, + { + "epoch": 1.4339333588663348, + "loss": 1.826289415359497, + "loss_ce": 0.20446836948394775, + "loss_iou": 0.7626163959503174, + "loss_num": 0.019287109375, + "loss_xval": 1.6218209266662598, + "num_input_tokens_seen": 647194368, + "step": 3744 + }, + { + "epoch": 1.4343163538873995, + "grad_norm": 101.34890471679762, + "learning_rate": 5e-06, + "loss": 1.6824, + "num_input_tokens_seen": 647367128, + "step": 3745 + }, + { + "epoch": 1.4343163538873995, + "loss": 1.6728715896606445, + "loss_ce": 0.14931800961494446, + "loss_iou": 0.6829499006271362, + "loss_num": 0.031494140625, + "loss_xval": 1.5235536098480225, + "num_input_tokens_seen": 647367128, + "step": 3745 + }, + { + "epoch": 1.434699348908464, + "grad_norm": 101.27894614002867, + "learning_rate": 5e-06, + "loss": 1.4336, + "num_input_tokens_seen": 647540472, + "step": 3746 + }, + { + "epoch": 1.434699348908464, + "loss": 1.4333503246307373, + "loss_ce": 0.21338771283626556, + "loss_iou": 0.5676991939544678, + "loss_num": 0.0169677734375, + "loss_xval": 1.2199625968933105, + "num_input_tokens_seen": 647540472, + "step": 3746 + }, + { + "epoch": 1.4350823439295288, + "grad_norm": 135.69922529206178, + "learning_rate": 5e-06, + "loss": 1.8911, + "num_input_tokens_seen": 647713472, + "step": 3747 + }, + { + "epoch": 1.4350823439295288, + "loss": 1.9443538188934326, + "loss_ce": 0.22142541408538818, + "loss_iou": 0.7996660470962524, + "loss_num": 0.024658203125, + "loss_xval": 1.7229282855987549, + "num_input_tokens_seen": 647713472, + "step": 3747 + }, + { + "epoch": 1.4354653389505936, + "grad_norm": 101.93708093168205, + "learning_rate": 5e-06, + "loss": 1.7479, + "num_input_tokens_seen": 647886560, + "step": 3748 + }, + { + "epoch": 1.4354653389505936, + "loss": 1.609716773033142, + "loss_ce": 0.15427130460739136, + "loss_iou": 0.6694341897964478, + "loss_num": 0.0233154296875, + "loss_xval": 1.4554455280303955, + "num_input_tokens_seen": 647886560, + "step": 3748 + }, + { + "epoch": 1.4358483339716583, + "grad_norm": 75.44481130968893, + "learning_rate": 5e-06, + "loss": 1.4704, + "num_input_tokens_seen": 648059536, + "step": 3749 + }, + { + "epoch": 1.4358483339716583, + "loss": 1.5968767404556274, + "loss_ce": 0.18753957748413086, + "loss_iou": 0.6483941078186035, + "loss_num": 0.0224609375, + "loss_xval": 1.409337043762207, + "num_input_tokens_seen": 648059536, + "step": 3749 + }, + { + "epoch": 1.436231328992723, + "grad_norm": 99.53147962818682, + "learning_rate": 5e-06, + "loss": 1.3952, + "num_input_tokens_seen": 648232472, + "step": 3750 + }, + { + "epoch": 1.436231328992723, + "eval_websight_new_CIoU": 0.9285513758659363, + "eval_websight_new_GIoU": 0.9281909763813019, + "eval_websight_new_IoU": 0.9289136528968811, + "eval_websight_new_MAE_all": 0.006849759723991156, + "eval_websight_new_MAE_h": 0.004385469830594957, + "eval_websight_new_MAE_w": 0.008191783213987947, + "eval_websight_new_MAE_x": 0.008513058768585324, + "eval_websight_new_MAE_y": 0.006308725103735924, + "eval_websight_new_NUM_probability": 0.12843996286392212, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 1.0500247478485107, + "eval_websight_new_loss_ce": 0.4122988283634186, + "eval_websight_new_loss_iou": 0.30277392268180847, + "eval_websight_new_loss_num": 0.0061969757080078125, + "eval_websight_new_loss_xval": 0.6365155577659607, + "eval_websight_new_runtime": 55.9458, + "eval_websight_new_samples_per_second": 0.894, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 648232472, + "step": 3750 + }, + { + "epoch": 1.436231328992723, + "eval_seeclick_CIoU": 0.6186380684375763, + "eval_seeclick_GIoU": 0.6335676312446594, + "eval_seeclick_IoU": 0.6683100163936615, + "eval_seeclick_MAE_all": 0.07598558999598026, + "eval_seeclick_MAE_h": 0.03271046606823802, + "eval_seeclick_MAE_w": 0.11810741201043129, + "eval_seeclick_MAE_x": 0.11608804017305374, + "eval_seeclick_MAE_y": 0.03703642264008522, + "eval_seeclick_NUM_probability": 0.42897067964076996, + "eval_seeclick_inside_bbox": 0.8576388955116272, + "eval_seeclick_loss": 1.7334070205688477, + "eval_seeclick_loss_ce": 0.24777323752641678, + "eval_seeclick_loss_iou": 0.6064954698085785, + "eval_seeclick_loss_num": 0.049999237060546875, + "eval_seeclick_loss_xval": 1.463082492351532, + "eval_seeclick_runtime": 85.2465, + "eval_seeclick_samples_per_second": 0.587, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 648232472, + "step": 3750 + }, + { + "epoch": 1.436231328992723, + "eval_icons_CIoU": 0.803604245185852, + "eval_icons_GIoU": 0.8062641024589539, + "eval_icons_IoU": 0.8186492621898651, + "eval_icons_MAE_all": 0.028514167293906212, + "eval_icons_MAE_h": 0.014705082401633263, + "eval_icons_MAE_w": 0.039799148216843605, + "eval_icons_MAE_x": 0.043168528005480766, + "eval_icons_MAE_y": 0.016383902169764042, + "eval_icons_NUM_probability": 0.3404311537742615, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 1.9245898723602295, + "eval_icons_loss_ce": 0.3984464108943939, + "eval_icons_loss_iou": 0.7120104730129242, + "eval_icons_loss_num": 0.01808929443359375, + "eval_icons_loss_xval": 1.5145055651664734, + "eval_icons_runtime": 93.4996, + "eval_icons_samples_per_second": 0.535, + "eval_icons_steps_per_second": 0.021, + "num_input_tokens_seen": 648232472, + "step": 3750 + }, + { + "epoch": 1.436231328992723, + "loss": 1.9871809482574463, + "loss_ce": 0.3983060121536255, + "loss_iou": 0.7439765930175781, + "loss_num": 0.0201416015625, + "loss_xval": 1.5888748168945312, + "num_input_tokens_seen": 648232472, + "step": 3750 + }, + { + "epoch": 1.4366143240137879, + "grad_norm": 85.30457316621232, + "learning_rate": 5e-06, + "loss": 1.5212, + "num_input_tokens_seen": 648405336, + "step": 3751 + }, + { + "epoch": 1.4366143240137879, + "loss": 1.478672981262207, + "loss_ce": 0.20677736401557922, + "loss_iou": 0.5964428186416626, + "loss_num": 0.015869140625, + "loss_xval": 1.2718956470489502, + "num_input_tokens_seen": 648405336, + "step": 3751 + }, + { + "epoch": 1.4369973190348526, + "grad_norm": 113.75891963551362, + "learning_rate": 5e-06, + "loss": 1.572, + "num_input_tokens_seen": 648578376, + "step": 3752 + }, + { + "epoch": 1.4369973190348526, + "loss": 1.4738179445266724, + "loss_ce": 0.2258295714855194, + "loss_iou": 0.5844433903694153, + "loss_num": 0.015869140625, + "loss_xval": 1.2479883432388306, + "num_input_tokens_seen": 648578376, + "step": 3752 + }, + { + "epoch": 1.4373803140559174, + "grad_norm": 196.98913144134312, + "learning_rate": 5e-06, + "loss": 1.6073, + "num_input_tokens_seen": 648751344, + "step": 3753 + }, + { + "epoch": 1.4373803140559174, + "loss": 1.7447113990783691, + "loss_ce": 0.17518246173858643, + "loss_iou": 0.7425433397293091, + "loss_num": 0.016845703125, + "loss_xval": 1.5695288181304932, + "num_input_tokens_seen": 648751344, + "step": 3753 + }, + { + "epoch": 1.4377633090769821, + "grad_norm": 274.07027465792294, + "learning_rate": 5e-06, + "loss": 1.9837, + "num_input_tokens_seen": 648924448, + "step": 3754 + }, + { + "epoch": 1.4377633090769821, + "loss": 1.9940869808197021, + "loss_ce": 0.16950249671936035, + "loss_iou": 0.8452146053314209, + "loss_num": 0.02685546875, + "loss_xval": 1.8245844841003418, + "num_input_tokens_seen": 648924448, + "step": 3754 + }, + { + "epoch": 1.4381463040980467, + "grad_norm": 150.30767114356615, + "learning_rate": 5e-06, + "loss": 1.7165, + "num_input_tokens_seen": 649097424, + "step": 3755 + }, + { + "epoch": 1.4381463040980467, + "loss": 1.673203945159912, + "loss_ce": 0.18848004937171936, + "loss_iou": 0.6845005750656128, + "loss_num": 0.0230712890625, + "loss_xval": 1.4847238063812256, + "num_input_tokens_seen": 649097424, + "step": 3755 + }, + { + "epoch": 1.4385292991191114, + "grad_norm": 178.9262208328964, + "learning_rate": 5e-06, + "loss": 1.6534, + "num_input_tokens_seen": 649270264, + "step": 3756 + }, + { + "epoch": 1.4385292991191114, + "loss": 1.4100279808044434, + "loss_ce": 0.132390558719635, + "loss_iou": 0.5667972564697266, + "loss_num": 0.02880859375, + "loss_xval": 1.2776374816894531, + "num_input_tokens_seen": 649270264, + "step": 3756 + }, + { + "epoch": 1.4389122941401762, + "grad_norm": 185.56122412169145, + "learning_rate": 5e-06, + "loss": 2.2031, + "num_input_tokens_seen": 649443120, + "step": 3757 + }, + { + "epoch": 1.4389122941401762, + "loss": 2.201828718185425, + "loss_ce": 0.18113598227500916, + "loss_iou": 0.8843087553977966, + "loss_num": 0.05029296875, + "loss_xval": 2.020692825317383, + "num_input_tokens_seen": 649443120, + "step": 3757 + }, + { + "epoch": 1.439295289161241, + "grad_norm": 156.22916748801134, + "learning_rate": 5e-06, + "loss": 1.6584, + "num_input_tokens_seen": 649616224, + "step": 3758 + }, + { + "epoch": 1.439295289161241, + "loss": 1.7143478393554688, + "loss_ce": 0.2034144401550293, + "loss_iou": 0.6921732425689697, + "loss_num": 0.0252685546875, + "loss_xval": 1.5109333992004395, + "num_input_tokens_seen": 649616224, + "step": 3758 + }, + { + "epoch": 1.4396782841823057, + "grad_norm": 134.4854109687959, + "learning_rate": 5e-06, + "loss": 1.4323, + "num_input_tokens_seen": 649789256, + "step": 3759 + }, + { + "epoch": 1.4396782841823057, + "loss": 1.3913935422897339, + "loss_ce": 0.20808057487010956, + "loss_iou": 0.531201183795929, + "loss_num": 0.024169921875, + "loss_xval": 1.183313012123108, + "num_input_tokens_seen": 649789256, + "step": 3759 + }, + { + "epoch": 1.4400612792033702, + "grad_norm": 67.81917494606034, + "learning_rate": 5e-06, + "loss": 1.9246, + "num_input_tokens_seen": 649962464, + "step": 3760 + }, + { + "epoch": 1.4400612792033702, + "loss": 1.9014415740966797, + "loss_ce": 0.25480008125305176, + "loss_iou": 0.772661566734314, + "loss_num": 0.020263671875, + "loss_xval": 1.646641492843628, + "num_input_tokens_seen": 649962464, + "step": 3760 + }, + { + "epoch": 1.440444274224435, + "grad_norm": 101.11515987968119, + "learning_rate": 5e-06, + "loss": 1.4676, + "num_input_tokens_seen": 650135224, + "step": 3761 + }, + { + "epoch": 1.440444274224435, + "loss": 1.2977476119995117, + "loss_ce": 0.17146068811416626, + "loss_iou": 0.5049769282341003, + "loss_num": 0.0233154296875, + "loss_xval": 1.1262868642807007, + "num_input_tokens_seen": 650135224, + "step": 3761 + }, + { + "epoch": 1.4408272692454998, + "grad_norm": 165.17808664996986, + "learning_rate": 5e-06, + "loss": 1.5256, + "num_input_tokens_seen": 650308608, + "step": 3762 + }, + { + "epoch": 1.4408272692454998, + "loss": 1.5596181154251099, + "loss_ce": 0.19343899190425873, + "loss_iou": 0.633162796497345, + "loss_num": 0.02001953125, + "loss_xval": 1.36617910861969, + "num_input_tokens_seen": 650308608, + "step": 3762 + }, + { + "epoch": 1.4412102642665645, + "grad_norm": 120.37777033006283, + "learning_rate": 5e-06, + "loss": 1.6499, + "num_input_tokens_seen": 650481568, + "step": 3763 + }, + { + "epoch": 1.4412102642665645, + "loss": 1.596879482269287, + "loss_ce": 0.16848647594451904, + "loss_iou": 0.6603940725326538, + "loss_num": 0.021484375, + "loss_xval": 1.4283931255340576, + "num_input_tokens_seen": 650481568, + "step": 3763 + }, + { + "epoch": 1.4415932592876293, + "grad_norm": 76.98411155572275, + "learning_rate": 5e-06, + "loss": 1.2079, + "num_input_tokens_seen": 650654240, + "step": 3764 + }, + { + "epoch": 1.4415932592876293, + "loss": 1.2885901927947998, + "loss_ce": 0.1566745638847351, + "loss_iou": 0.515710711479187, + "loss_num": 0.0201416015625, + "loss_xval": 1.131915807723999, + "num_input_tokens_seen": 650654240, + "step": 3764 + }, + { + "epoch": 1.441976254308694, + "grad_norm": 105.05592545315972, + "learning_rate": 5e-06, + "loss": 1.2087, + "num_input_tokens_seen": 650827232, + "step": 3765 + }, + { + "epoch": 1.441976254308694, + "loss": 1.3063874244689941, + "loss_ce": 0.18088823556900024, + "loss_iou": 0.5067803263664246, + "loss_num": 0.0223388671875, + "loss_xval": 1.1254992485046387, + "num_input_tokens_seen": 650827232, + "step": 3765 + }, + { + "epoch": 1.4423592493297588, + "grad_norm": 84.51373154328186, + "learning_rate": 5e-06, + "loss": 1.7202, + "num_input_tokens_seen": 651000288, + "step": 3766 + }, + { + "epoch": 1.4423592493297588, + "loss": 1.9406160116195679, + "loss_ce": 0.2464371621608734, + "loss_iou": 0.7758309245109558, + "loss_num": 0.028564453125, + "loss_xval": 1.6941789388656616, + "num_input_tokens_seen": 651000288, + "step": 3766 + }, + { + "epoch": 1.4427422443508235, + "grad_norm": 70.52425073354833, + "learning_rate": 5e-06, + "loss": 1.261, + "num_input_tokens_seen": 651173392, + "step": 3767 + }, + { + "epoch": 1.4427422443508235, + "loss": 1.3253724575042725, + "loss_ce": 0.20682013034820557, + "loss_iou": 0.5058094263076782, + "loss_num": 0.0213623046875, + "loss_xval": 1.1185524463653564, + "num_input_tokens_seen": 651173392, + "step": 3767 + }, + { + "epoch": 1.4431252393718883, + "grad_norm": 247.24629844383443, + "learning_rate": 5e-06, + "loss": 1.5622, + "num_input_tokens_seen": 651346336, + "step": 3768 + }, + { + "epoch": 1.4431252393718883, + "loss": 1.6428744792938232, + "loss_ce": 0.19238018989562988, + "loss_iou": 0.6811187267303467, + "loss_num": 0.017578125, + "loss_xval": 1.4504942893981934, + "num_input_tokens_seen": 651346336, + "step": 3768 + }, + { + "epoch": 1.4435082343929528, + "grad_norm": 95.39385812224009, + "learning_rate": 5e-06, + "loss": 1.8846, + "num_input_tokens_seen": 651519320, + "step": 3769 + }, + { + "epoch": 1.4435082343929528, + "loss": 1.7897255420684814, + "loss_ce": 0.22061772644519806, + "loss_iou": 0.7338947653770447, + "loss_num": 0.020263671875, + "loss_xval": 1.5691078901290894, + "num_input_tokens_seen": 651519320, + "step": 3769 + }, + { + "epoch": 1.4438912294140176, + "grad_norm": 144.53921328265437, + "learning_rate": 5e-06, + "loss": 1.5361, + "num_input_tokens_seen": 651692672, + "step": 3770 + }, + { + "epoch": 1.4438912294140176, + "loss": 1.5159696340560913, + "loss_ce": 0.21220770478248596, + "loss_iou": 0.5900828838348389, + "loss_num": 0.0247802734375, + "loss_xval": 1.3037619590759277, + "num_input_tokens_seen": 651692672, + "step": 3770 + }, + { + "epoch": 1.4442742244350824, + "grad_norm": 135.80791640212837, + "learning_rate": 5e-06, + "loss": 1.7292, + "num_input_tokens_seen": 651865456, + "step": 3771 + }, + { + "epoch": 1.4442742244350824, + "loss": 1.7545547485351562, + "loss_ce": 0.184688538312912, + "loss_iou": 0.7362881302833557, + "loss_num": 0.0194091796875, + "loss_xval": 1.5698662996292114, + "num_input_tokens_seen": 651865456, + "step": 3771 + }, + { + "epoch": 1.4446572194561471, + "grad_norm": 94.44691510131742, + "learning_rate": 5e-06, + "loss": 1.91, + "num_input_tokens_seen": 652038360, + "step": 3772 + }, + { + "epoch": 1.4446572194561471, + "loss": 1.745355248451233, + "loss_ce": 0.18886971473693848, + "loss_iou": 0.7118974924087524, + "loss_num": 0.0264892578125, + "loss_xval": 1.5564854145050049, + "num_input_tokens_seen": 652038360, + "step": 3772 + }, + { + "epoch": 1.4450402144772119, + "grad_norm": 109.58872241046265, + "learning_rate": 5e-06, + "loss": 1.4302, + "num_input_tokens_seen": 652211344, + "step": 3773 + }, + { + "epoch": 1.4450402144772119, + "loss": 1.5201438665390015, + "loss_ce": 0.167641282081604, + "loss_iou": 0.6205872297286987, + "loss_num": 0.022216796875, + "loss_xval": 1.3525025844573975, + "num_input_tokens_seen": 652211344, + "step": 3773 + }, + { + "epoch": 1.4454232094982764, + "grad_norm": 126.77775998202408, + "learning_rate": 5e-06, + "loss": 1.5639, + "num_input_tokens_seen": 652384360, + "step": 3774 + }, + { + "epoch": 1.4454232094982764, + "loss": 1.648437261581421, + "loss_ce": 0.1750509887933731, + "loss_iou": 0.6851489543914795, + "loss_num": 0.0206298828125, + "loss_xval": 1.473386287689209, + "num_input_tokens_seen": 652384360, + "step": 3774 + }, + { + "epoch": 1.4458062045193412, + "grad_norm": 106.01905857449188, + "learning_rate": 5e-06, + "loss": 1.7414, + "num_input_tokens_seen": 652557240, + "step": 3775 + }, + { + "epoch": 1.4458062045193412, + "loss": 1.8190821409225464, + "loss_ce": 0.2274116724729538, + "loss_iou": 0.7480751872062683, + "loss_num": 0.0191650390625, + "loss_xval": 1.5916703939437866, + "num_input_tokens_seen": 652557240, + "step": 3775 + }, + { + "epoch": 1.446189199540406, + "grad_norm": 72.76302246621044, + "learning_rate": 5e-06, + "loss": 1.4197, + "num_input_tokens_seen": 652730304, + "step": 3776 + }, + { + "epoch": 1.446189199540406, + "loss": 1.3549346923828125, + "loss_ce": 0.2012617588043213, + "loss_iou": 0.5414360761642456, + "loss_num": 0.01416015625, + "loss_xval": 1.1536729335784912, + "num_input_tokens_seen": 652730304, + "step": 3776 + }, + { + "epoch": 1.4465721945614707, + "grad_norm": 121.25168514069287, + "learning_rate": 5e-06, + "loss": 1.5176, + "num_input_tokens_seen": 652903680, + "step": 3777 + }, + { + "epoch": 1.4465721945614707, + "loss": 1.3323194980621338, + "loss_ce": 0.21395091712474823, + "loss_iou": 0.5245010852813721, + "loss_num": 0.01385498046875, + "loss_xval": 1.1183686256408691, + "num_input_tokens_seen": 652903680, + "step": 3777 + }, + { + "epoch": 1.4469551895825354, + "grad_norm": 256.18857843742103, + "learning_rate": 5e-06, + "loss": 1.6278, + "num_input_tokens_seen": 653076376, + "step": 3778 + }, + { + "epoch": 1.4469551895825354, + "loss": 1.4770128726959229, + "loss_ce": 0.16997739672660828, + "loss_iou": 0.6138449311256409, + "loss_num": 0.015869140625, + "loss_xval": 1.3070355653762817, + "num_input_tokens_seen": 653076376, + "step": 3778 + }, + { + "epoch": 1.4473381846036002, + "grad_norm": 385.7979687662338, + "learning_rate": 5e-06, + "loss": 1.7794, + "num_input_tokens_seen": 653249376, + "step": 3779 + }, + { + "epoch": 1.4473381846036002, + "loss": 1.6642146110534668, + "loss_ce": 0.19940748810768127, + "loss_iou": 0.658551037311554, + "loss_num": 0.029541015625, + "loss_xval": 1.464807152748108, + "num_input_tokens_seen": 653249376, + "step": 3779 + }, + { + "epoch": 1.447721179624665, + "grad_norm": 206.30552232507333, + "learning_rate": 5e-06, + "loss": 1.7149, + "num_input_tokens_seen": 653422032, + "step": 3780 + }, + { + "epoch": 1.447721179624665, + "loss": 1.7484610080718994, + "loss_ce": 0.18937820196151733, + "loss_iou": 0.7355655431747437, + "loss_num": 0.017578125, + "loss_xval": 1.5590827465057373, + "num_input_tokens_seen": 653422032, + "step": 3780 + }, + { + "epoch": 1.4481041746457297, + "grad_norm": 134.49787199330845, + "learning_rate": 5e-06, + "loss": 1.8694, + "num_input_tokens_seen": 653595408, + "step": 3781 + }, + { + "epoch": 1.4481041746457297, + "loss": 1.9886581897735596, + "loss_ce": 0.18671278655529022, + "loss_iou": 0.8289817571640015, + "loss_num": 0.02880859375, + "loss_xval": 1.801945447921753, + "num_input_tokens_seen": 653595408, + "step": 3781 + }, + { + "epoch": 1.4484871696667942, + "grad_norm": 76.68047915267991, + "learning_rate": 5e-06, + "loss": 1.3536, + "num_input_tokens_seen": 653768352, + "step": 3782 + }, + { + "epoch": 1.4484871696667942, + "loss": 1.3922433853149414, + "loss_ce": 0.18917998671531677, + "loss_iou": 0.5656887292861938, + "loss_num": 0.01434326171875, + "loss_xval": 1.2030632495880127, + "num_input_tokens_seen": 653768352, + "step": 3782 + }, + { + "epoch": 1.448870164687859, + "grad_norm": 124.78964133467781, + "learning_rate": 5e-06, + "loss": 1.3352, + "num_input_tokens_seen": 653941408, + "step": 3783 + }, + { + "epoch": 1.448870164687859, + "loss": 1.2876272201538086, + "loss_ce": 0.19031816720962524, + "loss_iou": 0.5140933990478516, + "loss_num": 0.0137939453125, + "loss_xval": 1.0973091125488281, + "num_input_tokens_seen": 653941408, + "step": 3783 + }, + { + "epoch": 1.4492531597089238, + "grad_norm": 120.9594175394944, + "learning_rate": 5e-06, + "loss": 1.6848, + "num_input_tokens_seen": 654114296, + "step": 3784 + }, + { + "epoch": 1.4492531597089238, + "loss": 1.6132863759994507, + "loss_ce": 0.2119094878435135, + "loss_iou": 0.6478319764137268, + "loss_num": 0.0211181640625, + "loss_xval": 1.4013768434524536, + "num_input_tokens_seen": 654114296, + "step": 3784 + }, + { + "epoch": 1.4496361547299885, + "grad_norm": 105.09965336934216, + "learning_rate": 5e-06, + "loss": 1.374, + "num_input_tokens_seen": 654287312, + "step": 3785 + }, + { + "epoch": 1.4496361547299885, + "loss": 1.2698428630828857, + "loss_ce": 0.22151289880275726, + "loss_iou": 0.48902401328086853, + "loss_num": 0.0140380859375, + "loss_xval": 1.0483300685882568, + "num_input_tokens_seen": 654287312, + "step": 3785 + }, + { + "epoch": 1.4500191497510533, + "grad_norm": 119.20706807447374, + "learning_rate": 5e-06, + "loss": 1.3342, + "num_input_tokens_seen": 654456576, + "step": 3786 + }, + { + "epoch": 1.4500191497510533, + "loss": 1.2609329223632812, + "loss_ce": 0.11578431725502014, + "loss_iou": 0.5418888330459595, + "loss_num": 0.01226806640625, + "loss_xval": 1.145148515701294, + "num_input_tokens_seen": 654456576, + "step": 3786 + }, + { + "epoch": 1.450402144772118, + "grad_norm": 131.5739674704228, + "learning_rate": 5e-06, + "loss": 1.4568, + "num_input_tokens_seen": 654629664, + "step": 3787 + }, + { + "epoch": 1.450402144772118, + "loss": 1.3627843856811523, + "loss_ce": 0.22565358877182007, + "loss_iou": 0.5240402817726135, + "loss_num": 0.017822265625, + "loss_xval": 1.137130856513977, + "num_input_tokens_seen": 654629664, + "step": 3787 + }, + { + "epoch": 1.4507851397931826, + "grad_norm": 123.91765590689454, + "learning_rate": 5e-06, + "loss": 1.4768, + "num_input_tokens_seen": 654802256, + "step": 3788 + }, + { + "epoch": 1.4507851397931826, + "loss": 1.4885624647140503, + "loss_ce": 0.17179864645004272, + "loss_iou": 0.6020312309265137, + "loss_num": 0.0225830078125, + "loss_xval": 1.2755041122436523, + "num_input_tokens_seen": 654802256, + "step": 3788 + }, + { + "epoch": 1.4511681348142473, + "grad_norm": 120.84765963073131, + "learning_rate": 5e-06, + "loss": 1.411, + "num_input_tokens_seen": 654975000, + "step": 3789 + }, + { + "epoch": 1.4511681348142473, + "loss": 1.4020578861236572, + "loss_ce": 0.15986859798431396, + "loss_iou": 0.5749979019165039, + "loss_num": 0.0184326171875, + "loss_xval": 1.2421894073486328, + "num_input_tokens_seen": 654975000, + "step": 3789 + }, + { + "epoch": 1.451551129835312, + "grad_norm": 103.58734996622945, + "learning_rate": 5e-06, + "loss": 1.6362, + "num_input_tokens_seen": 655147560, + "step": 3790 + }, + { + "epoch": 1.451551129835312, + "loss": 1.6650502681732178, + "loss_ce": 0.18911117315292358, + "loss_iou": 0.6893855333328247, + "loss_num": 0.0194091796875, + "loss_xval": 1.4759390354156494, + "num_input_tokens_seen": 655147560, + "step": 3790 + }, + { + "epoch": 1.4519341248563769, + "grad_norm": 118.18186315359165, + "learning_rate": 5e-06, + "loss": 1.548, + "num_input_tokens_seen": 655320184, + "step": 3791 + }, + { + "epoch": 1.4519341248563769, + "loss": 1.671811819076538, + "loss_ce": 0.16698873043060303, + "loss_iou": 0.6983343362808228, + "loss_num": 0.0216064453125, + "loss_xval": 1.504823088645935, + "num_input_tokens_seen": 655320184, + "step": 3791 + }, + { + "epoch": 1.4523171198774416, + "grad_norm": 168.50277127678518, + "learning_rate": 5e-06, + "loss": 2.0662, + "num_input_tokens_seen": 655492912, + "step": 3792 + }, + { + "epoch": 1.4523171198774416, + "loss": 2.023236036300659, + "loss_ce": 0.15831105411052704, + "loss_iou": 0.8838174343109131, + "loss_num": 0.01953125, + "loss_xval": 1.8649249076843262, + "num_input_tokens_seen": 655492912, + "step": 3792 + }, + { + "epoch": 1.4527001148985064, + "grad_norm": 89.94589993359895, + "learning_rate": 5e-06, + "loss": 1.8428, + "num_input_tokens_seen": 655666056, + "step": 3793 + }, + { + "epoch": 1.4527001148985064, + "loss": 1.965169906616211, + "loss_ce": 0.21689045429229736, + "loss_iou": 0.791223406791687, + "loss_num": 0.033203125, + "loss_xval": 1.748279333114624, + "num_input_tokens_seen": 655666056, + "step": 3793 + }, + { + "epoch": 1.4530831099195711, + "grad_norm": 195.35134482147066, + "learning_rate": 5e-06, + "loss": 1.5465, + "num_input_tokens_seen": 655839000, + "step": 3794 + }, + { + "epoch": 1.4530831099195711, + "loss": 1.5583913326263428, + "loss_ce": 0.14241421222686768, + "loss_iou": 0.6645772457122803, + "loss_num": 0.017333984375, + "loss_xval": 1.4159770011901855, + "num_input_tokens_seen": 655839000, + "step": 3794 + }, + { + "epoch": 1.4534661049406359, + "grad_norm": 138.16568988915304, + "learning_rate": 5e-06, + "loss": 1.9311, + "num_input_tokens_seen": 656012144, + "step": 3795 + }, + { + "epoch": 1.4534661049406359, + "loss": 1.902432918548584, + "loss_ce": 0.16578739881515503, + "loss_iou": 0.8244079351425171, + "loss_num": 0.017578125, + "loss_xval": 1.7366454601287842, + "num_input_tokens_seen": 656012144, + "step": 3795 + }, + { + "epoch": 1.4538490999617004, + "grad_norm": 91.95252792391383, + "learning_rate": 5e-06, + "loss": 1.9875, + "num_input_tokens_seen": 656184864, + "step": 3796 + }, + { + "epoch": 1.4538490999617004, + "loss": 1.8245433568954468, + "loss_ce": 0.1364639848470688, + "loss_iou": 0.7680509090423584, + "loss_num": 0.0303955078125, + "loss_xval": 1.6880793571472168, + "num_input_tokens_seen": 656184864, + "step": 3796 + }, + { + "epoch": 1.4542320949827652, + "grad_norm": 93.74971054419059, + "learning_rate": 5e-06, + "loss": 1.529, + "num_input_tokens_seen": 656357800, + "step": 3797 + }, + { + "epoch": 1.4542320949827652, + "loss": 1.3864085674285889, + "loss_ce": 0.1551656872034073, + "loss_iou": 0.5659693479537964, + "loss_num": 0.0198974609375, + "loss_xval": 1.2312428951263428, + "num_input_tokens_seen": 656357800, + "step": 3797 + }, + { + "epoch": 1.45461509000383, + "grad_norm": 211.2465508525641, + "learning_rate": 5e-06, + "loss": 1.9917, + "num_input_tokens_seen": 656530216, + "step": 3798 + }, + { + "epoch": 1.45461509000383, + "loss": 1.9216835498809814, + "loss_ce": 0.14129409193992615, + "loss_iou": 0.8456543684005737, + "loss_num": 0.017822265625, + "loss_xval": 1.7803895473480225, + "num_input_tokens_seen": 656530216, + "step": 3798 + }, + { + "epoch": 1.4549980850248947, + "grad_norm": 111.48053081234072, + "learning_rate": 5e-06, + "loss": 1.7296, + "num_input_tokens_seen": 656703144, + "step": 3799 + }, + { + "epoch": 1.4549980850248947, + "loss": 1.7023835182189941, + "loss_ce": 0.17317184805870056, + "loss_iou": 0.6857789754867554, + "loss_num": 0.031494140625, + "loss_xval": 1.5292117595672607, + "num_input_tokens_seen": 656703144, + "step": 3799 + }, + { + "epoch": 1.4553810800459595, + "grad_norm": 111.11626330585243, + "learning_rate": 5e-06, + "loss": 1.2795, + "num_input_tokens_seen": 656875856, + "step": 3800 + }, + { + "epoch": 1.4553810800459595, + "loss": 1.0074357986450195, + "loss_ce": 0.1625213772058487, + "loss_iou": 0.391291081905365, + "loss_num": 0.012451171875, + "loss_xval": 0.8449143767356873, + "num_input_tokens_seen": 656875856, + "step": 3800 + }, + { + "epoch": 1.4557640750670242, + "grad_norm": 104.89400882774832, + "learning_rate": 5e-06, + "loss": 1.4837, + "num_input_tokens_seen": 657048928, + "step": 3801 + }, + { + "epoch": 1.4557640750670242, + "loss": 1.5924118757247925, + "loss_ce": 0.19961793720722198, + "loss_iou": 0.6431438326835632, + "loss_num": 0.021240234375, + "loss_xval": 1.3927940130233765, + "num_input_tokens_seen": 657048928, + "step": 3801 + }, + { + "epoch": 1.4561470700880887, + "grad_norm": 129.35849983658812, + "learning_rate": 5e-06, + "loss": 1.6337, + "num_input_tokens_seen": 657222032, + "step": 3802 + }, + { + "epoch": 1.4561470700880887, + "loss": 1.6484363079071045, + "loss_ce": 0.1564095914363861, + "loss_iou": 0.6908985376358032, + "loss_num": 0.0220947265625, + "loss_xval": 1.4920265674591064, + "num_input_tokens_seen": 657222032, + "step": 3802 + }, + { + "epoch": 1.4565300651091535, + "grad_norm": 158.4448251316895, + "learning_rate": 5e-06, + "loss": 1.6882, + "num_input_tokens_seen": 657395000, + "step": 3803 + }, + { + "epoch": 1.4565300651091535, + "loss": 1.8415119647979736, + "loss_ce": 0.23011425137519836, + "loss_iou": 0.749149739742279, + "loss_num": 0.0225830078125, + "loss_xval": 1.611397624015808, + "num_input_tokens_seen": 657395000, + "step": 3803 + }, + { + "epoch": 1.4569130601302183, + "grad_norm": 90.37963164548083, + "learning_rate": 5e-06, + "loss": 1.7413, + "num_input_tokens_seen": 657568096, + "step": 3804 + }, + { + "epoch": 1.4569130601302183, + "loss": 1.6751604080200195, + "loss_ce": 0.18919594585895538, + "loss_iou": 0.6686719655990601, + "loss_num": 0.02978515625, + "loss_xval": 1.4859645366668701, + "num_input_tokens_seen": 657568096, + "step": 3804 + }, + { + "epoch": 1.457296055151283, + "grad_norm": 97.08764336384468, + "learning_rate": 5e-06, + "loss": 1.6194, + "num_input_tokens_seen": 657741072, + "step": 3805 + }, + { + "epoch": 1.457296055151283, + "loss": 1.3500893115997314, + "loss_ce": 0.17926084995269775, + "loss_iou": 0.5503036975860596, + "loss_num": 0.0140380859375, + "loss_xval": 1.1708283424377441, + "num_input_tokens_seen": 657741072, + "step": 3805 + }, + { + "epoch": 1.4576790501723478, + "grad_norm": 92.41936563895115, + "learning_rate": 5e-06, + "loss": 1.3274, + "num_input_tokens_seen": 657914192, + "step": 3806 + }, + { + "epoch": 1.4576790501723478, + "loss": 1.2393465042114258, + "loss_ce": 0.18367427587509155, + "loss_iou": 0.4906809329986572, + "loss_num": 0.014892578125, + "loss_xval": 1.0556721687316895, + "num_input_tokens_seen": 657914192, + "step": 3806 + }, + { + "epoch": 1.4580620451934125, + "grad_norm": 272.39600150539786, + "learning_rate": 5e-06, + "loss": 1.5496, + "num_input_tokens_seen": 658087064, + "step": 3807 + }, + { + "epoch": 1.4580620451934125, + "loss": 1.6365811824798584, + "loss_ce": 0.20125389099121094, + "loss_iou": 0.6523712277412415, + "loss_num": 0.026123046875, + "loss_xval": 1.435327172279358, + "num_input_tokens_seen": 658087064, + "step": 3807 + }, + { + "epoch": 1.4584450402144773, + "grad_norm": 145.24395504305105, + "learning_rate": 5e-06, + "loss": 1.7756, + "num_input_tokens_seen": 658259712, + "step": 3808 + }, + { + "epoch": 1.4584450402144773, + "loss": 1.8197343349456787, + "loss_ce": 0.1754903495311737, + "loss_iou": 0.7533658742904663, + "loss_num": 0.0274658203125, + "loss_xval": 1.6442439556121826, + "num_input_tokens_seen": 658259712, + "step": 3808 + }, + { + "epoch": 1.458828035235542, + "grad_norm": 193.5180404049713, + "learning_rate": 5e-06, + "loss": 1.7664, + "num_input_tokens_seen": 658432736, + "step": 3809 + }, + { + "epoch": 1.458828035235542, + "loss": 1.5885456800460815, + "loss_ce": 0.20038504898548126, + "loss_iou": 0.6512947082519531, + "loss_num": 0.01708984375, + "loss_xval": 1.3881607055664062, + "num_input_tokens_seen": 658432736, + "step": 3809 + }, + { + "epoch": 1.4592110302566066, + "grad_norm": 184.14992587635396, + "learning_rate": 5e-06, + "loss": 1.5467, + "num_input_tokens_seen": 658605616, + "step": 3810 + }, + { + "epoch": 1.4592110302566066, + "loss": 1.6167919635772705, + "loss_ce": 0.16174444556236267, + "loss_iou": 0.6547240614891052, + "loss_num": 0.0291748046875, + "loss_xval": 1.4550474882125854, + "num_input_tokens_seen": 658605616, + "step": 3810 + }, + { + "epoch": 1.4595940252776713, + "grad_norm": 212.16964484122292, + "learning_rate": 5e-06, + "loss": 2.2109, + "num_input_tokens_seen": 658778496, + "step": 3811 + }, + { + "epoch": 1.4595940252776713, + "loss": 2.2612743377685547, + "loss_ce": 0.19108566641807556, + "loss_iou": 0.9135123491287231, + "loss_num": 0.048583984375, + "loss_xval": 2.0701887607574463, + "num_input_tokens_seen": 658778496, + "step": 3811 + }, + { + "epoch": 1.459977020298736, + "grad_norm": 81.01815564486779, + "learning_rate": 5e-06, + "loss": 1.8385, + "num_input_tokens_seen": 658951296, + "step": 3812 + }, + { + "epoch": 1.459977020298736, + "loss": 1.8274188041687012, + "loss_ce": 0.18849460780620575, + "loss_iou": 0.7424662113189697, + "loss_num": 0.03076171875, + "loss_xval": 1.6389241218566895, + "num_input_tokens_seen": 658951296, + "step": 3812 + }, + { + "epoch": 1.4603600153198009, + "grad_norm": 49.95469091414546, + "learning_rate": 5e-06, + "loss": 1.5201, + "num_input_tokens_seen": 659124368, + "step": 3813 + }, + { + "epoch": 1.4603600153198009, + "loss": 1.5202065706253052, + "loss_ce": 0.13053268194198608, + "loss_iou": 0.6430333852767944, + "loss_num": 0.020751953125, + "loss_xval": 1.3740489482879639, + "num_input_tokens_seen": 659124368, + "step": 3813 + }, + { + "epoch": 1.4607430103408656, + "grad_norm": 107.13977637727947, + "learning_rate": 5e-06, + "loss": 1.3482, + "num_input_tokens_seen": 659297336, + "step": 3814 + }, + { + "epoch": 1.4607430103408656, + "loss": 1.1944173574447632, + "loss_ce": 0.1378280520439148, + "loss_iou": 0.4841967523097992, + "loss_num": 0.0177001953125, + "loss_xval": 1.0565893650054932, + "num_input_tokens_seen": 659297336, + "step": 3814 + }, + { + "epoch": 1.4611260053619302, + "grad_norm": 138.38002923188432, + "learning_rate": 5e-06, + "loss": 1.7644, + "num_input_tokens_seen": 659470184, + "step": 3815 + }, + { + "epoch": 1.4611260053619302, + "loss": 1.7486977577209473, + "loss_ce": 0.20691266655921936, + "loss_iou": 0.6980165243148804, + "loss_num": 0.0291748046875, + "loss_xval": 1.5417850017547607, + "num_input_tokens_seen": 659470184, + "step": 3815 + }, + { + "epoch": 1.461509000382995, + "grad_norm": 126.91918089264476, + "learning_rate": 5e-06, + "loss": 1.701, + "num_input_tokens_seen": 659642752, + "step": 3816 + }, + { + "epoch": 1.461509000382995, + "loss": 1.7082602977752686, + "loss_ce": 0.16621346771717072, + "loss_iou": 0.7019315958023071, + "loss_num": 0.027587890625, + "loss_xval": 1.5420467853546143, + "num_input_tokens_seen": 659642752, + "step": 3816 + }, + { + "epoch": 1.4618919954040597, + "grad_norm": 160.46795186791917, + "learning_rate": 5e-06, + "loss": 1.2791, + "num_input_tokens_seen": 659815784, + "step": 3817 + }, + { + "epoch": 1.4618919954040597, + "loss": 1.3117930889129639, + "loss_ce": 0.15506237745285034, + "loss_iou": 0.5235862731933594, + "loss_num": 0.02197265625, + "loss_xval": 1.1567306518554688, + "num_input_tokens_seen": 659815784, + "step": 3817 + }, + { + "epoch": 1.4622749904251244, + "grad_norm": 143.27679106100427, + "learning_rate": 5e-06, + "loss": 1.4454, + "num_input_tokens_seen": 659988680, + "step": 3818 + }, + { + "epoch": 1.4622749904251244, + "loss": 1.5526273250579834, + "loss_ce": 0.15987583994865417, + "loss_iou": 0.6397961378097534, + "loss_num": 0.022705078125, + "loss_xval": 1.3927514553070068, + "num_input_tokens_seen": 659988680, + "step": 3818 + }, + { + "epoch": 1.4626579854461892, + "grad_norm": 240.47910652884292, + "learning_rate": 5e-06, + "loss": 2.0018, + "num_input_tokens_seen": 660161832, + "step": 3819 + }, + { + "epoch": 1.4626579854461892, + "loss": 1.9347071647644043, + "loss_ce": 0.2272402048110962, + "loss_iou": 0.7853130102157593, + "loss_num": 0.02734375, + "loss_xval": 1.7074668407440186, + "num_input_tokens_seen": 660161832, + "step": 3819 + }, + { + "epoch": 1.463040980467254, + "grad_norm": 77.54631845218896, + "learning_rate": 5e-06, + "loss": 1.425, + "num_input_tokens_seen": 660334304, + "step": 3820 + }, + { + "epoch": 1.463040980467254, + "loss": 1.4100160598754883, + "loss_ce": 0.12929484248161316, + "loss_iou": 0.5822703838348389, + "loss_num": 0.0233154296875, + "loss_xval": 1.2807211875915527, + "num_input_tokens_seen": 660334304, + "step": 3820 + }, + { + "epoch": 1.4634239754883187, + "grad_norm": 82.10061315820577, + "learning_rate": 5e-06, + "loss": 1.4941, + "num_input_tokens_seen": 660507456, + "step": 3821 + }, + { + "epoch": 1.4634239754883187, + "loss": 1.5278706550598145, + "loss_ce": 0.17169851064682007, + "loss_iou": 0.6022193431854248, + "loss_num": 0.0303955078125, + "loss_xval": 1.3561720848083496, + "num_input_tokens_seen": 660507456, + "step": 3821 + }, + { + "epoch": 1.4638069705093835, + "grad_norm": 152.86590021249606, + "learning_rate": 5e-06, + "loss": 1.5364, + "num_input_tokens_seen": 660680816, + "step": 3822 + }, + { + "epoch": 1.4638069705093835, + "loss": 1.4629637002944946, + "loss_ce": 0.1718747466802597, + "loss_iou": 0.6119294166564941, + "loss_num": 0.013427734375, + "loss_xval": 1.2910890579223633, + "num_input_tokens_seen": 660680816, + "step": 3822 + }, + { + "epoch": 1.4641899655304482, + "grad_norm": 89.8082383751445, + "learning_rate": 5e-06, + "loss": 1.3508, + "num_input_tokens_seen": 660853888, + "step": 3823 + }, + { + "epoch": 1.4641899655304482, + "loss": 1.3696964979171753, + "loss_ce": 0.19920630753040314, + "loss_iou": 0.5320376753807068, + "loss_num": 0.021240234375, + "loss_xval": 1.1704901456832886, + "num_input_tokens_seen": 660853888, + "step": 3823 + }, + { + "epoch": 1.4645729605515128, + "grad_norm": 176.87759831014753, + "learning_rate": 5e-06, + "loss": 1.3726, + "num_input_tokens_seen": 661026400, + "step": 3824 + }, + { + "epoch": 1.4645729605515128, + "loss": 1.6656455993652344, + "loss_ce": 0.11513148248195648, + "loss_iou": 0.7068977355957031, + "loss_num": 0.02734375, + "loss_xval": 1.5505142211914062, + "num_input_tokens_seen": 661026400, + "step": 3824 + }, + { + "epoch": 1.4649559555725775, + "grad_norm": 120.89378195695188, + "learning_rate": 5e-06, + "loss": 1.8561, + "num_input_tokens_seen": 661199488, + "step": 3825 + }, + { + "epoch": 1.4649559555725775, + "loss": 1.9418671131134033, + "loss_ce": 0.18025416135787964, + "loss_iou": 0.7908101081848145, + "loss_num": 0.035888671875, + "loss_xval": 1.761612892150879, + "num_input_tokens_seen": 661199488, + "step": 3825 + }, + { + "epoch": 1.4653389505936423, + "grad_norm": 122.12627774650512, + "learning_rate": 5e-06, + "loss": 1.3821, + "num_input_tokens_seen": 661372544, + "step": 3826 + }, + { + "epoch": 1.4653389505936423, + "loss": 1.5193982124328613, + "loss_ce": 0.1425769031047821, + "loss_iou": 0.6450299024581909, + "loss_num": 0.017333984375, + "loss_xval": 1.3768212795257568, + "num_input_tokens_seen": 661372544, + "step": 3826 + }, + { + "epoch": 1.465721945614707, + "grad_norm": 230.93679128858588, + "learning_rate": 5e-06, + "loss": 2.1023, + "num_input_tokens_seen": 661545160, + "step": 3827 + }, + { + "epoch": 1.465721945614707, + "loss": 2.205559253692627, + "loss_ce": 0.15959420800209045, + "loss_iou": 0.9437894225120544, + "loss_num": 0.03173828125, + "loss_xval": 2.0459651947021484, + "num_input_tokens_seen": 661545160, + "step": 3827 + }, + { + "epoch": 1.4661049406357718, + "grad_norm": 77.35587336059181, + "learning_rate": 5e-06, + "loss": 1.7795, + "num_input_tokens_seen": 661718152, + "step": 3828 + }, + { + "epoch": 1.4661049406357718, + "loss": 1.8080837726593018, + "loss_ce": 0.18644201755523682, + "loss_iou": 0.7486259937286377, + "loss_num": 0.02490234375, + "loss_xval": 1.6216416358947754, + "num_input_tokens_seen": 661718152, + "step": 3828 + }, + { + "epoch": 1.4664879356568363, + "grad_norm": 144.25956094759215, + "learning_rate": 5e-06, + "loss": 1.4247, + "num_input_tokens_seen": 661891280, + "step": 3829 + }, + { + "epoch": 1.4664879356568363, + "loss": 1.2728171348571777, + "loss_ce": 0.15106286108493805, + "loss_iou": 0.5125526189804077, + "loss_num": 0.019287109375, + "loss_xval": 1.1217544078826904, + "num_input_tokens_seen": 661891280, + "step": 3829 + }, + { + "epoch": 1.466870930677901, + "grad_norm": 170.3971935136544, + "learning_rate": 5e-06, + "loss": 1.6923, + "num_input_tokens_seen": 662064112, + "step": 3830 + }, + { + "epoch": 1.466870930677901, + "loss": 1.7541427612304688, + "loss_ce": 0.1572430282831192, + "loss_iou": 0.7572511434555054, + "loss_num": 0.0164794921875, + "loss_xval": 1.5968997478485107, + "num_input_tokens_seen": 662064112, + "step": 3830 + }, + { + "epoch": 1.4672539256989658, + "grad_norm": 97.01879089579793, + "learning_rate": 5e-06, + "loss": 1.9848, + "num_input_tokens_seen": 662237120, + "step": 3831 + }, + { + "epoch": 1.4672539256989658, + "loss": 1.905449390411377, + "loss_ce": 0.20804432034492493, + "loss_iou": 0.7706385254859924, + "loss_num": 0.03125, + "loss_xval": 1.6974049806594849, + "num_input_tokens_seen": 662237120, + "step": 3831 + }, + { + "epoch": 1.4676369207200306, + "grad_norm": 78.1377968147627, + "learning_rate": 5e-06, + "loss": 1.5092, + "num_input_tokens_seen": 662410112, + "step": 3832 + }, + { + "epoch": 1.4676369207200306, + "loss": 1.3702943325042725, + "loss_ce": 0.1734234243631363, + "loss_iou": 0.5395823121070862, + "loss_num": 0.0235595703125, + "loss_xval": 1.1968708038330078, + "num_input_tokens_seen": 662410112, + "step": 3832 + }, + { + "epoch": 1.4680199157410954, + "grad_norm": 138.67596927650027, + "learning_rate": 5e-06, + "loss": 1.4924, + "num_input_tokens_seen": 662583464, + "step": 3833 + }, + { + "epoch": 1.4680199157410954, + "loss": 1.5313942432403564, + "loss_ce": 0.1910170614719391, + "loss_iou": 0.6064525842666626, + "loss_num": 0.0255126953125, + "loss_xval": 1.3403772115707397, + "num_input_tokens_seen": 662583464, + "step": 3833 + }, + { + "epoch": 1.4684029107621601, + "grad_norm": 94.77838700294897, + "learning_rate": 5e-06, + "loss": 1.7593, + "num_input_tokens_seen": 662753336, + "step": 3834 + }, + { + "epoch": 1.4684029107621601, + "loss": 1.6682029962539673, + "loss_ce": 0.18056021630764008, + "loss_iou": 0.692002534866333, + "loss_num": 0.020751953125, + "loss_xval": 1.487642765045166, + "num_input_tokens_seen": 662753336, + "step": 3834 + }, + { + "epoch": 1.4687859057832249, + "grad_norm": 140.78705327462356, + "learning_rate": 5e-06, + "loss": 1.3048, + "num_input_tokens_seen": 662926400, + "step": 3835 + }, + { + "epoch": 1.4687859057832249, + "loss": 1.4433050155639648, + "loss_ce": 0.20680516958236694, + "loss_iou": 0.5778599977493286, + "loss_num": 0.01611328125, + "loss_xval": 1.2365000247955322, + "num_input_tokens_seen": 662926400, + "step": 3835 + }, + { + "epoch": 1.4691689008042896, + "grad_norm": 162.05062889591497, + "learning_rate": 5e-06, + "loss": 1.7313, + "num_input_tokens_seen": 663099248, + "step": 3836 + }, + { + "epoch": 1.4691689008042896, + "loss": 1.8132050037384033, + "loss_ce": 0.17610056698322296, + "loss_iou": 0.7324011325836182, + "loss_num": 0.034423828125, + "loss_xval": 1.6371045112609863, + "num_input_tokens_seen": 663099248, + "step": 3836 + }, + { + "epoch": 1.4695518958253544, + "grad_norm": 114.63206588501914, + "learning_rate": 5e-06, + "loss": 1.2958, + "num_input_tokens_seen": 663272248, + "step": 3837 + }, + { + "epoch": 1.4695518958253544, + "loss": 1.2157354354858398, + "loss_ce": 0.18062147498130798, + "loss_iou": 0.4794405400753021, + "loss_num": 0.0152587890625, + "loss_xval": 1.035114049911499, + "num_input_tokens_seen": 663272248, + "step": 3837 + }, + { + "epoch": 1.469934890846419, + "grad_norm": 143.54249613964905, + "learning_rate": 5e-06, + "loss": 1.4071, + "num_input_tokens_seen": 663445008, + "step": 3838 + }, + { + "epoch": 1.469934890846419, + "loss": 1.571700096130371, + "loss_ce": 0.15280543267726898, + "loss_iou": 0.6679586172103882, + "loss_num": 0.0166015625, + "loss_xval": 1.4188945293426514, + "num_input_tokens_seen": 663445008, + "step": 3838 + }, + { + "epoch": 1.4703178858674837, + "grad_norm": 119.08983301678559, + "learning_rate": 5e-06, + "loss": 1.3561, + "num_input_tokens_seen": 663617696, + "step": 3839 + }, + { + "epoch": 1.4703178858674837, + "loss": 1.3493022918701172, + "loss_ce": 0.16583970189094543, + "loss_iou": 0.5522873401641846, + "loss_num": 0.0157470703125, + "loss_xval": 1.1834626197814941, + "num_input_tokens_seen": 663617696, + "step": 3839 + }, + { + "epoch": 1.4707008808885484, + "grad_norm": 335.4058954399076, + "learning_rate": 5e-06, + "loss": 1.6527, + "num_input_tokens_seen": 663790488, + "step": 3840 + }, + { + "epoch": 1.4707008808885484, + "loss": 1.5318005084991455, + "loss_ce": 0.1281571388244629, + "loss_iou": 0.6288542151451111, + "loss_num": 0.0291748046875, + "loss_xval": 1.4036434888839722, + "num_input_tokens_seen": 663790488, + "step": 3840 + }, + { + "epoch": 1.4710838759096132, + "grad_norm": 187.15833963722213, + "learning_rate": 5e-06, + "loss": 1.8186, + "num_input_tokens_seen": 663963648, + "step": 3841 + }, + { + "epoch": 1.4710838759096132, + "loss": 1.7523595094680786, + "loss_ce": 0.1846073418855667, + "loss_iou": 0.7219864130020142, + "loss_num": 0.0247802734375, + "loss_xval": 1.5677522420883179, + "num_input_tokens_seen": 663963648, + "step": 3841 + }, + { + "epoch": 1.471466870930678, + "grad_norm": 276.0166430233373, + "learning_rate": 5e-06, + "loss": 2.0253, + "num_input_tokens_seen": 664136872, + "step": 3842 + }, + { + "epoch": 1.471466870930678, + "loss": 2.061936616897583, + "loss_ce": 0.1929805725812912, + "loss_iou": 0.8287040591239929, + "loss_num": 0.042236328125, + "loss_xval": 1.8689559698104858, + "num_input_tokens_seen": 664136872, + "step": 3842 + }, + { + "epoch": 1.4718498659517425, + "grad_norm": 182.32725780040136, + "learning_rate": 5e-06, + "loss": 1.4631, + "num_input_tokens_seen": 664310000, + "step": 3843 + }, + { + "epoch": 1.4718498659517425, + "loss": 1.393494963645935, + "loss_ce": 0.22394207119941711, + "loss_iou": 0.5386490821838379, + "loss_num": 0.0184326171875, + "loss_xval": 1.1695528030395508, + "num_input_tokens_seen": 664310000, + "step": 3843 + }, + { + "epoch": 1.4722328609728073, + "grad_norm": 325.14936297944143, + "learning_rate": 5e-06, + "loss": 1.7968, + "num_input_tokens_seen": 664483112, + "step": 3844 + }, + { + "epoch": 1.4722328609728073, + "loss": 1.7259163856506348, + "loss_ce": 0.16397422552108765, + "loss_iou": 0.7327837944030762, + "loss_num": 0.019287109375, + "loss_xval": 1.5619421005249023, + "num_input_tokens_seen": 664483112, + "step": 3844 + }, + { + "epoch": 1.472615855993872, + "grad_norm": 154.19209840188947, + "learning_rate": 5e-06, + "loss": 1.8539, + "num_input_tokens_seen": 664656376, + "step": 3845 + }, + { + "epoch": 1.472615855993872, + "loss": 1.9076762199401855, + "loss_ce": 0.20823287963867188, + "loss_iou": 0.7454125881195068, + "loss_num": 0.041748046875, + "loss_xval": 1.6994433403015137, + "num_input_tokens_seen": 664656376, + "step": 3845 + }, + { + "epoch": 1.4729988510149368, + "grad_norm": 167.35411849209817, + "learning_rate": 5e-06, + "loss": 1.4539, + "num_input_tokens_seen": 664829280, + "step": 3846 + }, + { + "epoch": 1.4729988510149368, + "loss": 1.5264216661453247, + "loss_ce": 0.1563391089439392, + "loss_iou": 0.6442545056343079, + "loss_num": 0.016357421875, + "loss_xval": 1.3700824975967407, + "num_input_tokens_seen": 664829280, + "step": 3846 + }, + { + "epoch": 1.4733818460360015, + "grad_norm": 146.77845846627713, + "learning_rate": 5e-06, + "loss": 2.0207, + "num_input_tokens_seen": 665002352, + "step": 3847 + }, + { + "epoch": 1.4733818460360015, + "loss": 2.0562148094177246, + "loss_ce": 0.21677184104919434, + "loss_iou": 0.8762645125389099, + "loss_num": 0.017333984375, + "loss_xval": 1.8394430875778198, + "num_input_tokens_seen": 665002352, + "step": 3847 + }, + { + "epoch": 1.4737648410570663, + "grad_norm": 93.92841077468523, + "learning_rate": 5e-06, + "loss": 1.8418, + "num_input_tokens_seen": 665174992, + "step": 3848 + }, + { + "epoch": 1.4737648410570663, + "loss": 1.8205523490905762, + "loss_ce": 0.14205557107925415, + "loss_iou": 0.7516324520111084, + "loss_num": 0.03515625, + "loss_xval": 1.6784968376159668, + "num_input_tokens_seen": 665174992, + "step": 3848 + }, + { + "epoch": 1.474147836078131, + "grad_norm": 157.33118163806148, + "learning_rate": 5e-06, + "loss": 1.5346, + "num_input_tokens_seen": 665348536, + "step": 3849 + }, + { + "epoch": 1.474147836078131, + "loss": 1.5996110439300537, + "loss_ce": 0.25675928592681885, + "loss_iou": 0.6246882677078247, + "loss_num": 0.0186767578125, + "loss_xval": 1.3428518772125244, + "num_input_tokens_seen": 665348536, + "step": 3849 + }, + { + "epoch": 1.4745308310991958, + "grad_norm": 113.91028531435974, + "learning_rate": 5e-06, + "loss": 1.7914, + "num_input_tokens_seen": 665521480, + "step": 3850 + }, + { + "epoch": 1.4745308310991958, + "loss": 1.7607181072235107, + "loss_ce": 0.19359637796878815, + "loss_iou": 0.7268286943435669, + "loss_num": 0.022705078125, + "loss_xval": 1.5671217441558838, + "num_input_tokens_seen": 665521480, + "step": 3850 + }, + { + "epoch": 1.4749138261202606, + "grad_norm": 126.63833983812442, + "learning_rate": 5e-06, + "loss": 1.667, + "num_input_tokens_seen": 665694456, + "step": 3851 + }, + { + "epoch": 1.4749138261202606, + "loss": 1.6462596654891968, + "loss_ce": 0.22160466015338898, + "loss_iou": 0.6712127327919006, + "loss_num": 0.0164794921875, + "loss_xval": 1.4246550798416138, + "num_input_tokens_seen": 665694456, + "step": 3851 + }, + { + "epoch": 1.475296821141325, + "grad_norm": 127.9277838472454, + "learning_rate": 5e-06, + "loss": 1.6844, + "num_input_tokens_seen": 665867328, + "step": 3852 + }, + { + "epoch": 1.475296821141325, + "loss": 1.7342861890792847, + "loss_ce": 0.16108569502830505, + "loss_iou": 0.7260838747024536, + "loss_num": 0.024169921875, + "loss_xval": 1.5732004642486572, + "num_input_tokens_seen": 665867328, + "step": 3852 + }, + { + "epoch": 1.4756798161623899, + "grad_norm": 82.94437218184599, + "learning_rate": 5e-06, + "loss": 1.4305, + "num_input_tokens_seen": 666040384, + "step": 3853 + }, + { + "epoch": 1.4756798161623899, + "loss": 1.351753830909729, + "loss_ce": 0.1556341052055359, + "loss_iou": 0.537299394607544, + "loss_num": 0.0242919921875, + "loss_xval": 1.196119785308838, + "num_input_tokens_seen": 666040384, + "step": 3853 + }, + { + "epoch": 1.4760628111834546, + "grad_norm": 125.65292723452029, + "learning_rate": 5e-06, + "loss": 1.4625, + "num_input_tokens_seen": 666213184, + "step": 3854 + }, + { + "epoch": 1.4760628111834546, + "loss": 1.3487107753753662, + "loss_ce": 0.14079010486602783, + "loss_iou": 0.5578787326812744, + "loss_num": 0.0184326171875, + "loss_xval": 1.2079205513000488, + "num_input_tokens_seen": 666213184, + "step": 3854 + }, + { + "epoch": 1.4764458062045194, + "grad_norm": 131.79253341079308, + "learning_rate": 5e-06, + "loss": 1.6839, + "num_input_tokens_seen": 666386208, + "step": 3855 + }, + { + "epoch": 1.4764458062045194, + "loss": 1.6640814542770386, + "loss_ce": 0.19890397787094116, + "loss_iou": 0.6766195297241211, + "loss_num": 0.0223388671875, + "loss_xval": 1.4651775360107422, + "num_input_tokens_seen": 666386208, + "step": 3855 + }, + { + "epoch": 1.4768288012255841, + "grad_norm": 136.18593025029074, + "learning_rate": 5e-06, + "loss": 1.6107, + "num_input_tokens_seen": 666559128, + "step": 3856 + }, + { + "epoch": 1.4768288012255841, + "loss": 1.6052563190460205, + "loss_ce": 0.17612627148628235, + "loss_iou": 0.6662404537200928, + "loss_num": 0.019287109375, + "loss_xval": 1.4291300773620605, + "num_input_tokens_seen": 666559128, + "step": 3856 + }, + { + "epoch": 1.4772117962466487, + "grad_norm": 150.1747582565557, + "learning_rate": 5e-06, + "loss": 1.6752, + "num_input_tokens_seen": 666731936, + "step": 3857 + }, + { + "epoch": 1.4772117962466487, + "loss": 1.6757440567016602, + "loss_ce": 0.1703186333179474, + "loss_iou": 0.6937527656555176, + "loss_num": 0.0235595703125, + "loss_xval": 1.5054254531860352, + "num_input_tokens_seen": 666731936, + "step": 3857 + }, + { + "epoch": 1.4775947912677134, + "grad_norm": 114.09954680579511, + "learning_rate": 5e-06, + "loss": 1.8521, + "num_input_tokens_seen": 666904496, + "step": 3858 + }, + { + "epoch": 1.4775947912677134, + "loss": 1.7862820625305176, + "loss_ce": 0.17286039888858795, + "loss_iou": 0.7237640619277954, + "loss_num": 0.033203125, + "loss_xval": 1.6134216785430908, + "num_input_tokens_seen": 666904496, + "step": 3858 + }, + { + "epoch": 1.4779777862887782, + "grad_norm": 169.8345223772268, + "learning_rate": 5e-06, + "loss": 1.3313, + "num_input_tokens_seen": 667077768, + "step": 3859 + }, + { + "epoch": 1.4779777862887782, + "loss": 1.3333956003189087, + "loss_ce": 0.1704695224761963, + "loss_iou": 0.5455590486526489, + "loss_num": 0.01434326171875, + "loss_xval": 1.1629259586334229, + "num_input_tokens_seen": 667077768, + "step": 3859 + }, + { + "epoch": 1.478360781309843, + "grad_norm": 147.86795164609876, + "learning_rate": 5e-06, + "loss": 1.6171, + "num_input_tokens_seen": 667250776, + "step": 3860 + }, + { + "epoch": 1.478360781309843, + "loss": 1.5118118524551392, + "loss_ce": 0.19477102160453796, + "loss_iou": 0.5991026759147644, + "loss_num": 0.0238037109375, + "loss_xval": 1.3170408010482788, + "num_input_tokens_seen": 667250776, + "step": 3860 + }, + { + "epoch": 1.4787437763309077, + "grad_norm": 102.01628569584015, + "learning_rate": 5e-06, + "loss": 1.2921, + "num_input_tokens_seen": 667423832, + "step": 3861 + }, + { + "epoch": 1.4787437763309077, + "loss": 1.2340381145477295, + "loss_ce": 0.16711857914924622, + "loss_iou": 0.49796777963638306, + "loss_num": 0.01422119140625, + "loss_xval": 1.0669194459915161, + "num_input_tokens_seen": 667423832, + "step": 3861 + }, + { + "epoch": 1.4791267713519725, + "grad_norm": 104.32723047255132, + "learning_rate": 5e-06, + "loss": 1.5295, + "num_input_tokens_seen": 667596472, + "step": 3862 + }, + { + "epoch": 1.4791267713519725, + "loss": 1.3800314664840698, + "loss_ce": 0.16500824689865112, + "loss_iou": 0.5674268007278442, + "loss_num": 0.0159912109375, + "loss_xval": 1.2150232791900635, + "num_input_tokens_seen": 667596472, + "step": 3862 + }, + { + "epoch": 1.4795097663730372, + "grad_norm": 132.21034295835113, + "learning_rate": 5e-06, + "loss": 1.4402, + "num_input_tokens_seen": 667769472, + "step": 3863 + }, + { + "epoch": 1.4795097663730372, + "loss": 1.5435000658035278, + "loss_ce": 0.1579607129096985, + "loss_iou": 0.6303611993789673, + "loss_num": 0.0250244140625, + "loss_xval": 1.3855392932891846, + "num_input_tokens_seen": 667769472, + "step": 3863 + }, + { + "epoch": 1.479892761394102, + "grad_norm": 184.56186037129535, + "learning_rate": 5e-06, + "loss": 1.6246, + "num_input_tokens_seen": 667942584, + "step": 3864 + }, + { + "epoch": 1.479892761394102, + "loss": 1.652564287185669, + "loss_ce": 0.18969173729419708, + "loss_iou": 0.6824555397033691, + "loss_num": 0.01953125, + "loss_xval": 1.4628725051879883, + "num_input_tokens_seen": 667942584, + "step": 3864 + }, + { + "epoch": 1.4802757564151667, + "grad_norm": 170.45213369292847, + "learning_rate": 5e-06, + "loss": 1.5748, + "num_input_tokens_seen": 668115696, + "step": 3865 + }, + { + "epoch": 1.4802757564151667, + "loss": 1.5227084159851074, + "loss_ce": 0.19459585845470428, + "loss_iou": 0.6224150657653809, + "loss_num": 0.0166015625, + "loss_xval": 1.3281126022338867, + "num_input_tokens_seen": 668115696, + "step": 3865 + }, + { + "epoch": 1.4806587514362313, + "grad_norm": 103.4924956997599, + "learning_rate": 5e-06, + "loss": 1.6014, + "num_input_tokens_seen": 668288672, + "step": 3866 + }, + { + "epoch": 1.4806587514362313, + "loss": 1.5472420454025269, + "loss_ce": 0.2081267535686493, + "loss_iou": 0.6122151613235474, + "loss_num": 0.02294921875, + "loss_xval": 1.3391153812408447, + "num_input_tokens_seen": 668288672, + "step": 3866 + }, + { + "epoch": 1.481041746457296, + "grad_norm": 175.38685505076648, + "learning_rate": 5e-06, + "loss": 1.7048, + "num_input_tokens_seen": 668461584, + "step": 3867 + }, + { + "epoch": 1.481041746457296, + "loss": 1.6872774362564087, + "loss_ce": 0.1981140375137329, + "loss_iou": 0.6807389259338379, + "loss_num": 0.0255126953125, + "loss_xval": 1.4891633987426758, + "num_input_tokens_seen": 668461584, + "step": 3867 + }, + { + "epoch": 1.4814247414783608, + "grad_norm": 136.4284280379348, + "learning_rate": 5e-06, + "loss": 1.7501, + "num_input_tokens_seen": 668634728, + "step": 3868 + }, + { + "epoch": 1.4814247414783608, + "loss": 1.8383429050445557, + "loss_ce": 0.13189396262168884, + "loss_iou": 0.7788836359977722, + "loss_num": 0.0296630859375, + "loss_xval": 1.7064489126205444, + "num_input_tokens_seen": 668634728, + "step": 3868 + }, + { + "epoch": 1.4818077364994255, + "grad_norm": 132.9321594877913, + "learning_rate": 5e-06, + "loss": 1.372, + "num_input_tokens_seen": 668808128, + "step": 3869 + }, + { + "epoch": 1.4818077364994255, + "loss": 1.389357328414917, + "loss_ce": 0.19434954226016998, + "loss_iou": 0.5608369708061218, + "loss_num": 0.0146484375, + "loss_xval": 1.1950076818466187, + "num_input_tokens_seen": 668808128, + "step": 3869 + }, + { + "epoch": 1.4821907315204903, + "grad_norm": 188.2283810651053, + "learning_rate": 5e-06, + "loss": 1.4836, + "num_input_tokens_seen": 668980768, + "step": 3870 + }, + { + "epoch": 1.4821907315204903, + "loss": 1.577918291091919, + "loss_ce": 0.14871646463871002, + "loss_iou": 0.6660932302474976, + "loss_num": 0.0194091796875, + "loss_xval": 1.4292018413543701, + "num_input_tokens_seen": 668980768, + "step": 3870 + }, + { + "epoch": 1.4825737265415548, + "grad_norm": 141.23940856964686, + "learning_rate": 5e-06, + "loss": 1.4481, + "num_input_tokens_seen": 669153744, + "step": 3871 + }, + { + "epoch": 1.4825737265415548, + "loss": 1.255262851715088, + "loss_ce": 0.15619684755802155, + "loss_iou": 0.5179625153541565, + "loss_num": 0.01263427734375, + "loss_xval": 1.099065899848938, + "num_input_tokens_seen": 669153744, + "step": 3871 + }, + { + "epoch": 1.4829567215626196, + "grad_norm": 90.8889940085996, + "learning_rate": 5e-06, + "loss": 1.6327, + "num_input_tokens_seen": 669326544, + "step": 3872 + }, + { + "epoch": 1.4829567215626196, + "loss": 1.4989891052246094, + "loss_ce": 0.19657351076602936, + "loss_iou": 0.5932244062423706, + "loss_num": 0.023193359375, + "loss_xval": 1.3024156093597412, + "num_input_tokens_seen": 669326544, + "step": 3872 + }, + { + "epoch": 1.4833397165836844, + "grad_norm": 106.92784849994153, + "learning_rate": 5e-06, + "loss": 1.199, + "num_input_tokens_seen": 669499320, + "step": 3873 + }, + { + "epoch": 1.4833397165836844, + "loss": 1.090671181678772, + "loss_ce": 0.17284780740737915, + "loss_iou": 0.424091100692749, + "loss_num": 0.013916015625, + "loss_xval": 0.917823314666748, + "num_input_tokens_seen": 669499320, + "step": 3873 + }, + { + "epoch": 1.4837227116047491, + "grad_norm": 119.3902921505456, + "learning_rate": 5e-06, + "loss": 1.6888, + "num_input_tokens_seen": 669672344, + "step": 3874 + }, + { + "epoch": 1.4837227116047491, + "loss": 1.724827766418457, + "loss_ce": 0.1560395061969757, + "loss_iou": 0.7323310375213623, + "loss_num": 0.0208740234375, + "loss_xval": 1.5687880516052246, + "num_input_tokens_seen": 669672344, + "step": 3874 + }, + { + "epoch": 1.4841057066258139, + "grad_norm": 141.76058799087716, + "learning_rate": 5e-06, + "loss": 1.6218, + "num_input_tokens_seen": 669845368, + "step": 3875 + }, + { + "epoch": 1.4841057066258139, + "loss": 1.6312702894210815, + "loss_ce": 0.17275990545749664, + "loss_iou": 0.6746134757995605, + "loss_num": 0.0218505859375, + "loss_xval": 1.458510398864746, + "num_input_tokens_seen": 669845368, + "step": 3875 + }, + { + "epoch": 1.4844887016468786, + "grad_norm": 129.6314489565196, + "learning_rate": 5e-06, + "loss": 1.5693, + "num_input_tokens_seen": 670018440, + "step": 3876 + }, + { + "epoch": 1.4844887016468786, + "loss": 1.6121318340301514, + "loss_ce": 0.21013782918453217, + "loss_iou": 0.6578909754753113, + "loss_num": 0.0172119140625, + "loss_xval": 1.4019941091537476, + "num_input_tokens_seen": 670018440, + "step": 3876 + }, + { + "epoch": 1.4848716966679434, + "grad_norm": 197.97396657663876, + "learning_rate": 5e-06, + "loss": 1.6026, + "num_input_tokens_seen": 670191504, + "step": 3877 + }, + { + "epoch": 1.4848716966679434, + "loss": 1.46147620677948, + "loss_ce": 0.1412167251110077, + "loss_iou": 0.6187021136283875, + "loss_num": 0.0166015625, + "loss_xval": 1.32025945186615, + "num_input_tokens_seen": 670191504, + "step": 3877 + }, + { + "epoch": 1.4852546916890081, + "grad_norm": 107.1568237957383, + "learning_rate": 5e-06, + "loss": 1.6205, + "num_input_tokens_seen": 670364448, + "step": 3878 + }, + { + "epoch": 1.4852546916890081, + "loss": 1.6485404968261719, + "loss_ce": 0.18954724073410034, + "loss_iou": 0.6764570474624634, + "loss_num": 0.021240234375, + "loss_xval": 1.4589931964874268, + "num_input_tokens_seen": 670364448, + "step": 3878 + }, + { + "epoch": 1.485637686710073, + "grad_norm": 131.64257329031273, + "learning_rate": 5e-06, + "loss": 1.325, + "num_input_tokens_seen": 670537432, + "step": 3879 + }, + { + "epoch": 1.485637686710073, + "loss": 1.3019726276397705, + "loss_ce": 0.15859565138816833, + "loss_iou": 0.5418881177902222, + "loss_num": 0.011962890625, + "loss_xval": 1.1433770656585693, + "num_input_tokens_seen": 670537432, + "step": 3879 + }, + { + "epoch": 1.4860206817311374, + "grad_norm": 95.66175125333709, + "learning_rate": 5e-06, + "loss": 1.5738, + "num_input_tokens_seen": 670710448, + "step": 3880 + }, + { + "epoch": 1.4860206817311374, + "loss": 1.641021490097046, + "loss_ce": 0.14951558411121368, + "loss_iou": 0.6779733896255493, + "loss_num": 0.027099609375, + "loss_xval": 1.4915058612823486, + "num_input_tokens_seen": 670710448, + "step": 3880 + }, + { + "epoch": 1.4864036767522022, + "grad_norm": 116.52670753883206, + "learning_rate": 5e-06, + "loss": 1.3799, + "num_input_tokens_seen": 670883072, + "step": 3881 + }, + { + "epoch": 1.4864036767522022, + "loss": 1.361303687095642, + "loss_ce": 0.15120264887809753, + "loss_iou": 0.5624327063560486, + "loss_num": 0.01708984375, + "loss_xval": 1.2101010084152222, + "num_input_tokens_seen": 670883072, + "step": 3881 + }, + { + "epoch": 1.486786671773267, + "grad_norm": 324.4629411639793, + "learning_rate": 5e-06, + "loss": 1.8486, + "num_input_tokens_seen": 671055992, + "step": 3882 + }, + { + "epoch": 1.486786671773267, + "loss": 1.8590505123138428, + "loss_ce": 0.17376533150672913, + "loss_iou": 0.784445583820343, + "loss_num": 0.0233154296875, + "loss_xval": 1.685285210609436, + "num_input_tokens_seen": 671055992, + "step": 3882 + }, + { + "epoch": 1.4871696667943317, + "grad_norm": 105.6266431676682, + "learning_rate": 5e-06, + "loss": 1.8311, + "num_input_tokens_seen": 671229032, + "step": 3883 + }, + { + "epoch": 1.4871696667943317, + "loss": 1.7354176044464111, + "loss_ce": 0.1748410016298294, + "loss_iou": 0.7163845300674438, + "loss_num": 0.025634765625, + "loss_xval": 1.5605766773223877, + "num_input_tokens_seen": 671229032, + "step": 3883 + }, + { + "epoch": 1.4875526618153965, + "grad_norm": 264.2298469536217, + "learning_rate": 5e-06, + "loss": 1.7788, + "num_input_tokens_seen": 671402112, + "step": 3884 + }, + { + "epoch": 1.4875526618153965, + "loss": 1.6878291368484497, + "loss_ce": 0.22226132452487946, + "loss_iou": 0.6703296899795532, + "loss_num": 0.0250244140625, + "loss_xval": 1.4655678272247314, + "num_input_tokens_seen": 671402112, + "step": 3884 + }, + { + "epoch": 1.487935656836461, + "grad_norm": 225.60078800531593, + "learning_rate": 5e-06, + "loss": 1.6987, + "num_input_tokens_seen": 671575032, + "step": 3885 + }, + { + "epoch": 1.487935656836461, + "loss": 1.4834569692611694, + "loss_ce": 0.17097559571266174, + "loss_iou": 0.60823655128479, + "loss_num": 0.0191650390625, + "loss_xval": 1.31248140335083, + "num_input_tokens_seen": 671575032, + "step": 3885 + }, + { + "epoch": 1.4883186518575258, + "grad_norm": 167.5257703107799, + "learning_rate": 5e-06, + "loss": 2.206, + "num_input_tokens_seen": 671747976, + "step": 3886 + }, + { + "epoch": 1.4883186518575258, + "loss": 2.038886070251465, + "loss_ce": 0.16198047995567322, + "loss_iou": 0.8290777206420898, + "loss_num": 0.043701171875, + "loss_xval": 1.8769054412841797, + "num_input_tokens_seen": 671747976, + "step": 3886 + }, + { + "epoch": 1.4887016468785905, + "grad_norm": 86.88630888341918, + "learning_rate": 5e-06, + "loss": 1.5637, + "num_input_tokens_seen": 671920688, + "step": 3887 + }, + { + "epoch": 1.4887016468785905, + "loss": 1.4317007064819336, + "loss_ce": 0.14628341794013977, + "loss_iou": 0.5837181806564331, + "loss_num": 0.0235595703125, + "loss_xval": 1.2731492519378662, + "num_input_tokens_seen": 671920688, + "step": 3887 + }, + { + "epoch": 1.4890846418996553, + "grad_norm": 158.54654743374675, + "learning_rate": 5e-06, + "loss": 1.408, + "num_input_tokens_seen": 672093936, + "step": 3888 + }, + { + "epoch": 1.4890846418996553, + "loss": 1.4867074489593506, + "loss_ce": 0.16030298173427582, + "loss_iou": 0.6173266768455505, + "loss_num": 0.018310546875, + "loss_xval": 1.3264044523239136, + "num_input_tokens_seen": 672093936, + "step": 3888 + }, + { + "epoch": 1.48946763692072, + "grad_norm": 125.28459812812459, + "learning_rate": 5e-06, + "loss": 1.4976, + "num_input_tokens_seen": 672267136, + "step": 3889 + }, + { + "epoch": 1.48946763692072, + "loss": 1.3624978065490723, + "loss_ce": 0.17049774527549744, + "loss_iou": 0.555045485496521, + "loss_num": 0.016357421875, + "loss_xval": 1.192000150680542, + "num_input_tokens_seen": 672267136, + "step": 3889 + }, + { + "epoch": 1.4898506319417848, + "grad_norm": 149.58744976740113, + "learning_rate": 5e-06, + "loss": 1.372, + "num_input_tokens_seen": 672440328, + "step": 3890 + }, + { + "epoch": 1.4898506319417848, + "loss": 1.3499786853790283, + "loss_ce": 0.1211739107966423, + "loss_iou": 0.5589977502822876, + "loss_num": 0.022216796875, + "loss_xval": 1.2288048267364502, + "num_input_tokens_seen": 672440328, + "step": 3890 + }, + { + "epoch": 1.4902336269628496, + "grad_norm": 152.0818351162049, + "learning_rate": 5e-06, + "loss": 1.5785, + "num_input_tokens_seen": 672613368, + "step": 3891 + }, + { + "epoch": 1.4902336269628496, + "loss": 1.3897818326950073, + "loss_ce": 0.17107608914375305, + "loss_iou": 0.5633171200752258, + "loss_num": 0.0184326171875, + "loss_xval": 1.2187057733535767, + "num_input_tokens_seen": 672613368, + "step": 3891 + }, + { + "epoch": 1.4906166219839143, + "grad_norm": 108.03528672017505, + "learning_rate": 5e-06, + "loss": 1.5662, + "num_input_tokens_seen": 672786296, + "step": 3892 + }, + { + "epoch": 1.4906166219839143, + "loss": 1.725993275642395, + "loss_ce": 0.17192400991916656, + "loss_iou": 0.7165030241012573, + "loss_num": 0.024169921875, + "loss_xval": 1.5540692806243896, + "num_input_tokens_seen": 672786296, + "step": 3892 + }, + { + "epoch": 1.490999617004979, + "grad_norm": 120.93625765029971, + "learning_rate": 5e-06, + "loss": 1.6048, + "num_input_tokens_seen": 672958984, + "step": 3893 + }, + { + "epoch": 1.490999617004979, + "loss": 1.6273632049560547, + "loss_ce": 0.20472627878189087, + "loss_iou": 0.6684107780456543, + "loss_num": 0.0172119140625, + "loss_xval": 1.4226369857788086, + "num_input_tokens_seen": 672958984, + "step": 3893 + }, + { + "epoch": 1.4913826120260436, + "grad_norm": 152.1381935646056, + "learning_rate": 5e-06, + "loss": 1.0851, + "num_input_tokens_seen": 673132112, + "step": 3894 + }, + { + "epoch": 1.4913826120260436, + "loss": 0.9724470376968384, + "loss_ce": 0.15467657148838043, + "loss_iou": 0.3776809573173523, + "loss_num": 0.01251220703125, + "loss_xval": 0.8177703619003296, + "num_input_tokens_seen": 673132112, + "step": 3894 + }, + { + "epoch": 1.4917656070471084, + "grad_norm": 119.91263870959762, + "learning_rate": 5e-06, + "loss": 1.7792, + "num_input_tokens_seen": 673304944, + "step": 3895 + }, + { + "epoch": 1.4917656070471084, + "loss": 1.760056972503662, + "loss_ce": 0.15842179954051971, + "loss_iou": 0.7236691117286682, + "loss_num": 0.0308837890625, + "loss_xval": 1.6016350984573364, + "num_input_tokens_seen": 673304944, + "step": 3895 + }, + { + "epoch": 1.4921486020681731, + "grad_norm": 81.29026001799502, + "learning_rate": 5e-06, + "loss": 1.4743, + "num_input_tokens_seen": 673477536, + "step": 3896 + }, + { + "epoch": 1.4921486020681731, + "loss": 1.5240058898925781, + "loss_ce": 0.195234015583992, + "loss_iou": 0.6291077136993408, + "loss_num": 0.01409912109375, + "loss_xval": 1.3287720680236816, + "num_input_tokens_seen": 673477536, + "step": 3896 + }, + { + "epoch": 1.4925315970892379, + "grad_norm": 153.74166088899742, + "learning_rate": 5e-06, + "loss": 1.6929, + "num_input_tokens_seen": 673650760, + "step": 3897 + }, + { + "epoch": 1.4925315970892379, + "loss": 1.6741621494293213, + "loss_ce": 0.16368725895881653, + "loss_iou": 0.7229956388473511, + "loss_num": 0.012939453125, + "loss_xval": 1.5104749202728271, + "num_input_tokens_seen": 673650760, + "step": 3897 + }, + { + "epoch": 1.4929145921103026, + "grad_norm": 133.10121478712492, + "learning_rate": 5e-06, + "loss": 1.7809, + "num_input_tokens_seen": 673823768, + "step": 3898 + }, + { + "epoch": 1.4929145921103026, + "loss": 1.89362633228302, + "loss_ce": 0.16937804222106934, + "loss_iou": 0.7788111567497253, + "loss_num": 0.033447265625, + "loss_xval": 1.7242482900619507, + "num_input_tokens_seen": 673823768, + "step": 3898 + }, + { + "epoch": 1.4932975871313672, + "grad_norm": 162.6035247166546, + "learning_rate": 5e-06, + "loss": 1.4498, + "num_input_tokens_seen": 673993712, + "step": 3899 + }, + { + "epoch": 1.4932975871313672, + "loss": 1.5272722244262695, + "loss_ce": 0.17096881568431854, + "loss_iou": 0.6098533868789673, + "loss_num": 0.02734375, + "loss_xval": 1.3563034534454346, + "num_input_tokens_seen": 673993712, + "step": 3899 + }, + { + "epoch": 1.493680582152432, + "grad_norm": 153.67429353808546, + "learning_rate": 5e-06, + "loss": 1.867, + "num_input_tokens_seen": 674166952, + "step": 3900 + }, + { + "epoch": 1.493680582152432, + "loss": 1.883059024810791, + "loss_ce": 0.20422764122486115, + "loss_iou": 0.7705985307693481, + "loss_num": 0.027587890625, + "loss_xval": 1.6788313388824463, + "num_input_tokens_seen": 674166952, + "step": 3900 + }, + { + "epoch": 1.4940635771734967, + "grad_norm": 60.57757712105375, + "learning_rate": 5e-06, + "loss": 1.6093, + "num_input_tokens_seen": 674340048, + "step": 3901 + }, + { + "epoch": 1.4940635771734967, + "loss": 1.677382469177246, + "loss_ce": 0.15197619795799255, + "loss_iou": 0.6813737750053406, + "loss_num": 0.032470703125, + "loss_xval": 1.5254062414169312, + "num_input_tokens_seen": 674340048, + "step": 3901 + }, + { + "epoch": 1.4944465721945615, + "grad_norm": 165.47416506905398, + "learning_rate": 5e-06, + "loss": 1.3134, + "num_input_tokens_seen": 674513152, + "step": 3902 + }, + { + "epoch": 1.4944465721945615, + "loss": 1.3706345558166504, + "loss_ce": 0.18077810108661652, + "loss_iou": 0.5623659491539001, + "loss_num": 0.01300048828125, + "loss_xval": 1.1898564100265503, + "num_input_tokens_seen": 674513152, + "step": 3902 + }, + { + "epoch": 1.4948295672156262, + "grad_norm": 141.53108056287766, + "learning_rate": 5e-06, + "loss": 1.9451, + "num_input_tokens_seen": 674685936, + "step": 3903 + }, + { + "epoch": 1.4948295672156262, + "loss": 2.033285617828369, + "loss_ce": 0.15381646156311035, + "loss_iou": 0.8622809648513794, + "loss_num": 0.031005859375, + "loss_xval": 1.8794691562652588, + "num_input_tokens_seen": 674685936, + "step": 3903 + }, + { + "epoch": 1.495212562236691, + "grad_norm": 154.93278934688692, + "learning_rate": 5e-06, + "loss": 1.5626, + "num_input_tokens_seen": 674859344, + "step": 3904 + }, + { + "epoch": 1.495212562236691, + "loss": 1.5289535522460938, + "loss_ce": 0.13078534603118896, + "loss_iou": 0.6564816236495972, + "loss_num": 0.0169677734375, + "loss_xval": 1.3981683254241943, + "num_input_tokens_seen": 674859344, + "step": 3904 + }, + { + "epoch": 1.4955955572577557, + "grad_norm": 199.77810636348377, + "learning_rate": 5e-06, + "loss": 1.3503, + "num_input_tokens_seen": 675032312, + "step": 3905 + }, + { + "epoch": 1.4955955572577557, + "loss": 1.3020457029342651, + "loss_ce": 0.2171669900417328, + "loss_iou": 0.51427161693573, + "loss_num": 0.01129150390625, + "loss_xval": 1.08487868309021, + "num_input_tokens_seen": 675032312, + "step": 3905 + }, + { + "epoch": 1.4959785522788205, + "grad_norm": 120.0415211054411, + "learning_rate": 5e-06, + "loss": 1.3756, + "num_input_tokens_seen": 675205224, + "step": 3906 + }, + { + "epoch": 1.4959785522788205, + "loss": 1.4973325729370117, + "loss_ce": 0.1475808471441269, + "loss_iou": 0.6320596933364868, + "loss_num": 0.01708984375, + "loss_xval": 1.3497517108917236, + "num_input_tokens_seen": 675205224, + "step": 3906 + }, + { + "epoch": 1.496361547299885, + "grad_norm": 116.03800266511163, + "learning_rate": 5e-06, + "loss": 1.2903, + "num_input_tokens_seen": 675378000, + "step": 3907 + }, + { + "epoch": 1.496361547299885, + "loss": 1.3025251626968384, + "loss_ce": 0.2025635540485382, + "loss_iou": 0.5094839930534363, + "loss_num": 0.0162353515625, + "loss_xval": 1.0999616384506226, + "num_input_tokens_seen": 675378000, + "step": 3907 + }, + { + "epoch": 1.4967445423209498, + "grad_norm": 124.87824766868395, + "learning_rate": 5e-06, + "loss": 1.4478, + "num_input_tokens_seen": 675551176, + "step": 3908 + }, + { + "epoch": 1.4967445423209498, + "loss": 1.451635479927063, + "loss_ce": 0.184466153383255, + "loss_iou": 0.5844513177871704, + "loss_num": 0.0196533203125, + "loss_xval": 1.2671692371368408, + "num_input_tokens_seen": 675551176, + "step": 3908 + }, + { + "epoch": 1.4971275373420145, + "grad_norm": 99.83496962957669, + "learning_rate": 5e-06, + "loss": 1.3585, + "num_input_tokens_seen": 675724448, + "step": 3909 + }, + { + "epoch": 1.4971275373420145, + "loss": 1.4900596141815186, + "loss_ce": 0.1743171215057373, + "loss_iou": 0.6139869689941406, + "loss_num": 0.017578125, + "loss_xval": 1.3157424926757812, + "num_input_tokens_seen": 675724448, + "step": 3909 + }, + { + "epoch": 1.4975105323630793, + "grad_norm": 118.73129195967844, + "learning_rate": 5e-06, + "loss": 1.6089, + "num_input_tokens_seen": 675897048, + "step": 3910 + }, + { + "epoch": 1.4975105323630793, + "loss": 1.420870065689087, + "loss_ce": 0.1739872694015503, + "loss_iou": 0.5579507350921631, + "loss_num": 0.0262451171875, + "loss_xval": 1.2468829154968262, + "num_input_tokens_seen": 675897048, + "step": 3910 + }, + { + "epoch": 1.497893527384144, + "grad_norm": 122.12571647799199, + "learning_rate": 5e-06, + "loss": 1.1813, + "num_input_tokens_seen": 676069904, + "step": 3911 + }, + { + "epoch": 1.497893527384144, + "loss": 1.2542847394943237, + "loss_ce": 0.19382768869400024, + "loss_iou": 0.48590174317359924, + "loss_num": 0.0177001953125, + "loss_xval": 1.0604569911956787, + "num_input_tokens_seen": 676069904, + "step": 3911 + }, + { + "epoch": 1.4982765224052088, + "grad_norm": 139.25997298449545, + "learning_rate": 5e-06, + "loss": 1.5783, + "num_input_tokens_seen": 676242864, + "step": 3912 + }, + { + "epoch": 1.4982765224052088, + "loss": 1.51690673828125, + "loss_ce": 0.1545480489730835, + "loss_iou": 0.6304590702056885, + "loss_num": 0.020263671875, + "loss_xval": 1.362358570098877, + "num_input_tokens_seen": 676242864, + "step": 3912 + }, + { + "epoch": 1.4986595174262733, + "grad_norm": 123.25310142689585, + "learning_rate": 5e-06, + "loss": 1.8552, + "num_input_tokens_seen": 676416160, + "step": 3913 + }, + { + "epoch": 1.4986595174262733, + "loss": 1.8524422645568848, + "loss_ce": 0.21841105818748474, + "loss_iou": 0.754271388053894, + "loss_num": 0.025146484375, + "loss_xval": 1.634031057357788, + "num_input_tokens_seen": 676416160, + "step": 3913 + }, + { + "epoch": 1.499042512447338, + "grad_norm": 107.64315769116206, + "learning_rate": 5e-06, + "loss": 1.1797, + "num_input_tokens_seen": 676588952, + "step": 3914 + }, + { + "epoch": 1.499042512447338, + "loss": 1.0533498525619507, + "loss_ce": 0.1467789113521576, + "loss_iou": 0.4129336476325989, + "loss_num": 0.01611328125, + "loss_xval": 0.9025121927261353, + "num_input_tokens_seen": 676588952, + "step": 3914 + }, + { + "epoch": 1.4994255074684029, + "grad_norm": 189.16867982905197, + "learning_rate": 5e-06, + "loss": 1.399, + "num_input_tokens_seen": 676761864, + "step": 3915 + }, + { + "epoch": 1.4994255074684029, + "loss": 1.5084915161132812, + "loss_ce": 0.16149842739105225, + "loss_iou": 0.6406902074813843, + "loss_num": 0.01312255859375, + "loss_xval": 1.3469932079315186, + "num_input_tokens_seen": 676761864, + "step": 3915 + }, + { + "epoch": 1.4998085024894676, + "grad_norm": 93.63628383723204, + "learning_rate": 5e-06, + "loss": 1.755, + "num_input_tokens_seen": 676934536, + "step": 3916 + }, + { + "epoch": 1.4998085024894676, + "loss": 1.7024974822998047, + "loss_ce": 0.17724838852882385, + "loss_iou": 0.6955468654632568, + "loss_num": 0.02685546875, + "loss_xval": 1.5252490043640137, + "num_input_tokens_seen": 676934536, + "step": 3916 + }, + { + "epoch": 1.5001914975105324, + "grad_norm": 110.46402062423184, + "learning_rate": 5e-06, + "loss": 1.1692, + "num_input_tokens_seen": 677107328, + "step": 3917 + }, + { + "epoch": 1.5001914975105324, + "loss": 1.185386300086975, + "loss_ce": 0.1652449518442154, + "loss_iou": 0.4723433256149292, + "loss_num": 0.01507568359375, + "loss_xval": 1.020141363143921, + "num_input_tokens_seen": 677107328, + "step": 3917 + }, + { + "epoch": 1.5005744925315971, + "grad_norm": 177.81936394082368, + "learning_rate": 5e-06, + "loss": 1.772, + "num_input_tokens_seen": 677280320, + "step": 3918 + }, + { + "epoch": 1.5005744925315971, + "loss": 1.663917899131775, + "loss_ce": 0.1926594078540802, + "loss_iou": 0.6747772097587585, + "loss_num": 0.0242919921875, + "loss_xval": 1.471258521080017, + "num_input_tokens_seen": 677280320, + "step": 3918 + }, + { + "epoch": 1.500957487552662, + "grad_norm": 113.27744686178022, + "learning_rate": 5e-06, + "loss": 1.5075, + "num_input_tokens_seen": 677453384, + "step": 3919 + }, + { + "epoch": 1.500957487552662, + "loss": 1.3370167016983032, + "loss_ce": 0.19517093896865845, + "loss_iou": 0.5225220322608948, + "loss_num": 0.019287109375, + "loss_xval": 1.1418458223342896, + "num_input_tokens_seen": 677453384, + "step": 3919 + }, + { + "epoch": 1.5013404825737267, + "grad_norm": 147.21865546380945, + "learning_rate": 5e-06, + "loss": 1.7438, + "num_input_tokens_seen": 677626224, + "step": 3920 + }, + { + "epoch": 1.5013404825737267, + "loss": 1.8057869672775269, + "loss_ce": 0.11913647502660751, + "loss_iou": 0.7625452280044556, + "loss_num": 0.0322265625, + "loss_xval": 1.6866505146026611, + "num_input_tokens_seen": 677626224, + "step": 3920 + }, + { + "epoch": 1.5017234775947914, + "grad_norm": 143.36489983173033, + "learning_rate": 5e-06, + "loss": 1.3252, + "num_input_tokens_seen": 677799232, + "step": 3921 + }, + { + "epoch": 1.5017234775947914, + "loss": 1.3773850202560425, + "loss_ce": 0.15254107117652893, + "loss_iou": 0.5581769943237305, + "loss_num": 0.021728515625, + "loss_xval": 1.224843978881836, + "num_input_tokens_seen": 677799232, + "step": 3921 + }, + { + "epoch": 1.502106472615856, + "grad_norm": 299.8088889611129, + "learning_rate": 5e-06, + "loss": 1.6504, + "num_input_tokens_seen": 677972304, + "step": 3922 + }, + { + "epoch": 1.502106472615856, + "loss": 1.6555671691894531, + "loss_ce": 0.21011576056480408, + "loss_iou": 0.6768578290939331, + "loss_num": 0.018310546875, + "loss_xval": 1.4454514980316162, + "num_input_tokens_seen": 677972304, + "step": 3922 + }, + { + "epoch": 1.5024894676369207, + "grad_norm": 140.5422263206731, + "learning_rate": 5e-06, + "loss": 1.7557, + "num_input_tokens_seen": 678145656, + "step": 3923 + }, + { + "epoch": 1.5024894676369207, + "loss": 1.9341998100280762, + "loss_ce": 0.2066763937473297, + "loss_iou": 0.7899091839790344, + "loss_num": 0.029541015625, + "loss_xval": 1.7275234460830688, + "num_input_tokens_seen": 678145656, + "step": 3923 + }, + { + "epoch": 1.5028724626579855, + "grad_norm": 168.56528740382507, + "learning_rate": 5e-06, + "loss": 1.2538, + "num_input_tokens_seen": 678318784, + "step": 3924 + }, + { + "epoch": 1.5028724626579855, + "loss": 1.2720829248428345, + "loss_ce": 0.18417221307754517, + "loss_iou": 0.5023903846740723, + "loss_num": 0.0166015625, + "loss_xval": 1.0879106521606445, + "num_input_tokens_seen": 678318784, + "step": 3924 + }, + { + "epoch": 1.5032554576790502, + "grad_norm": 146.38262092112728, + "learning_rate": 5e-06, + "loss": 1.3102, + "num_input_tokens_seen": 678492024, + "step": 3925 + }, + { + "epoch": 1.5032554576790502, + "loss": 1.4724841117858887, + "loss_ce": 0.16766434907913208, + "loss_iou": 0.6028035879135132, + "loss_num": 0.0198974609375, + "loss_xval": 1.3048198223114014, + "num_input_tokens_seen": 678492024, + "step": 3925 + }, + { + "epoch": 1.5036384527001148, + "grad_norm": 301.52686169574025, + "learning_rate": 5e-06, + "loss": 1.8261, + "num_input_tokens_seen": 678665008, + "step": 3926 + }, + { + "epoch": 1.5036384527001148, + "loss": 1.8101704120635986, + "loss_ce": 0.16355374455451965, + "loss_iou": 0.7610829472541809, + "loss_num": 0.02490234375, + "loss_xval": 1.6466165781021118, + "num_input_tokens_seen": 678665008, + "step": 3926 + }, + { + "epoch": 1.5040214477211795, + "grad_norm": 151.62069510106224, + "learning_rate": 5e-06, + "loss": 1.5278, + "num_input_tokens_seen": 678838120, + "step": 3927 + }, + { + "epoch": 1.5040214477211795, + "loss": 1.6126360893249512, + "loss_ce": 0.17145290970802307, + "loss_iou": 0.676081657409668, + "loss_num": 0.017822265625, + "loss_xval": 1.441183090209961, + "num_input_tokens_seen": 678838120, + "step": 3927 + }, + { + "epoch": 1.5044044427422443, + "grad_norm": 151.24627586443248, + "learning_rate": 5e-06, + "loss": 1.5012, + "num_input_tokens_seen": 679010648, + "step": 3928 + }, + { + "epoch": 1.5044044427422443, + "loss": 1.468981146812439, + "loss_ce": 0.1473751664161682, + "loss_iou": 0.6055051684379578, + "loss_num": 0.0220947265625, + "loss_xval": 1.3216060400009155, + "num_input_tokens_seen": 679010648, + "step": 3928 + }, + { + "epoch": 1.504787437763309, + "grad_norm": 117.36174820735435, + "learning_rate": 5e-06, + "loss": 1.3066, + "num_input_tokens_seen": 679183704, + "step": 3929 + }, + { + "epoch": 1.504787437763309, + "loss": 1.3060994148254395, + "loss_ce": 0.19131767749786377, + "loss_iou": 0.5271174311637878, + "loss_num": 0.0120849609375, + "loss_xval": 1.1147817373275757, + "num_input_tokens_seen": 679183704, + "step": 3929 + }, + { + "epoch": 1.5051704327843738, + "grad_norm": 159.3159302948904, + "learning_rate": 5e-06, + "loss": 1.6127, + "num_input_tokens_seen": 679356424, + "step": 3930 + }, + { + "epoch": 1.5051704327843738, + "loss": 1.5889123678207397, + "loss_ce": 0.18345588445663452, + "loss_iou": 0.659011721611023, + "loss_num": 0.0174560546875, + "loss_xval": 1.405456304550171, + "num_input_tokens_seen": 679356424, + "step": 3930 + }, + { + "epoch": 1.5055534278054385, + "grad_norm": 141.7121383308052, + "learning_rate": 5e-06, + "loss": 1.2912, + "num_input_tokens_seen": 679529184, + "step": 3931 + }, + { + "epoch": 1.5055534278054385, + "loss": 1.127032995223999, + "loss_ce": 0.1291084736585617, + "loss_iou": 0.45585617423057556, + "loss_num": 0.0172119140625, + "loss_xval": 0.9979245066642761, + "num_input_tokens_seen": 679529184, + "step": 3931 + }, + { + "epoch": 1.5059364228265033, + "grad_norm": 193.61432466420874, + "learning_rate": 5e-06, + "loss": 1.7357, + "num_input_tokens_seen": 679701784, + "step": 3932 + }, + { + "epoch": 1.5059364228265033, + "loss": 1.6995859146118164, + "loss_ce": 0.14858072996139526, + "loss_iou": 0.706441342830658, + "loss_num": 0.027587890625, + "loss_xval": 1.551005244255066, + "num_input_tokens_seen": 679701784, + "step": 3932 + }, + { + "epoch": 1.506319417847568, + "grad_norm": 148.5073978214111, + "learning_rate": 5e-06, + "loss": 1.3603, + "num_input_tokens_seen": 679874576, + "step": 3933 + }, + { + "epoch": 1.506319417847568, + "loss": 1.3367570638656616, + "loss_ce": 0.15908914804458618, + "loss_iou": 0.5338717699050903, + "loss_num": 0.02197265625, + "loss_xval": 1.1776678562164307, + "num_input_tokens_seen": 679874576, + "step": 3933 + }, + { + "epoch": 1.5067024128686328, + "grad_norm": 125.59857770780995, + "learning_rate": 5e-06, + "loss": 1.2445, + "num_input_tokens_seen": 680047504, + "step": 3934 + }, + { + "epoch": 1.5067024128686328, + "loss": 1.1900660991668701, + "loss_ce": 0.1693650484085083, + "loss_iou": 0.4734853208065033, + "loss_num": 0.0147705078125, + "loss_xval": 1.0207011699676514, + "num_input_tokens_seen": 680047504, + "step": 3934 + }, + { + "epoch": 1.5070854078896976, + "grad_norm": 216.85466740687056, + "learning_rate": 5e-06, + "loss": 1.8651, + "num_input_tokens_seen": 680220768, + "step": 3935 + }, + { + "epoch": 1.5070854078896976, + "loss": 1.8050951957702637, + "loss_ce": 0.20160922408103943, + "loss_iou": 0.7552036643028259, + "loss_num": 0.0185546875, + "loss_xval": 1.6034859418869019, + "num_input_tokens_seen": 680220768, + "step": 3935 + }, + { + "epoch": 1.5074684029107621, + "grad_norm": 104.59036594951397, + "learning_rate": 5e-06, + "loss": 1.5949, + "num_input_tokens_seen": 680393960, + "step": 3936 + }, + { + "epoch": 1.5074684029107621, + "loss": 1.5934373140335083, + "loss_ce": 0.18331730365753174, + "loss_iou": 0.6564760208129883, + "loss_num": 0.0194091796875, + "loss_xval": 1.4101200103759766, + "num_input_tokens_seen": 680393960, + "step": 3936 + }, + { + "epoch": 1.5078513979318269, + "grad_norm": 94.69427317633223, + "learning_rate": 5e-06, + "loss": 1.2119, + "num_input_tokens_seen": 680566496, + "step": 3937 + }, + { + "epoch": 1.5078513979318269, + "loss": 1.2829943895339966, + "loss_ce": 0.11970832198858261, + "loss_iou": 0.5344933867454529, + "loss_num": 0.018798828125, + "loss_xval": 1.1632860898971558, + "num_input_tokens_seen": 680566496, + "step": 3937 + }, + { + "epoch": 1.5082343929528916, + "grad_norm": 153.91357361344652, + "learning_rate": 5e-06, + "loss": 1.31, + "num_input_tokens_seen": 680739464, + "step": 3938 + }, + { + "epoch": 1.5082343929528916, + "loss": 1.3061113357543945, + "loss_ce": 0.13433563709259033, + "loss_iou": 0.5441702604293823, + "loss_num": 0.0167236328125, + "loss_xval": 1.1717755794525146, + "num_input_tokens_seen": 680739464, + "step": 3938 + }, + { + "epoch": 1.5086173879739564, + "grad_norm": 103.84133103064748, + "learning_rate": 5e-06, + "loss": 1.615, + "num_input_tokens_seen": 680912048, + "step": 3939 + }, + { + "epoch": 1.5086173879739564, + "loss": 1.4809836149215698, + "loss_ce": 0.1380067616701126, + "loss_iou": 0.6224467158317566, + "loss_num": 0.0196533203125, + "loss_xval": 1.3429769277572632, + "num_input_tokens_seen": 680912048, + "step": 3939 + }, + { + "epoch": 1.509000382995021, + "grad_norm": 121.17580221839172, + "learning_rate": 5e-06, + "loss": 1.4507, + "num_input_tokens_seen": 681084920, + "step": 3940 + }, + { + "epoch": 1.509000382995021, + "loss": 1.5148675441741943, + "loss_ce": 0.15818607807159424, + "loss_iou": 0.6258504986763, + "loss_num": 0.02099609375, + "loss_xval": 1.3566814661026, + "num_input_tokens_seen": 681084920, + "step": 3940 + }, + { + "epoch": 1.5093833780160857, + "grad_norm": 226.64884905297725, + "learning_rate": 5e-06, + "loss": 2.0001, + "num_input_tokens_seen": 681257592, + "step": 3941 + }, + { + "epoch": 1.5093833780160857, + "loss": 2.0569167137145996, + "loss_ce": 0.1741567999124527, + "loss_iou": 0.8931165337562561, + "loss_num": 0.019287109375, + "loss_xval": 1.8827601671218872, + "num_input_tokens_seen": 681257592, + "step": 3941 + }, + { + "epoch": 1.5097663730371504, + "grad_norm": 136.4010765764811, + "learning_rate": 5e-06, + "loss": 1.9591, + "num_input_tokens_seen": 681430552, + "step": 3942 + }, + { + "epoch": 1.5097663730371504, + "loss": 1.8821616172790527, + "loss_ce": 0.2154098004102707, + "loss_iou": 0.7494220733642578, + "loss_num": 0.03369140625, + "loss_xval": 1.6667518615722656, + "num_input_tokens_seen": 681430552, + "step": 3942 + }, + { + "epoch": 1.5101493680582152, + "grad_norm": 67.87533683784773, + "learning_rate": 5e-06, + "loss": 1.3768, + "num_input_tokens_seen": 681603568, + "step": 3943 + }, + { + "epoch": 1.5101493680582152, + "loss": 1.332188367843628, + "loss_ce": 0.1632273942232132, + "loss_iou": 0.5403978824615479, + "loss_num": 0.017578125, + "loss_xval": 1.1689610481262207, + "num_input_tokens_seen": 681603568, + "step": 3943 + }, + { + "epoch": 1.51053236307928, + "grad_norm": 111.59310271327914, + "learning_rate": 5e-06, + "loss": 1.3828, + "num_input_tokens_seen": 681776256, + "step": 3944 + }, + { + "epoch": 1.51053236307928, + "loss": 1.3522703647613525, + "loss_ce": 0.16833075881004333, + "loss_iou": 0.5519307255744934, + "loss_num": 0.0159912109375, + "loss_xval": 1.1839395761489868, + "num_input_tokens_seen": 681776256, + "step": 3944 + }, + { + "epoch": 1.5109153581003447, + "grad_norm": 205.43888918345615, + "learning_rate": 5e-06, + "loss": 1.5487, + "num_input_tokens_seen": 681949584, + "step": 3945 + }, + { + "epoch": 1.5109153581003447, + "loss": 1.6860454082489014, + "loss_ce": 0.18768858909606934, + "loss_iou": 0.699862003326416, + "loss_num": 0.019775390625, + "loss_xval": 1.498356819152832, + "num_input_tokens_seen": 681949584, + "step": 3945 + }, + { + "epoch": 1.5112983531214095, + "grad_norm": 127.98848796269809, + "learning_rate": 5e-06, + "loss": 1.5937, + "num_input_tokens_seen": 682122688, + "step": 3946 + }, + { + "epoch": 1.5112983531214095, + "loss": 1.4739629030227661, + "loss_ce": 0.16732311248779297, + "loss_iou": 0.6131281852722168, + "loss_num": 0.01611328125, + "loss_xval": 1.3001699447631836, + "num_input_tokens_seen": 682122688, + "step": 3946 + }, + { + "epoch": 1.5116813481424742, + "grad_norm": 154.43631041264297, + "learning_rate": 5e-06, + "loss": 1.8958, + "num_input_tokens_seen": 682295648, + "step": 3947 + }, + { + "epoch": 1.5116813481424742, + "loss": 1.7483105659484863, + "loss_ce": 0.13244858384132385, + "loss_iou": 0.7305383682250977, + "loss_num": 0.031005859375, + "loss_xval": 1.6158618927001953, + "num_input_tokens_seen": 682295648, + "step": 3947 + }, + { + "epoch": 1.512064343163539, + "grad_norm": 108.00771571125344, + "learning_rate": 5e-06, + "loss": 1.3608, + "num_input_tokens_seen": 682468688, + "step": 3948 + }, + { + "epoch": 1.512064343163539, + "loss": 1.4411499500274658, + "loss_ce": 0.18800832331180573, + "loss_iou": 0.5710746049880981, + "loss_num": 0.022216796875, + "loss_xval": 1.2531416416168213, + "num_input_tokens_seen": 682468688, + "step": 3948 + }, + { + "epoch": 1.5124473381846038, + "grad_norm": 223.0197942802195, + "learning_rate": 5e-06, + "loss": 1.6571, + "num_input_tokens_seen": 682639792, + "step": 3949 + }, + { + "epoch": 1.5124473381846038, + "loss": 1.5951857566833496, + "loss_ce": 0.18513739109039307, + "loss_iou": 0.6478037238121033, + "loss_num": 0.02294921875, + "loss_xval": 1.4100483655929565, + "num_input_tokens_seen": 682639792, + "step": 3949 + }, + { + "epoch": 1.5128303332056683, + "grad_norm": 83.44992441252616, + "learning_rate": 5e-06, + "loss": 2.0769, + "num_input_tokens_seen": 682813040, + "step": 3950 + }, + { + "epoch": 1.5128303332056683, + "loss": 2.0230278968811035, + "loss_ce": 0.21663933992385864, + "loss_iou": 0.8293722867965698, + "loss_num": 0.029541015625, + "loss_xval": 1.8063886165618896, + "num_input_tokens_seen": 682813040, + "step": 3950 + }, + { + "epoch": 1.513213328226733, + "grad_norm": 109.7736289437539, + "learning_rate": 5e-06, + "loss": 1.8202, + "num_input_tokens_seen": 682985920, + "step": 3951 + }, + { + "epoch": 1.513213328226733, + "loss": 1.589260220527649, + "loss_ce": 0.1508396863937378, + "loss_iou": 0.6564966440200806, + "loss_num": 0.025146484375, + "loss_xval": 1.4384205341339111, + "num_input_tokens_seen": 682985920, + "step": 3951 + }, + { + "epoch": 1.5135963232477978, + "grad_norm": 113.40872734827907, + "learning_rate": 5e-06, + "loss": 1.4412, + "num_input_tokens_seen": 683158960, + "step": 3952 + }, + { + "epoch": 1.5135963232477978, + "loss": 1.4061739444732666, + "loss_ce": 0.16902853548526764, + "loss_iou": 0.5697598457336426, + "loss_num": 0.01953125, + "loss_xval": 1.2096796035766602, + "num_input_tokens_seen": 683158960, + "step": 3952 + }, + { + "epoch": 1.5139793182688623, + "grad_norm": 82.42682002201437, + "learning_rate": 5e-06, + "loss": 1.8388, + "num_input_tokens_seen": 683331592, + "step": 3953 + }, + { + "epoch": 1.5139793182688623, + "loss": 2.0235116481781006, + "loss_ce": 0.14281706511974335, + "loss_iou": 0.8556303977966309, + "loss_num": 0.033935546875, + "loss_xval": 1.8806943893432617, + "num_input_tokens_seen": 683331592, + "step": 3953 + }, + { + "epoch": 1.514362313289927, + "grad_norm": 91.4057633169997, + "learning_rate": 5e-06, + "loss": 1.5258, + "num_input_tokens_seen": 683504528, + "step": 3954 + }, + { + "epoch": 1.514362313289927, + "loss": 1.494154691696167, + "loss_ce": 0.1196802407503128, + "loss_iou": 0.6352352499961853, + "loss_num": 0.020751953125, + "loss_xval": 1.3662956953048706, + "num_input_tokens_seen": 683504528, + "step": 3954 + }, + { + "epoch": 1.5147453083109919, + "grad_norm": 107.3594175956104, + "learning_rate": 5e-06, + "loss": 1.5227, + "num_input_tokens_seen": 683677384, + "step": 3955 + }, + { + "epoch": 1.5147453083109919, + "loss": 1.478843331336975, + "loss_ce": 0.16039426624774933, + "loss_iou": 0.6079397201538086, + "loss_num": 0.0205078125, + "loss_xval": 1.3184490203857422, + "num_input_tokens_seen": 683677384, + "step": 3955 + }, + { + "epoch": 1.5151283033320566, + "grad_norm": 93.84858563554947, + "learning_rate": 5e-06, + "loss": 1.3261, + "num_input_tokens_seen": 683850496, + "step": 3956 + }, + { + "epoch": 1.5151283033320566, + "loss": 1.3284571170806885, + "loss_ce": 0.22089354693889618, + "loss_iou": 0.5197699069976807, + "loss_num": 0.01361083984375, + "loss_xval": 1.1075634956359863, + "num_input_tokens_seen": 683850496, + "step": 3956 + }, + { + "epoch": 1.5155112983531214, + "grad_norm": 211.77244019223895, + "learning_rate": 5e-06, + "loss": 1.9287, + "num_input_tokens_seen": 684023272, + "step": 3957 + }, + { + "epoch": 1.5155112983531214, + "loss": 1.8714628219604492, + "loss_ce": 0.14375276863574982, + "loss_iou": 0.8195739984512329, + "loss_num": 0.0177001953125, + "loss_xval": 1.7277100086212158, + "num_input_tokens_seen": 684023272, + "step": 3957 + }, + { + "epoch": 1.5158942933741861, + "grad_norm": 254.66520592087497, + "learning_rate": 5e-06, + "loss": 1.9458, + "num_input_tokens_seen": 684196400, + "step": 3958 + }, + { + "epoch": 1.5158942933741861, + "loss": 1.9426088333129883, + "loss_ce": 0.13388551771640778, + "loss_iou": 0.8289831876754761, + "loss_num": 0.0301513671875, + "loss_xval": 1.8087232112884521, + "num_input_tokens_seen": 684196400, + "step": 3958 + }, + { + "epoch": 1.5162772883952509, + "grad_norm": 152.56221898504054, + "learning_rate": 5e-06, + "loss": 2.9065, + "num_input_tokens_seen": 684369808, + "step": 3959 + }, + { + "epoch": 1.5162772883952509, + "loss": 2.9309732913970947, + "loss_ce": 0.14469051361083984, + "loss_iou": 1.317763090133667, + "loss_num": 0.0301513671875, + "loss_xval": 2.786283016204834, + "num_input_tokens_seen": 684369808, + "step": 3959 + }, + { + "epoch": 1.5166602834163156, + "grad_norm": 81.62702327862401, + "learning_rate": 5e-06, + "loss": 2.354, + "num_input_tokens_seen": 684542680, + "step": 3960 + }, + { + "epoch": 1.5166602834163156, + "loss": 2.4589433670043945, + "loss_ce": 0.1825285255908966, + "loss_iou": 0.9466791749000549, + "loss_num": 0.07666015625, + "loss_xval": 2.2764148712158203, + "num_input_tokens_seen": 684542680, + "step": 3960 + }, + { + "epoch": 1.5170432784373804, + "grad_norm": 82.43059801788286, + "learning_rate": 5e-06, + "loss": 1.492, + "num_input_tokens_seen": 684715264, + "step": 3961 + }, + { + "epoch": 1.5170432784373804, + "loss": 1.3565406799316406, + "loss_ce": 0.10414262115955353, + "loss_iou": 0.5798122882843018, + "loss_num": 0.0185546875, + "loss_xval": 1.2523980140686035, + "num_input_tokens_seen": 684715264, + "step": 3961 + }, + { + "epoch": 1.5174262734584452, + "grad_norm": 337.9323224462057, + "learning_rate": 5e-06, + "loss": 2.3093, + "num_input_tokens_seen": 684887952, + "step": 3962 + }, + { + "epoch": 1.5174262734584452, + "loss": 2.363466262817383, + "loss_ce": 0.1540198028087616, + "loss_iou": 1.040392279624939, + "loss_num": 0.0257568359375, + "loss_xval": 2.209446668624878, + "num_input_tokens_seen": 684887952, + "step": 3962 + }, + { + "epoch": 1.51780926847951, + "grad_norm": 121.1484778544416, + "learning_rate": 5e-06, + "loss": 2.2396, + "num_input_tokens_seen": 685061208, + "step": 3963 + }, + { + "epoch": 1.51780926847951, + "loss": 2.2443549633026123, + "loss_ce": 0.18490296602249146, + "loss_iou": 0.8711566925048828, + "loss_num": 0.0634765625, + "loss_xval": 2.0594520568847656, + "num_input_tokens_seen": 685061208, + "step": 3963 + }, + { + "epoch": 1.5181922635005745, + "grad_norm": 83.59056278522132, + "learning_rate": 5e-06, + "loss": 1.9785, + "num_input_tokens_seen": 685233880, + "step": 3964 + }, + { + "epoch": 1.5181922635005745, + "loss": 1.9430819749832153, + "loss_ce": 0.16593565046787262, + "loss_iou": 0.824882984161377, + "loss_num": 0.025390625, + "loss_xval": 1.777146339416504, + "num_input_tokens_seen": 685233880, + "step": 3964 + }, + { + "epoch": 1.5185752585216392, + "grad_norm": 120.30319911187016, + "learning_rate": 5e-06, + "loss": 2.475, + "num_input_tokens_seen": 685406640, + "step": 3965 + }, + { + "epoch": 1.5185752585216392, + "loss": 2.295506477355957, + "loss_ce": 0.17737790942192078, + "loss_iou": 1.001630187034607, + "loss_num": 0.02294921875, + "loss_xval": 2.118128538131714, + "num_input_tokens_seen": 685406640, + "step": 3965 + }, + { + "epoch": 1.518958253542704, + "grad_norm": 55.87681187685328, + "learning_rate": 5e-06, + "loss": 2.176, + "num_input_tokens_seen": 685579960, + "step": 3966 + }, + { + "epoch": 1.518958253542704, + "loss": 2.278275728225708, + "loss_ce": 0.24889490008354187, + "loss_iou": 0.8973199129104614, + "loss_num": 0.046875, + "loss_xval": 2.029381036758423, + "num_input_tokens_seen": 685579960, + "step": 3966 + }, + { + "epoch": 1.5193412485637685, + "grad_norm": 71.51627517987505, + "learning_rate": 5e-06, + "loss": 1.6915, + "num_input_tokens_seen": 685749456, + "step": 3967 + }, + { + "epoch": 1.5193412485637685, + "loss": 1.5556166172027588, + "loss_ce": 0.19647814333438873, + "loss_iou": 0.6312598586082458, + "loss_num": 0.019287109375, + "loss_xval": 1.3591383695602417, + "num_input_tokens_seen": 685749456, + "step": 3967 + }, + { + "epoch": 1.5197242435848333, + "grad_norm": 199.55954187293344, + "learning_rate": 5e-06, + "loss": 1.5538, + "num_input_tokens_seen": 685922568, + "step": 3968 + }, + { + "epoch": 1.5197242435848333, + "loss": 1.4557030200958252, + "loss_ce": 0.17327332496643066, + "loss_iou": 0.5964608192443848, + "loss_num": 0.0179443359375, + "loss_xval": 1.2824296951293945, + "num_input_tokens_seen": 685922568, + "step": 3968 + }, + { + "epoch": 1.520107238605898, + "grad_norm": 214.03904571066968, + "learning_rate": 5e-06, + "loss": 1.8144, + "num_input_tokens_seen": 686095808, + "step": 3969 + }, + { + "epoch": 1.520107238605898, + "loss": 1.8164312839508057, + "loss_ce": 0.15939146280288696, + "loss_iou": 0.7656842470169067, + "loss_num": 0.025146484375, + "loss_xval": 1.6570398807525635, + "num_input_tokens_seen": 686095808, + "step": 3969 + }, + { + "epoch": 1.5204902336269628, + "grad_norm": 224.43256981929963, + "learning_rate": 5e-06, + "loss": 1.9742, + "num_input_tokens_seen": 686268848, + "step": 3970 + }, + { + "epoch": 1.5204902336269628, + "loss": 2.041718006134033, + "loss_ce": 0.14143480360507965, + "loss_iou": 0.8721081018447876, + "loss_num": 0.03125, + "loss_xval": 1.9002830982208252, + "num_input_tokens_seen": 686268848, + "step": 3970 + }, + { + "epoch": 1.5208732286480275, + "grad_norm": 172.600474175764, + "learning_rate": 5e-06, + "loss": 1.8223, + "num_input_tokens_seen": 686441752, + "step": 3971 + }, + { + "epoch": 1.5208732286480275, + "loss": 1.8466782569885254, + "loss_ce": 0.18908868730068207, + "loss_iou": 0.7544234991073608, + "loss_num": 0.02978515625, + "loss_xval": 1.6575896739959717, + "num_input_tokens_seen": 686441752, + "step": 3971 + }, + { + "epoch": 1.5212562236690923, + "grad_norm": 97.08139887337438, + "learning_rate": 5e-06, + "loss": 1.5846, + "num_input_tokens_seen": 686614816, + "step": 3972 + }, + { + "epoch": 1.5212562236690923, + "loss": 1.645388126373291, + "loss_ce": 0.1794380396604538, + "loss_iou": 0.6563454866409302, + "loss_num": 0.0306396484375, + "loss_xval": 1.4659502506256104, + "num_input_tokens_seen": 686614816, + "step": 3972 + }, + { + "epoch": 1.521639218690157, + "grad_norm": 118.64503531925449, + "learning_rate": 5e-06, + "loss": 1.4486, + "num_input_tokens_seen": 686787720, + "step": 3973 + }, + { + "epoch": 1.521639218690157, + "loss": 1.5404670238494873, + "loss_ce": 0.16298604011535645, + "loss_iou": 0.6268203854560852, + "loss_num": 0.0247802734375, + "loss_xval": 1.3774811029434204, + "num_input_tokens_seen": 686787720, + "step": 3973 + }, + { + "epoch": 1.5220222137112218, + "grad_norm": 153.00926754387302, + "learning_rate": 5e-06, + "loss": 1.7508, + "num_input_tokens_seen": 686960536, + "step": 3974 + }, + { + "epoch": 1.5220222137112218, + "loss": 1.7331939935684204, + "loss_ce": 0.17863628268241882, + "loss_iou": 0.7111777663230896, + "loss_num": 0.0264892578125, + "loss_xval": 1.5545576810836792, + "num_input_tokens_seen": 686960536, + "step": 3974 + }, + { + "epoch": 1.5224052087322866, + "grad_norm": 99.33252420757358, + "learning_rate": 5e-06, + "loss": 1.4462, + "num_input_tokens_seen": 687133816, + "step": 3975 + }, + { + "epoch": 1.5224052087322866, + "loss": 1.349708080291748, + "loss_ce": 0.1548483967781067, + "loss_iou": 0.5491968393325806, + "loss_num": 0.019287109375, + "loss_xval": 1.1948597431182861, + "num_input_tokens_seen": 687133816, + "step": 3975 + }, + { + "epoch": 1.5227882037533513, + "grad_norm": 164.65765648653823, + "learning_rate": 5e-06, + "loss": 1.4702, + "num_input_tokens_seen": 687306776, + "step": 3976 + }, + { + "epoch": 1.5227882037533513, + "loss": 1.6038367748260498, + "loss_ce": 0.18100813031196594, + "loss_iou": 0.6422615051269531, + "loss_num": 0.0277099609375, + "loss_xval": 1.4228286743164062, + "num_input_tokens_seen": 687306776, + "step": 3976 + }, + { + "epoch": 1.523171198774416, + "grad_norm": 85.26483364804754, + "learning_rate": 5e-06, + "loss": 1.3608, + "num_input_tokens_seen": 687479800, + "step": 3977 + }, + { + "epoch": 1.523171198774416, + "loss": 1.4947741031646729, + "loss_ce": 0.1709693968296051, + "loss_iou": 0.6206426620483398, + "loss_num": 0.0164794921875, + "loss_xval": 1.3238048553466797, + "num_input_tokens_seen": 687479800, + "step": 3977 + }, + { + "epoch": 1.5235541937954806, + "grad_norm": 95.08004515964755, + "learning_rate": 5e-06, + "loss": 1.4765, + "num_input_tokens_seen": 687652672, + "step": 3978 + }, + { + "epoch": 1.5235541937954806, + "loss": 1.6495649814605713, + "loss_ce": 0.19735872745513916, + "loss_iou": 0.6748487949371338, + "loss_num": 0.0205078125, + "loss_xval": 1.4522061347961426, + "num_input_tokens_seen": 687652672, + "step": 3978 + }, + { + "epoch": 1.5239371888165454, + "grad_norm": 108.5698208549644, + "learning_rate": 5e-06, + "loss": 1.4894, + "num_input_tokens_seen": 687825624, + "step": 3979 + }, + { + "epoch": 1.5239371888165454, + "loss": 1.5349109172821045, + "loss_ce": 0.14889198541641235, + "loss_iou": 0.6471720337867737, + "loss_num": 0.018310546875, + "loss_xval": 1.386018991470337, + "num_input_tokens_seen": 687825624, + "step": 3979 + }, + { + "epoch": 1.5243201838376101, + "grad_norm": 123.08096805229017, + "learning_rate": 5e-06, + "loss": 1.6522, + "num_input_tokens_seen": 687998728, + "step": 3980 + }, + { + "epoch": 1.5243201838376101, + "loss": 1.725043535232544, + "loss_ce": 0.17687951028347015, + "loss_iou": 0.7139319181442261, + "loss_num": 0.0240478515625, + "loss_xval": 1.5481641292572021, + "num_input_tokens_seen": 687998728, + "step": 3980 + }, + { + "epoch": 1.5247031788586747, + "grad_norm": 142.74838888561476, + "learning_rate": 5e-06, + "loss": 1.3553, + "num_input_tokens_seen": 688171400, + "step": 3981 + }, + { + "epoch": 1.5247031788586747, + "loss": 1.1920249462127686, + "loss_ce": 0.12986698746681213, + "loss_iou": 0.4850126802921295, + "loss_num": 0.0184326171875, + "loss_xval": 1.0621578693389893, + "num_input_tokens_seen": 688171400, + "step": 3981 + }, + { + "epoch": 1.5250861738797394, + "grad_norm": 108.84666223775466, + "learning_rate": 5e-06, + "loss": 1.3054, + "num_input_tokens_seen": 688344192, + "step": 3982 + }, + { + "epoch": 1.5250861738797394, + "loss": 1.3433678150177002, + "loss_ce": 0.16022491455078125, + "loss_iou": 0.555637001991272, + "loss_num": 0.01434326171875, + "loss_xval": 1.183142900466919, + "num_input_tokens_seen": 688344192, + "step": 3982 + }, + { + "epoch": 1.5254691689008042, + "grad_norm": 144.08404942888112, + "learning_rate": 5e-06, + "loss": 1.468, + "num_input_tokens_seen": 688517040, + "step": 3983 + }, + { + "epoch": 1.5254691689008042, + "loss": 1.5013774633407593, + "loss_ce": 0.14980170130729675, + "loss_iou": 0.6245488524436951, + "loss_num": 0.0205078125, + "loss_xval": 1.3515757322311401, + "num_input_tokens_seen": 688517040, + "step": 3983 + }, + { + "epoch": 1.525852163921869, + "grad_norm": 176.312711176238, + "learning_rate": 5e-06, + "loss": 1.6051, + "num_input_tokens_seen": 688689992, + "step": 3984 + }, + { + "epoch": 1.525852163921869, + "loss": 1.482277512550354, + "loss_ce": 0.16307860612869263, + "loss_iou": 0.6153489351272583, + "loss_num": 0.0177001953125, + "loss_xval": 1.3191988468170166, + "num_input_tokens_seen": 688689992, + "step": 3984 + }, + { + "epoch": 1.5262351589429337, + "grad_norm": 121.81468625578677, + "learning_rate": 5e-06, + "loss": 1.3592, + "num_input_tokens_seen": 688862904, + "step": 3985 + }, + { + "epoch": 1.5262351589429337, + "loss": 1.2518173456192017, + "loss_ce": 0.15622524917125702, + "loss_iou": 0.5110986828804016, + "loss_num": 0.0146484375, + "loss_xval": 1.0804554224014282, + "num_input_tokens_seen": 688862904, + "step": 3985 + }, + { + "epoch": 1.5266181539639985, + "grad_norm": 298.4313072516741, + "learning_rate": 5e-06, + "loss": 1.7817, + "num_input_tokens_seen": 689035448, + "step": 3986 + }, + { + "epoch": 1.5266181539639985, + "loss": 1.6795730590820312, + "loss_ce": 0.13823232054710388, + "loss_iou": 0.6964516043663025, + "loss_num": 0.0296630859375, + "loss_xval": 1.541340708732605, + "num_input_tokens_seen": 689035448, + "step": 3986 + }, + { + "epoch": 1.5270011489850632, + "grad_norm": 242.10426549026667, + "learning_rate": 5e-06, + "loss": 1.4475, + "num_input_tokens_seen": 689204640, + "step": 3987 + }, + { + "epoch": 1.5270011489850632, + "loss": 1.5300861597061157, + "loss_ce": 0.16784243285655975, + "loss_iou": 0.631607174873352, + "loss_num": 0.019775390625, + "loss_xval": 1.362243890762329, + "num_input_tokens_seen": 689204640, + "step": 3987 + }, + { + "epoch": 1.527384144006128, + "grad_norm": 192.50921147839483, + "learning_rate": 5e-06, + "loss": 1.7361, + "num_input_tokens_seen": 689377632, + "step": 3988 + }, + { + "epoch": 1.527384144006128, + "loss": 1.644757628440857, + "loss_ce": 0.19601956009864807, + "loss_iou": 0.6878089904785156, + "loss_num": 0.0146484375, + "loss_xval": 1.4487380981445312, + "num_input_tokens_seen": 689377632, + "step": 3988 + }, + { + "epoch": 1.5277671390271927, + "grad_norm": 111.88305398399335, + "learning_rate": 5e-06, + "loss": 1.6252, + "num_input_tokens_seen": 689550544, + "step": 3989 + }, + { + "epoch": 1.5277671390271927, + "loss": 1.6201847791671753, + "loss_ce": 0.1457895040512085, + "loss_iou": 0.6800992488861084, + "loss_num": 0.0228271484375, + "loss_xval": 1.4743952751159668, + "num_input_tokens_seen": 689550544, + "step": 3989 + }, + { + "epoch": 1.5281501340482575, + "grad_norm": 97.06904162293944, + "learning_rate": 5e-06, + "loss": 1.3497, + "num_input_tokens_seen": 689723640, + "step": 3990 + }, + { + "epoch": 1.5281501340482575, + "loss": 1.361687183380127, + "loss_ce": 0.17057424783706665, + "loss_iou": 0.5443174839019775, + "loss_num": 0.0205078125, + "loss_xval": 1.191112995147705, + "num_input_tokens_seen": 689723640, + "step": 3990 + }, + { + "epoch": 1.5285331290693223, + "grad_norm": 90.30874889339908, + "learning_rate": 5e-06, + "loss": 1.5368, + "num_input_tokens_seen": 689896488, + "step": 3991 + }, + { + "epoch": 1.5285331290693223, + "loss": 1.5996413230895996, + "loss_ce": 0.17233280837535858, + "loss_iou": 0.6681984066963196, + "loss_num": 0.0181884765625, + "loss_xval": 1.4273086786270142, + "num_input_tokens_seen": 689896488, + "step": 3991 + }, + { + "epoch": 1.5289161240903868, + "grad_norm": 147.39944798644706, + "learning_rate": 5e-06, + "loss": 1.6209, + "num_input_tokens_seen": 690069520, + "step": 3992 + }, + { + "epoch": 1.5289161240903868, + "loss": 1.534162998199463, + "loss_ce": 0.18425728380680084, + "loss_iou": 0.6326555609703064, + "loss_num": 0.0169677734375, + "loss_xval": 1.3499058485031128, + "num_input_tokens_seen": 690069520, + "step": 3992 + }, + { + "epoch": 1.5292991191114516, + "grad_norm": 167.28159313669147, + "learning_rate": 5e-06, + "loss": 1.6969, + "num_input_tokens_seen": 690242560, + "step": 3993 + }, + { + "epoch": 1.5292991191114516, + "loss": 1.727402925491333, + "loss_ce": 0.15593892335891724, + "loss_iou": 0.7414052486419678, + "loss_num": 0.0177001953125, + "loss_xval": 1.5714640617370605, + "num_input_tokens_seen": 690242560, + "step": 3993 + }, + { + "epoch": 1.5296821141325163, + "grad_norm": 216.9949331892913, + "learning_rate": 5e-06, + "loss": 1.7428, + "num_input_tokens_seen": 690415248, + "step": 3994 + }, + { + "epoch": 1.5296821141325163, + "loss": 1.61613929271698, + "loss_ce": 0.1425824761390686, + "loss_iou": 0.6704636812210083, + "loss_num": 0.0264892578125, + "loss_xval": 1.4735567569732666, + "num_input_tokens_seen": 690415248, + "step": 3994 + }, + { + "epoch": 1.5300651091535808, + "grad_norm": 93.0324383986881, + "learning_rate": 5e-06, + "loss": 1.4356, + "num_input_tokens_seen": 690588264, + "step": 3995 + }, + { + "epoch": 1.5300651091535808, + "loss": 1.479750156402588, + "loss_ce": 0.15825852751731873, + "loss_iou": 0.6298162937164307, + "loss_num": 0.01239013671875, + "loss_xval": 1.3214917182922363, + "num_input_tokens_seen": 690588264, + "step": 3995 + }, + { + "epoch": 1.5304481041746456, + "grad_norm": 106.38967492820314, + "learning_rate": 5e-06, + "loss": 1.4427, + "num_input_tokens_seen": 690761440, + "step": 3996 + }, + { + "epoch": 1.5304481041746456, + "loss": 1.488261103630066, + "loss_ce": 0.16846603155136108, + "loss_iou": 0.6170355677604675, + "loss_num": 0.01708984375, + "loss_xval": 1.31979501247406, + "num_input_tokens_seen": 690761440, + "step": 3996 + }, + { + "epoch": 1.5308310991957104, + "grad_norm": 120.89542447178839, + "learning_rate": 5e-06, + "loss": 1.7495, + "num_input_tokens_seen": 690934416, + "step": 3997 + }, + { + "epoch": 1.5308310991957104, + "loss": 1.8797813653945923, + "loss_ce": 0.18472015857696533, + "loss_iou": 0.782970666885376, + "loss_num": 0.02587890625, + "loss_xval": 1.6950613260269165, + "num_input_tokens_seen": 690934416, + "step": 3997 + }, + { + "epoch": 1.5312140942167751, + "grad_norm": 109.55889150361551, + "learning_rate": 5e-06, + "loss": 1.5846, + "num_input_tokens_seen": 691107376, + "step": 3998 + }, + { + "epoch": 1.5312140942167751, + "loss": 1.6063764095306396, + "loss_ce": 0.2207745611667633, + "loss_iou": 0.6444305777549744, + "loss_num": 0.0194091796875, + "loss_xval": 1.3856018781661987, + "num_input_tokens_seen": 691107376, + "step": 3998 + }, + { + "epoch": 1.5315970892378399, + "grad_norm": 134.8528535173368, + "learning_rate": 5e-06, + "loss": 1.4115, + "num_input_tokens_seen": 691280352, + "step": 3999 + }, + { + "epoch": 1.5315970892378399, + "loss": 1.5181658267974854, + "loss_ce": 0.18865513801574707, + "loss_iou": 0.6115785241127014, + "loss_num": 0.021240234375, + "loss_xval": 1.3295108079910278, + "num_input_tokens_seen": 691280352, + "step": 3999 + }, + { + "epoch": 1.5319800842589046, + "grad_norm": 108.508561955465, + "learning_rate": 5e-06, + "loss": 1.7889, + "num_input_tokens_seen": 691453432, + "step": 4000 + }, + { + "epoch": 1.5319800842589046, + "eval_websight_new_CIoU": 0.9476378560066223, + "eval_websight_new_GIoU": 0.9474721252918243, + "eval_websight_new_IoU": 0.9477640390396118, + "eval_websight_new_MAE_all": 0.004908971022814512, + "eval_websight_new_MAE_h": 0.0029728240333497524, + "eval_websight_new_MAE_w": 0.005848645232617855, + "eval_websight_new_MAE_x": 0.004454620066098869, + "eval_websight_new_MAE_y": 0.006359795108437538, + "eval_websight_new_NUM_probability": 0.2463129609823227, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 0.8320034742355347, + "eval_websight_new_loss_ce": 0.3229493796825409, + "eval_websight_new_loss_iou": 0.23782658576965332, + "eval_websight_new_loss_num": 0.005114555358886719, + "eval_websight_new_loss_xval": 0.5012307018041611, + "eval_websight_new_runtime": 56.7352, + "eval_websight_new_samples_per_second": 0.881, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 691453432, + "step": 4000 + }, + { + "epoch": 1.5319800842589046, + "eval_seeclick_CIoU": 0.6127658784389496, + "eval_seeclick_GIoU": 0.6294102370738983, + "eval_seeclick_IoU": 0.6650410592556, + "eval_seeclick_MAE_all": 0.07940063998103142, + "eval_seeclick_MAE_h": 0.037692490965127945, + "eval_seeclick_MAE_w": 0.12155372649431229, + "eval_seeclick_MAE_x": 0.11558730155229568, + "eval_seeclick_MAE_y": 0.04276901297271252, + "eval_seeclick_NUM_probability": 0.5139389932155609, + "eval_seeclick_inside_bbox": 0.8420138955116272, + "eval_seeclick_loss": 1.731054425239563, + "eval_seeclick_loss_ce": 0.2207578793168068, + "eval_seeclick_loss_iou": 0.6101343333721161, + "eval_seeclick_loss_num": 0.050933837890625, + "eval_seeclick_loss_xval": 1.4750294089317322, + "eval_seeclick_runtime": 86.5426, + "eval_seeclick_samples_per_second": 0.578, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 691453432, + "step": 4000 + }, + { + "epoch": 1.5319800842589046, + "eval_icons_CIoU": 0.8217889368534088, + "eval_icons_GIoU": 0.8226584494113922, + "eval_icons_IoU": 0.838074266910553, + "eval_icons_MAE_all": 0.03133867587894201, + "eval_icons_MAE_h": 0.0188470296561718, + "eval_icons_MAE_w": 0.041911233216524124, + "eval_icons_MAE_x": 0.04401544854044914, + "eval_icons_MAE_y": 0.020580998621881008, + "eval_icons_NUM_probability": 0.3711031377315521, + "eval_icons_inside_bbox": 0.9409722089767456, + "eval_icons_loss": 1.8929972648620605, + "eval_icons_loss_ce": 0.39243292808532715, + "eval_icons_loss_iou": 0.6932045519351959, + "eval_icons_loss_num": 0.02086639404296875, + "eval_icons_loss_xval": 1.4908402562141418, + "eval_icons_runtime": 86.0582, + "eval_icons_samples_per_second": 0.581, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 691453432, + "step": 4000 + }, + { + "epoch": 1.5319800842589046, + "loss": 1.930707573890686, + "loss_ce": 0.39242303371429443, + "loss_iou": 0.7135698199272156, + "loss_num": 0.022216796875, + "loss_xval": 1.5382846593856812, + "num_input_tokens_seen": 691453432, + "step": 4000 + }, + { + "epoch": 1.5323630792799694, + "grad_norm": 46.19858695867596, + "learning_rate": 5e-06, + "loss": 1.35, + "num_input_tokens_seen": 691626112, + "step": 4001 + }, + { + "epoch": 1.5323630792799694, + "loss": 1.2584831714630127, + "loss_ce": 0.1658729612827301, + "loss_iou": 0.4762977957725525, + "loss_num": 0.028076171875, + "loss_xval": 1.092610239982605, + "num_input_tokens_seen": 691626112, + "step": 4001 + }, + { + "epoch": 1.5327460743010342, + "grad_norm": 64.63637224563966, + "learning_rate": 5e-06, + "loss": 1.3761, + "num_input_tokens_seen": 691798888, + "step": 4002 + }, + { + "epoch": 1.5327460743010342, + "loss": 1.1979390382766724, + "loss_ce": 0.20775623619556427, + "loss_iou": 0.4690675437450409, + "loss_num": 0.0103759765625, + "loss_xval": 0.9901828169822693, + "num_input_tokens_seen": 691798888, + "step": 4002 + }, + { + "epoch": 1.533129069322099, + "grad_norm": 115.07410986903477, + "learning_rate": 5e-06, + "loss": 1.193, + "num_input_tokens_seen": 691971736, + "step": 4003 + }, + { + "epoch": 1.533129069322099, + "loss": 1.0701923370361328, + "loss_ce": 0.12962406873703003, + "loss_iou": 0.4435812532901764, + "loss_num": 0.01068115234375, + "loss_xval": 0.9405682682991028, + "num_input_tokens_seen": 691971736, + "step": 4003 + }, + { + "epoch": 1.5335120643431637, + "grad_norm": 170.75227991480216, + "learning_rate": 5e-06, + "loss": 1.5122, + "num_input_tokens_seen": 692144880, + "step": 4004 + }, + { + "epoch": 1.5335120643431637, + "loss": 1.5192091464996338, + "loss_ce": 0.15954117476940155, + "loss_iou": 0.6162812113761902, + "loss_num": 0.025390625, + "loss_xval": 1.3596681356430054, + "num_input_tokens_seen": 692144880, + "step": 4004 + }, + { + "epoch": 1.5338950593642284, + "grad_norm": 153.79374738053806, + "learning_rate": 5e-06, + "loss": 1.7093, + "num_input_tokens_seen": 692317432, + "step": 4005 + }, + { + "epoch": 1.5338950593642284, + "loss": 1.8067653179168701, + "loss_ce": 0.18908385932445526, + "loss_iou": 0.7398710250854492, + "loss_num": 0.027587890625, + "loss_xval": 1.6176815032958984, + "num_input_tokens_seen": 692317432, + "step": 4005 + }, + { + "epoch": 1.534278054385293, + "grad_norm": 99.87547366584295, + "learning_rate": 5e-06, + "loss": 1.2958, + "num_input_tokens_seen": 692490368, + "step": 4006 + }, + { + "epoch": 1.534278054385293, + "loss": 1.2684710025787354, + "loss_ce": 0.1644982248544693, + "loss_iou": 0.5198361873626709, + "loss_num": 0.0128173828125, + "loss_xval": 1.1039729118347168, + "num_input_tokens_seen": 692490368, + "step": 4006 + }, + { + "epoch": 1.5346610494063577, + "grad_norm": 97.69049595070976, + "learning_rate": 5e-06, + "loss": 1.3975, + "num_input_tokens_seen": 692663400, + "step": 4007 + }, + { + "epoch": 1.5346610494063577, + "loss": 1.3261613845825195, + "loss_ce": 0.19734099507331848, + "loss_iou": 0.5318784713745117, + "loss_num": 0.01300048828125, + "loss_xval": 1.1288204193115234, + "num_input_tokens_seen": 692663400, + "step": 4007 + }, + { + "epoch": 1.5350440444274225, + "grad_norm": 198.11477350605034, + "learning_rate": 5e-06, + "loss": 1.4736, + "num_input_tokens_seen": 692836616, + "step": 4008 + }, + { + "epoch": 1.5350440444274225, + "loss": 1.4241814613342285, + "loss_ce": 0.19302211701869965, + "loss_iou": 0.5766849517822266, + "loss_num": 0.01556396484375, + "loss_xval": 1.2311592102050781, + "num_input_tokens_seen": 692836616, + "step": 4008 + }, + { + "epoch": 1.535427039448487, + "grad_norm": 157.87159707061483, + "learning_rate": 5e-06, + "loss": 1.612, + "num_input_tokens_seen": 693009600, + "step": 4009 + }, + { + "epoch": 1.535427039448487, + "loss": 1.4267079830169678, + "loss_ce": 0.17157082259655, + "loss_iou": 0.5778554081916809, + "loss_num": 0.0198974609375, + "loss_xval": 1.2551370859146118, + "num_input_tokens_seen": 693009600, + "step": 4009 + }, + { + "epoch": 1.5358100344695518, + "grad_norm": 113.47818812880557, + "learning_rate": 5e-06, + "loss": 1.4546, + "num_input_tokens_seen": 693182488, + "step": 4010 + }, + { + "epoch": 1.5358100344695518, + "loss": 1.485398530960083, + "loss_ce": 0.1909884810447693, + "loss_iou": 0.6073033213615417, + "loss_num": 0.0159912109375, + "loss_xval": 1.2944101095199585, + "num_input_tokens_seen": 693182488, + "step": 4010 + }, + { + "epoch": 1.5361930294906165, + "grad_norm": 111.18114453476046, + "learning_rate": 5e-06, + "loss": 1.5617, + "num_input_tokens_seen": 693355216, + "step": 4011 + }, + { + "epoch": 1.5361930294906165, + "loss": 1.6103756427764893, + "loss_ce": 0.17208832502365112, + "loss_iou": 0.662030041217804, + "loss_num": 0.0228271484375, + "loss_xval": 1.438287377357483, + "num_input_tokens_seen": 693355216, + "step": 4011 + }, + { + "epoch": 1.5365760245116813, + "grad_norm": 78.26411281282405, + "learning_rate": 5e-06, + "loss": 1.7468, + "num_input_tokens_seen": 693527760, + "step": 4012 + }, + { + "epoch": 1.5365760245116813, + "loss": 1.6645243167877197, + "loss_ce": 0.15518814325332642, + "loss_iou": 0.6866138577461243, + "loss_num": 0.0272216796875, + "loss_xval": 1.5093361139297485, + "num_input_tokens_seen": 693527760, + "step": 4012 + }, + { + "epoch": 1.536959019532746, + "grad_norm": 63.912879554020364, + "learning_rate": 5e-06, + "loss": 1.2575, + "num_input_tokens_seen": 693700288, + "step": 4013 + }, + { + "epoch": 1.536959019532746, + "loss": 1.220572829246521, + "loss_ce": 0.14960825443267822, + "loss_iou": 0.4911249876022339, + "loss_num": 0.017822265625, + "loss_xval": 1.0709645748138428, + "num_input_tokens_seen": 693700288, + "step": 4013 + }, + { + "epoch": 1.5373420145538108, + "grad_norm": 88.04675407486323, + "learning_rate": 5e-06, + "loss": 1.2159, + "num_input_tokens_seen": 693872880, + "step": 4014 + }, + { + "epoch": 1.5373420145538108, + "loss": 1.228543996810913, + "loss_ce": 0.1612216681241989, + "loss_iou": 0.5036013126373291, + "loss_num": 0.01202392578125, + "loss_xval": 1.0673222541809082, + "num_input_tokens_seen": 693872880, + "step": 4014 + }, + { + "epoch": 1.5377250095748756, + "grad_norm": 156.12297498999627, + "learning_rate": 5e-06, + "loss": 1.5896, + "num_input_tokens_seen": 694045672, + "step": 4015 + }, + { + "epoch": 1.5377250095748756, + "loss": 1.554947853088379, + "loss_ce": 0.20863324403762817, + "loss_iou": 0.6180731058120728, + "loss_num": 0.0220947265625, + "loss_xval": 1.3463146686553955, + "num_input_tokens_seen": 694045672, + "step": 4015 + }, + { + "epoch": 1.5381080045959403, + "grad_norm": 115.45476900828294, + "learning_rate": 5e-06, + "loss": 1.743, + "num_input_tokens_seen": 694218832, + "step": 4016 + }, + { + "epoch": 1.5381080045959403, + "loss": 1.7367010116577148, + "loss_ce": 0.2145659327507019, + "loss_iou": 0.6940509676933289, + "loss_num": 0.02685546875, + "loss_xval": 1.5221351385116577, + "num_input_tokens_seen": 694218832, + "step": 4016 + }, + { + "epoch": 1.538490999617005, + "grad_norm": 57.166597421566486, + "learning_rate": 5e-06, + "loss": 1.273, + "num_input_tokens_seen": 694391936, + "step": 4017 + }, + { + "epoch": 1.538490999617005, + "loss": 1.1809477806091309, + "loss_ce": 0.15838342905044556, + "loss_iou": 0.46304917335510254, + "loss_num": 0.019287109375, + "loss_xval": 1.02256441116333, + "num_input_tokens_seen": 694391936, + "step": 4017 + }, + { + "epoch": 1.5388739946380698, + "grad_norm": 93.45333582588987, + "learning_rate": 5e-06, + "loss": 1.2499, + "num_input_tokens_seen": 694565152, + "step": 4018 + }, + { + "epoch": 1.5388739946380698, + "loss": 1.3203922510147095, + "loss_ce": 0.17759379744529724, + "loss_iou": 0.5262026786804199, + "loss_num": 0.01806640625, + "loss_xval": 1.1427984237670898, + "num_input_tokens_seen": 694565152, + "step": 4018 + }, + { + "epoch": 1.5392569896591344, + "grad_norm": 95.92580225372755, + "learning_rate": 5e-06, + "loss": 1.6361, + "num_input_tokens_seen": 694738304, + "step": 4019 + }, + { + "epoch": 1.5392569896591344, + "loss": 1.5220376253128052, + "loss_ce": 0.15661263465881348, + "loss_iou": 0.6216772794723511, + "loss_num": 0.0244140625, + "loss_xval": 1.3654248714447021, + "num_input_tokens_seen": 694738304, + "step": 4019 + }, + { + "epoch": 1.5396399846801991, + "grad_norm": 67.68619604488842, + "learning_rate": 5e-06, + "loss": 1.1718, + "num_input_tokens_seen": 694911200, + "step": 4020 + }, + { + "epoch": 1.5396399846801991, + "loss": 1.244327187538147, + "loss_ce": 0.17027240991592407, + "loss_iou": 0.5018863677978516, + "loss_num": 0.0140380859375, + "loss_xval": 1.0740547180175781, + "num_input_tokens_seen": 694911200, + "step": 4020 + }, + { + "epoch": 1.540022979701264, + "grad_norm": 182.7957511965751, + "learning_rate": 5e-06, + "loss": 1.504, + "num_input_tokens_seen": 695084080, + "step": 4021 + }, + { + "epoch": 1.540022979701264, + "loss": 1.5927867889404297, + "loss_ce": 0.1959582269191742, + "loss_iou": 0.6407970786094666, + "loss_num": 0.0230712890625, + "loss_xval": 1.396828532218933, + "num_input_tokens_seen": 695084080, + "step": 4021 + }, + { + "epoch": 1.5404059747223287, + "grad_norm": 107.9979394300168, + "learning_rate": 5e-06, + "loss": 1.705, + "num_input_tokens_seen": 695257040, + "step": 4022 + }, + { + "epoch": 1.5404059747223287, + "loss": 1.6298996210098267, + "loss_ce": 0.13876935839653015, + "loss_iou": 0.6566979289054871, + "loss_num": 0.03564453125, + "loss_xval": 1.4549974203109741, + "num_input_tokens_seen": 695257040, + "step": 4022 + }, + { + "epoch": 1.5407889697433932, + "grad_norm": 109.67021807219069, + "learning_rate": 5e-06, + "loss": 1.2811, + "num_input_tokens_seen": 695429928, + "step": 4023 + }, + { + "epoch": 1.5407889697433932, + "loss": 1.1913628578186035, + "loss_ce": 0.16078069806098938, + "loss_iou": 0.48027217388153076, + "loss_num": 0.0140380859375, + "loss_xval": 1.0305821895599365, + "num_input_tokens_seen": 695429928, + "step": 4023 + }, + { + "epoch": 1.541171964764458, + "grad_norm": 152.34037272652972, + "learning_rate": 5e-06, + "loss": 1.4865, + "num_input_tokens_seen": 695602808, + "step": 4024 + }, + { + "epoch": 1.541171964764458, + "loss": 1.4521257877349854, + "loss_ce": 0.1641998291015625, + "loss_iou": 0.6030999422073364, + "loss_num": 0.016357421875, + "loss_xval": 1.2879259586334229, + "num_input_tokens_seen": 695602808, + "step": 4024 + }, + { + "epoch": 1.5415549597855227, + "grad_norm": 119.86376995028226, + "learning_rate": 5e-06, + "loss": 1.4631, + "num_input_tokens_seen": 695775456, + "step": 4025 + }, + { + "epoch": 1.5415549597855227, + "loss": 1.4829367399215698, + "loss_ce": 0.17699642479419708, + "loss_iou": 0.6118019223213196, + "loss_num": 0.0164794921875, + "loss_xval": 1.3059402704238892, + "num_input_tokens_seen": 695775456, + "step": 4025 + }, + { + "epoch": 1.5419379548065875, + "grad_norm": 199.6008949325988, + "learning_rate": 5e-06, + "loss": 1.6634, + "num_input_tokens_seen": 695948720, + "step": 4026 + }, + { + "epoch": 1.5419379548065875, + "loss": 1.6517527103424072, + "loss_ce": 0.1972631812095642, + "loss_iou": 0.6919969320297241, + "loss_num": 0.01409912109375, + "loss_xval": 1.4544894695281982, + "num_input_tokens_seen": 695948720, + "step": 4026 + }, + { + "epoch": 1.5423209498276522, + "grad_norm": 173.84512058132393, + "learning_rate": 5e-06, + "loss": 2.1056, + "num_input_tokens_seen": 696121840, + "step": 4027 + }, + { + "epoch": 1.5423209498276522, + "loss": 2.1429200172424316, + "loss_ce": 0.20784983038902283, + "loss_iou": 0.9154415726661682, + "loss_num": 0.0208740234375, + "loss_xval": 1.9350701570510864, + "num_input_tokens_seen": 696121840, + "step": 4027 + }, + { + "epoch": 1.542703944848717, + "grad_norm": 137.5637997552979, + "learning_rate": 5e-06, + "loss": 1.9552, + "num_input_tokens_seen": 696294640, + "step": 4028 + }, + { + "epoch": 1.542703944848717, + "loss": 2.0105209350585938, + "loss_ce": 0.1763545572757721, + "loss_iou": 0.8391107320785522, + "loss_num": 0.03125, + "loss_xval": 1.8341662883758545, + "num_input_tokens_seen": 696294640, + "step": 4028 + }, + { + "epoch": 1.5430869398697817, + "grad_norm": 95.63297736079416, + "learning_rate": 5e-06, + "loss": 1.7056, + "num_input_tokens_seen": 696468072, + "step": 4029 + }, + { + "epoch": 1.5430869398697817, + "loss": 1.6168577671051025, + "loss_ce": 0.19520872831344604, + "loss_iou": 0.6636748313903809, + "loss_num": 0.0189208984375, + "loss_xval": 1.4216489791870117, + "num_input_tokens_seen": 696468072, + "step": 4029 + }, + { + "epoch": 1.5434699348908465, + "grad_norm": 104.9830239639552, + "learning_rate": 5e-06, + "loss": 1.4851, + "num_input_tokens_seen": 696641408, + "step": 4030 + }, + { + "epoch": 1.5434699348908465, + "loss": 1.4427449703216553, + "loss_ce": 0.1917538344860077, + "loss_iou": 0.5754314661026001, + "loss_num": 0.02001953125, + "loss_xval": 1.2509911060333252, + "num_input_tokens_seen": 696641408, + "step": 4030 + }, + { + "epoch": 1.5438529299119113, + "grad_norm": 112.33921601219592, + "learning_rate": 5e-06, + "loss": 1.777, + "num_input_tokens_seen": 696814304, + "step": 4031 + }, + { + "epoch": 1.5438529299119113, + "loss": 1.7711230516433716, + "loss_ce": 0.18419550359249115, + "loss_iou": 0.7331000566482544, + "loss_num": 0.024169921875, + "loss_xval": 1.5869276523590088, + "num_input_tokens_seen": 696814304, + "step": 4031 + }, + { + "epoch": 1.544235924932976, + "grad_norm": 223.81233438764835, + "learning_rate": 5e-06, + "loss": 1.3109, + "num_input_tokens_seen": 696987152, + "step": 4032 + }, + { + "epoch": 1.544235924932976, + "loss": 1.3282899856567383, + "loss_ce": 0.14142560958862305, + "loss_iou": 0.5531795024871826, + "loss_num": 0.01611328125, + "loss_xval": 1.1868643760681152, + "num_input_tokens_seen": 696987152, + "step": 4032 + }, + { + "epoch": 1.5446189199540405, + "grad_norm": 121.71460532194978, + "learning_rate": 5e-06, + "loss": 1.6271, + "num_input_tokens_seen": 697159976, + "step": 4033 + }, + { + "epoch": 1.5446189199540405, + "loss": 1.4645191431045532, + "loss_ce": 0.15616536140441895, + "loss_iou": 0.5996571779251099, + "loss_num": 0.0218505859375, + "loss_xval": 1.3083536624908447, + "num_input_tokens_seen": 697159976, + "step": 4033 + }, + { + "epoch": 1.5450019149751053, + "grad_norm": 224.39751296901443, + "learning_rate": 5e-06, + "loss": 1.9026, + "num_input_tokens_seen": 697333144, + "step": 4034 + }, + { + "epoch": 1.5450019149751053, + "loss": 1.8723915815353394, + "loss_ce": 0.18856292963027954, + "loss_iou": 0.7223464846611023, + "loss_num": 0.0478515625, + "loss_xval": 1.6838287115097046, + "num_input_tokens_seen": 697333144, + "step": 4034 + }, + { + "epoch": 1.54538490999617, + "grad_norm": 86.38781709948974, + "learning_rate": 5e-06, + "loss": 1.5271, + "num_input_tokens_seen": 697505984, + "step": 4035 + }, + { + "epoch": 1.54538490999617, + "loss": 1.4957278966903687, + "loss_ce": 0.1962394416332245, + "loss_iou": 0.5999701023101807, + "loss_num": 0.0198974609375, + "loss_xval": 1.2994885444641113, + "num_input_tokens_seen": 697505984, + "step": 4035 + }, + { + "epoch": 1.5457679050172348, + "grad_norm": 59.55226856790362, + "learning_rate": 5e-06, + "loss": 1.4834, + "num_input_tokens_seen": 697679176, + "step": 4036 + }, + { + "epoch": 1.5457679050172348, + "loss": 1.5904284715652466, + "loss_ce": 0.16325372457504272, + "loss_iou": 0.6481729745864868, + "loss_num": 0.026123046875, + "loss_xval": 1.4271748065948486, + "num_input_tokens_seen": 697679176, + "step": 4036 + }, + { + "epoch": 1.5461509000382994, + "grad_norm": 123.63629350762896, + "learning_rate": 5e-06, + "loss": 1.2917, + "num_input_tokens_seen": 697852312, + "step": 4037 + }, + { + "epoch": 1.5461509000382994, + "loss": 1.2972408533096313, + "loss_ce": 0.17061948776245117, + "loss_iou": 0.5271702408790588, + "loss_num": 0.01446533203125, + "loss_xval": 1.1266213655471802, + "num_input_tokens_seen": 697852312, + "step": 4037 + }, + { + "epoch": 1.5465338950593641, + "grad_norm": 137.7346544970482, + "learning_rate": 5e-06, + "loss": 1.3046, + "num_input_tokens_seen": 698025168, + "step": 4038 + }, + { + "epoch": 1.5465338950593641, + "loss": 1.2661736011505127, + "loss_ce": 0.14972297847270966, + "loss_iou": 0.5060555338859558, + "loss_num": 0.0208740234375, + "loss_xval": 1.1164506673812866, + "num_input_tokens_seen": 698025168, + "step": 4038 + }, + { + "epoch": 1.5469168900804289, + "grad_norm": 159.85474924022307, + "learning_rate": 5e-06, + "loss": 1.5059, + "num_input_tokens_seen": 698198272, + "step": 4039 + }, + { + "epoch": 1.5469168900804289, + "loss": 1.4872212409973145, + "loss_ce": 0.1583525836467743, + "loss_iou": 0.6273249387741089, + "loss_num": 0.01483154296875, + "loss_xval": 1.3288686275482178, + "num_input_tokens_seen": 698198272, + "step": 4039 + }, + { + "epoch": 1.5472998851014936, + "grad_norm": 103.27065357471398, + "learning_rate": 5e-06, + "loss": 1.3301, + "num_input_tokens_seen": 698371192, + "step": 4040 + }, + { + "epoch": 1.5472998851014936, + "loss": 1.3318990468978882, + "loss_ce": 0.20116597414016724, + "loss_iou": 0.5277841091156006, + "loss_num": 0.0150146484375, + "loss_xval": 1.1307330131530762, + "num_input_tokens_seen": 698371192, + "step": 4040 + }, + { + "epoch": 1.5476828801225584, + "grad_norm": 142.5808620696021, + "learning_rate": 5e-06, + "loss": 1.3475, + "num_input_tokens_seen": 698543920, + "step": 4041 + }, + { + "epoch": 1.5476828801225584, + "loss": 1.4051227569580078, + "loss_ce": 0.1359701156616211, + "loss_iou": 0.5977416038513184, + "loss_num": 0.0147705078125, + "loss_xval": 1.2691526412963867, + "num_input_tokens_seen": 698543920, + "step": 4041 + }, + { + "epoch": 1.5480658751436231, + "grad_norm": 118.78991913862968, + "learning_rate": 5e-06, + "loss": 1.5009, + "num_input_tokens_seen": 698716568, + "step": 4042 + }, + { + "epoch": 1.5480658751436231, + "loss": 1.5289782285690308, + "loss_ce": 0.17239195108413696, + "loss_iou": 0.6370944380760193, + "loss_num": 0.0164794921875, + "loss_xval": 1.3565863370895386, + "num_input_tokens_seen": 698716568, + "step": 4042 + }, + { + "epoch": 1.548448870164688, + "grad_norm": 161.89217274162206, + "learning_rate": 5e-06, + "loss": 1.6185, + "num_input_tokens_seen": 698889800, + "step": 4043 + }, + { + "epoch": 1.548448870164688, + "loss": 1.551445484161377, + "loss_ce": 0.16810211539268494, + "loss_iou": 0.6364959478378296, + "loss_num": 0.02197265625, + "loss_xval": 1.3833434581756592, + "num_input_tokens_seen": 698889800, + "step": 4043 + }, + { + "epoch": 1.5488318651857527, + "grad_norm": 532.9776921227485, + "learning_rate": 5e-06, + "loss": 1.5141, + "num_input_tokens_seen": 699062904, + "step": 4044 + }, + { + "epoch": 1.5488318651857527, + "loss": 1.4322932958602905, + "loss_ce": 0.1805468648672104, + "loss_iou": 0.574313759803772, + "loss_num": 0.0206298828125, + "loss_xval": 1.251746416091919, + "num_input_tokens_seen": 699062904, + "step": 4044 + }, + { + "epoch": 1.5492148602068174, + "grad_norm": 150.11870950442585, + "learning_rate": 5e-06, + "loss": 1.698, + "num_input_tokens_seen": 699235832, + "step": 4045 + }, + { + "epoch": 1.5492148602068174, + "loss": 1.642263412475586, + "loss_ce": 0.15046949684619904, + "loss_iou": 0.6751571893692017, + "loss_num": 0.0283203125, + "loss_xval": 1.4917938709259033, + "num_input_tokens_seen": 699235832, + "step": 4045 + }, + { + "epoch": 1.5495978552278822, + "grad_norm": 191.6479875586437, + "learning_rate": 5e-06, + "loss": 1.3066, + "num_input_tokens_seen": 699408488, + "step": 4046 + }, + { + "epoch": 1.5495978552278822, + "loss": 1.3165333271026611, + "loss_ce": 0.13863667845726013, + "loss_iou": 0.5390978455543518, + "loss_num": 0.0198974609375, + "loss_xval": 1.1778966188430786, + "num_input_tokens_seen": 699408488, + "step": 4046 + }, + { + "epoch": 1.5499808502489467, + "grad_norm": 79.09853614584158, + "learning_rate": 5e-06, + "loss": 1.8071, + "num_input_tokens_seen": 699581464, + "step": 4047 + }, + { + "epoch": 1.5499808502489467, + "loss": 1.7550569772720337, + "loss_ce": 0.14108368754386902, + "loss_iou": 0.7232463955879211, + "loss_num": 0.033447265625, + "loss_xval": 1.6139732599258423, + "num_input_tokens_seen": 699581464, + "step": 4047 + }, + { + "epoch": 1.5503638452700115, + "grad_norm": 86.21733289719229, + "learning_rate": 5e-06, + "loss": 1.2929, + "num_input_tokens_seen": 699754040, + "step": 4048 + }, + { + "epoch": 1.5503638452700115, + "loss": 1.265880823135376, + "loss_ce": 0.16111326217651367, + "loss_iou": 0.5023578405380249, + "loss_num": 0.02001953125, + "loss_xval": 1.1047675609588623, + "num_input_tokens_seen": 699754040, + "step": 4048 + }, + { + "epoch": 1.5507468402910762, + "grad_norm": 163.46510712305485, + "learning_rate": 5e-06, + "loss": 1.5917, + "num_input_tokens_seen": 699926840, + "step": 4049 + }, + { + "epoch": 1.5507468402910762, + "loss": 1.6322741508483887, + "loss_ce": 0.16537222266197205, + "loss_iou": 0.6964025497436523, + "loss_num": 0.01483154296875, + "loss_xval": 1.4669017791748047, + "num_input_tokens_seen": 699926840, + "step": 4049 + }, + { + "epoch": 1.551129835312141, + "grad_norm": 96.6453890523453, + "learning_rate": 5e-06, + "loss": 1.631, + "num_input_tokens_seen": 700099976, + "step": 4050 + }, + { + "epoch": 1.551129835312141, + "loss": 1.5915098190307617, + "loss_ce": 0.17163625359535217, + "loss_iou": 0.6488100290298462, + "loss_num": 0.0245361328125, + "loss_xval": 1.4198734760284424, + "num_input_tokens_seen": 700099976, + "step": 4050 + }, + { + "epoch": 1.5515128303332055, + "grad_norm": 62.40844671149482, + "learning_rate": 5e-06, + "loss": 1.3397, + "num_input_tokens_seen": 700272632, + "step": 4051 + }, + { + "epoch": 1.5515128303332055, + "loss": 1.4549493789672852, + "loss_ce": 0.19186116755008698, + "loss_iou": 0.5998668074607849, + "loss_num": 0.0126953125, + "loss_xval": 1.2630881071090698, + "num_input_tokens_seen": 700272632, + "step": 4051 + }, + { + "epoch": 1.5518958253542703, + "grad_norm": 126.44252936551133, + "learning_rate": 5e-06, + "loss": 1.3642, + "num_input_tokens_seen": 700445696, + "step": 4052 + }, + { + "epoch": 1.5518958253542703, + "loss": 1.4157166481018066, + "loss_ce": 0.1703338921070099, + "loss_iou": 0.5618698596954346, + "loss_num": 0.0242919921875, + "loss_xval": 1.2453827857971191, + "num_input_tokens_seen": 700445696, + "step": 4052 + }, + { + "epoch": 1.552278820375335, + "grad_norm": 122.92057155810977, + "learning_rate": 5e-06, + "loss": 1.3993, + "num_input_tokens_seen": 700618784, + "step": 4053 + }, + { + "epoch": 1.552278820375335, + "loss": 1.3907073736190796, + "loss_ce": 0.17245326936244965, + "loss_iou": 0.5662040710449219, + "loss_num": 0.0172119140625, + "loss_xval": 1.2182540893554688, + "num_input_tokens_seen": 700618784, + "step": 4053 + }, + { + "epoch": 1.5526618153963998, + "grad_norm": 148.87953002659964, + "learning_rate": 5e-06, + "loss": 1.6484, + "num_input_tokens_seen": 700792176, + "step": 4054 + }, + { + "epoch": 1.5526618153963998, + "loss": 1.5574525594711304, + "loss_ce": 0.10910694301128387, + "loss_iou": 0.6690123081207275, + "loss_num": 0.0220947265625, + "loss_xval": 1.44834566116333, + "num_input_tokens_seen": 700792176, + "step": 4054 + }, + { + "epoch": 1.5530448104174646, + "grad_norm": 106.30489665811756, + "learning_rate": 5e-06, + "loss": 1.9592, + "num_input_tokens_seen": 700964728, + "step": 4055 + }, + { + "epoch": 1.5530448104174646, + "loss": 1.8856539726257324, + "loss_ce": 0.16048921644687653, + "loss_iou": 0.7678557634353638, + "loss_num": 0.037841796875, + "loss_xval": 1.7251646518707275, + "num_input_tokens_seen": 700964728, + "step": 4055 + }, + { + "epoch": 1.5534278054385293, + "grad_norm": 192.2419175697598, + "learning_rate": 5e-06, + "loss": 1.5801, + "num_input_tokens_seen": 701138128, + "step": 4056 + }, + { + "epoch": 1.5534278054385293, + "loss": 1.5525187253952026, + "loss_ce": 0.22270707786083221, + "loss_iou": 0.6143839955329895, + "loss_num": 0.020263671875, + "loss_xval": 1.329811692237854, + "num_input_tokens_seen": 701138128, + "step": 4056 + }, + { + "epoch": 1.553810800459594, + "grad_norm": 166.7682208271423, + "learning_rate": 5e-06, + "loss": 1.7475, + "num_input_tokens_seen": 701310848, + "step": 4057 + }, + { + "epoch": 1.553810800459594, + "loss": 1.866917610168457, + "loss_ce": 0.14354538917541504, + "loss_iou": 0.807303786277771, + "loss_num": 0.021728515625, + "loss_xval": 1.723372220993042, + "num_input_tokens_seen": 701310848, + "step": 4057 + }, + { + "epoch": 1.5541937954806588, + "grad_norm": 142.03188244107125, + "learning_rate": 5e-06, + "loss": 2.334, + "num_input_tokens_seen": 701483760, + "step": 4058 + }, + { + "epoch": 1.5541937954806588, + "loss": 2.191911220550537, + "loss_ce": 0.22595535218715668, + "loss_iou": 0.8638982772827148, + "loss_num": 0.047607421875, + "loss_xval": 1.9659557342529297, + "num_input_tokens_seen": 701483760, + "step": 4058 + }, + { + "epoch": 1.5545767905017236, + "grad_norm": 88.74749877146184, + "learning_rate": 5e-06, + "loss": 1.3027, + "num_input_tokens_seen": 701656888, + "step": 4059 + }, + { + "epoch": 1.5545767905017236, + "loss": 1.2640423774719238, + "loss_ce": 0.17497318983078003, + "loss_iou": 0.5103548765182495, + "loss_num": 0.013671875, + "loss_xval": 1.089069128036499, + "num_input_tokens_seen": 701656888, + "step": 4059 + }, + { + "epoch": 1.5549597855227884, + "grad_norm": 155.0285265772275, + "learning_rate": 5e-06, + "loss": 1.5255, + "num_input_tokens_seen": 701826400, + "step": 4060 + }, + { + "epoch": 1.5549597855227884, + "loss": 1.3491778373718262, + "loss_ce": 0.15473467111587524, + "loss_iou": 0.5659410953521729, + "loss_num": 0.01251220703125, + "loss_xval": 1.1944432258605957, + "num_input_tokens_seen": 701826400, + "step": 4060 + }, + { + "epoch": 1.5553427805438529, + "grad_norm": 132.81623454348352, + "learning_rate": 5e-06, + "loss": 1.6386, + "num_input_tokens_seen": 701999288, + "step": 4061 + }, + { + "epoch": 1.5553427805438529, + "loss": 1.6399247646331787, + "loss_ce": 0.15263910591602325, + "loss_iou": 0.6787319183349609, + "loss_num": 0.02587890625, + "loss_xval": 1.4872856140136719, + "num_input_tokens_seen": 701999288, + "step": 4061 + }, + { + "epoch": 1.5557257755649176, + "grad_norm": 132.6690892951261, + "learning_rate": 5e-06, + "loss": 1.2982, + "num_input_tokens_seen": 702172496, + "step": 4062 + }, + { + "epoch": 1.5557257755649176, + "loss": 1.3524725437164307, + "loss_ce": 0.18340986967086792, + "loss_iou": 0.5478798151016235, + "loss_num": 0.0146484375, + "loss_xval": 1.169062852859497, + "num_input_tokens_seen": 702172496, + "step": 4062 + }, + { + "epoch": 1.5561087705859824, + "grad_norm": 144.18941232089801, + "learning_rate": 5e-06, + "loss": 1.7399, + "num_input_tokens_seen": 702345424, + "step": 4063 + }, + { + "epoch": 1.5561087705859824, + "loss": 1.7615002393722534, + "loss_ce": 0.1590670645236969, + "loss_iou": 0.749382495880127, + "loss_num": 0.020751953125, + "loss_xval": 1.602433204650879, + "num_input_tokens_seen": 702345424, + "step": 4063 + }, + { + "epoch": 1.5564917656070472, + "grad_norm": 168.94267630592827, + "learning_rate": 5e-06, + "loss": 1.8126, + "num_input_tokens_seen": 702518808, + "step": 4064 + }, + { + "epoch": 1.5564917656070472, + "loss": 1.8514591455459595, + "loss_ce": 0.2286132425069809, + "loss_iou": 0.7452913522720337, + "loss_num": 0.0264892578125, + "loss_xval": 1.6228458881378174, + "num_input_tokens_seen": 702518808, + "step": 4064 + }, + { + "epoch": 1.5568747606281117, + "grad_norm": 144.1567964190179, + "learning_rate": 5e-06, + "loss": 1.5602, + "num_input_tokens_seen": 702688384, + "step": 4065 + }, + { + "epoch": 1.5568747606281117, + "loss": 1.4671590328216553, + "loss_ce": 0.1535789668560028, + "loss_iou": 0.6142637729644775, + "loss_num": 0.0169677734375, + "loss_xval": 1.31358003616333, + "num_input_tokens_seen": 702688384, + "step": 4065 + }, + { + "epoch": 1.5572577556491765, + "grad_norm": 248.88925403808216, + "learning_rate": 5e-06, + "loss": 1.7117, + "num_input_tokens_seen": 702861544, + "step": 4066 + }, + { + "epoch": 1.5572577556491765, + "loss": 1.5558196306228638, + "loss_ce": 0.18789410591125488, + "loss_iou": 0.6358518600463867, + "loss_num": 0.019287109375, + "loss_xval": 1.3679256439208984, + "num_input_tokens_seen": 702861544, + "step": 4066 + }, + { + "epoch": 1.5576407506702412, + "grad_norm": 148.6132169700537, + "learning_rate": 5e-06, + "loss": 1.9878, + "num_input_tokens_seen": 703034504, + "step": 4067 + }, + { + "epoch": 1.5576407506702412, + "loss": 2.099031925201416, + "loss_ce": 0.17101332545280457, + "loss_iou": 0.8806352615356445, + "loss_num": 0.033447265625, + "loss_xval": 1.928018569946289, + "num_input_tokens_seen": 703034504, + "step": 4067 + }, + { + "epoch": 1.558023745691306, + "grad_norm": 244.41205692724475, + "learning_rate": 5e-06, + "loss": 2.0029, + "num_input_tokens_seen": 703207136, + "step": 4068 + }, + { + "epoch": 1.558023745691306, + "loss": 2.001335620880127, + "loss_ce": 0.2015422284603119, + "loss_iou": 0.8007146120071411, + "loss_num": 0.03955078125, + "loss_xval": 1.7997934818267822, + "num_input_tokens_seen": 703207136, + "step": 4068 + }, + { + "epoch": 1.5584067407123707, + "grad_norm": 265.45685510750087, + "learning_rate": 5e-06, + "loss": 1.5545, + "num_input_tokens_seen": 703380376, + "step": 4069 + }, + { + "epoch": 1.5584067407123707, + "loss": 1.5851879119873047, + "loss_ce": 0.16373172402381897, + "loss_iou": 0.6626628637313843, + "loss_num": 0.019287109375, + "loss_xval": 1.4214560985565186, + "num_input_tokens_seen": 703380376, + "step": 4069 + }, + { + "epoch": 1.5587897357334355, + "grad_norm": 127.7041350680125, + "learning_rate": 5e-06, + "loss": 1.4033, + "num_input_tokens_seen": 703553280, + "step": 4070 + }, + { + "epoch": 1.5587897357334355, + "loss": 1.444726586341858, + "loss_ce": 0.1692478358745575, + "loss_iou": 0.5924741625785828, + "loss_num": 0.01806640625, + "loss_xval": 1.275478720664978, + "num_input_tokens_seen": 703553280, + "step": 4070 + }, + { + "epoch": 1.5591727307545002, + "grad_norm": 142.4465541088045, + "learning_rate": 5e-06, + "loss": 1.594, + "num_input_tokens_seen": 703726168, + "step": 4071 + }, + { + "epoch": 1.5591727307545002, + "loss": 1.6197677850723267, + "loss_ce": 0.1557142734527588, + "loss_iou": 0.6873642802238464, + "loss_num": 0.017822265625, + "loss_xval": 1.4640535116195679, + "num_input_tokens_seen": 703726168, + "step": 4071 + }, + { + "epoch": 1.559555725775565, + "grad_norm": 127.87412145966195, + "learning_rate": 5e-06, + "loss": 1.663, + "num_input_tokens_seen": 703899496, + "step": 4072 + }, + { + "epoch": 1.559555725775565, + "loss": 1.7447503805160522, + "loss_ce": 0.23163306713104248, + "loss_iou": 0.7167637348175049, + "loss_num": 0.015869140625, + "loss_xval": 1.5131173133850098, + "num_input_tokens_seen": 703899496, + "step": 4072 + }, + { + "epoch": 1.5599387207966298, + "grad_norm": 204.54413428405158, + "learning_rate": 5e-06, + "loss": 1.6549, + "num_input_tokens_seen": 704072232, + "step": 4073 + }, + { + "epoch": 1.5599387207966298, + "loss": 1.8466553688049316, + "loss_ce": 0.1531267762184143, + "loss_iou": 0.8081443309783936, + "loss_num": 0.01544189453125, + "loss_xval": 1.693528652191162, + "num_input_tokens_seen": 704072232, + "step": 4073 + }, + { + "epoch": 1.5603217158176945, + "grad_norm": 155.9762483650282, + "learning_rate": 5e-06, + "loss": 1.7382, + "num_input_tokens_seen": 704245248, + "step": 4074 + }, + { + "epoch": 1.5603217158176945, + "loss": 1.7038124799728394, + "loss_ce": 0.16794651746749878, + "loss_iou": 0.7059518098831177, + "loss_num": 0.0247802734375, + "loss_xval": 1.5358660221099854, + "num_input_tokens_seen": 704245248, + "step": 4074 + }, + { + "epoch": 1.560704710838759, + "grad_norm": 142.34730879508822, + "learning_rate": 5e-06, + "loss": 2.0228, + "num_input_tokens_seen": 704418272, + "step": 4075 + }, + { + "epoch": 1.560704710838759, + "loss": 1.9907587766647339, + "loss_ce": 0.14460095763206482, + "loss_iou": 0.8328078985214233, + "loss_num": 0.0361328125, + "loss_xval": 1.8461577892303467, + "num_input_tokens_seen": 704418272, + "step": 4075 + }, + { + "epoch": 1.5610877058598238, + "grad_norm": 88.85904020323856, + "learning_rate": 5e-06, + "loss": 1.655, + "num_input_tokens_seen": 704591168, + "step": 4076 + }, + { + "epoch": 1.5610877058598238, + "loss": 1.9035416841506958, + "loss_ce": 0.18996544182300568, + "loss_iou": 0.777259349822998, + "loss_num": 0.03173828125, + "loss_xval": 1.713576316833496, + "num_input_tokens_seen": 704591168, + "step": 4076 + }, + { + "epoch": 1.5614707008808886, + "grad_norm": 120.86826739205884, + "learning_rate": 5e-06, + "loss": 1.2974, + "num_input_tokens_seen": 704764128, + "step": 4077 + }, + { + "epoch": 1.5614707008808886, + "loss": 1.17726469039917, + "loss_ce": 0.1245112419128418, + "loss_iou": 0.48397257924079895, + "loss_num": 0.0169677734375, + "loss_xval": 1.0527534484863281, + "num_input_tokens_seen": 704764128, + "step": 4077 + }, + { + "epoch": 1.561853695901953, + "grad_norm": 116.76087367670702, + "learning_rate": 5e-06, + "loss": 2.2354, + "num_input_tokens_seen": 704937136, + "step": 4078 + }, + { + "epoch": 1.561853695901953, + "loss": 2.2421231269836426, + "loss_ce": 0.17673224210739136, + "loss_iou": 0.9810445308685303, + "loss_num": 0.0206298828125, + "loss_xval": 2.0602335929870605, + "num_input_tokens_seen": 704937136, + "step": 4078 + }, + { + "epoch": 1.5622366909230179, + "grad_norm": 129.41712769878112, + "learning_rate": 5e-06, + "loss": 1.6554, + "num_input_tokens_seen": 705109776, + "step": 4079 + }, + { + "epoch": 1.5622366909230179, + "loss": 1.6967781782150269, + "loss_ce": 0.12966777384281158, + "loss_iou": 0.7334758639335632, + "loss_num": 0.02001953125, + "loss_xval": 1.5671104192733765, + "num_input_tokens_seen": 705109776, + "step": 4079 + }, + { + "epoch": 1.5626196859440826, + "grad_norm": 147.87306043107048, + "learning_rate": 5e-06, + "loss": 1.4896, + "num_input_tokens_seen": 705282960, + "step": 4080 + }, + { + "epoch": 1.5626196859440826, + "loss": 1.5170713663101196, + "loss_ce": 0.14922286570072174, + "loss_iou": 0.646219789981842, + "loss_num": 0.01507568359375, + "loss_xval": 1.367848515510559, + "num_input_tokens_seen": 705282960, + "step": 4080 + }, + { + "epoch": 1.5630026809651474, + "grad_norm": 227.72109178509598, + "learning_rate": 5e-06, + "loss": 2.0063, + "num_input_tokens_seen": 705455600, + "step": 4081 + }, + { + "epoch": 1.5630026809651474, + "loss": 2.1332335472106934, + "loss_ce": 0.13369309902191162, + "loss_iou": 0.9299460053443909, + "loss_num": 0.0279541015625, + "loss_xval": 1.9995403289794922, + "num_input_tokens_seen": 705455600, + "step": 4081 + }, + { + "epoch": 1.5633856759862121, + "grad_norm": 70.03848448318948, + "learning_rate": 5e-06, + "loss": 1.2402, + "num_input_tokens_seen": 705628872, + "step": 4082 + }, + { + "epoch": 1.5633856759862121, + "loss": 1.375079870223999, + "loss_ce": 0.17386046051979065, + "loss_iou": 0.5470055937767029, + "loss_num": 0.021484375, + "loss_xval": 1.2012194395065308, + "num_input_tokens_seen": 705628872, + "step": 4082 + }, + { + "epoch": 1.563768671007277, + "grad_norm": 135.75341219091302, + "learning_rate": 5e-06, + "loss": 1.1816, + "num_input_tokens_seen": 705802112, + "step": 4083 + }, + { + "epoch": 1.563768671007277, + "loss": 1.241959571838379, + "loss_ce": 0.1726619303226471, + "loss_iou": 0.5051383376121521, + "loss_num": 0.01177978515625, + "loss_xval": 1.0692976713180542, + "num_input_tokens_seen": 705802112, + "step": 4083 + }, + { + "epoch": 1.5641516660283417, + "grad_norm": 224.00194830761635, + "learning_rate": 5e-06, + "loss": 1.7552, + "num_input_tokens_seen": 705975256, + "step": 4084 + }, + { + "epoch": 1.5641516660283417, + "loss": 1.9128923416137695, + "loss_ce": 0.15855154395103455, + "loss_iou": 0.8008764982223511, + "loss_num": 0.030517578125, + "loss_xval": 1.7543408870697021, + "num_input_tokens_seen": 705975256, + "step": 4084 + }, + { + "epoch": 1.5645346610494064, + "grad_norm": 88.2324151297439, + "learning_rate": 5e-06, + "loss": 1.3487, + "num_input_tokens_seen": 706148112, + "step": 4085 + }, + { + "epoch": 1.5645346610494064, + "loss": 1.2271016836166382, + "loss_ce": 0.11906194686889648, + "loss_iou": 0.5063360929489136, + "loss_num": 0.01904296875, + "loss_xval": 1.1018750667572021, + "num_input_tokens_seen": 706148112, + "step": 4085 + }, + { + "epoch": 1.5649176560704712, + "grad_norm": 129.6185017710986, + "learning_rate": 5e-06, + "loss": 1.295, + "num_input_tokens_seen": 706320832, + "step": 4086 + }, + { + "epoch": 1.5649176560704712, + "loss": 1.3663123846054077, + "loss_ce": 0.135855033993721, + "loss_iou": 0.5584354400634766, + "loss_num": 0.022705078125, + "loss_xval": 1.2304573059082031, + "num_input_tokens_seen": 706320832, + "step": 4086 + }, + { + "epoch": 1.565300651091536, + "grad_norm": 189.63905035964822, + "learning_rate": 5e-06, + "loss": 1.812, + "num_input_tokens_seen": 706490760, + "step": 4087 + }, + { + "epoch": 1.565300651091536, + "loss": 1.9216313362121582, + "loss_ce": 0.15427327156066895, + "loss_iou": 0.8071714639663696, + "loss_num": 0.030517578125, + "loss_xval": 1.7673580646514893, + "num_input_tokens_seen": 706490760, + "step": 4087 + }, + { + "epoch": 1.5656836461126007, + "grad_norm": 112.03550902850772, + "learning_rate": 5e-06, + "loss": 1.3883, + "num_input_tokens_seen": 706663896, + "step": 4088 + }, + { + "epoch": 1.5656836461126007, + "loss": 1.2862303256988525, + "loss_ce": 0.1765056997537613, + "loss_iou": 0.5050118565559387, + "loss_num": 0.0198974609375, + "loss_xval": 1.1097246408462524, + "num_input_tokens_seen": 706663896, + "step": 4088 + }, + { + "epoch": 1.5660666411336652, + "grad_norm": 264.38863985724606, + "learning_rate": 5e-06, + "loss": 1.3277, + "num_input_tokens_seen": 706836520, + "step": 4089 + }, + { + "epoch": 1.5660666411336652, + "loss": 1.313084602355957, + "loss_ce": 0.13801221549510956, + "loss_iou": 0.5486568212509155, + "loss_num": 0.01556396484375, + "loss_xval": 1.175072431564331, + "num_input_tokens_seen": 706836520, + "step": 4089 + }, + { + "epoch": 1.56644963615473, + "grad_norm": 231.41789883023728, + "learning_rate": 5e-06, + "loss": 1.5801, + "num_input_tokens_seen": 707009448, + "step": 4090 + }, + { + "epoch": 1.56644963615473, + "loss": 1.5929927825927734, + "loss_ce": 0.17305263876914978, + "loss_iou": 0.6497131586074829, + "loss_num": 0.024169921875, + "loss_xval": 1.4199402332305908, + "num_input_tokens_seen": 707009448, + "step": 4090 + }, + { + "epoch": 1.5668326311757947, + "grad_norm": 358.92833373521785, + "learning_rate": 5e-06, + "loss": 2.3622, + "num_input_tokens_seen": 707182544, + "step": 4091 + }, + { + "epoch": 1.5668326311757947, + "loss": 2.406782865524292, + "loss_ce": 0.17092815041542053, + "loss_iou": 0.9906079769134521, + "loss_num": 0.051025390625, + "loss_xval": 2.2358546257019043, + "num_input_tokens_seen": 707182544, + "step": 4091 + }, + { + "epoch": 1.5672156261968593, + "grad_norm": 141.66751245145667, + "learning_rate": 5e-06, + "loss": 2.0203, + "num_input_tokens_seen": 707355544, + "step": 4092 + }, + { + "epoch": 1.5672156261968593, + "loss": 2.183703899383545, + "loss_ce": 0.1479608416557312, + "loss_iou": 0.8810917139053345, + "loss_num": 0.0546875, + "loss_xval": 2.035742998123169, + "num_input_tokens_seen": 707355544, + "step": 4092 + }, + { + "epoch": 1.567598621217924, + "grad_norm": 123.3936171504265, + "learning_rate": 5e-06, + "loss": 1.5455, + "num_input_tokens_seen": 707528248, + "step": 4093 + }, + { + "epoch": 1.567598621217924, + "loss": 1.55316162109375, + "loss_ce": 0.10258376598358154, + "loss_iou": 0.6512075662612915, + "loss_num": 0.0296630859375, + "loss_xval": 1.450577974319458, + "num_input_tokens_seen": 707528248, + "step": 4093 + }, + { + "epoch": 1.5679816162389888, + "grad_norm": 225.26098894949774, + "learning_rate": 5e-06, + "loss": 1.4629, + "num_input_tokens_seen": 707700984, + "step": 4094 + }, + { + "epoch": 1.5679816162389888, + "loss": 1.34732985496521, + "loss_ce": 0.1244833841919899, + "loss_iou": 0.5621830821037292, + "loss_num": 0.0196533203125, + "loss_xval": 1.2030709981918335, + "num_input_tokens_seen": 707700984, + "step": 4094 + }, + { + "epoch": 1.5683646112600536, + "grad_norm": 209.7959169229572, + "learning_rate": 5e-06, + "loss": 1.8999, + "num_input_tokens_seen": 707873912, + "step": 4095 + }, + { + "epoch": 1.5683646112600536, + "loss": 1.8382642269134521, + "loss_ce": 0.16478237509727478, + "loss_iou": 0.7574868202209473, + "loss_num": 0.03173828125, + "loss_xval": 1.6734819412231445, + "num_input_tokens_seen": 707873912, + "step": 4095 + }, + { + "epoch": 1.5687476062811183, + "grad_norm": 70.59792028901389, + "learning_rate": 5e-06, + "loss": 1.3797, + "num_input_tokens_seen": 708046896, + "step": 4096 + }, + { + "epoch": 1.5687476062811183, + "loss": 1.4648287296295166, + "loss_ce": 0.19003352522850037, + "loss_iou": 0.5990675091743469, + "loss_num": 0.01531982421875, + "loss_xval": 1.2747951745986938, + "num_input_tokens_seen": 708046896, + "step": 4096 + }, + { + "epoch": 1.569130601302183, + "grad_norm": 189.0194476326106, + "learning_rate": 5e-06, + "loss": 1.3509, + "num_input_tokens_seen": 708219976, + "step": 4097 + }, + { + "epoch": 1.569130601302183, + "loss": 1.4974284172058105, + "loss_ce": 0.16130970418453217, + "loss_iou": 0.6132650375366211, + "loss_num": 0.02197265625, + "loss_xval": 1.3361186981201172, + "num_input_tokens_seen": 708219976, + "step": 4097 + }, + { + "epoch": 1.5695135963232478, + "grad_norm": 178.25234115410692, + "learning_rate": 5e-06, + "loss": 1.6199, + "num_input_tokens_seen": 708393144, + "step": 4098 + }, + { + "epoch": 1.5695135963232478, + "loss": 1.5991699695587158, + "loss_ce": 0.23091337084770203, + "loss_iou": 0.6257786750793457, + "loss_num": 0.0233154296875, + "loss_xval": 1.3682565689086914, + "num_input_tokens_seen": 708393144, + "step": 4098 + }, + { + "epoch": 1.5698965913443126, + "grad_norm": 203.68978487678905, + "learning_rate": 5e-06, + "loss": 1.4014, + "num_input_tokens_seen": 708565960, + "step": 4099 + }, + { + "epoch": 1.5698965913443126, + "loss": 1.286609172821045, + "loss_ce": 0.15518397092819214, + "loss_iou": 0.5227895975112915, + "loss_num": 0.0172119140625, + "loss_xval": 1.131425142288208, + "num_input_tokens_seen": 708565960, + "step": 4099 + }, + { + "epoch": 1.5702795863653773, + "grad_norm": 119.15708207594575, + "learning_rate": 5e-06, + "loss": 1.3562, + "num_input_tokens_seen": 708738696, + "step": 4100 + }, + { + "epoch": 1.5702795863653773, + "loss": 1.2358953952789307, + "loss_ce": 0.15120267868041992, + "loss_iou": 0.5067323446273804, + "loss_num": 0.01422119140625, + "loss_xval": 1.0795657634735107, + "num_input_tokens_seen": 708738696, + "step": 4100 + }, + { + "epoch": 1.570662581386442, + "grad_norm": 127.40129626622009, + "learning_rate": 5e-06, + "loss": 1.3663, + "num_input_tokens_seen": 708911680, + "step": 4101 + }, + { + "epoch": 1.570662581386442, + "loss": 1.2586824893951416, + "loss_ce": 0.14269477128982544, + "loss_iou": 0.5022382140159607, + "loss_num": 0.0223388671875, + "loss_xval": 1.1159876585006714, + "num_input_tokens_seen": 708911680, + "step": 4101 + }, + { + "epoch": 1.5710455764075069, + "grad_norm": 160.54159834166944, + "learning_rate": 5e-06, + "loss": 1.4902, + "num_input_tokens_seen": 709084704, + "step": 4102 + }, + { + "epoch": 1.5710455764075069, + "loss": 1.2457003593444824, + "loss_ce": 0.15577131509780884, + "loss_iou": 0.5157363414764404, + "loss_num": 0.01165771484375, + "loss_xval": 1.0899291038513184, + "num_input_tokens_seen": 709084704, + "step": 4102 + }, + { + "epoch": 1.5714285714285714, + "grad_norm": 155.78345187403025, + "learning_rate": 5e-06, + "loss": 1.7929, + "num_input_tokens_seen": 709257568, + "step": 4103 + }, + { + "epoch": 1.5714285714285714, + "loss": 1.8913211822509766, + "loss_ce": 0.1973917931318283, + "loss_iou": 0.7813519239425659, + "loss_num": 0.0262451171875, + "loss_xval": 1.6939294338226318, + "num_input_tokens_seen": 709257568, + "step": 4103 + }, + { + "epoch": 1.5718115664496362, + "grad_norm": 81.54697218385583, + "learning_rate": 5e-06, + "loss": 1.667, + "num_input_tokens_seen": 709430528, + "step": 4104 + }, + { + "epoch": 1.5718115664496362, + "loss": 1.6795519590377808, + "loss_ce": 0.178236186504364, + "loss_iou": 0.7043932676315308, + "loss_num": 0.0185546875, + "loss_xval": 1.501315712928772, + "num_input_tokens_seen": 709430528, + "step": 4104 + }, + { + "epoch": 1.572194561470701, + "grad_norm": 97.0537341349617, + "learning_rate": 5e-06, + "loss": 1.2385, + "num_input_tokens_seen": 709603584, + "step": 4105 + }, + { + "epoch": 1.572194561470701, + "loss": 1.094093680381775, + "loss_ce": 0.1425839066505432, + "loss_iou": 0.43873709440231323, + "loss_num": 0.01483154296875, + "loss_xval": 0.9408286809921265, + "num_input_tokens_seen": 709603584, + "step": 4105 + }, + { + "epoch": 1.5725775564917654, + "grad_norm": 171.79064175114, + "learning_rate": 5e-06, + "loss": 1.3332, + "num_input_tokens_seen": 709776600, + "step": 4106 + }, + { + "epoch": 1.5725775564917654, + "loss": 1.4016001224517822, + "loss_ce": 0.14572454988956451, + "loss_iou": 0.5824055075645447, + "loss_num": 0.0181884765625, + "loss_xval": 1.2558754682540894, + "num_input_tokens_seen": 709776600, + "step": 4106 + }, + { + "epoch": 1.5729605515128302, + "grad_norm": 104.86836487044407, + "learning_rate": 5e-06, + "loss": 1.5366, + "num_input_tokens_seen": 709949312, + "step": 4107 + }, + { + "epoch": 1.5729605515128302, + "loss": 1.4513180255889893, + "loss_ce": 0.16940246522426605, + "loss_iou": 0.6044130325317383, + "loss_num": 0.01458740234375, + "loss_xval": 1.2819156646728516, + "num_input_tokens_seen": 709949312, + "step": 4107 + }, + { + "epoch": 1.573343546533895, + "grad_norm": 99.64302601390047, + "learning_rate": 5e-06, + "loss": 1.176, + "num_input_tokens_seen": 710122168, + "step": 4108 + }, + { + "epoch": 1.573343546533895, + "loss": 1.3021981716156006, + "loss_ce": 0.17345380783081055, + "loss_iou": 0.5259047746658325, + "loss_num": 0.015380859375, + "loss_xval": 1.12874436378479, + "num_input_tokens_seen": 710122168, + "step": 4108 + }, + { + "epoch": 1.5737265415549597, + "grad_norm": 165.9908599513972, + "learning_rate": 5e-06, + "loss": 1.6324, + "num_input_tokens_seen": 710294968, + "step": 4109 + }, + { + "epoch": 1.5737265415549597, + "loss": 1.7453044652938843, + "loss_ce": 0.12673524022102356, + "loss_iou": 0.7542919516563416, + "loss_num": 0.02197265625, + "loss_xval": 1.618569254875183, + "num_input_tokens_seen": 710294968, + "step": 4109 + }, + { + "epoch": 1.5741095365760245, + "grad_norm": 131.39107236290215, + "learning_rate": 5e-06, + "loss": 1.5451, + "num_input_tokens_seen": 710467552, + "step": 4110 + }, + { + "epoch": 1.5741095365760245, + "loss": 1.5330334901809692, + "loss_ce": 0.17671683430671692, + "loss_iou": 0.6343045830726624, + "loss_num": 0.017578125, + "loss_xval": 1.3563166856765747, + "num_input_tokens_seen": 710467552, + "step": 4110 + }, + { + "epoch": 1.5744925315970892, + "grad_norm": 95.10638950465642, + "learning_rate": 5e-06, + "loss": 1.2058, + "num_input_tokens_seen": 710640328, + "step": 4111 + }, + { + "epoch": 1.5744925315970892, + "loss": 1.2235019207000732, + "loss_ce": 0.13141286373138428, + "loss_iou": 0.5170376300811768, + "loss_num": 0.0115966796875, + "loss_xval": 1.0920891761779785, + "num_input_tokens_seen": 710640328, + "step": 4111 + }, + { + "epoch": 1.574875526618154, + "grad_norm": 112.59656671003388, + "learning_rate": 5e-06, + "loss": 1.2657, + "num_input_tokens_seen": 710813304, + "step": 4112 + }, + { + "epoch": 1.574875526618154, + "loss": 1.2622308731079102, + "loss_ce": 0.14622503519058228, + "loss_iou": 0.5209240913391113, + "loss_num": 0.01483154296875, + "loss_xval": 1.1160058975219727, + "num_input_tokens_seen": 710813304, + "step": 4112 + }, + { + "epoch": 1.5752585216392188, + "grad_norm": 121.46169215186669, + "learning_rate": 5e-06, + "loss": 1.563, + "num_input_tokens_seen": 710986528, + "step": 4113 + }, + { + "epoch": 1.5752585216392188, + "loss": 1.690927267074585, + "loss_ce": 0.1960771679878235, + "loss_iou": 0.7036018371582031, + "loss_num": 0.017578125, + "loss_xval": 1.4948501586914062, + "num_input_tokens_seen": 710986528, + "step": 4113 + }, + { + "epoch": 1.5756415166602835, + "grad_norm": 96.98294240494276, + "learning_rate": 5e-06, + "loss": 1.6662, + "num_input_tokens_seen": 711159728, + "step": 4114 + }, + { + "epoch": 1.5756415166602835, + "loss": 1.6186466217041016, + "loss_ce": 0.19410072267055511, + "loss_iou": 0.6570056080818176, + "loss_num": 0.0220947265625, + "loss_xval": 1.4245460033416748, + "num_input_tokens_seen": 711159728, + "step": 4114 + }, + { + "epoch": 1.5760245116813483, + "grad_norm": 95.67673641953289, + "learning_rate": 5e-06, + "loss": 1.4595, + "num_input_tokens_seen": 711332608, + "step": 4115 + }, + { + "epoch": 1.5760245116813483, + "loss": 1.4235999584197998, + "loss_ce": 0.16213397681713104, + "loss_iou": 0.5831255316734314, + "loss_num": 0.01904296875, + "loss_xval": 1.2614659070968628, + "num_input_tokens_seen": 711332608, + "step": 4115 + }, + { + "epoch": 1.576407506702413, + "grad_norm": 92.77410602492115, + "learning_rate": 5e-06, + "loss": 1.417, + "num_input_tokens_seen": 711505976, + "step": 4116 + }, + { + "epoch": 1.576407506702413, + "loss": 1.5653266906738281, + "loss_ce": 0.2093820571899414, + "loss_iou": 0.6309142112731934, + "loss_num": 0.018798828125, + "loss_xval": 1.3559446334838867, + "num_input_tokens_seen": 711505976, + "step": 4116 + }, + { + "epoch": 1.5767905017234776, + "grad_norm": 112.31256374166821, + "learning_rate": 5e-06, + "loss": 1.4461, + "num_input_tokens_seen": 711678688, + "step": 4117 + }, + { + "epoch": 1.5767905017234776, + "loss": 1.3936405181884766, + "loss_ce": 0.10780879855155945, + "loss_iou": 0.5804463624954224, + "loss_num": 0.02490234375, + "loss_xval": 1.2477457523345947, + "num_input_tokens_seen": 711678688, + "step": 4117 + }, + { + "epoch": 1.5771734967445423, + "grad_norm": 119.24830662489816, + "learning_rate": 5e-06, + "loss": 1.6079, + "num_input_tokens_seen": 711851200, + "step": 4118 + }, + { + "epoch": 1.5771734967445423, + "loss": 1.512297511100769, + "loss_ce": 0.15411420166492462, + "loss_iou": 0.6256248950958252, + "loss_num": 0.0213623046875, + "loss_xval": 1.3581833839416504, + "num_input_tokens_seen": 711851200, + "step": 4118 + }, + { + "epoch": 1.577556491765607, + "grad_norm": 136.61281502636845, + "learning_rate": 5e-06, + "loss": 1.2418, + "num_input_tokens_seen": 712024120, + "step": 4119 + }, + { + "epoch": 1.577556491765607, + "loss": 1.2185297012329102, + "loss_ce": 0.1887328326702118, + "loss_iou": 0.4820462167263031, + "loss_num": 0.01318359375, + "loss_xval": 1.029796838760376, + "num_input_tokens_seen": 712024120, + "step": 4119 + }, + { + "epoch": 1.5779394867866716, + "grad_norm": 161.70568656741355, + "learning_rate": 5e-06, + "loss": 1.4603, + "num_input_tokens_seen": 712197288, + "step": 4120 + }, + { + "epoch": 1.5779394867866716, + "loss": 1.4775609970092773, + "loss_ce": 0.1906827837228775, + "loss_iou": 0.6030033230781555, + "loss_num": 0.01611328125, + "loss_xval": 1.286878228187561, + "num_input_tokens_seen": 712197288, + "step": 4120 + }, + { + "epoch": 1.5783224818077364, + "grad_norm": 125.21855298836583, + "learning_rate": 5e-06, + "loss": 1.4454, + "num_input_tokens_seen": 712370424, + "step": 4121 + }, + { + "epoch": 1.5783224818077364, + "loss": 1.4849789142608643, + "loss_ce": 0.1686321496963501, + "loss_iou": 0.6066292524337769, + "loss_num": 0.0206298828125, + "loss_xval": 1.3163468837738037, + "num_input_tokens_seen": 712370424, + "step": 4121 + }, + { + "epoch": 1.5787054768288011, + "grad_norm": 202.20400727751363, + "learning_rate": 5e-06, + "loss": 1.707, + "num_input_tokens_seen": 712543424, + "step": 4122 + }, + { + "epoch": 1.5787054768288011, + "loss": 1.7018821239471436, + "loss_ce": 0.15344122052192688, + "loss_iou": 0.7219133377075195, + "loss_num": 0.0208740234375, + "loss_xval": 1.548440933227539, + "num_input_tokens_seen": 712543424, + "step": 4122 + }, + { + "epoch": 1.579088471849866, + "grad_norm": 90.25571528996664, + "learning_rate": 5e-06, + "loss": 1.5607, + "num_input_tokens_seen": 712716384, + "step": 4123 + }, + { + "epoch": 1.579088471849866, + "loss": 1.656998872756958, + "loss_ce": 0.16306447982788086, + "loss_iou": 0.7058295011520386, + "loss_num": 0.0164794921875, + "loss_xval": 1.4939343929290771, + "num_input_tokens_seen": 712716384, + "step": 4123 + }, + { + "epoch": 1.5794714668709307, + "grad_norm": 146.265974471142, + "learning_rate": 5e-06, + "loss": 1.2993, + "num_input_tokens_seen": 712889216, + "step": 4124 + }, + { + "epoch": 1.5794714668709307, + "loss": 1.3837032318115234, + "loss_ce": 0.13485397398471832, + "loss_iou": 0.5820052623748779, + "loss_num": 0.0169677734375, + "loss_xval": 1.2488493919372559, + "num_input_tokens_seen": 712889216, + "step": 4124 + }, + { + "epoch": 1.5798544618919954, + "grad_norm": 180.46149214467812, + "learning_rate": 5e-06, + "loss": 1.5767, + "num_input_tokens_seen": 713061896, + "step": 4125 + }, + { + "epoch": 1.5798544618919954, + "loss": 1.7555794715881348, + "loss_ce": 0.1521853655576706, + "loss_iou": 0.7463535070419312, + "loss_num": 0.0220947265625, + "loss_xval": 1.6033942699432373, + "num_input_tokens_seen": 713061896, + "step": 4125 + }, + { + "epoch": 1.5802374569130602, + "grad_norm": 144.27489392024512, + "learning_rate": 5e-06, + "loss": 1.7659, + "num_input_tokens_seen": 713234720, + "step": 4126 + }, + { + "epoch": 1.5802374569130602, + "loss": 1.6287106275558472, + "loss_ce": 0.16540609300136566, + "loss_iou": 0.6711969375610352, + "loss_num": 0.024169921875, + "loss_xval": 1.4633045196533203, + "num_input_tokens_seen": 713234720, + "step": 4126 + }, + { + "epoch": 1.580620451934125, + "grad_norm": 142.99195314920425, + "learning_rate": 5e-06, + "loss": 1.2814, + "num_input_tokens_seen": 713407728, + "step": 4127 + }, + { + "epoch": 1.580620451934125, + "loss": 1.34083092212677, + "loss_ce": 0.19812652468681335, + "loss_iou": 0.5264760851860046, + "loss_num": 0.0179443359375, + "loss_xval": 1.1427043676376343, + "num_input_tokens_seen": 713407728, + "step": 4127 + }, + { + "epoch": 1.5810034469551897, + "grad_norm": 165.02152487307984, + "learning_rate": 5e-06, + "loss": 1.5148, + "num_input_tokens_seen": 713580632, + "step": 4128 + }, + { + "epoch": 1.5810034469551897, + "loss": 1.6564905643463135, + "loss_ce": 0.21012263000011444, + "loss_iou": 0.6827177405357361, + "loss_num": 0.0162353515625, + "loss_xval": 1.4463680982589722, + "num_input_tokens_seen": 713580632, + "step": 4128 + }, + { + "epoch": 1.5813864419762544, + "grad_norm": 215.9535446466486, + "learning_rate": 5e-06, + "loss": 1.6682, + "num_input_tokens_seen": 713753704, + "step": 4129 + }, + { + "epoch": 1.5813864419762544, + "loss": 1.7163509130477905, + "loss_ce": 0.15969273447990417, + "loss_iou": 0.7143948078155518, + "loss_num": 0.025634765625, + "loss_xval": 1.5566582679748535, + "num_input_tokens_seen": 713753704, + "step": 4129 + }, + { + "epoch": 1.5817694369973192, + "grad_norm": 111.27153619210029, + "learning_rate": 5e-06, + "loss": 1.3561, + "num_input_tokens_seen": 713926688, + "step": 4130 + }, + { + "epoch": 1.5817694369973192, + "loss": 1.3754353523254395, + "loss_ce": 0.13145066797733307, + "loss_iou": 0.5646650195121765, + "loss_num": 0.02294921875, + "loss_xval": 1.243984580039978, + "num_input_tokens_seen": 713926688, + "step": 4130 + }, + { + "epoch": 1.5821524320183837, + "grad_norm": 136.51067513872, + "learning_rate": 5e-06, + "loss": 1.4769, + "num_input_tokens_seen": 714099512, + "step": 4131 + }, + { + "epoch": 1.5821524320183837, + "loss": 1.2997477054595947, + "loss_ce": 0.15273964405059814, + "loss_iou": 0.5227227210998535, + "loss_num": 0.020263671875, + "loss_xval": 1.141392707824707, + "num_input_tokens_seen": 714099512, + "step": 4131 + }, + { + "epoch": 1.5825354270394485, + "grad_norm": 155.1805329899844, + "learning_rate": 5e-06, + "loss": 1.4896, + "num_input_tokens_seen": 714272424, + "step": 4132 + }, + { + "epoch": 1.5825354270394485, + "loss": 1.3481863737106323, + "loss_ce": 0.14173448085784912, + "loss_iou": 0.5591127872467041, + "loss_num": 0.0177001953125, + "loss_xval": 1.2064518928527832, + "num_input_tokens_seen": 714272424, + "step": 4132 + }, + { + "epoch": 1.5829184220605133, + "grad_norm": 93.67945409196703, + "learning_rate": 5e-06, + "loss": 1.4453, + "num_input_tokens_seen": 714445512, + "step": 4133 + }, + { + "epoch": 1.5829184220605133, + "loss": 1.3629486560821533, + "loss_ce": 0.1463022530078888, + "loss_iou": 0.567628026008606, + "loss_num": 0.0162353515625, + "loss_xval": 1.216646432876587, + "num_input_tokens_seen": 714445512, + "step": 4133 + }, + { + "epoch": 1.5833014170815778, + "grad_norm": 176.43149146789915, + "learning_rate": 5e-06, + "loss": 1.3805, + "num_input_tokens_seen": 714618000, + "step": 4134 + }, + { + "epoch": 1.5833014170815778, + "loss": 1.3133476972579956, + "loss_ce": 0.1425570845603943, + "loss_iou": 0.539893627166748, + "loss_num": 0.0181884765625, + "loss_xval": 1.170790672302246, + "num_input_tokens_seen": 714618000, + "step": 4134 + }, + { + "epoch": 1.5836844121026425, + "grad_norm": 100.57607901972257, + "learning_rate": 5e-06, + "loss": 1.361, + "num_input_tokens_seen": 714790840, + "step": 4135 + }, + { + "epoch": 1.5836844121026425, + "loss": 1.1960337162017822, + "loss_ce": 0.20448628067970276, + "loss_iou": 0.46229591965675354, + "loss_num": 0.01336669921875, + "loss_xval": 0.9915474057197571, + "num_input_tokens_seen": 714790840, + "step": 4135 + }, + { + "epoch": 1.5840674071237073, + "grad_norm": 198.05811479622452, + "learning_rate": 5e-06, + "loss": 1.3672, + "num_input_tokens_seen": 714963856, + "step": 4136 + }, + { + "epoch": 1.5840674071237073, + "loss": 1.2496293783187866, + "loss_ce": 0.1796765923500061, + "loss_iou": 0.4971880316734314, + "loss_num": 0.01513671875, + "loss_xval": 1.0699528455734253, + "num_input_tokens_seen": 714963856, + "step": 4136 + }, + { + "epoch": 1.584450402144772, + "grad_norm": 218.06086572369287, + "learning_rate": 5e-06, + "loss": 1.684, + "num_input_tokens_seen": 715136784, + "step": 4137 + }, + { + "epoch": 1.584450402144772, + "loss": 1.6358394622802734, + "loss_ce": 0.18271474540233612, + "loss_iou": 0.6757200956344604, + "loss_num": 0.0203857421875, + "loss_xval": 1.453124761581421, + "num_input_tokens_seen": 715136784, + "step": 4137 + }, + { + "epoch": 1.5848333971658368, + "grad_norm": 139.27848894680739, + "learning_rate": 5e-06, + "loss": 1.4464, + "num_input_tokens_seen": 715310048, + "step": 4138 + }, + { + "epoch": 1.5848333971658368, + "loss": 1.282130241394043, + "loss_ce": 0.17187875509262085, + "loss_iou": 0.5068163871765137, + "loss_num": 0.019287109375, + "loss_xval": 1.1102514266967773, + "num_input_tokens_seen": 715310048, + "step": 4138 + }, + { + "epoch": 1.5852163921869016, + "grad_norm": 117.16573760868806, + "learning_rate": 5e-06, + "loss": 1.4076, + "num_input_tokens_seen": 715482880, + "step": 4139 + }, + { + "epoch": 1.5852163921869016, + "loss": 1.5313211679458618, + "loss_ce": 0.163076251745224, + "loss_iou": 0.6271766424179077, + "loss_num": 0.0228271484375, + "loss_xval": 1.3682448863983154, + "num_input_tokens_seen": 715482880, + "step": 4139 + }, + { + "epoch": 1.5855993872079663, + "grad_norm": 167.40106243290677, + "learning_rate": 5e-06, + "loss": 1.582, + "num_input_tokens_seen": 715655872, + "step": 4140 + }, + { + "epoch": 1.5855993872079663, + "loss": 1.6569092273712158, + "loss_ce": 0.15909257531166077, + "loss_iou": 0.7087472677230835, + "loss_num": 0.01611328125, + "loss_xval": 1.497816801071167, + "num_input_tokens_seen": 715655872, + "step": 4140 + }, + { + "epoch": 1.585982382229031, + "grad_norm": 242.45741736718762, + "learning_rate": 5e-06, + "loss": 1.5618, + "num_input_tokens_seen": 715825728, + "step": 4141 + }, + { + "epoch": 1.585982382229031, + "loss": 1.4228453636169434, + "loss_ce": 0.17762215435504913, + "loss_iou": 0.5792308449745178, + "loss_num": 0.017333984375, + "loss_xval": 1.2452231645584106, + "num_input_tokens_seen": 715825728, + "step": 4141 + }, + { + "epoch": 1.5863653772500959, + "grad_norm": 215.18232499518476, + "learning_rate": 5e-06, + "loss": 2.1397, + "num_input_tokens_seen": 715998824, + "step": 4142 + }, + { + "epoch": 1.5863653772500959, + "loss": 2.1633801460266113, + "loss_ce": 0.15684956312179565, + "loss_iou": 0.9566954374313354, + "loss_num": 0.0186767578125, + "loss_xval": 2.006530523300171, + "num_input_tokens_seen": 715998824, + "step": 4142 + }, + { + "epoch": 1.5867483722711606, + "grad_norm": 196.24639417525137, + "learning_rate": 5e-06, + "loss": 2.2569, + "num_input_tokens_seen": 716171632, + "step": 4143 + }, + { + "epoch": 1.5867483722711606, + "loss": 2.2805094718933105, + "loss_ce": 0.17804448306560516, + "loss_iou": 0.9064571261405945, + "loss_num": 0.057861328125, + "loss_xval": 2.1024651527404785, + "num_input_tokens_seen": 716171632, + "step": 4143 + }, + { + "epoch": 1.5871313672922251, + "grad_norm": 212.68308123228155, + "learning_rate": 5e-06, + "loss": 2.7161, + "num_input_tokens_seen": 716344312, + "step": 4144 + }, + { + "epoch": 1.5871313672922251, + "loss": 2.824314594268799, + "loss_ce": 0.2492833435535431, + "loss_iou": 1.209207534790039, + "loss_num": 0.03125, + "loss_xval": 2.575031280517578, + "num_input_tokens_seen": 716344312, + "step": 4144 + }, + { + "epoch": 1.58751436231329, + "grad_norm": 97.51620370993898, + "learning_rate": 5e-06, + "loss": 1.5787, + "num_input_tokens_seen": 716517296, + "step": 4145 + }, + { + "epoch": 1.58751436231329, + "loss": 1.413367509841919, + "loss_ce": 0.13391654193401337, + "loss_iou": 0.5669410228729248, + "loss_num": 0.0291748046875, + "loss_xval": 1.2604079246520996, + "num_input_tokens_seen": 716517296, + "step": 4145 + }, + { + "epoch": 1.5878973573343547, + "grad_norm": 84.09415924064281, + "learning_rate": 5e-06, + "loss": 1.6434, + "num_input_tokens_seen": 716690456, + "step": 4146 + }, + { + "epoch": 1.5878973573343547, + "loss": 1.9350314140319824, + "loss_ce": 0.12022538483142853, + "loss_iou": 0.8216485977172852, + "loss_num": 0.034423828125, + "loss_xval": 1.8148059844970703, + "num_input_tokens_seen": 716690456, + "step": 4146 + }, + { + "epoch": 1.5882803523554194, + "grad_norm": 131.85175873613758, + "learning_rate": 5e-06, + "loss": 2.0919, + "num_input_tokens_seen": 716863408, + "step": 4147 + }, + { + "epoch": 1.5882803523554194, + "loss": 2.241295576095581, + "loss_ce": 0.14061076939105988, + "loss_iou": 0.9912604093551636, + "loss_num": 0.023681640625, + "loss_xval": 2.100684881210327, + "num_input_tokens_seen": 716863408, + "step": 4147 + }, + { + "epoch": 1.588663347376484, + "grad_norm": 78.5322331711656, + "learning_rate": 5e-06, + "loss": 1.7065, + "num_input_tokens_seen": 717036496, + "step": 4148 + }, + { + "epoch": 1.588663347376484, + "loss": 1.801315188407898, + "loss_ce": 0.11861561238765717, + "loss_iou": 0.7705795764923096, + "loss_num": 0.0283203125, + "loss_xval": 1.6826996803283691, + "num_input_tokens_seen": 717036496, + "step": 4148 + }, + { + "epoch": 1.5890463423975487, + "grad_norm": 108.38839175352602, + "learning_rate": 5e-06, + "loss": 1.4917, + "num_input_tokens_seen": 717209536, + "step": 4149 + }, + { + "epoch": 1.5890463423975487, + "loss": 1.5406887531280518, + "loss_ce": 0.19515490531921387, + "loss_iou": 0.625128984451294, + "loss_num": 0.01904296875, + "loss_xval": 1.337751865386963, + "num_input_tokens_seen": 717209536, + "step": 4149 + }, + { + "epoch": 1.5894293374186135, + "grad_norm": 82.13063350594624, + "learning_rate": 5e-06, + "loss": 1.7176, + "num_input_tokens_seen": 717382800, + "step": 4150 + }, + { + "epoch": 1.5894293374186135, + "loss": 1.8092962503433228, + "loss_ce": 0.1871616542339325, + "loss_iou": 0.7534501552581787, + "loss_num": 0.0230712890625, + "loss_xval": 1.6221346855163574, + "num_input_tokens_seen": 717382800, + "step": 4150 + }, + { + "epoch": 1.5898123324396782, + "grad_norm": 120.62412092658283, + "learning_rate": 5e-06, + "loss": 1.5449, + "num_input_tokens_seen": 717555752, + "step": 4151 + }, + { + "epoch": 1.5898123324396782, + "loss": 1.7137246131896973, + "loss_ce": 0.21029409766197205, + "loss_iou": 0.6971803903579712, + "loss_num": 0.0218505859375, + "loss_xval": 1.5034306049346924, + "num_input_tokens_seen": 717555752, + "step": 4151 + }, + { + "epoch": 1.590195327460743, + "grad_norm": 231.08103008179881, + "learning_rate": 5e-06, + "loss": 1.8221, + "num_input_tokens_seen": 717728968, + "step": 4152 + }, + { + "epoch": 1.590195327460743, + "loss": 1.7823009490966797, + "loss_ce": 0.22162017226219177, + "loss_iou": 0.7400267124176025, + "loss_num": 0.01611328125, + "loss_xval": 1.560680866241455, + "num_input_tokens_seen": 717728968, + "step": 4152 + }, + { + "epoch": 1.5905783224818077, + "grad_norm": 148.41194264439528, + "learning_rate": 5e-06, + "loss": 1.4061, + "num_input_tokens_seen": 717901888, + "step": 4153 + }, + { + "epoch": 1.5905783224818077, + "loss": 1.4149425029754639, + "loss_ce": 0.15381485223770142, + "loss_iou": 0.5752658843994141, + "loss_num": 0.0220947265625, + "loss_xval": 1.2611274719238281, + "num_input_tokens_seen": 717901888, + "step": 4153 + }, + { + "epoch": 1.5909613175028725, + "grad_norm": 172.7051509765074, + "learning_rate": 5e-06, + "loss": 1.4777, + "num_input_tokens_seen": 718074976, + "step": 4154 + }, + { + "epoch": 1.5909613175028725, + "loss": 1.486426830291748, + "loss_ce": 0.1446625143289566, + "loss_iou": 0.6393422484397888, + "loss_num": 0.01263427734375, + "loss_xval": 1.3417643308639526, + "num_input_tokens_seen": 718074976, + "step": 4154 + }, + { + "epoch": 1.5913443125239373, + "grad_norm": 107.68109281663867, + "learning_rate": 5e-06, + "loss": 1.7517, + "num_input_tokens_seen": 718247808, + "step": 4155 + }, + { + "epoch": 1.5913443125239373, + "loss": 1.8165727853775024, + "loss_ce": 0.16311918199062347, + "loss_iou": 0.7450922727584839, + "loss_num": 0.03271484375, + "loss_xval": 1.6024281978607178, + "num_input_tokens_seen": 718247808, + "step": 4155 + }, + { + "epoch": 1.591727307545002, + "grad_norm": 109.20059029580428, + "learning_rate": 5e-06, + "loss": 1.4292, + "num_input_tokens_seen": 718420800, + "step": 4156 + }, + { + "epoch": 1.591727307545002, + "loss": 1.460224986076355, + "loss_ce": 0.13261058926582336, + "loss_iou": 0.6007121205329895, + "loss_num": 0.0252685546875, + "loss_xval": 1.327614426612854, + "num_input_tokens_seen": 718420800, + "step": 4156 + }, + { + "epoch": 1.5921103025660668, + "grad_norm": 96.94773639437696, + "learning_rate": 5e-06, + "loss": 1.2066, + "num_input_tokens_seen": 718594040, + "step": 4157 + }, + { + "epoch": 1.5921103025660668, + "loss": 1.2734310626983643, + "loss_ce": 0.1546371579170227, + "loss_iou": 0.5315343737602234, + "loss_num": 0.01116943359375, + "loss_xval": 1.1187938451766968, + "num_input_tokens_seen": 718594040, + "step": 4157 + }, + { + "epoch": 1.5924932975871313, + "grad_norm": 152.5733884973267, + "learning_rate": 5e-06, + "loss": 1.6566, + "num_input_tokens_seen": 718767408, + "step": 4158 + }, + { + "epoch": 1.5924932975871313, + "loss": 1.4709134101867676, + "loss_ce": 0.16556543111801147, + "loss_iou": 0.6193030476570129, + "loss_num": 0.01336669921875, + "loss_xval": 1.3053480386734009, + "num_input_tokens_seen": 718767408, + "step": 4158 + }, + { + "epoch": 1.592876292608196, + "grad_norm": 151.32500543632838, + "learning_rate": 5e-06, + "loss": 1.5158, + "num_input_tokens_seen": 718940264, + "step": 4159 + }, + { + "epoch": 1.592876292608196, + "loss": 1.4600410461425781, + "loss_ce": 0.18419691920280457, + "loss_iou": 0.6048715710639954, + "loss_num": 0.01318359375, + "loss_xval": 1.2758442163467407, + "num_input_tokens_seen": 718940264, + "step": 4159 + }, + { + "epoch": 1.5932592876292608, + "grad_norm": 155.07072042887276, + "learning_rate": 5e-06, + "loss": 2.0443, + "num_input_tokens_seen": 719113560, + "step": 4160 + }, + { + "epoch": 1.5932592876292608, + "loss": 2.1296849250793457, + "loss_ce": 0.15815117955207825, + "loss_iou": 0.8649172782897949, + "loss_num": 0.04833984375, + "loss_xval": 1.9715337753295898, + "num_input_tokens_seen": 719113560, + "step": 4160 + }, + { + "epoch": 1.5936422826503256, + "grad_norm": 140.57978385602607, + "learning_rate": 5e-06, + "loss": 1.2904, + "num_input_tokens_seen": 719286400, + "step": 4161 + }, + { + "epoch": 1.5936422826503256, + "loss": 1.3131370544433594, + "loss_ce": 0.16869494318962097, + "loss_iou": 0.543198823928833, + "loss_num": 0.0115966796875, + "loss_xval": 1.144442081451416, + "num_input_tokens_seen": 719286400, + "step": 4161 + }, + { + "epoch": 1.5940252776713901, + "grad_norm": 136.09479291602526, + "learning_rate": 5e-06, + "loss": 1.2916, + "num_input_tokens_seen": 719459200, + "step": 4162 + }, + { + "epoch": 1.5940252776713901, + "loss": 1.2238564491271973, + "loss_ce": 0.16852031648159027, + "loss_iou": 0.4966316819190979, + "loss_num": 0.01239013671875, + "loss_xval": 1.0553361177444458, + "num_input_tokens_seen": 719459200, + "step": 4162 + }, + { + "epoch": 1.5944082726924549, + "grad_norm": 157.00291345601957, + "learning_rate": 5e-06, + "loss": 1.6166, + "num_input_tokens_seen": 719632456, + "step": 4163 + }, + { + "epoch": 1.5944082726924549, + "loss": 1.6060909032821655, + "loss_ce": 0.18971838057041168, + "loss_iou": 0.6511489152908325, + "loss_num": 0.0228271484375, + "loss_xval": 1.416372537612915, + "num_input_tokens_seen": 719632456, + "step": 4163 + }, + { + "epoch": 1.5947912677135196, + "grad_norm": 145.2367620609835, + "learning_rate": 5e-06, + "loss": 1.6286, + "num_input_tokens_seen": 719805552, + "step": 4164 + }, + { + "epoch": 1.5947912677135196, + "loss": 1.5709772109985352, + "loss_ce": 0.13955217599868774, + "loss_iou": 0.6321249008178711, + "loss_num": 0.033447265625, + "loss_xval": 1.4314250946044922, + "num_input_tokens_seen": 719805552, + "step": 4164 + }, + { + "epoch": 1.5951742627345844, + "grad_norm": 108.52186220452043, + "learning_rate": 5e-06, + "loss": 1.4935, + "num_input_tokens_seen": 719978200, + "step": 4165 + }, + { + "epoch": 1.5951742627345844, + "loss": 1.5908496379852295, + "loss_ce": 0.18345247209072113, + "loss_iou": 0.6416716575622559, + "loss_num": 0.0247802734375, + "loss_xval": 1.4073972702026367, + "num_input_tokens_seen": 719978200, + "step": 4165 + }, + { + "epoch": 1.5955572577556492, + "grad_norm": 156.18950512363426, + "learning_rate": 5e-06, + "loss": 1.6529, + "num_input_tokens_seen": 720151256, + "step": 4166 + }, + { + "epoch": 1.5955572577556492, + "loss": 1.686915397644043, + "loss_ce": 0.1795039176940918, + "loss_iou": 0.7060678601264954, + "loss_num": 0.01904296875, + "loss_xval": 1.5074115991592407, + "num_input_tokens_seen": 720151256, + "step": 4166 + }, + { + "epoch": 1.595940252776714, + "grad_norm": 125.43865878654431, + "learning_rate": 5e-06, + "loss": 1.5317, + "num_input_tokens_seen": 720320352, + "step": 4167 + }, + { + "epoch": 1.595940252776714, + "loss": 1.6104614734649658, + "loss_ce": 0.13296779990196228, + "loss_iou": 0.6841661930084229, + "loss_num": 0.0218505859375, + "loss_xval": 1.4774937629699707, + "num_input_tokens_seen": 720320352, + "step": 4167 + }, + { + "epoch": 1.5963232477977787, + "grad_norm": 136.12731473282872, + "learning_rate": 5e-06, + "loss": 1.3777, + "num_input_tokens_seen": 720493408, + "step": 4168 + }, + { + "epoch": 1.5963232477977787, + "loss": 1.5521327257156372, + "loss_ce": 0.16964644193649292, + "loss_iou": 0.6411638259887695, + "loss_num": 0.02001953125, + "loss_xval": 1.382486343383789, + "num_input_tokens_seen": 720493408, + "step": 4168 + }, + { + "epoch": 1.5967062428188434, + "grad_norm": 132.3225138177353, + "learning_rate": 5e-06, + "loss": 1.4554, + "num_input_tokens_seen": 720666600, + "step": 4169 + }, + { + "epoch": 1.5967062428188434, + "loss": 1.4904382228851318, + "loss_ce": 0.12942801415920258, + "loss_iou": 0.6297086477279663, + "loss_num": 0.020263671875, + "loss_xval": 1.3610103130340576, + "num_input_tokens_seen": 720666600, + "step": 4169 + }, + { + "epoch": 1.5970892378399082, + "grad_norm": 115.20173630301336, + "learning_rate": 5e-06, + "loss": 1.4385, + "num_input_tokens_seen": 720839496, + "step": 4170 + }, + { + "epoch": 1.5970892378399082, + "loss": 1.5010660886764526, + "loss_ce": 0.1956491619348526, + "loss_iou": 0.5966476798057556, + "loss_num": 0.0224609375, + "loss_xval": 1.3054169416427612, + "num_input_tokens_seen": 720839496, + "step": 4170 + }, + { + "epoch": 1.597472232860973, + "grad_norm": 142.6082978665598, + "learning_rate": 5e-06, + "loss": 1.3468, + "num_input_tokens_seen": 721012352, + "step": 4171 + }, + { + "epoch": 1.597472232860973, + "loss": 1.3007220029830933, + "loss_ce": 0.1823827028274536, + "loss_iou": 0.5270956754684448, + "loss_num": 0.0128173828125, + "loss_xval": 1.1183393001556396, + "num_input_tokens_seen": 721012352, + "step": 4171 + }, + { + "epoch": 1.5978552278820375, + "grad_norm": 168.93688929215168, + "learning_rate": 5e-06, + "loss": 1.9489, + "num_input_tokens_seen": 721185152, + "step": 4172 + }, + { + "epoch": 1.5978552278820375, + "loss": 1.736716389656067, + "loss_ce": 0.16597309708595276, + "loss_iou": 0.7086504697799683, + "loss_num": 0.03076171875, + "loss_xval": 1.5707433223724365, + "num_input_tokens_seen": 721185152, + "step": 4172 + }, + { + "epoch": 1.5982382229031022, + "grad_norm": 115.08275550344064, + "learning_rate": 5e-06, + "loss": 1.5576, + "num_input_tokens_seen": 721358280, + "step": 4173 + }, + { + "epoch": 1.5982382229031022, + "loss": 1.4300172328948975, + "loss_ce": 0.16085779666900635, + "loss_iou": 0.5783511400222778, + "loss_num": 0.0224609375, + "loss_xval": 1.2691595554351807, + "num_input_tokens_seen": 721358280, + "step": 4173 + }, + { + "epoch": 1.598621217924167, + "grad_norm": 151.18544005799114, + "learning_rate": 5e-06, + "loss": 1.5803, + "num_input_tokens_seen": 721531232, + "step": 4174 + }, + { + "epoch": 1.598621217924167, + "loss": 1.5780599117279053, + "loss_ce": 0.18436744809150696, + "loss_iou": 0.6431353092193604, + "loss_num": 0.021484375, + "loss_xval": 1.3936924934387207, + "num_input_tokens_seen": 721531232, + "step": 4174 + }, + { + "epoch": 1.5990042129452318, + "grad_norm": 95.07224439462104, + "learning_rate": 5e-06, + "loss": 1.8869, + "num_input_tokens_seen": 721704312, + "step": 4175 + }, + { + "epoch": 1.5990042129452318, + "loss": 1.758817434310913, + "loss_ce": 0.13629360496997833, + "loss_iou": 0.7348763942718506, + "loss_num": 0.030517578125, + "loss_xval": 1.6225237846374512, + "num_input_tokens_seen": 721704312, + "step": 4175 + }, + { + "epoch": 1.5993872079662963, + "grad_norm": 86.37329986341791, + "learning_rate": 5e-06, + "loss": 1.2869, + "num_input_tokens_seen": 721877168, + "step": 4176 + }, + { + "epoch": 1.5993872079662963, + "loss": 1.2956174612045288, + "loss_ce": 0.17034663259983063, + "loss_iou": 0.5340862274169922, + "loss_num": 0.01141357421875, + "loss_xval": 1.1252708435058594, + "num_input_tokens_seen": 721877168, + "step": 4176 + }, + { + "epoch": 1.599770202987361, + "grad_norm": 141.03852514451134, + "learning_rate": 5e-06, + "loss": 1.2412, + "num_input_tokens_seen": 722050448, + "step": 4177 + }, + { + "epoch": 1.599770202987361, + "loss": 1.272310733795166, + "loss_ce": 0.19302532076835632, + "loss_iou": 0.5057986974716187, + "loss_num": 0.0135498046875, + "loss_xval": 1.0792853832244873, + "num_input_tokens_seen": 722050448, + "step": 4177 + }, + { + "epoch": 1.6001531980084258, + "grad_norm": 151.4463610938401, + "learning_rate": 5e-06, + "loss": 1.4196, + "num_input_tokens_seen": 722223512, + "step": 4178 + }, + { + "epoch": 1.6001531980084258, + "loss": 1.5337145328521729, + "loss_ce": 0.1342497169971466, + "loss_iou": 0.6537882089614868, + "loss_num": 0.018310546875, + "loss_xval": 1.3994648456573486, + "num_input_tokens_seen": 722223512, + "step": 4178 + }, + { + "epoch": 1.6005361930294906, + "grad_norm": 124.17179064680883, + "learning_rate": 5e-06, + "loss": 1.3181, + "num_input_tokens_seen": 722396256, + "step": 4179 + }, + { + "epoch": 1.6005361930294906, + "loss": 1.4280534982681274, + "loss_ce": 0.16537800431251526, + "loss_iou": 0.5884453058242798, + "loss_num": 0.01708984375, + "loss_xval": 1.2626755237579346, + "num_input_tokens_seen": 722396256, + "step": 4179 + }, + { + "epoch": 1.6009191880505553, + "grad_norm": 371.16790633061066, + "learning_rate": 5e-06, + "loss": 1.4541, + "num_input_tokens_seen": 722569264, + "step": 4180 + }, + { + "epoch": 1.6009191880505553, + "loss": 1.4142394065856934, + "loss_ce": 0.1499151587486267, + "loss_iou": 0.5764980912208557, + "loss_num": 0.022216796875, + "loss_xval": 1.2643243074417114, + "num_input_tokens_seen": 722569264, + "step": 4180 + }, + { + "epoch": 1.60130218307162, + "grad_norm": 158.38630178691437, + "learning_rate": 5e-06, + "loss": 1.9153, + "num_input_tokens_seen": 722742344, + "step": 4181 + }, + { + "epoch": 1.60130218307162, + "loss": 1.7647039890289307, + "loss_ce": 0.19260618090629578, + "loss_iou": 0.728462278842926, + "loss_num": 0.0230712890625, + "loss_xval": 1.572097897529602, + "num_input_tokens_seen": 722742344, + "step": 4181 + }, + { + "epoch": 1.6016851780926848, + "grad_norm": 251.06629771242092, + "learning_rate": 5e-06, + "loss": 1.7332, + "num_input_tokens_seen": 722915640, + "step": 4182 + }, + { + "epoch": 1.6016851780926848, + "loss": 1.745754599571228, + "loss_ce": 0.15105757117271423, + "loss_iou": 0.7322850227355957, + "loss_num": 0.0260009765625, + "loss_xval": 1.5946969985961914, + "num_input_tokens_seen": 722915640, + "step": 4182 + }, + { + "epoch": 1.6020681731137496, + "grad_norm": 131.27761042079712, + "learning_rate": 5e-06, + "loss": 1.6333, + "num_input_tokens_seen": 723088552, + "step": 4183 + }, + { + "epoch": 1.6020681731137496, + "loss": 1.581559658050537, + "loss_ce": 0.15056690573692322, + "loss_iou": 0.6644862294197083, + "loss_num": 0.0203857421875, + "loss_xval": 1.4309927225112915, + "num_input_tokens_seen": 723088552, + "step": 4183 + }, + { + "epoch": 1.6024511681348144, + "grad_norm": 146.93881199696452, + "learning_rate": 5e-06, + "loss": 1.6093, + "num_input_tokens_seen": 723261424, + "step": 4184 + }, + { + "epoch": 1.6024511681348144, + "loss": 1.342673897743225, + "loss_ce": 0.10359810292720795, + "loss_iou": 0.562012255191803, + "loss_num": 0.02294921875, + "loss_xval": 1.229676365852356, + "num_input_tokens_seen": 723261424, + "step": 4184 + }, + { + "epoch": 1.6028341631558791, + "grad_norm": 165.6412352146461, + "learning_rate": 5e-06, + "loss": 1.3122, + "num_input_tokens_seen": 723433976, + "step": 4185 + }, + { + "epoch": 1.6028341631558791, + "loss": 1.2699909210205078, + "loss_ce": 0.1705361157655716, + "loss_iou": 0.5034626722335815, + "loss_num": 0.0185546875, + "loss_xval": 1.099454641342163, + "num_input_tokens_seen": 723433976, + "step": 4185 + }, + { + "epoch": 1.6032171581769437, + "grad_norm": 124.46698113894391, + "learning_rate": 5e-06, + "loss": 1.4424, + "num_input_tokens_seen": 723606664, + "step": 4186 + }, + { + "epoch": 1.6032171581769437, + "loss": 1.5509071350097656, + "loss_ce": 0.16748195886611938, + "loss_iou": 0.646195650100708, + "loss_num": 0.0181884765625, + "loss_xval": 1.383425235748291, + "num_input_tokens_seen": 723606664, + "step": 4186 + }, + { + "epoch": 1.6036001531980084, + "grad_norm": 99.2164667549704, + "learning_rate": 5e-06, + "loss": 1.342, + "num_input_tokens_seen": 723779400, + "step": 4187 + }, + { + "epoch": 1.6036001531980084, + "loss": 1.476794719696045, + "loss_ce": 0.1615278273820877, + "loss_iou": 0.6092020273208618, + "loss_num": 0.0194091796875, + "loss_xval": 1.3152668476104736, + "num_input_tokens_seen": 723779400, + "step": 4187 + }, + { + "epoch": 1.6039831482190732, + "grad_norm": 150.00525951319077, + "learning_rate": 5e-06, + "loss": 1.396, + "num_input_tokens_seen": 723952440, + "step": 4188 + }, + { + "epoch": 1.6039831482190732, + "loss": 1.4182698726654053, + "loss_ce": 0.17111729085445404, + "loss_iou": 0.58997642993927, + "loss_num": 0.013427734375, + "loss_xval": 1.24715256690979, + "num_input_tokens_seen": 723952440, + "step": 4188 + }, + { + "epoch": 1.604366143240138, + "grad_norm": 193.94537393184032, + "learning_rate": 5e-06, + "loss": 1.5548, + "num_input_tokens_seen": 724125104, + "step": 4189 + }, + { + "epoch": 1.604366143240138, + "loss": 1.5976792573928833, + "loss_ce": 0.1452297568321228, + "loss_iou": 0.6719491481781006, + "loss_num": 0.021728515625, + "loss_xval": 1.4524493217468262, + "num_input_tokens_seen": 724125104, + "step": 4189 + }, + { + "epoch": 1.6047491382612025, + "grad_norm": 175.33307876378808, + "learning_rate": 5e-06, + "loss": 1.5052, + "num_input_tokens_seen": 724297976, + "step": 4190 + }, + { + "epoch": 1.6047491382612025, + "loss": 1.55488920211792, + "loss_ce": 0.18336597084999084, + "loss_iou": 0.645859956741333, + "loss_num": 0.0159912109375, + "loss_xval": 1.371523380279541, + "num_input_tokens_seen": 724297976, + "step": 4190 + }, + { + "epoch": 1.6051321332822672, + "grad_norm": 145.4086847251544, + "learning_rate": 5e-06, + "loss": 1.6218, + "num_input_tokens_seen": 724470944, + "step": 4191 + }, + { + "epoch": 1.6051321332822672, + "loss": 1.651831030845642, + "loss_ce": 0.20629402995109558, + "loss_iou": 0.6676690578460693, + "loss_num": 0.0220947265625, + "loss_xval": 1.4455370903015137, + "num_input_tokens_seen": 724470944, + "step": 4191 + }, + { + "epoch": 1.605515128303332, + "grad_norm": 83.23085303981833, + "learning_rate": 5e-06, + "loss": 1.4423, + "num_input_tokens_seen": 724643840, + "step": 4192 + }, + { + "epoch": 1.605515128303332, + "loss": 1.6921238899230957, + "loss_ce": 0.18480592966079712, + "loss_iou": 0.6849792003631592, + "loss_num": 0.0274658203125, + "loss_xval": 1.5073180198669434, + "num_input_tokens_seen": 724643840, + "step": 4192 + }, + { + "epoch": 1.6058981233243967, + "grad_norm": 235.94366271150906, + "learning_rate": 5e-06, + "loss": 1.3723, + "num_input_tokens_seen": 724816920, + "step": 4193 + }, + { + "epoch": 1.6058981233243967, + "loss": 1.2548590898513794, + "loss_ce": 0.14185810089111328, + "loss_iou": 0.5108004212379456, + "loss_num": 0.018310546875, + "loss_xval": 1.1130009889602661, + "num_input_tokens_seen": 724816920, + "step": 4193 + }, + { + "epoch": 1.6062811183454615, + "grad_norm": 153.38321902166095, + "learning_rate": 5e-06, + "loss": 1.6748, + "num_input_tokens_seen": 724989960, + "step": 4194 + }, + { + "epoch": 1.6062811183454615, + "loss": 1.7610971927642822, + "loss_ce": 0.17520743608474731, + "loss_iou": 0.7269964218139648, + "loss_num": 0.0263671875, + "loss_xval": 1.5858898162841797, + "num_input_tokens_seen": 724989960, + "step": 4194 + }, + { + "epoch": 1.6066641133665263, + "grad_norm": 251.33073815191366, + "learning_rate": 5e-06, + "loss": 1.998, + "num_input_tokens_seen": 725163144, + "step": 4195 + }, + { + "epoch": 1.6066641133665263, + "loss": 2.229393720626831, + "loss_ce": 0.21598659455776215, + "loss_iou": 0.9182636141777039, + "loss_num": 0.035400390625, + "loss_xval": 2.0134072303771973, + "num_input_tokens_seen": 725163144, + "step": 4195 + }, + { + "epoch": 1.607047108387591, + "grad_norm": 195.8249379939445, + "learning_rate": 5e-06, + "loss": 1.4789, + "num_input_tokens_seen": 725335928, + "step": 4196 + }, + { + "epoch": 1.607047108387591, + "loss": 1.4367557764053345, + "loss_ce": 0.15262383222579956, + "loss_iou": 0.5933904051780701, + "loss_num": 0.01953125, + "loss_xval": 1.2841318845748901, + "num_input_tokens_seen": 725335928, + "step": 4196 + }, + { + "epoch": 1.6074301034086558, + "grad_norm": 124.93389924958187, + "learning_rate": 5e-06, + "loss": 1.4569, + "num_input_tokens_seen": 725508936, + "step": 4197 + }, + { + "epoch": 1.6074301034086558, + "loss": 1.4103691577911377, + "loss_ce": 0.15445317327976227, + "loss_iou": 0.5805336833000183, + "loss_num": 0.0189208984375, + "loss_xval": 1.2559159994125366, + "num_input_tokens_seen": 725508936, + "step": 4197 + }, + { + "epoch": 1.6078130984297205, + "grad_norm": 134.4865770057758, + "learning_rate": 5e-06, + "loss": 1.6511, + "num_input_tokens_seen": 725682192, + "step": 4198 + }, + { + "epoch": 1.6078130984297205, + "loss": 1.6141129732131958, + "loss_ce": 0.17460237443447113, + "loss_iou": 0.6625043153762817, + "loss_num": 0.02294921875, + "loss_xval": 1.4395105838775635, + "num_input_tokens_seen": 725682192, + "step": 4198 + }, + { + "epoch": 1.6081960934507853, + "grad_norm": 124.76184771201105, + "learning_rate": 5e-06, + "loss": 1.4239, + "num_input_tokens_seen": 725855096, + "step": 4199 + }, + { + "epoch": 1.6081960934507853, + "loss": 1.4835871458053589, + "loss_ce": 0.17362385988235474, + "loss_iou": 0.6245250701904297, + "loss_num": 0.01220703125, + "loss_xval": 1.3099632263183594, + "num_input_tokens_seen": 725855096, + "step": 4199 + }, + { + "epoch": 1.6085790884718498, + "grad_norm": 113.66008530444972, + "learning_rate": 5e-06, + "loss": 1.668, + "num_input_tokens_seen": 726027752, + "step": 4200 + }, + { + "epoch": 1.6085790884718498, + "loss": 1.6983070373535156, + "loss_ce": 0.17619121074676514, + "loss_iou": 0.71070396900177, + "loss_num": 0.0201416015625, + "loss_xval": 1.52211594581604, + "num_input_tokens_seen": 726027752, + "step": 4200 + }, + { + "epoch": 1.6089620834929146, + "grad_norm": 79.37422880142995, + "learning_rate": 5e-06, + "loss": 1.1525, + "num_input_tokens_seen": 726200240, + "step": 4201 + }, + { + "epoch": 1.6089620834929146, + "loss": 1.1343028545379639, + "loss_ce": 0.1569584608078003, + "loss_iou": 0.4649295508861542, + "loss_num": 0.009521484375, + "loss_xval": 0.9773444533348083, + "num_input_tokens_seen": 726200240, + "step": 4201 + }, + { + "epoch": 1.6093450785139793, + "grad_norm": 121.0359814229709, + "learning_rate": 5e-06, + "loss": 1.1703, + "num_input_tokens_seen": 726372488, + "step": 4202 + }, + { + "epoch": 1.6093450785139793, + "loss": 1.114872694015503, + "loss_ce": 0.13682061433792114, + "loss_iou": 0.447262704372406, + "loss_num": 0.0167236328125, + "loss_xval": 0.978052020072937, + "num_input_tokens_seen": 726372488, + "step": 4202 + }, + { + "epoch": 1.6097280735350439, + "grad_norm": 152.19469348382117, + "learning_rate": 5e-06, + "loss": 1.2371, + "num_input_tokens_seen": 726545560, + "step": 4203 + }, + { + "epoch": 1.6097280735350439, + "loss": 1.3882474899291992, + "loss_ce": 0.1839291751384735, + "loss_iou": 0.5568863749504089, + "loss_num": 0.01806640625, + "loss_xval": 1.2043184041976929, + "num_input_tokens_seen": 726545560, + "step": 4203 + }, + { + "epoch": 1.6101110685561086, + "grad_norm": 152.85058899432732, + "learning_rate": 5e-06, + "loss": 1.3286, + "num_input_tokens_seen": 726718288, + "step": 4204 + }, + { + "epoch": 1.6101110685561086, + "loss": 1.3046131134033203, + "loss_ce": 0.15318046510219574, + "loss_iou": 0.5195334553718567, + "loss_num": 0.0224609375, + "loss_xval": 1.1514326333999634, + "num_input_tokens_seen": 726718288, + "step": 4204 + }, + { + "epoch": 1.6104940635771734, + "grad_norm": 102.86555761747161, + "learning_rate": 5e-06, + "loss": 1.4742, + "num_input_tokens_seen": 726891280, + "step": 4205 + }, + { + "epoch": 1.6104940635771734, + "loss": 1.4549144506454468, + "loss_ce": 0.18275438249111176, + "loss_iou": 0.5906088352203369, + "loss_num": 0.0181884765625, + "loss_xval": 1.2721600532531738, + "num_input_tokens_seen": 726891280, + "step": 4205 + }, + { + "epoch": 1.6108770585982382, + "grad_norm": 124.48242543593666, + "learning_rate": 5e-06, + "loss": 1.4113, + "num_input_tokens_seen": 727064296, + "step": 4206 + }, + { + "epoch": 1.6108770585982382, + "loss": 1.40886390209198, + "loss_ce": 0.17270991206169128, + "loss_iou": 0.5808913707733154, + "loss_num": 0.014892578125, + "loss_xval": 1.2361540794372559, + "num_input_tokens_seen": 727064296, + "step": 4206 + }, + { + "epoch": 1.611260053619303, + "grad_norm": 152.93814477278573, + "learning_rate": 5e-06, + "loss": 1.4134, + "num_input_tokens_seen": 727237248, + "step": 4207 + }, + { + "epoch": 1.611260053619303, + "loss": 1.248939037322998, + "loss_ce": 0.16090553998947144, + "loss_iou": 0.5178936719894409, + "loss_num": 0.01043701171875, + "loss_xval": 1.0880334377288818, + "num_input_tokens_seen": 727237248, + "step": 4207 + }, + { + "epoch": 1.6116430486403677, + "grad_norm": 161.91302956545493, + "learning_rate": 5e-06, + "loss": 1.6091, + "num_input_tokens_seen": 727410024, + "step": 4208 + }, + { + "epoch": 1.6116430486403677, + "loss": 1.6586511135101318, + "loss_ce": 0.1684921234846115, + "loss_iou": 0.7031330466270447, + "loss_num": 0.016845703125, + "loss_xval": 1.4901589155197144, + "num_input_tokens_seen": 727410024, + "step": 4208 + }, + { + "epoch": 1.6120260436614324, + "grad_norm": 127.17948579750434, + "learning_rate": 5e-06, + "loss": 1.2158, + "num_input_tokens_seen": 727583024, + "step": 4209 + }, + { + "epoch": 1.6120260436614324, + "loss": 1.3533885478973389, + "loss_ce": 0.1721852868795395, + "loss_iou": 0.5454966425895691, + "loss_num": 0.01806640625, + "loss_xval": 1.1812032461166382, + "num_input_tokens_seen": 727583024, + "step": 4209 + }, + { + "epoch": 1.6124090386824972, + "grad_norm": 156.97799307329777, + "learning_rate": 5e-06, + "loss": 1.4997, + "num_input_tokens_seen": 727755576, + "step": 4210 + }, + { + "epoch": 1.6124090386824972, + "loss": 1.493351697921753, + "loss_ce": 0.11826685070991516, + "loss_iou": 0.6387753486633301, + "loss_num": 0.01953125, + "loss_xval": 1.3750848770141602, + "num_input_tokens_seen": 727755576, + "step": 4210 + }, + { + "epoch": 1.612792033703562, + "grad_norm": 101.78998317340869, + "learning_rate": 5e-06, + "loss": 1.3822, + "num_input_tokens_seen": 727928264, + "step": 4211 + }, + { + "epoch": 1.612792033703562, + "loss": 1.5854644775390625, + "loss_ce": 0.1664758026599884, + "loss_iou": 0.6659457683563232, + "loss_num": 0.017333984375, + "loss_xval": 1.4189887046813965, + "num_input_tokens_seen": 727928264, + "step": 4211 + }, + { + "epoch": 1.6131750287246267, + "grad_norm": 162.3333980065569, + "learning_rate": 5e-06, + "loss": 1.6166, + "num_input_tokens_seen": 728100824, + "step": 4212 + }, + { + "epoch": 1.6131750287246267, + "loss": 1.9017621278762817, + "loss_ce": 0.1310673952102661, + "loss_iou": 0.8206501007080078, + "loss_num": 0.02587890625, + "loss_xval": 1.7706948518753052, + "num_input_tokens_seen": 728100824, + "step": 4212 + }, + { + "epoch": 1.6135580237456915, + "grad_norm": 167.7287404606955, + "learning_rate": 5e-06, + "loss": 1.478, + "num_input_tokens_seen": 728273472, + "step": 4213 + }, + { + "epoch": 1.6135580237456915, + "loss": 1.4391961097717285, + "loss_ce": 0.16294166445732117, + "loss_iou": 0.6000260710716248, + "loss_num": 0.0152587890625, + "loss_xval": 1.2762545347213745, + "num_input_tokens_seen": 728273472, + "step": 4213 + }, + { + "epoch": 1.613941018766756, + "grad_norm": 193.79280839460625, + "learning_rate": 5e-06, + "loss": 1.7893, + "num_input_tokens_seen": 728446560, + "step": 4214 + }, + { + "epoch": 1.613941018766756, + "loss": 1.788272738456726, + "loss_ce": 0.14585144817829132, + "loss_iou": 0.7796915173530579, + "loss_num": 0.0166015625, + "loss_xval": 1.6424213647842407, + "num_input_tokens_seen": 728446560, + "step": 4214 + }, + { + "epoch": 1.6143240137878208, + "grad_norm": 107.96259250437333, + "learning_rate": 5e-06, + "loss": 1.3204, + "num_input_tokens_seen": 728619168, + "step": 4215 + }, + { + "epoch": 1.6143240137878208, + "loss": 1.391005039215088, + "loss_ce": 0.16493898630142212, + "loss_iou": 0.5781972408294678, + "loss_num": 0.013916015625, + "loss_xval": 1.2260661125183105, + "num_input_tokens_seen": 728619168, + "step": 4215 + }, + { + "epoch": 1.6147070088088855, + "grad_norm": 187.0352787801529, + "learning_rate": 5e-06, + "loss": 1.4137, + "num_input_tokens_seen": 728792184, + "step": 4216 + }, + { + "epoch": 1.6147070088088855, + "loss": 1.4656509160995483, + "loss_ce": 0.14935088157653809, + "loss_iou": 0.6199725270271301, + "loss_num": 0.0152587890625, + "loss_xval": 1.3163000345230103, + "num_input_tokens_seen": 728792184, + "step": 4216 + }, + { + "epoch": 1.61509000382995, + "grad_norm": 169.04310416732002, + "learning_rate": 5e-06, + "loss": 1.663, + "num_input_tokens_seen": 728964840, + "step": 4217 + }, + { + "epoch": 1.61509000382995, + "loss": 1.6586328744888306, + "loss_ce": 0.1359909176826477, + "loss_iou": 0.699309229850769, + "loss_num": 0.0247802734375, + "loss_xval": 1.522641897201538, + "num_input_tokens_seen": 728964840, + "step": 4217 + }, + { + "epoch": 1.6154729988510148, + "grad_norm": 152.06177212909685, + "learning_rate": 5e-06, + "loss": 1.5059, + "num_input_tokens_seen": 729137720, + "step": 4218 + }, + { + "epoch": 1.6154729988510148, + "loss": 1.583996295928955, + "loss_ce": 0.17172470688819885, + "loss_iou": 0.6719104051589966, + "loss_num": 0.013671875, + "loss_xval": 1.4122717380523682, + "num_input_tokens_seen": 729137720, + "step": 4218 + }, + { + "epoch": 1.6158559938720796, + "grad_norm": 200.02509033924764, + "learning_rate": 5e-06, + "loss": 1.6775, + "num_input_tokens_seen": 729310504, + "step": 4219 + }, + { + "epoch": 1.6158559938720796, + "loss": 1.5794682502746582, + "loss_ce": 0.16939860582351685, + "loss_iou": 0.6682611703872681, + "loss_num": 0.01470947265625, + "loss_xval": 1.4100697040557861, + "num_input_tokens_seen": 729310504, + "step": 4219 + }, + { + "epoch": 1.6162389888931443, + "grad_norm": 277.57050945481717, + "learning_rate": 5e-06, + "loss": 1.9702, + "num_input_tokens_seen": 729483616, + "step": 4220 + }, + { + "epoch": 1.6162389888931443, + "loss": 1.8970720767974854, + "loss_ce": 0.13330280780792236, + "loss_iou": 0.8094968795776367, + "loss_num": 0.0289306640625, + "loss_xval": 1.7637691497802734, + "num_input_tokens_seen": 729483616, + "step": 4220 + }, + { + "epoch": 1.616621983914209, + "grad_norm": 83.35662605388745, + "learning_rate": 5e-06, + "loss": 1.9715, + "num_input_tokens_seen": 729656672, + "step": 4221 + }, + { + "epoch": 1.616621983914209, + "loss": 1.9594939947128296, + "loss_ce": 0.16559158265590668, + "loss_iou": 0.8036589622497559, + "loss_num": 0.037353515625, + "loss_xval": 1.7939023971557617, + "num_input_tokens_seen": 729656672, + "step": 4221 + }, + { + "epoch": 1.6170049789352738, + "grad_norm": 156.0426568366871, + "learning_rate": 5e-06, + "loss": 1.7302, + "num_input_tokens_seen": 729829664, + "step": 4222 + }, + { + "epoch": 1.6170049789352738, + "loss": 1.7628661394119263, + "loss_ce": 0.14224308729171753, + "loss_iou": 0.743722140789032, + "loss_num": 0.026611328125, + "loss_xval": 1.620622992515564, + "num_input_tokens_seen": 729829664, + "step": 4222 + }, + { + "epoch": 1.6173879739563386, + "grad_norm": 160.89069524338066, + "learning_rate": 5e-06, + "loss": 1.6602, + "num_input_tokens_seen": 730002456, + "step": 4223 + }, + { + "epoch": 1.6173879739563386, + "loss": 1.6575422286987305, + "loss_ce": 0.16317525506019592, + "loss_iou": 0.6911532282829285, + "loss_num": 0.0224609375, + "loss_xval": 1.494367003440857, + "num_input_tokens_seen": 730002456, + "step": 4223 + }, + { + "epoch": 1.6177709689774034, + "grad_norm": 176.03727937909886, + "learning_rate": 5e-06, + "loss": 1.9298, + "num_input_tokens_seen": 730175536, + "step": 4224 + }, + { + "epoch": 1.6177709689774034, + "loss": 1.876756191253662, + "loss_ce": 0.18384908139705658, + "loss_iou": 0.8029355406761169, + "loss_num": 0.0174560546875, + "loss_xval": 1.6929072141647339, + "num_input_tokens_seen": 730175536, + "step": 4224 + }, + { + "epoch": 1.6181539639984681, + "grad_norm": 77.70857364021485, + "learning_rate": 5e-06, + "loss": 1.91, + "num_input_tokens_seen": 730348768, + "step": 4225 + }, + { + "epoch": 1.6181539639984681, + "loss": 1.9864534139633179, + "loss_ce": 0.1839562952518463, + "loss_iou": 0.822848916053772, + "loss_num": 0.03125, + "loss_xval": 1.802497148513794, + "num_input_tokens_seen": 730348768, + "step": 4225 + }, + { + "epoch": 1.6185369590195329, + "grad_norm": 98.3553739132101, + "learning_rate": 5e-06, + "loss": 1.4728, + "num_input_tokens_seen": 730521824, + "step": 4226 + }, + { + "epoch": 1.6185369590195329, + "loss": 1.4634766578674316, + "loss_ce": 0.144941046833992, + "loss_iou": 0.6283076405525208, + "loss_num": 0.01239013671875, + "loss_xval": 1.3185354471206665, + "num_input_tokens_seen": 730521824, + "step": 4226 + }, + { + "epoch": 1.6189199540405976, + "grad_norm": 179.6512128786453, + "learning_rate": 5e-06, + "loss": 1.7384, + "num_input_tokens_seen": 730694672, + "step": 4227 + }, + { + "epoch": 1.6189199540405976, + "loss": 1.8198180198669434, + "loss_ce": 0.1349303126335144, + "loss_iou": 0.8038392066955566, + "loss_num": 0.01544189453125, + "loss_xval": 1.6848878860473633, + "num_input_tokens_seen": 730694672, + "step": 4227 + }, + { + "epoch": 1.6193029490616622, + "grad_norm": 223.12057888652353, + "learning_rate": 5e-06, + "loss": 2.0535, + "num_input_tokens_seen": 730867736, + "step": 4228 + }, + { + "epoch": 1.6193029490616622, + "loss": 2.180980920791626, + "loss_ce": 0.1341322958469391, + "loss_iou": 0.9227772951126099, + "loss_num": 0.040283203125, + "loss_xval": 2.0468485355377197, + "num_input_tokens_seen": 730867736, + "step": 4228 + }, + { + "epoch": 1.619685944082727, + "grad_norm": 114.72192326790878, + "learning_rate": 5e-06, + "loss": 1.9036, + "num_input_tokens_seen": 731041016, + "step": 4229 + }, + { + "epoch": 1.619685944082727, + "loss": 1.9049959182739258, + "loss_ce": 0.18319116532802582, + "loss_iou": 0.7841202020645142, + "loss_num": 0.03076171875, + "loss_xval": 1.7218048572540283, + "num_input_tokens_seen": 731041016, + "step": 4229 + }, + { + "epoch": 1.6200689391037917, + "grad_norm": 61.207408232360386, + "learning_rate": 5e-06, + "loss": 1.2428, + "num_input_tokens_seen": 731213896, + "step": 4230 + }, + { + "epoch": 1.6200689391037917, + "loss": 1.3117761611938477, + "loss_ce": 0.15150532126426697, + "loss_iou": 0.5487176179885864, + "loss_num": 0.0125732421875, + "loss_xval": 1.1602709293365479, + "num_input_tokens_seen": 731213896, + "step": 4230 + }, + { + "epoch": 1.6204519341248562, + "grad_norm": 98.28703306409523, + "learning_rate": 5e-06, + "loss": 1.1018, + "num_input_tokens_seen": 731386744, + "step": 4231 + }, + { + "epoch": 1.6204519341248562, + "loss": 1.0563000440597534, + "loss_ce": 0.0884590819478035, + "loss_iou": 0.45318925380706787, + "loss_num": 0.0123291015625, + "loss_xval": 0.9494082927703857, + "num_input_tokens_seen": 731386744, + "step": 4231 + }, + { + "epoch": 1.620834929145921, + "grad_norm": 165.15158453212973, + "learning_rate": 5e-06, + "loss": 1.4172, + "num_input_tokens_seen": 731559808, + "step": 4232 + }, + { + "epoch": 1.620834929145921, + "loss": 1.3487050533294678, + "loss_ce": 0.171361044049263, + "loss_iou": 0.5394319295883179, + "loss_num": 0.0196533203125, + "loss_xval": 1.1773440837860107, + "num_input_tokens_seen": 731559808, + "step": 4232 + }, + { + "epoch": 1.6212179241669857, + "grad_norm": 209.7155277261917, + "learning_rate": 5e-06, + "loss": 1.7705, + "num_input_tokens_seen": 731732840, + "step": 4233 + }, + { + "epoch": 1.6212179241669857, + "loss": 1.8195605278015137, + "loss_ce": 0.14862287044525146, + "loss_iou": 0.7685896158218384, + "loss_num": 0.0267333984375, + "loss_xval": 1.6709377765655518, + "num_input_tokens_seen": 731732840, + "step": 4233 + }, + { + "epoch": 1.6216009191880505, + "grad_norm": 135.34385611849774, + "learning_rate": 5e-06, + "loss": 1.5091, + "num_input_tokens_seen": 731905344, + "step": 4234 + }, + { + "epoch": 1.6216009191880505, + "loss": 1.689784288406372, + "loss_ce": 0.14371293783187866, + "loss_iou": 0.7203012704849243, + "loss_num": 0.0211181640625, + "loss_xval": 1.5460712909698486, + "num_input_tokens_seen": 731905344, + "step": 4234 + }, + { + "epoch": 1.6219839142091153, + "grad_norm": 297.4578435313298, + "learning_rate": 5e-06, + "loss": 1.4793, + "num_input_tokens_seen": 732078448, + "step": 4235 + }, + { + "epoch": 1.6219839142091153, + "loss": 1.3251738548278809, + "loss_ce": 0.18002194166183472, + "loss_iou": 0.529378354549408, + "loss_num": 0.017333984375, + "loss_xval": 1.145151972770691, + "num_input_tokens_seen": 732078448, + "step": 4235 + }, + { + "epoch": 1.62236690923018, + "grad_norm": 86.83817184916266, + "learning_rate": 5e-06, + "loss": 1.2281, + "num_input_tokens_seen": 732251432, + "step": 4236 + }, + { + "epoch": 1.62236690923018, + "loss": 1.221873164176941, + "loss_ce": 0.16788625717163086, + "loss_iou": 0.47956913709640503, + "loss_num": 0.01904296875, + "loss_xval": 1.05398690700531, + "num_input_tokens_seen": 732251432, + "step": 4236 + }, + { + "epoch": 1.6227499042512448, + "grad_norm": 113.9655956875708, + "learning_rate": 5e-06, + "loss": 1.5004, + "num_input_tokens_seen": 732424424, + "step": 4237 + }, + { + "epoch": 1.6227499042512448, + "loss": 1.6975293159484863, + "loss_ce": 0.14779433608055115, + "loss_iou": 0.7001604437828064, + "loss_num": 0.0299072265625, + "loss_xval": 1.5497349500656128, + "num_input_tokens_seen": 732424424, + "step": 4237 + }, + { + "epoch": 1.6231328992723095, + "grad_norm": 131.99938853002732, + "learning_rate": 5e-06, + "loss": 1.5101, + "num_input_tokens_seen": 732597216, + "step": 4238 + }, + { + "epoch": 1.6231328992723095, + "loss": 1.4549403190612793, + "loss_ce": 0.1477314680814743, + "loss_iou": 0.5885409712791443, + "loss_num": 0.0260009765625, + "loss_xval": 1.2988470792770386, + "num_input_tokens_seen": 732597216, + "step": 4238 + }, + { + "epoch": 1.6235158942933743, + "grad_norm": 116.74818789125096, + "learning_rate": 5e-06, + "loss": 1.8328, + "num_input_tokens_seen": 732770272, + "step": 4239 + }, + { + "epoch": 1.6235158942933743, + "loss": 1.7853672504425049, + "loss_ce": 0.17761091887950897, + "loss_iou": 0.7398828268051147, + "loss_num": 0.025634765625, + "loss_xval": 1.6077563762664795, + "num_input_tokens_seen": 732770272, + "step": 4239 + }, + { + "epoch": 1.623898889314439, + "grad_norm": 115.16556321758176, + "learning_rate": 5e-06, + "loss": 1.4008, + "num_input_tokens_seen": 732943208, + "step": 4240 + }, + { + "epoch": 1.623898889314439, + "loss": 1.5703788995742798, + "loss_ce": 0.1624567210674286, + "loss_iou": 0.6411864161491394, + "loss_num": 0.025146484375, + "loss_xval": 1.4079221487045288, + "num_input_tokens_seen": 732943208, + "step": 4240 + }, + { + "epoch": 1.6242818843355038, + "grad_norm": 160.31123185536194, + "learning_rate": 5e-06, + "loss": 1.5956, + "num_input_tokens_seen": 733116376, + "step": 4241 + }, + { + "epoch": 1.6242818843355038, + "loss": 1.5039113759994507, + "loss_ce": 0.14939121901988983, + "loss_iou": 0.6216417551040649, + "loss_num": 0.022216796875, + "loss_xval": 1.3545200824737549, + "num_input_tokens_seen": 733116376, + "step": 4241 + }, + { + "epoch": 1.6246648793565683, + "grad_norm": 149.68244304531984, + "learning_rate": 5e-06, + "loss": 1.7246, + "num_input_tokens_seen": 733289152, + "step": 4242 + }, + { + "epoch": 1.6246648793565683, + "loss": 1.6408194303512573, + "loss_ce": 0.14709943532943726, + "loss_iou": 0.6603731513023376, + "loss_num": 0.03466796875, + "loss_xval": 1.4937199354171753, + "num_input_tokens_seen": 733289152, + "step": 4242 + }, + { + "epoch": 1.625047874377633, + "grad_norm": 64.99801486935895, + "learning_rate": 5e-06, + "loss": 1.3936, + "num_input_tokens_seen": 733462248, + "step": 4243 + }, + { + "epoch": 1.625047874377633, + "loss": 1.3872981071472168, + "loss_ce": 0.15859389305114746, + "loss_iou": 0.5587490797042847, + "loss_num": 0.022216796875, + "loss_xval": 1.2287042140960693, + "num_input_tokens_seen": 733462248, + "step": 4243 + }, + { + "epoch": 1.6254308693986979, + "grad_norm": 104.31327966536591, + "learning_rate": 5e-06, + "loss": 1.1563, + "num_input_tokens_seen": 733635496, + "step": 4244 + }, + { + "epoch": 1.6254308693986979, + "loss": 1.0872504711151123, + "loss_ce": 0.16980624198913574, + "loss_iou": 0.4264497756958008, + "loss_num": 0.012939453125, + "loss_xval": 0.9174442291259766, + "num_input_tokens_seen": 733635496, + "step": 4244 + }, + { + "epoch": 1.6258138644197624, + "grad_norm": 120.54582933263765, + "learning_rate": 5e-06, + "loss": 1.3233, + "num_input_tokens_seen": 733808528, + "step": 4245 + }, + { + "epoch": 1.6258138644197624, + "loss": 1.3540951013565063, + "loss_ce": 0.19186602532863617, + "loss_iou": 0.5447909832000732, + "loss_num": 0.0145263671875, + "loss_xval": 1.162229061126709, + "num_input_tokens_seen": 733808528, + "step": 4245 + }, + { + "epoch": 1.6261968594408271, + "grad_norm": 155.19784512722597, + "learning_rate": 5e-06, + "loss": 1.5014, + "num_input_tokens_seen": 733981488, + "step": 4246 + }, + { + "epoch": 1.6261968594408271, + "loss": 1.5388509035110474, + "loss_ce": 0.1324869990348816, + "loss_iou": 0.6651875972747803, + "loss_num": 0.01519775390625, + "loss_xval": 1.4063639640808105, + "num_input_tokens_seen": 733981488, + "step": 4246 + }, + { + "epoch": 1.626579854461892, + "grad_norm": 166.99671583118356, + "learning_rate": 5e-06, + "loss": 1.5675, + "num_input_tokens_seen": 734154400, + "step": 4247 + }, + { + "epoch": 1.626579854461892, + "loss": 1.4532661437988281, + "loss_ce": 0.13205987215042114, + "loss_iou": 0.6123547554016113, + "loss_num": 0.019287109375, + "loss_xval": 1.3212060928344727, + "num_input_tokens_seen": 734154400, + "step": 4247 + }, + { + "epoch": 1.6269628494829567, + "grad_norm": 153.52289951618562, + "learning_rate": 5e-06, + "loss": 1.4302, + "num_input_tokens_seen": 734326904, + "step": 4248 + }, + { + "epoch": 1.6269628494829567, + "loss": 1.346290946006775, + "loss_ce": 0.15899604558944702, + "loss_iou": 0.5563092231750488, + "loss_num": 0.01495361328125, + "loss_xval": 1.1872949600219727, + "num_input_tokens_seen": 734326904, + "step": 4248 + }, + { + "epoch": 1.6273458445040214, + "grad_norm": 138.05420791491989, + "learning_rate": 5e-06, + "loss": 1.4822, + "num_input_tokens_seen": 734499984, + "step": 4249 + }, + { + "epoch": 1.6273458445040214, + "loss": 1.5413062572479248, + "loss_ce": 0.21249623596668243, + "loss_iou": 0.6194525957107544, + "loss_num": 0.0179443359375, + "loss_xval": 1.3288099765777588, + "num_input_tokens_seen": 734499984, + "step": 4249 + }, + { + "epoch": 1.6277288395250862, + "grad_norm": 100.62585858377918, + "learning_rate": 5e-06, + "loss": 1.36, + "num_input_tokens_seen": 734672824, + "step": 4250 + }, + { + "epoch": 1.6277288395250862, + "eval_websight_new_CIoU": 0.9162223041057587, + "eval_websight_new_GIoU": 0.9161066710948944, + "eval_websight_new_IoU": 0.9164418876171112, + "eval_websight_new_MAE_all": 0.009378555696457624, + "eval_websight_new_MAE_h": 0.007857543416321278, + "eval_websight_new_MAE_w": 0.015783830545842648, + "eval_websight_new_MAE_x": 0.008010117802768946, + "eval_websight_new_MAE_y": 0.005862730089575052, + "eval_websight_new_NUM_probability": 0.401885524392128, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 1.018053650856018, + "eval_websight_new_loss_ce": 0.25705206394195557, + "eval_websight_new_loss_iou": 0.3521401435136795, + "eval_websight_new_loss_num": 0.01025390625, + "eval_websight_new_loss_xval": 0.7555345594882965, + "eval_websight_new_runtime": 56.1371, + "eval_websight_new_samples_per_second": 0.891, + "eval_websight_new_steps_per_second": 0.036, + "num_input_tokens_seen": 734672824, + "step": 4250 + }, + { + "epoch": 1.6277288395250862, + "eval_seeclick_CIoU": 0.6385174989700317, + "eval_seeclick_GIoU": 0.652334988117218, + "eval_seeclick_IoU": 0.6844546496868134, + "eval_seeclick_MAE_all": 0.06909987516701221, + "eval_seeclick_MAE_h": 0.022533646784722805, + "eval_seeclick_MAE_w": 0.11771515384316444, + "eval_seeclick_MAE_x": 0.11195151880383492, + "eval_seeclick_MAE_y": 0.02419918216764927, + "eval_seeclick_NUM_probability": 0.5888088047504425, + "eval_seeclick_inside_bbox": 0.8854166567325592, + "eval_seeclick_loss": 1.6230900287628174, + "eval_seeclick_loss_ce": 0.19231893867254257, + "eval_seeclick_loss_iou": 0.5816226601600647, + "eval_seeclick_loss_num": 0.0477447509765625, + "eval_seeclick_loss_xval": 1.401816487312317, + "eval_seeclick_runtime": 88.333, + "eval_seeclick_samples_per_second": 0.566, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 734672824, + "step": 4250 + }, + { + "epoch": 1.6277288395250862, + "eval_icons_CIoU": 0.8221758902072906, + "eval_icons_GIoU": 0.8194511830806732, + "eval_icons_IoU": 0.8379125595092773, + "eval_icons_MAE_all": 0.03192135691642761, + "eval_icons_MAE_h": 0.02202616073191166, + "eval_icons_MAE_w": 0.04361097142100334, + "eval_icons_MAE_x": 0.042733339592814445, + "eval_icons_MAE_y": 0.01931495126336813, + "eval_icons_NUM_probability": 0.40384694933891296, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 1.9730126857757568, + "eval_icons_loss_ce": 0.38168513774871826, + "eval_icons_loss_iou": 0.7400006651878357, + "eval_icons_loss_num": 0.022357940673828125, + "eval_icons_loss_xval": 1.5917567014694214, + "eval_icons_runtime": 87.2141, + "eval_icons_samples_per_second": 0.573, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 734672824, + "step": 4250 + }, + { + "epoch": 1.6277288395250862, + "loss": 2.055295944213867, + "loss_ce": 0.3824533224105835, + "loss_iou": 0.7747604846954346, + "loss_num": 0.024658203125, + "loss_xval": 1.6728425025939941, + "num_input_tokens_seen": 734672824, + "step": 4250 + }, + { + "epoch": 1.628111834546151, + "grad_norm": 79.00335484482997, + "learning_rate": 5e-06, + "loss": 1.4629, + "num_input_tokens_seen": 734846360, + "step": 4251 + }, + { + "epoch": 1.628111834546151, + "loss": 1.3319406509399414, + "loss_ce": 0.14555197954177856, + "loss_iou": 0.5480893850326538, + "loss_num": 0.01806640625, + "loss_xval": 1.1863887310028076, + "num_input_tokens_seen": 734846360, + "step": 4251 + }, + { + "epoch": 1.6284948295672157, + "grad_norm": 175.7979061714301, + "learning_rate": 5e-06, + "loss": 1.355, + "num_input_tokens_seen": 735019488, + "step": 4252 + }, + { + "epoch": 1.6284948295672157, + "loss": 1.2803380489349365, + "loss_ce": 0.15754714608192444, + "loss_iou": 0.5031984448432922, + "loss_num": 0.023193359375, + "loss_xval": 1.1227909326553345, + "num_input_tokens_seen": 735019488, + "step": 4252 + }, + { + "epoch": 1.6288778245882805, + "grad_norm": 177.45530485706, + "learning_rate": 5e-06, + "loss": 1.925, + "num_input_tokens_seen": 735192456, + "step": 4253 + }, + { + "epoch": 1.6288778245882805, + "loss": 1.9332362413406372, + "loss_ce": 0.2007823884487152, + "loss_iou": 0.7958838939666748, + "loss_num": 0.0281982421875, + "loss_xval": 1.7324538230895996, + "num_input_tokens_seen": 735192456, + "step": 4253 + }, + { + "epoch": 1.6292608196093452, + "grad_norm": 88.9595650995408, + "learning_rate": 5e-06, + "loss": 1.2875, + "num_input_tokens_seen": 735365296, + "step": 4254 + }, + { + "epoch": 1.6292608196093452, + "loss": 1.3784162998199463, + "loss_ce": 0.13960078358650208, + "loss_iou": 0.5806809663772583, + "loss_num": 0.0155029296875, + "loss_xval": 1.2388155460357666, + "num_input_tokens_seen": 735365296, + "step": 4254 + }, + { + "epoch": 1.62964381463041, + "grad_norm": 236.99011132617477, + "learning_rate": 5e-06, + "loss": 1.3328, + "num_input_tokens_seen": 735538528, + "step": 4255 + }, + { + "epoch": 1.62964381463041, + "loss": 1.2332539558410645, + "loss_ce": 0.12416472285985947, + "loss_iou": 0.5107366442680359, + "loss_num": 0.017578125, + "loss_xval": 1.1090892553329468, + "num_input_tokens_seen": 735538528, + "step": 4255 + }, + { + "epoch": 1.6300268096514745, + "grad_norm": 173.3068144320145, + "learning_rate": 5e-06, + "loss": 1.8645, + "num_input_tokens_seen": 735711648, + "step": 4256 + }, + { + "epoch": 1.6300268096514745, + "loss": 1.8725402355194092, + "loss_ce": 0.1486969292163849, + "loss_iou": 0.7692092657089233, + "loss_num": 0.037109375, + "loss_xval": 1.7238433361053467, + "num_input_tokens_seen": 735711648, + "step": 4256 + }, + { + "epoch": 1.6304098046725393, + "grad_norm": 129.64462652022746, + "learning_rate": 5e-06, + "loss": 1.2881, + "num_input_tokens_seen": 735884368, + "step": 4257 + }, + { + "epoch": 1.6304098046725393, + "loss": 1.2674479484558105, + "loss_ce": 0.13030312955379486, + "loss_iou": 0.514647901058197, + "loss_num": 0.0216064453125, + "loss_xval": 1.137144923210144, + "num_input_tokens_seen": 735884368, + "step": 4257 + }, + { + "epoch": 1.630792799693604, + "grad_norm": 179.32605556331572, + "learning_rate": 5e-06, + "loss": 1.542, + "num_input_tokens_seen": 736057584, + "step": 4258 + }, + { + "epoch": 1.630792799693604, + "loss": 1.4749085903167725, + "loss_ce": 0.15508228540420532, + "loss_iou": 0.6110392808914185, + "loss_num": 0.01953125, + "loss_xval": 1.319826364517212, + "num_input_tokens_seen": 736057584, + "step": 4258 + }, + { + "epoch": 1.6311757947146686, + "grad_norm": 120.53814416384229, + "learning_rate": 5e-06, + "loss": 1.5756, + "num_input_tokens_seen": 736230544, + "step": 4259 + }, + { + "epoch": 1.6311757947146686, + "loss": 1.6546740531921387, + "loss_ce": 0.16424831748008728, + "loss_iou": 0.6956065893173218, + "loss_num": 0.0198974609375, + "loss_xval": 1.4904258251190186, + "num_input_tokens_seen": 736230544, + "step": 4259 + }, + { + "epoch": 1.6315587897357333, + "grad_norm": 74.18145854487854, + "learning_rate": 5e-06, + "loss": 1.153, + "num_input_tokens_seen": 736403896, + "step": 4260 + }, + { + "epoch": 1.6315587897357333, + "loss": 1.2199480533599854, + "loss_ce": 0.14292341470718384, + "loss_iou": 0.5010062456130981, + "loss_num": 0.0150146484375, + "loss_xval": 1.0770246982574463, + "num_input_tokens_seen": 736403896, + "step": 4260 + }, + { + "epoch": 1.631941784756798, + "grad_norm": 81.70348520697706, + "learning_rate": 5e-06, + "loss": 1.3334, + "num_input_tokens_seen": 736576688, + "step": 4261 + }, + { + "epoch": 1.631941784756798, + "loss": 1.3570826053619385, + "loss_ce": 0.159705251455307, + "loss_iou": 0.5623422265052795, + "loss_num": 0.0145263671875, + "loss_xval": 1.197377324104309, + "num_input_tokens_seen": 736576688, + "step": 4261 + }, + { + "epoch": 1.6323247797778628, + "grad_norm": 255.40016348708113, + "learning_rate": 5e-06, + "loss": 1.4284, + "num_input_tokens_seen": 736750192, + "step": 4262 + }, + { + "epoch": 1.6323247797778628, + "loss": 1.5462151765823364, + "loss_ce": 0.18443627655506134, + "loss_iou": 0.6099055409431458, + "loss_num": 0.0284423828125, + "loss_xval": 1.3617788553237915, + "num_input_tokens_seen": 736750192, + "step": 4262 + }, + { + "epoch": 1.6327077747989276, + "grad_norm": 169.70891739035935, + "learning_rate": 5e-06, + "loss": 1.7396, + "num_input_tokens_seen": 736923136, + "step": 4263 + }, + { + "epoch": 1.6327077747989276, + "loss": 1.6810753345489502, + "loss_ce": 0.13653725385665894, + "loss_iou": 0.6868503093719482, + "loss_num": 0.0341796875, + "loss_xval": 1.5445380210876465, + "num_input_tokens_seen": 736923136, + "step": 4263 + }, + { + "epoch": 1.6330907698199923, + "grad_norm": 80.76270682928212, + "learning_rate": 5e-06, + "loss": 1.3823, + "num_input_tokens_seen": 737096320, + "step": 4264 + }, + { + "epoch": 1.6330907698199923, + "loss": 1.3377436399459839, + "loss_ce": 0.20186583697795868, + "loss_iou": 0.5355597734451294, + "loss_num": 0.012939453125, + "loss_xval": 1.1358778476715088, + "num_input_tokens_seen": 737096320, + "step": 4264 + }, + { + "epoch": 1.633473764841057, + "grad_norm": 136.92582532641694, + "learning_rate": 5e-06, + "loss": 1.2552, + "num_input_tokens_seen": 737269128, + "step": 4265 + }, + { + "epoch": 1.633473764841057, + "loss": 1.2514030933380127, + "loss_ce": 0.16696761548519135, + "loss_iou": 0.5057186484336853, + "loss_num": 0.01458740234375, + "loss_xval": 1.0844353437423706, + "num_input_tokens_seen": 737269128, + "step": 4265 + }, + { + "epoch": 1.6338567598621219, + "grad_norm": 186.46948201714852, + "learning_rate": 5e-06, + "loss": 1.3126, + "num_input_tokens_seen": 737441792, + "step": 4266 + }, + { + "epoch": 1.6338567598621219, + "loss": 1.2710222005844116, + "loss_ce": 0.14832234382629395, + "loss_iou": 0.5136738419532776, + "loss_num": 0.01904296875, + "loss_xval": 1.1226998567581177, + "num_input_tokens_seen": 737441792, + "step": 4266 + }, + { + "epoch": 1.6342397548831866, + "grad_norm": 163.69091903635953, + "learning_rate": 5e-06, + "loss": 2.1152, + "num_input_tokens_seen": 737614592, + "step": 4267 + }, + { + "epoch": 1.6342397548831866, + "loss": 2.1176929473876953, + "loss_ce": 0.15432396531105042, + "loss_iou": 0.8610180616378784, + "loss_num": 0.04833984375, + "loss_xval": 1.9633691310882568, + "num_input_tokens_seen": 737614592, + "step": 4267 + }, + { + "epoch": 1.6346227499042514, + "grad_norm": 116.22006071019236, + "learning_rate": 5e-06, + "loss": 1.5089, + "num_input_tokens_seen": 737787440, + "step": 4268 + }, + { + "epoch": 1.6346227499042514, + "loss": 1.5042097568511963, + "loss_ce": 0.15562763810157776, + "loss_iou": 0.6205800771713257, + "loss_num": 0.021484375, + "loss_xval": 1.3485820293426514, + "num_input_tokens_seen": 737787440, + "step": 4268 + }, + { + "epoch": 1.635005744925316, + "grad_norm": 119.21878105702457, + "learning_rate": 5e-06, + "loss": 1.4444, + "num_input_tokens_seen": 737960080, + "step": 4269 + }, + { + "epoch": 1.635005744925316, + "loss": 1.3537085056304932, + "loss_ce": 0.1828244924545288, + "loss_iou": 0.5500721335411072, + "loss_num": 0.01416015625, + "loss_xval": 1.1708840131759644, + "num_input_tokens_seen": 737960080, + "step": 4269 + }, + { + "epoch": 1.6353887399463807, + "grad_norm": 109.5187969297078, + "learning_rate": 5e-06, + "loss": 1.7138, + "num_input_tokens_seen": 738133296, + "step": 4270 + }, + { + "epoch": 1.6353887399463807, + "loss": 1.6863322257995605, + "loss_ce": 0.14331993460655212, + "loss_iou": 0.7184818387031555, + "loss_num": 0.021240234375, + "loss_xval": 1.5430123805999756, + "num_input_tokens_seen": 738133296, + "step": 4270 + }, + { + "epoch": 1.6357717349674454, + "grad_norm": 74.76696792186888, + "learning_rate": 5e-06, + "loss": 1.264, + "num_input_tokens_seen": 738306352, + "step": 4271 + }, + { + "epoch": 1.6357717349674454, + "loss": 1.2260103225708008, + "loss_ce": 0.18773356080055237, + "loss_iou": 0.49386221170425415, + "loss_num": 0.01007080078125, + "loss_xval": 1.0382767915725708, + "num_input_tokens_seen": 738306352, + "step": 4271 + }, + { + "epoch": 1.6361547299885102, + "grad_norm": 178.4165857708238, + "learning_rate": 5e-06, + "loss": 1.4123, + "num_input_tokens_seen": 738479312, + "step": 4272 + }, + { + "epoch": 1.6361547299885102, + "loss": 1.3048121929168701, + "loss_ce": 0.1136963814496994, + "loss_iou": 0.5423809885978699, + "loss_num": 0.021240234375, + "loss_xval": 1.367012858390808, + "num_input_tokens_seen": 738479312, + "step": 4272 + }, + { + "epoch": 1.6365377250095747, + "grad_norm": 231.64930660933527, + "learning_rate": 5e-06, + "loss": 1.3932, + "num_input_tokens_seen": 738652208, + "step": 4273 + }, + { + "epoch": 1.6365377250095747, + "loss": 1.337618112564087, + "loss_ce": 0.1696353554725647, + "loss_iou": 0.5430062413215637, + "loss_num": 0.016357421875, + "loss_xval": 1.1679826974868774, + "num_input_tokens_seen": 738652208, + "step": 4273 + }, + { + "epoch": 1.6369207200306395, + "grad_norm": 269.29606389009, + "learning_rate": 5e-06, + "loss": 2.1258, + "num_input_tokens_seen": 738825200, + "step": 4274 + }, + { + "epoch": 1.6369207200306395, + "loss": 2.2030158042907715, + "loss_ce": 0.16961421072483063, + "loss_iou": 0.9405593872070312, + "loss_num": 0.0303955078125, + "loss_xval": 2.0334014892578125, + "num_input_tokens_seen": 738825200, + "step": 4274 + }, + { + "epoch": 1.6373037150517042, + "grad_norm": 148.0039806718881, + "learning_rate": 5e-06, + "loss": 1.665, + "num_input_tokens_seen": 738998392, + "step": 4275 + }, + { + "epoch": 1.6373037150517042, + "loss": 1.5252833366394043, + "loss_ce": 0.17528021335601807, + "loss_iou": 0.6302933692932129, + "loss_num": 0.0179443359375, + "loss_xval": 1.3500032424926758, + "num_input_tokens_seen": 738998392, + "step": 4275 + }, + { + "epoch": 1.637686710072769, + "grad_norm": 173.16527791033351, + "learning_rate": 5e-06, + "loss": 1.2718, + "num_input_tokens_seen": 739171272, + "step": 4276 + }, + { + "epoch": 1.637686710072769, + "loss": 1.0502935647964478, + "loss_ce": 0.14169564843177795, + "loss_iou": 0.4223317503929138, + "loss_num": 0.01275634765625, + "loss_xval": 0.9085978269577026, + "num_input_tokens_seen": 739171272, + "step": 4276 + }, + { + "epoch": 1.6380697050938338, + "grad_norm": 147.36560063091952, + "learning_rate": 5e-06, + "loss": 1.5918, + "num_input_tokens_seen": 739344272, + "step": 4277 + }, + { + "epoch": 1.6380697050938338, + "loss": 1.7093251943588257, + "loss_ce": 0.20406115055084229, + "loss_iou": 0.7129286527633667, + "loss_num": 0.015869140625, + "loss_xval": 1.5052640438079834, + "num_input_tokens_seen": 739344272, + "step": 4277 + }, + { + "epoch": 1.6384527001148985, + "grad_norm": 184.70601390030873, + "learning_rate": 5e-06, + "loss": 1.7006, + "num_input_tokens_seen": 739517024, + "step": 4278 + }, + { + "epoch": 1.6384527001148985, + "loss": 1.6746817827224731, + "loss_ce": 0.14700838923454285, + "loss_iou": 0.687878429889679, + "loss_num": 0.0303955078125, + "loss_xval": 1.527673363685608, + "num_input_tokens_seen": 739517024, + "step": 4278 + }, + { + "epoch": 1.6388356951359633, + "grad_norm": 172.35186396439505, + "learning_rate": 5e-06, + "loss": 1.3699, + "num_input_tokens_seen": 739689888, + "step": 4279 + }, + { + "epoch": 1.6388356951359633, + "loss": 1.3808223009109497, + "loss_ce": 0.17082251608371735, + "loss_iou": 0.5690044164657593, + "loss_num": 0.014404296875, + "loss_xval": 1.2099997997283936, + "num_input_tokens_seen": 739689888, + "step": 4279 + }, + { + "epoch": 1.639218690157028, + "grad_norm": 291.7120410585724, + "learning_rate": 5e-06, + "loss": 1.8506, + "num_input_tokens_seen": 739862648, + "step": 4280 + }, + { + "epoch": 1.639218690157028, + "loss": 1.9401651620864868, + "loss_ce": 0.15000417828559875, + "loss_iou": 0.8300780653953552, + "loss_num": 0.0260009765625, + "loss_xval": 1.7901610136032104, + "num_input_tokens_seen": 739862648, + "step": 4280 + }, + { + "epoch": 1.6396016851780928, + "grad_norm": 145.98182649385603, + "learning_rate": 5e-06, + "loss": 2.0004, + "num_input_tokens_seen": 740035488, + "step": 4281 + }, + { + "epoch": 1.6396016851780928, + "loss": 1.8754746913909912, + "loss_ce": 0.13285154104232788, + "loss_iou": 0.800449788570404, + "loss_num": 0.0283203125, + "loss_xval": 1.742623209953308, + "num_input_tokens_seen": 740035488, + "step": 4281 + }, + { + "epoch": 1.6399846801991576, + "grad_norm": 165.6245313970752, + "learning_rate": 5e-06, + "loss": 1.9451, + "num_input_tokens_seen": 740208512, + "step": 4282 + }, + { + "epoch": 1.6399846801991576, + "loss": 1.8908205032348633, + "loss_ce": 0.16656585037708282, + "loss_iou": 0.7740535736083984, + "loss_num": 0.03515625, + "loss_xval": 1.7242546081542969, + "num_input_tokens_seen": 740208512, + "step": 4282 + }, + { + "epoch": 1.640367675220222, + "grad_norm": 66.89123684629259, + "learning_rate": 5e-06, + "loss": 1.1958, + "num_input_tokens_seen": 740381368, + "step": 4283 + }, + { + "epoch": 1.640367675220222, + "loss": 1.2351512908935547, + "loss_ce": 0.19138486683368683, + "loss_iou": 0.4932425022125244, + "loss_num": 0.011474609375, + "loss_xval": 1.0437664985656738, + "num_input_tokens_seen": 740381368, + "step": 4283 + }, + { + "epoch": 1.6407506702412868, + "grad_norm": 117.65326730680832, + "learning_rate": 5e-06, + "loss": 1.5424, + "num_input_tokens_seen": 740554144, + "step": 4284 + }, + { + "epoch": 1.6407506702412868, + "loss": 1.3629440069198608, + "loss_ce": 0.13619455695152283, + "loss_iou": 0.5643940567970276, + "loss_num": 0.01953125, + "loss_xval": 1.2267495393753052, + "num_input_tokens_seen": 740554144, + "step": 4284 + }, + { + "epoch": 1.6411336652623516, + "grad_norm": 172.1231842524589, + "learning_rate": 5e-06, + "loss": 1.4857, + "num_input_tokens_seen": 740726976, + "step": 4285 + }, + { + "epoch": 1.6411336652623516, + "loss": 1.3251349925994873, + "loss_ce": 0.16517385840415955, + "loss_iou": 0.55124831199646, + "loss_num": 0.011474609375, + "loss_xval": 1.151782512664795, + "num_input_tokens_seen": 740726976, + "step": 4285 + }, + { + "epoch": 1.6415166602834164, + "grad_norm": 183.24079291365823, + "learning_rate": 5e-06, + "loss": 1.7292, + "num_input_tokens_seen": 740896352, + "step": 4286 + }, + { + "epoch": 1.6415166602834164, + "loss": 1.864220380783081, + "loss_ce": 0.13857772946357727, + "loss_iou": 0.7636086940765381, + "loss_num": 0.039794921875, + "loss_xval": 1.7256428003311157, + "num_input_tokens_seen": 740896352, + "step": 4286 + }, + { + "epoch": 1.641899655304481, + "grad_norm": 75.01338252661343, + "learning_rate": 5e-06, + "loss": 1.5956, + "num_input_tokens_seen": 741069256, + "step": 4287 + }, + { + "epoch": 1.641899655304481, + "loss": 1.4006035327911377, + "loss_ce": 0.1531718373298645, + "loss_iou": 0.5650457739830017, + "loss_num": 0.0234375, + "loss_xval": 1.2474316358566284, + "num_input_tokens_seen": 741069256, + "step": 4287 + }, + { + "epoch": 1.6422826503255457, + "grad_norm": 113.8831730921573, + "learning_rate": 5e-06, + "loss": 1.4835, + "num_input_tokens_seen": 741242472, + "step": 4288 + }, + { + "epoch": 1.6422826503255457, + "loss": 1.2966430187225342, + "loss_ce": 0.17165182530879974, + "loss_iou": 0.5388215780258179, + "loss_num": 0.00946044921875, + "loss_xval": 1.1249911785125732, + "num_input_tokens_seen": 741242472, + "step": 4288 + }, + { + "epoch": 1.6426656453466104, + "grad_norm": 317.2341601707341, + "learning_rate": 5e-06, + "loss": 1.3591, + "num_input_tokens_seen": 741415208, + "step": 4289 + }, + { + "epoch": 1.6426656453466104, + "loss": 1.3367316722869873, + "loss_ce": 0.14232824742794037, + "loss_iou": 0.5580628514289856, + "loss_num": 0.015625, + "loss_xval": 1.1944032907485962, + "num_input_tokens_seen": 741415208, + "step": 4289 + }, + { + "epoch": 1.6430486403676752, + "grad_norm": 131.1750460487662, + "learning_rate": 5e-06, + "loss": 1.6847, + "num_input_tokens_seen": 741588256, + "step": 4290 + }, + { + "epoch": 1.6430486403676752, + "loss": 1.8473162651062012, + "loss_ce": 0.15907804667949677, + "loss_iou": 0.7536649703979492, + "loss_num": 0.0361328125, + "loss_xval": 1.6882381439208984, + "num_input_tokens_seen": 741588256, + "step": 4290 + }, + { + "epoch": 1.64343163538874, + "grad_norm": 175.40988601634265, + "learning_rate": 5e-06, + "loss": 1.6343, + "num_input_tokens_seen": 741761416, + "step": 4291 + }, + { + "epoch": 1.64343163538874, + "loss": 1.556229591369629, + "loss_ce": 0.19933602213859558, + "loss_iou": 0.6340437531471252, + "loss_num": 0.0177001953125, + "loss_xval": 1.3568936586380005, + "num_input_tokens_seen": 741761416, + "step": 4291 + }, + { + "epoch": 1.6438146304098047, + "grad_norm": 188.56269865478197, + "learning_rate": 5e-06, + "loss": 1.5454, + "num_input_tokens_seen": 741934456, + "step": 4292 + }, + { + "epoch": 1.6438146304098047, + "loss": 1.4899033308029175, + "loss_ce": 0.20824198424816132, + "loss_iou": 0.6150586009025574, + "loss_num": 0.01031494140625, + "loss_xval": 1.2816613912582397, + "num_input_tokens_seen": 741934456, + "step": 4292 + }, + { + "epoch": 1.6441976254308694, + "grad_norm": 293.7354363788219, + "learning_rate": 5e-06, + "loss": 1.9317, + "num_input_tokens_seen": 742107264, + "step": 4293 + }, + { + "epoch": 1.6441976254308694, + "loss": 1.8700616359710693, + "loss_ce": 0.1377236396074295, + "loss_iou": 0.8095741271972656, + "loss_num": 0.0225830078125, + "loss_xval": 1.7323379516601562, + "num_input_tokens_seen": 742107264, + "step": 4293 + }, + { + "epoch": 1.6445806204519342, + "grad_norm": 155.56475849248338, + "learning_rate": 5e-06, + "loss": 1.9621, + "num_input_tokens_seen": 742280000, + "step": 4294 + }, + { + "epoch": 1.6445806204519342, + "loss": 1.9358739852905273, + "loss_ce": 0.16428327560424805, + "loss_iou": 0.8030315637588501, + "loss_num": 0.033203125, + "loss_xval": 1.7715904712677002, + "num_input_tokens_seen": 742280000, + "step": 4294 + }, + { + "epoch": 1.644963615472999, + "grad_norm": 44.49111387657106, + "learning_rate": 5e-06, + "loss": 1.7541, + "num_input_tokens_seen": 742452792, + "step": 4295 + }, + { + "epoch": 1.644963615472999, + "loss": 1.712242603302002, + "loss_ce": 0.14141297340393066, + "loss_iou": -20753281974272.0, + "loss_num": 0.02734375, + "loss_xval": 1.790556472822374e+31, + "num_input_tokens_seen": 742452792, + "step": 4295 + }, + { + "epoch": 1.6453466104940637, + "grad_norm": 140.2333146591295, + "learning_rate": 5e-06, + "loss": 1.5158, + "num_input_tokens_seen": 742625760, + "step": 4296 + }, + { + "epoch": 1.6453466104940637, + "loss": 1.5926063060760498, + "loss_ce": 0.177087664604187, + "loss_iou": 0.6723132133483887, + "loss_num": 0.01416015625, + "loss_xval": 1.4155187606811523, + "num_input_tokens_seen": 742625760, + "step": 4296 + }, + { + "epoch": 1.6457296055151283, + "grad_norm": 172.70051532883474, + "learning_rate": 5e-06, + "loss": 2.3346, + "num_input_tokens_seen": 742798680, + "step": 4297 + }, + { + "epoch": 1.6457296055151283, + "loss": 2.3730525970458984, + "loss_ce": 0.20516502857208252, + "loss_iou": 1.0251668691635132, + "loss_num": 0.0235595703125, + "loss_xval": 2.1678876876831055, + "num_input_tokens_seen": 742798680, + "step": 4297 + }, + { + "epoch": 1.646112600536193, + "grad_norm": 96.58527355825707, + "learning_rate": 5e-06, + "loss": 1.7464, + "num_input_tokens_seen": 742972056, + "step": 4298 + }, + { + "epoch": 1.646112600536193, + "loss": 1.7323307991027832, + "loss_ce": 0.16866838932037354, + "loss_iou": 0.7081312537193298, + "loss_num": 0.029541015625, + "loss_xval": 1.5636624097824097, + "num_input_tokens_seen": 742972056, + "step": 4298 + }, + { + "epoch": 1.6464955955572578, + "grad_norm": 76.79804901297256, + "learning_rate": 5e-06, + "loss": 1.3906, + "num_input_tokens_seen": 743144872, + "step": 4299 + }, + { + "epoch": 1.6464955955572578, + "loss": 1.330512523651123, + "loss_ce": 0.2305942177772522, + "loss_iou": 0.514253556728363, + "loss_num": 0.0142822265625, + "loss_xval": 1.099918246269226, + "num_input_tokens_seen": 743144872, + "step": 4299 + }, + { + "epoch": 1.6468785905783225, + "grad_norm": 122.44180625472737, + "learning_rate": 5e-06, + "loss": 1.7444, + "num_input_tokens_seen": 743318064, + "step": 4300 + }, + { + "epoch": 1.6468785905783225, + "loss": 1.6140046119689941, + "loss_ce": 0.1705130934715271, + "loss_iou": 0.6736500859260559, + "loss_num": 0.019287109375, + "loss_xval": 1.4434915781021118, + "num_input_tokens_seen": 743318064, + "step": 4300 + }, + { + "epoch": 1.647261585599387, + "grad_norm": 84.41639006897243, + "learning_rate": 5e-06, + "loss": 1.7019, + "num_input_tokens_seen": 743491320, + "step": 4301 + }, + { + "epoch": 1.647261585599387, + "loss": 1.6535019874572754, + "loss_ce": 0.19724097847938538, + "loss_iou": 0.6837274432182312, + "loss_num": 0.0177001953125, + "loss_xval": 1.4562610387802124, + "num_input_tokens_seen": 743491320, + "step": 4301 + }, + { + "epoch": 1.6476445806204518, + "grad_norm": 85.11099246075345, + "learning_rate": 5e-06, + "loss": 1.2476, + "num_input_tokens_seen": 743664160, + "step": 4302 + }, + { + "epoch": 1.6476445806204518, + "loss": 1.3284763097763062, + "loss_ce": 0.16261108219623566, + "loss_iou": 0.5181132555007935, + "loss_num": 0.02587890625, + "loss_xval": 1.165865182876587, + "num_input_tokens_seen": 743664160, + "step": 4302 + }, + { + "epoch": 1.6480275756415166, + "grad_norm": 100.86795750806613, + "learning_rate": 5e-06, + "loss": 1.5445, + "num_input_tokens_seen": 743837032, + "step": 4303 + }, + { + "epoch": 1.6480275756415166, + "loss": 1.605276107788086, + "loss_ce": 0.145803302526474, + "loss_iou": 0.6753846406936646, + "loss_num": 0.021728515625, + "loss_xval": 1.459472894668579, + "num_input_tokens_seen": 743837032, + "step": 4303 + }, + { + "epoch": 1.6484105706625813, + "grad_norm": 124.00043972417424, + "learning_rate": 5e-06, + "loss": 1.6567, + "num_input_tokens_seen": 744009816, + "step": 4304 + }, + { + "epoch": 1.6484105706625813, + "loss": 1.62785804271698, + "loss_ce": 0.16736261546611786, + "loss_iou": 0.6700212359428406, + "loss_num": 0.0240478515625, + "loss_xval": 1.4604953527450562, + "num_input_tokens_seen": 744009816, + "step": 4304 + }, + { + "epoch": 1.648793565683646, + "grad_norm": 266.6892693451771, + "learning_rate": 5e-06, + "loss": 1.5544, + "num_input_tokens_seen": 744182792, + "step": 4305 + }, + { + "epoch": 1.648793565683646, + "loss": 1.5796635150909424, + "loss_ce": 0.12979306280612946, + "loss_iou": 0.6984153985977173, + "loss_num": 0.0106201171875, + "loss_xval": 1.4498703479766846, + "num_input_tokens_seen": 744182792, + "step": 4305 + }, + { + "epoch": 1.6491765607047109, + "grad_norm": 120.91812762698265, + "learning_rate": 5e-06, + "loss": 1.5587, + "num_input_tokens_seen": 744355864, + "step": 4306 + }, + { + "epoch": 1.6491765607047109, + "loss": 1.5446631908416748, + "loss_ce": 0.17455412447452545, + "loss_iou": 0.6263539791107178, + "loss_num": 0.0234375, + "loss_xval": 1.3701090812683105, + "num_input_tokens_seen": 744355864, + "step": 4306 + }, + { + "epoch": 1.6495595557257756, + "grad_norm": 99.2804550777207, + "learning_rate": 5e-06, + "loss": 1.1902, + "num_input_tokens_seen": 744528744, + "step": 4307 + }, + { + "epoch": 1.6495595557257756, + "loss": 1.2010624408721924, + "loss_ce": 0.19453051686286926, + "loss_iou": 0.46843019127845764, + "loss_num": 0.013916015625, + "loss_xval": 1.0065319538116455, + "num_input_tokens_seen": 744528744, + "step": 4307 + }, + { + "epoch": 1.6499425507468404, + "grad_norm": 139.7206590368086, + "learning_rate": 5e-06, + "loss": 1.4275, + "num_input_tokens_seen": 744701952, + "step": 4308 + }, + { + "epoch": 1.6499425507468404, + "loss": 1.4538816213607788, + "loss_ce": 0.14945608377456665, + "loss_iou": 0.5948854684829712, + "loss_num": 0.02294921875, + "loss_xval": 1.3044254779815674, + "num_input_tokens_seen": 744701952, + "step": 4308 + }, + { + "epoch": 1.6503255457679051, + "grad_norm": 125.34830773908683, + "learning_rate": 5e-06, + "loss": 1.3716, + "num_input_tokens_seen": 744874792, + "step": 4309 + }, + { + "epoch": 1.6503255457679051, + "loss": 1.357515811920166, + "loss_ce": 0.1182800680398941, + "loss_iou": 0.589146077632904, + "loss_num": 0.01220703125, + "loss_xval": 1.239235758781433, + "num_input_tokens_seen": 744874792, + "step": 4309 + }, + { + "epoch": 1.65070854078897, + "grad_norm": 134.44926680400766, + "learning_rate": 5e-06, + "loss": 1.4559, + "num_input_tokens_seen": 745047936, + "step": 4310 + }, + { + "epoch": 1.65070854078897, + "loss": 1.4882766008377075, + "loss_ce": 0.1506461203098297, + "loss_iou": 0.6241985559463501, + "loss_num": 0.017822265625, + "loss_xval": 1.3376305103302002, + "num_input_tokens_seen": 745047936, + "step": 4310 + }, + { + "epoch": 1.6510915358100344, + "grad_norm": 180.53997717268547, + "learning_rate": 5e-06, + "loss": 1.3624, + "num_input_tokens_seen": 745220864, + "step": 4311 + }, + { + "epoch": 1.6510915358100344, + "loss": 1.1268970966339111, + "loss_ce": 0.15746453404426575, + "loss_iou": 0.4212244749069214, + "loss_num": 0.025390625, + "loss_xval": 0.9694325923919678, + "num_input_tokens_seen": 745220864, + "step": 4311 + }, + { + "epoch": 1.6514745308310992, + "grad_norm": 235.10424515817198, + "learning_rate": 5e-06, + "loss": 1.6102, + "num_input_tokens_seen": 745393592, + "step": 4312 + }, + { + "epoch": 1.6514745308310992, + "loss": 1.5685279369354248, + "loss_ce": 0.14919352531433105, + "loss_iou": 0.6671409606933594, + "loss_num": 0.0169677734375, + "loss_xval": 1.4193344116210938, + "num_input_tokens_seen": 745393592, + "step": 4312 + }, + { + "epoch": 1.651857525852164, + "grad_norm": 169.14939483295666, + "learning_rate": 5e-06, + "loss": 1.5582, + "num_input_tokens_seen": 745566184, + "step": 4313 + }, + { + "epoch": 1.651857525852164, + "loss": 1.528897762298584, + "loss_ce": 0.15899689495563507, + "loss_iou": 0.6291643381118774, + "loss_num": 0.0223388671875, + "loss_xval": 1.3699009418487549, + "num_input_tokens_seen": 745566184, + "step": 4313 + }, + { + "epoch": 1.6522405208732287, + "grad_norm": 101.44406082473364, + "learning_rate": 5e-06, + "loss": 1.4022, + "num_input_tokens_seen": 745739000, + "step": 4314 + }, + { + "epoch": 1.6522405208732287, + "loss": 1.4315533638000488, + "loss_ce": 0.17240798473358154, + "loss_iou": 0.5926769375801086, + "loss_num": 0.0147705078125, + "loss_xval": 1.2591453790664673, + "num_input_tokens_seen": 745739000, + "step": 4314 + }, + { + "epoch": 1.6526235158942932, + "grad_norm": 153.54532476078182, + "learning_rate": 5e-06, + "loss": 1.4521, + "num_input_tokens_seen": 745912176, + "step": 4315 + }, + { + "epoch": 1.6526235158942932, + "loss": 1.4062680006027222, + "loss_ce": 0.19291892647743225, + "loss_iou": 0.5603793859481812, + "loss_num": 0.0185546875, + "loss_xval": 1.2133491039276123, + "num_input_tokens_seen": 745912176, + "step": 4315 + }, + { + "epoch": 1.653006510915358, + "grad_norm": 126.15423208683085, + "learning_rate": 5e-06, + "loss": 1.3409, + "num_input_tokens_seen": 746085200, + "step": 4316 + }, + { + "epoch": 1.653006510915358, + "loss": 1.410337209701538, + "loss_ce": 0.2024148404598236, + "loss_iou": 0.5630066394805908, + "loss_num": 0.016357421875, + "loss_xval": 1.2079224586486816, + "num_input_tokens_seen": 746085200, + "step": 4316 + }, + { + "epoch": 1.6533895059364228, + "grad_norm": 113.4981658484504, + "learning_rate": 5e-06, + "loss": 1.4783, + "num_input_tokens_seen": 746258392, + "step": 4317 + }, + { + "epoch": 1.6533895059364228, + "loss": 1.333397626876831, + "loss_ce": 0.17618131637573242, + "loss_iou": 0.5437875986099243, + "loss_num": 0.013916015625, + "loss_xval": 1.1572163105010986, + "num_input_tokens_seen": 746258392, + "step": 4317 + }, + { + "epoch": 1.6537725009574875, + "grad_norm": 106.16823314010469, + "learning_rate": 5e-06, + "loss": 1.5228, + "num_input_tokens_seen": 746431328, + "step": 4318 + }, + { + "epoch": 1.6537725009574875, + "loss": 1.705797791481018, + "loss_ce": 0.16248458623886108, + "loss_iou": 0.6962934732437134, + "loss_num": 0.0301513671875, + "loss_xval": 1.5433132648468018, + "num_input_tokens_seen": 746431328, + "step": 4318 + }, + { + "epoch": 1.6541554959785523, + "grad_norm": 64.50687135872865, + "learning_rate": 5e-06, + "loss": 1.3769, + "num_input_tokens_seen": 746604360, + "step": 4319 + }, + { + "epoch": 1.6541554959785523, + "loss": 1.2541346549987793, + "loss_ce": 0.14104785025119781, + "loss_iou": 0.5109654068946838, + "loss_num": 0.0181884765625, + "loss_xval": 1.1130868196487427, + "num_input_tokens_seen": 746604360, + "step": 4319 + }, + { + "epoch": 1.654538490999617, + "grad_norm": 120.06465185779766, + "learning_rate": 5e-06, + "loss": 1.4102, + "num_input_tokens_seen": 746777384, + "step": 4320 + }, + { + "epoch": 1.654538490999617, + "loss": 1.188852071762085, + "loss_ce": 0.12857110798358917, + "loss_iou": 0.4838911294937134, + "loss_num": 0.0185546875, + "loss_xval": 1.0471584796905518, + "num_input_tokens_seen": 746777384, + "step": 4320 + }, + { + "epoch": 1.6549214860206818, + "grad_norm": 99.73864319221155, + "learning_rate": 5e-06, + "loss": 1.2341, + "num_input_tokens_seen": 746950424, + "step": 4321 + }, + { + "epoch": 1.6549214860206818, + "loss": 1.3568239212036133, + "loss_ce": 0.1628800332546234, + "loss_iou": 0.5363030433654785, + "loss_num": 0.0242919921875, + "loss_xval": 1.193943977355957, + "num_input_tokens_seen": 746950424, + "step": 4321 + }, + { + "epoch": 1.6553044810417465, + "grad_norm": 113.5453243126911, + "learning_rate": 5e-06, + "loss": 1.6964, + "num_input_tokens_seen": 747123408, + "step": 4322 + }, + { + "epoch": 1.6553044810417465, + "loss": 1.706247329711914, + "loss_ce": 0.18428021669387817, + "loss_iou": 0.6889925003051758, + "loss_num": 0.02880859375, + "loss_xval": 1.5219669342041016, + "num_input_tokens_seen": 747123408, + "step": 4322 + }, + { + "epoch": 1.6556874760628113, + "grad_norm": 128.88972982006158, + "learning_rate": 5e-06, + "loss": 1.4075, + "num_input_tokens_seen": 747296576, + "step": 4323 + }, + { + "epoch": 1.6556874760628113, + "loss": 1.39468252658844, + "loss_ce": 0.16338348388671875, + "loss_iou": 0.5695374608039856, + "loss_num": 0.0184326171875, + "loss_xval": 1.2312990427017212, + "num_input_tokens_seen": 747296576, + "step": 4323 + }, + { + "epoch": 1.656070471083876, + "grad_norm": 162.94147982802778, + "learning_rate": 5e-06, + "loss": 1.7359, + "num_input_tokens_seen": 747469320, + "step": 4324 + }, + { + "epoch": 1.656070471083876, + "loss": 2.078629732131958, + "loss_ce": 0.17305155098438263, + "loss_iou": 0.8767850399017334, + "loss_num": 0.0303955078125, + "loss_xval": 1.9055781364440918, + "num_input_tokens_seen": 747469320, + "step": 4324 + }, + { + "epoch": 1.6564534661049406, + "grad_norm": 124.06849237730506, + "learning_rate": 5e-06, + "loss": 1.3565, + "num_input_tokens_seen": 747642304, + "step": 4325 + }, + { + "epoch": 1.6564534661049406, + "loss": 1.3706270456314087, + "loss_ce": 0.1738179475069046, + "loss_iou": 0.5613104104995728, + "loss_num": 0.01483154296875, + "loss_xval": 1.1968090534210205, + "num_input_tokens_seen": 747642304, + "step": 4325 + }, + { + "epoch": 1.6568364611260054, + "grad_norm": 195.91109856325355, + "learning_rate": 5e-06, + "loss": 1.6801, + "num_input_tokens_seen": 747814768, + "step": 4326 + }, + { + "epoch": 1.6568364611260054, + "loss": 1.7964779138565063, + "loss_ce": 0.1447073221206665, + "loss_iou": 0.7687869071960449, + "loss_num": 0.0228271484375, + "loss_xval": 1.6517705917358398, + "num_input_tokens_seen": 747814768, + "step": 4326 + }, + { + "epoch": 1.6572194561470701, + "grad_norm": 121.30326584270553, + "learning_rate": 5e-06, + "loss": 1.4, + "num_input_tokens_seen": 747987728, + "step": 4327 + }, + { + "epoch": 1.6572194561470701, + "loss": 1.5141677856445312, + "loss_ce": 0.13041839003562927, + "loss_iou": 0.62269127368927, + "loss_num": 0.0277099609375, + "loss_xval": 1.38374924659729, + "num_input_tokens_seen": 747987728, + "step": 4327 + }, + { + "epoch": 1.6576024511681346, + "grad_norm": 190.22677907897673, + "learning_rate": 5e-06, + "loss": 1.3527, + "num_input_tokens_seen": 748160808, + "step": 4328 + }, + { + "epoch": 1.6576024511681346, + "loss": 1.2638424634933472, + "loss_ce": 0.17623110115528107, + "loss_iou": 0.517835259437561, + "loss_num": 0.0103759765625, + "loss_xval": 1.087611436843872, + "num_input_tokens_seen": 748160808, + "step": 4328 + }, + { + "epoch": 1.6579854461891994, + "grad_norm": 171.87342369575066, + "learning_rate": 5e-06, + "loss": 1.4965, + "num_input_tokens_seen": 748333496, + "step": 4329 + }, + { + "epoch": 1.6579854461891994, + "loss": 1.554666519165039, + "loss_ce": 0.1455255150794983, + "loss_iou": 0.638286292552948, + "loss_num": 0.0264892578125, + "loss_xval": 1.4091410636901855, + "num_input_tokens_seen": 748333496, + "step": 4329 + }, + { + "epoch": 1.6583684412102642, + "grad_norm": 109.43188793076496, + "learning_rate": 5e-06, + "loss": 1.4313, + "num_input_tokens_seen": 748506256, + "step": 4330 + }, + { + "epoch": 1.6583684412102642, + "loss": 1.4558132886886597, + "loss_ce": 0.18514424562454224, + "loss_iou": 0.5906720757484436, + "loss_num": 0.017822265625, + "loss_xval": 1.2706691026687622, + "num_input_tokens_seen": 748506256, + "step": 4330 + }, + { + "epoch": 1.658751436231329, + "grad_norm": 108.72071196031955, + "learning_rate": 5e-06, + "loss": 1.4831, + "num_input_tokens_seen": 748679320, + "step": 4331 + }, + { + "epoch": 1.658751436231329, + "loss": 1.451843500137329, + "loss_ce": 0.20245873928070068, + "loss_iou": 0.5653510093688965, + "loss_num": 0.023681640625, + "loss_xval": 1.249384880065918, + "num_input_tokens_seen": 748679320, + "step": 4331 + }, + { + "epoch": 1.6591344312523937, + "grad_norm": 107.55488067482345, + "learning_rate": 5e-06, + "loss": 1.3853, + "num_input_tokens_seen": 748852344, + "step": 4332 + }, + { + "epoch": 1.6591344312523937, + "loss": 1.4387247562408447, + "loss_ce": 0.18469186127185822, + "loss_iou": 0.5855430364608765, + "loss_num": 0.0166015625, + "loss_xval": 1.254032850265503, + "num_input_tokens_seen": 748852344, + "step": 4332 + }, + { + "epoch": 1.6595174262734584, + "grad_norm": 138.97777822884672, + "learning_rate": 5e-06, + "loss": 1.5506, + "num_input_tokens_seen": 749025504, + "step": 4333 + }, + { + "epoch": 1.6595174262734584, + "loss": 1.5302248001098633, + "loss_ce": 0.13889721035957336, + "loss_iou": 0.6157077550888062, + "loss_num": 0.031982421875, + "loss_xval": 1.3913276195526123, + "num_input_tokens_seen": 749025504, + "step": 4333 + }, + { + "epoch": 1.6599004212945232, + "grad_norm": 127.38663465582381, + "learning_rate": 5e-06, + "loss": 1.4353, + "num_input_tokens_seen": 749198056, + "step": 4334 + }, + { + "epoch": 1.6599004212945232, + "loss": 1.3108580112457275, + "loss_ce": 0.19315487146377563, + "loss_iou": 0.5166457891464233, + "loss_num": 0.016845703125, + "loss_xval": 1.1177031993865967, + "num_input_tokens_seen": 749198056, + "step": 4334 + }, + { + "epoch": 1.660283416315588, + "grad_norm": 121.81982195951672, + "learning_rate": 5e-06, + "loss": 1.6543, + "num_input_tokens_seen": 749371064, + "step": 4335 + }, + { + "epoch": 1.660283416315588, + "loss": 1.6364576816558838, + "loss_ce": 0.16774462163448334, + "loss_iou": 0.6846739053726196, + "loss_num": 0.0198974609375, + "loss_xval": 1.4687130451202393, + "num_input_tokens_seen": 749371064, + "step": 4335 + }, + { + "epoch": 1.6606664113366527, + "grad_norm": 89.15176029224519, + "learning_rate": 5e-06, + "loss": 1.4432, + "num_input_tokens_seen": 749544112, + "step": 4336 + }, + { + "epoch": 1.6606664113366527, + "loss": 1.460259199142456, + "loss_ce": 0.1678803563117981, + "loss_iou": 0.6032969951629639, + "loss_num": 0.0172119140625, + "loss_xval": 1.2923789024353027, + "num_input_tokens_seen": 749544112, + "step": 4336 + }, + { + "epoch": 1.6610494063577175, + "grad_norm": 148.71456479347296, + "learning_rate": 5e-06, + "loss": 1.0917, + "num_input_tokens_seen": 749716984, + "step": 4337 + }, + { + "epoch": 1.6610494063577175, + "loss": 1.0275170803070068, + "loss_ce": 0.11111672967672348, + "loss_iou": 0.4249817728996277, + "loss_num": 0.0133056640625, + "loss_xval": 0.9164003133773804, + "num_input_tokens_seen": 749716984, + "step": 4337 + }, + { + "epoch": 1.6614324013787822, + "grad_norm": 115.50154730509183, + "learning_rate": 5e-06, + "loss": 1.7234, + "num_input_tokens_seen": 749890016, + "step": 4338 + }, + { + "epoch": 1.6614324013787822, + "loss": 1.7582378387451172, + "loss_ce": 0.17511804401874542, + "loss_iou": 0.7226817607879639, + "loss_num": 0.027587890625, + "loss_xval": 1.5831198692321777, + "num_input_tokens_seen": 749890016, + "step": 4338 + }, + { + "epoch": 1.6618153963998468, + "grad_norm": 109.33496661169397, + "learning_rate": 5e-06, + "loss": 1.3089, + "num_input_tokens_seen": 750063056, + "step": 4339 + }, + { + "epoch": 1.6618153963998468, + "loss": 1.4102544784545898, + "loss_ce": 0.15036270022392273, + "loss_iou": 0.5823689699172974, + "loss_num": 0.01904296875, + "loss_xval": 1.2598917484283447, + "num_input_tokens_seen": 750063056, + "step": 4339 + }, + { + "epoch": 1.6621983914209115, + "grad_norm": 98.19377264488183, + "learning_rate": 5e-06, + "loss": 1.3632, + "num_input_tokens_seen": 750235864, + "step": 4340 + }, + { + "epoch": 1.6621983914209115, + "loss": 1.4982824325561523, + "loss_ce": 0.19171017408370972, + "loss_iou": 0.5953027009963989, + "loss_num": 0.023193359375, + "loss_xval": 1.3065721988677979, + "num_input_tokens_seen": 750235864, + "step": 4340 + }, + { + "epoch": 1.6625813864419763, + "grad_norm": 159.9248649242272, + "learning_rate": 5e-06, + "loss": 1.2652, + "num_input_tokens_seen": 750408744, + "step": 4341 + }, + { + "epoch": 1.6625813864419763, + "loss": 1.2764732837677002, + "loss_ce": 0.1473279893398285, + "loss_iou": 0.5065434575080872, + "loss_num": 0.023193359375, + "loss_xval": 1.1291451454162598, + "num_input_tokens_seen": 750408744, + "step": 4341 + }, + { + "epoch": 1.6629643814630408, + "grad_norm": 191.00419749506867, + "learning_rate": 5e-06, + "loss": 1.5639, + "num_input_tokens_seen": 750581496, + "step": 4342 + }, + { + "epoch": 1.6629643814630408, + "loss": 1.575960636138916, + "loss_ce": 0.20127132534980774, + "loss_iou": 0.6380282640457153, + "loss_num": 0.019775390625, + "loss_xval": 1.3746893405914307, + "num_input_tokens_seen": 750581496, + "step": 4342 + }, + { + "epoch": 1.6633473764841056, + "grad_norm": 137.39907596519382, + "learning_rate": 5e-06, + "loss": 1.4354, + "num_input_tokens_seen": 750754536, + "step": 4343 + }, + { + "epoch": 1.6633473764841056, + "loss": 1.6266783475875854, + "loss_ce": 0.17832821607589722, + "loss_iou": 0.672340989112854, + "loss_num": 0.020751953125, + "loss_xval": 1.448350191116333, + "num_input_tokens_seen": 750754536, + "step": 4343 + }, + { + "epoch": 1.6637303715051703, + "grad_norm": 109.33498349661797, + "learning_rate": 5e-06, + "loss": 1.6813, + "num_input_tokens_seen": 750927400, + "step": 4344 + }, + { + "epoch": 1.6637303715051703, + "loss": 1.6764832735061646, + "loss_ce": 0.16076403856277466, + "loss_iou": 0.7087873220443726, + "loss_num": 0.0196533203125, + "loss_xval": 1.5157191753387451, + "num_input_tokens_seen": 750927400, + "step": 4344 + }, + { + "epoch": 1.664113366526235, + "grad_norm": 132.21153686371912, + "learning_rate": 5e-06, + "loss": 1.4743, + "num_input_tokens_seen": 751100208, + "step": 4345 + }, + { + "epoch": 1.664113366526235, + "loss": 1.6191737651824951, + "loss_ce": 0.1825704425573349, + "loss_iou": 0.651269793510437, + "loss_num": 0.02685546875, + "loss_xval": 1.436603307723999, + "num_input_tokens_seen": 751100208, + "step": 4345 + }, + { + "epoch": 1.6644963615472999, + "grad_norm": 143.87108557151714, + "learning_rate": 5e-06, + "loss": 1.5951, + "num_input_tokens_seen": 751267976, + "step": 4346 + }, + { + "epoch": 1.6644963615472999, + "loss": 1.6740915775299072, + "loss_ce": 0.179146409034729, + "loss_iou": 0.6795557141304016, + "loss_num": 0.0272216796875, + "loss_xval": 1.4949451684951782, + "num_input_tokens_seen": 751267976, + "step": 4346 + }, + { + "epoch": 1.6648793565683646, + "grad_norm": 106.6810436195744, + "learning_rate": 5e-06, + "loss": 1.5367, + "num_input_tokens_seen": 751440792, + "step": 4347 + }, + { + "epoch": 1.6648793565683646, + "loss": 1.4976184368133545, + "loss_ce": 0.13274140655994415, + "loss_iou": 0.6166731119155884, + "loss_num": 0.0262451171875, + "loss_xval": 1.3648769855499268, + "num_input_tokens_seen": 751440792, + "step": 4347 + }, + { + "epoch": 1.6652623515894294, + "grad_norm": 103.23602112630653, + "learning_rate": 5e-06, + "loss": 1.398, + "num_input_tokens_seen": 751613856, + "step": 4348 + }, + { + "epoch": 1.6652623515894294, + "loss": 1.3447130918502808, + "loss_ce": 0.1880890130996704, + "loss_iou": 0.5271035432815552, + "loss_num": 0.0205078125, + "loss_xval": 1.1566240787506104, + "num_input_tokens_seen": 751613856, + "step": 4348 + }, + { + "epoch": 1.6656453466104941, + "grad_norm": 155.02316345437134, + "learning_rate": 5e-06, + "loss": 1.3166, + "num_input_tokens_seen": 751786368, + "step": 4349 + }, + { + "epoch": 1.6656453466104941, + "loss": 1.3376691341400146, + "loss_ce": 0.14817002415657043, + "loss_iou": 0.5526505708694458, + "loss_num": 0.016845703125, + "loss_xval": 1.1894991397857666, + "num_input_tokens_seen": 751786368, + "step": 4349 + }, + { + "epoch": 1.6660283416315589, + "grad_norm": 138.28528744846454, + "learning_rate": 5e-06, + "loss": 1.6043, + "num_input_tokens_seen": 751959088, + "step": 4350 + }, + { + "epoch": 1.6660283416315589, + "loss": 1.6053296327590942, + "loss_ce": 0.13853321969509125, + "loss_iou": 0.6715390682220459, + "loss_num": 0.0247802734375, + "loss_xval": 1.4667963981628418, + "num_input_tokens_seen": 751959088, + "step": 4350 + }, + { + "epoch": 1.6664113366526236, + "grad_norm": 241.1625900477056, + "learning_rate": 5e-06, + "loss": 1.5604, + "num_input_tokens_seen": 752132040, + "step": 4351 + }, + { + "epoch": 1.6664113366526236, + "loss": 1.562677264213562, + "loss_ce": 0.16798928380012512, + "loss_iou": 0.6683523654937744, + "loss_num": 0.0115966796875, + "loss_xval": 1.3946881294250488, + "num_input_tokens_seen": 752132040, + "step": 4351 + }, + { + "epoch": 1.6667943316736884, + "grad_norm": 127.74427420012312, + "learning_rate": 5e-06, + "loss": 1.8056, + "num_input_tokens_seen": 752305016, + "step": 4352 + }, + { + "epoch": 1.6667943316736884, + "loss": 1.7611594200134277, + "loss_ce": 0.1871078461408615, + "loss_iou": 0.6976702809333801, + "loss_num": 0.035888671875, + "loss_xval": 1.5740514993667603, + "num_input_tokens_seen": 752305016, + "step": 4352 + }, + { + "epoch": 1.667177326694753, + "grad_norm": 114.88384420280931, + "learning_rate": 5e-06, + "loss": 1.0786, + "num_input_tokens_seen": 752478144, + "step": 4353 + }, + { + "epoch": 1.667177326694753, + "loss": 1.088322639465332, + "loss_ce": 0.17090633511543274, + "loss_iou": 0.4382079839706421, + "loss_num": 0.0081787109375, + "loss_xval": 0.9174163341522217, + "num_input_tokens_seen": 752478144, + "step": 4353 + }, + { + "epoch": 1.6675603217158177, + "grad_norm": 214.57938217468157, + "learning_rate": 5e-06, + "loss": 1.4141, + "num_input_tokens_seen": 752651088, + "step": 4354 + }, + { + "epoch": 1.6675603217158177, + "loss": 1.4140644073486328, + "loss_ce": 0.15576159954071045, + "loss_iou": 0.568452000617981, + "loss_num": 0.0242919921875, + "loss_xval": 1.258302927017212, + "num_input_tokens_seen": 752651088, + "step": 4354 + }, + { + "epoch": 1.6679433167368825, + "grad_norm": 95.12374924026861, + "learning_rate": 5e-06, + "loss": 1.4094, + "num_input_tokens_seen": 752823896, + "step": 4355 + }, + { + "epoch": 1.6679433167368825, + "loss": 1.4674533605575562, + "loss_ce": 0.13537859916687012, + "loss_iou": 8.569318057542831e+32, + "loss_num": 0.013671875, + "loss_xval": 8.203829036485028e+35, + "num_input_tokens_seen": 752823896, + "step": 4355 + }, + { + "epoch": 1.668326311757947, + "grad_norm": 90.87505988937167, + "learning_rate": 5e-06, + "loss": 1.558, + "num_input_tokens_seen": 752996504, + "step": 4356 + }, + { + "epoch": 1.668326311757947, + "loss": 1.5766923427581787, + "loss_ce": 0.16272133588790894, + "loss_iou": 0.6496124267578125, + "loss_num": 0.02294921875, + "loss_xval": 1.413970947265625, + "num_input_tokens_seen": 752996504, + "step": 4356 + }, + { + "epoch": 1.6687093067790117, + "grad_norm": 141.6348893494254, + "learning_rate": 5e-06, + "loss": 1.3547, + "num_input_tokens_seen": 753169632, + "step": 4357 + }, + { + "epoch": 1.6687093067790117, + "loss": 1.3793840408325195, + "loss_ce": 0.16662092506885529, + "loss_iou": 0.5807315111160278, + "loss_num": 0.01025390625, + "loss_xval": 1.2127630710601807, + "num_input_tokens_seen": 753169632, + "step": 4357 + }, + { + "epoch": 1.6690923018000765, + "grad_norm": 153.06472665831689, + "learning_rate": 5e-06, + "loss": 1.5865, + "num_input_tokens_seen": 753342568, + "step": 4358 + }, + { + "epoch": 1.6690923018000765, + "loss": 1.5346765518188477, + "loss_ce": 0.16870664060115814, + "loss_iou": 0.6440293192863464, + "loss_num": 0.01556396484375, + "loss_xval": 1.3659700155258179, + "num_input_tokens_seen": 753342568, + "step": 4358 + }, + { + "epoch": 1.6694752968211413, + "grad_norm": 138.2465074214682, + "learning_rate": 5e-06, + "loss": 1.6133, + "num_input_tokens_seen": 753515872, + "step": 4359 + }, + { + "epoch": 1.6694752968211413, + "loss": 1.493098258972168, + "loss_ce": 0.18320278823375702, + "loss_iou": 0.6241936087608337, + "loss_num": 0.0123291015625, + "loss_xval": 1.309895396232605, + "num_input_tokens_seen": 753515872, + "step": 4359 + }, + { + "epoch": 1.669858291842206, + "grad_norm": 98.27732783563468, + "learning_rate": 5e-06, + "loss": 1.5559, + "num_input_tokens_seen": 753688824, + "step": 4360 + }, + { + "epoch": 1.669858291842206, + "loss": 1.5809683799743652, + "loss_ce": 0.12172377109527588, + "loss_iou": 0.6667560935020447, + "loss_num": 0.025146484375, + "loss_xval": 1.4592446088790894, + "num_input_tokens_seen": 753688824, + "step": 4360 + }, + { + "epoch": 1.6702412868632708, + "grad_norm": 102.88437547538797, + "learning_rate": 5e-06, + "loss": 1.2817, + "num_input_tokens_seen": 753861536, + "step": 4361 + }, + { + "epoch": 1.6702412868632708, + "loss": 1.1647738218307495, + "loss_ce": 0.13653725385665894, + "loss_iou": 0.4613076150417328, + "loss_num": 0.0211181640625, + "loss_xval": 1.0282366275787354, + "num_input_tokens_seen": 753861536, + "step": 4361 + }, + { + "epoch": 1.6706242818843355, + "grad_norm": 155.73607378861095, + "learning_rate": 5e-06, + "loss": 1.4204, + "num_input_tokens_seen": 754034784, + "step": 4362 + }, + { + "epoch": 1.6706242818843355, + "loss": 1.3382277488708496, + "loss_ce": 0.1748562753200531, + "loss_iou": 0.5535790920257568, + "loss_num": 0.01123046875, + "loss_xval": 1.1633715629577637, + "num_input_tokens_seen": 754034784, + "step": 4362 + }, + { + "epoch": 1.6710072769054003, + "grad_norm": 136.3262772158089, + "learning_rate": 5e-06, + "loss": 1.2393, + "num_input_tokens_seen": 754207320, + "step": 4363 + }, + { + "epoch": 1.6710072769054003, + "loss": 1.2228909730911255, + "loss_ce": 0.1309235692024231, + "loss_iou": 0.5086911916732788, + "loss_num": 0.014892578125, + "loss_xval": 1.0919673442840576, + "num_input_tokens_seen": 754207320, + "step": 4363 + }, + { + "epoch": 1.671390271926465, + "grad_norm": 268.5851211662383, + "learning_rate": 5e-06, + "loss": 1.7028, + "num_input_tokens_seen": 754379912, + "step": 4364 + }, + { + "epoch": 1.671390271926465, + "loss": 1.8059022426605225, + "loss_ce": 0.1507665067911148, + "loss_iou": 0.7666090726852417, + "loss_num": 0.0244140625, + "loss_xval": 1.6551358699798584, + "num_input_tokens_seen": 754379912, + "step": 4364 + }, + { + "epoch": 1.6717732669475298, + "grad_norm": 194.19436886467636, + "learning_rate": 5e-06, + "loss": 1.5381, + "num_input_tokens_seen": 754552584, + "step": 4365 + }, + { + "epoch": 1.6717732669475298, + "loss": 1.6535385847091675, + "loss_ce": 0.14655561745166779, + "loss_iou": 0.6829653978347778, + "loss_num": 0.0281982421875, + "loss_xval": 1.5069830417633057, + "num_input_tokens_seen": 754552584, + "step": 4365 + }, + { + "epoch": 1.6721562619685946, + "grad_norm": 363.1074922804633, + "learning_rate": 5e-06, + "loss": 1.633, + "num_input_tokens_seen": 754725384, + "step": 4366 + }, + { + "epoch": 1.6721562619685946, + "loss": 1.7063180208206177, + "loss_ce": 0.16991159319877625, + "loss_iou": 0.7248987555503845, + "loss_num": 0.017333984375, + "loss_xval": 1.536406397819519, + "num_input_tokens_seen": 754725384, + "step": 4366 + }, + { + "epoch": 1.672539256989659, + "grad_norm": 333.9474035795017, + "learning_rate": 5e-06, + "loss": 2.5983, + "num_input_tokens_seen": 754898616, + "step": 4367 + }, + { + "epoch": 1.672539256989659, + "loss": 2.56595516204834, + "loss_ce": 0.19206538796424866, + "loss_iou": 1.064325213432312, + "loss_num": 0.049072265625, + "loss_xval": 2.373889684677124, + "num_input_tokens_seen": 754898616, + "step": 4367 + }, + { + "epoch": 1.6729222520107239, + "grad_norm": 89.54363251972225, + "learning_rate": 5e-06, + "loss": 2.1643, + "num_input_tokens_seen": 755071528, + "step": 4368 + }, + { + "epoch": 1.6729222520107239, + "loss": 2.049285888671875, + "loss_ce": 0.1552119255065918, + "loss_iou": 0.8515169620513916, + "loss_num": 0.0380859375, + "loss_xval": 1.8940738439559937, + "num_input_tokens_seen": 755071528, + "step": 4368 + }, + { + "epoch": 1.6733052470317886, + "grad_norm": 288.5888479185838, + "learning_rate": 5e-06, + "loss": 1.4525, + "num_input_tokens_seen": 755244576, + "step": 4369 + }, + { + "epoch": 1.6733052470317886, + "loss": 1.488768458366394, + "loss_ce": 0.19600477814674377, + "loss_iou": 0.6057324409484863, + "loss_num": 0.0162353515625, + "loss_xval": 1.2927637100219727, + "num_input_tokens_seen": 755244576, + "step": 4369 + }, + { + "epoch": 1.6736882420528532, + "grad_norm": 175.09232969517532, + "learning_rate": 5e-06, + "loss": 1.8828, + "num_input_tokens_seen": 755417560, + "step": 4370 + }, + { + "epoch": 1.6736882420528532, + "loss": 1.8737170696258545, + "loss_ce": 0.18630965054035187, + "loss_iou": 0.7724452018737793, + "loss_num": 0.0284423828125, + "loss_xval": 1.6874074935913086, + "num_input_tokens_seen": 755417560, + "step": 4370 + }, + { + "epoch": 1.674071237073918, + "grad_norm": 157.99918288709196, + "learning_rate": 5e-06, + "loss": 1.747, + "num_input_tokens_seen": 755590256, + "step": 4371 + }, + { + "epoch": 1.674071237073918, + "loss": 1.6813737154006958, + "loss_ce": 0.19650748372077942, + "loss_iou": 0.6870436668395996, + "loss_num": 0.022216796875, + "loss_xval": 1.4848661422729492, + "num_input_tokens_seen": 755590256, + "step": 4371 + }, + { + "epoch": 1.6744542320949827, + "grad_norm": 152.377673994168, + "learning_rate": 5e-06, + "loss": 1.7661, + "num_input_tokens_seen": 755763224, + "step": 4372 + }, + { + "epoch": 1.6744542320949827, + "loss": 1.7150580883026123, + "loss_ce": 0.1900370866060257, + "loss_iou": 0.6973097324371338, + "loss_num": 0.026123046875, + "loss_xval": 1.5250210762023926, + "num_input_tokens_seen": 755763224, + "step": 4372 + }, + { + "epoch": 1.6748372271160474, + "grad_norm": 207.45305571019634, + "learning_rate": 5e-06, + "loss": 1.6515, + "num_input_tokens_seen": 755936408, + "step": 4373 + }, + { + "epoch": 1.6748372271160474, + "loss": 1.5031293630599976, + "loss_ce": 0.1421300172805786, + "loss_iou": 0.6314579248428345, + "loss_num": 0.0196533203125, + "loss_xval": 1.360999345779419, + "num_input_tokens_seen": 755936408, + "step": 4373 + }, + { + "epoch": 1.6752202221371122, + "grad_norm": 114.78800827220954, + "learning_rate": 5e-06, + "loss": 1.5075, + "num_input_tokens_seen": 756109432, + "step": 4374 + }, + { + "epoch": 1.6752202221371122, + "loss": 1.4894447326660156, + "loss_ce": 0.1648956835269928, + "loss_iou": 0.6026736497879028, + "loss_num": 0.0238037109375, + "loss_xval": 1.3245489597320557, + "num_input_tokens_seen": 756109432, + "step": 4374 + }, + { + "epoch": 1.675603217158177, + "grad_norm": 154.2731282607183, + "learning_rate": 5e-06, + "loss": 1.4073, + "num_input_tokens_seen": 756282152, + "step": 4375 + }, + { + "epoch": 1.675603217158177, + "loss": 1.337599754333496, + "loss_ce": 0.1523275226354599, + "loss_iou": 0.5486602783203125, + "loss_num": 0.017578125, + "loss_xval": 1.185272216796875, + "num_input_tokens_seen": 756282152, + "step": 4375 + }, + { + "epoch": 1.6759862121792417, + "grad_norm": 148.39229143516698, + "learning_rate": 5e-06, + "loss": 1.3415, + "num_input_tokens_seen": 756455248, + "step": 4376 + }, + { + "epoch": 1.6759862121792417, + "loss": 1.2090262174606323, + "loss_ce": 0.15305881202220917, + "loss_iou": 0.47675997018814087, + "loss_num": 0.0205078125, + "loss_xval": 1.0485211610794067, + "num_input_tokens_seen": 756455248, + "step": 4376 + }, + { + "epoch": 1.6763692072003065, + "grad_norm": 118.59006548468923, + "learning_rate": 5e-06, + "loss": 1.4575, + "num_input_tokens_seen": 756628136, + "step": 4377 + }, + { + "epoch": 1.6763692072003065, + "loss": 1.5994715690612793, + "loss_ce": 0.16459201276302338, + "loss_iou": 0.6657277345657349, + "loss_num": 0.0206298828125, + "loss_xval": 1.4348795413970947, + "num_input_tokens_seen": 756628136, + "step": 4377 + }, + { + "epoch": 1.6767522022213712, + "grad_norm": 126.86195805763037, + "learning_rate": 5e-06, + "loss": 1.5015, + "num_input_tokens_seen": 756801352, + "step": 4378 + }, + { + "epoch": 1.6767522022213712, + "loss": 1.5466210842132568, + "loss_ce": 0.15083849430084229, + "loss_iou": 0.6531525254249573, + "loss_num": 0.0179443359375, + "loss_xval": 1.3957825899124146, + "num_input_tokens_seen": 756801352, + "step": 4378 + }, + { + "epoch": 1.677135197242436, + "grad_norm": 78.38021438088244, + "learning_rate": 5e-06, + "loss": 1.2303, + "num_input_tokens_seen": 756974280, + "step": 4379 + }, + { + "epoch": 1.677135197242436, + "loss": 1.1930649280548096, + "loss_ce": 0.16140449047088623, + "loss_iou": 0.47673720121383667, + "loss_num": 0.015625, + "loss_xval": 1.0316604375839233, + "num_input_tokens_seen": 756974280, + "step": 4379 + }, + { + "epoch": 1.6775181922635007, + "grad_norm": 107.58407424952352, + "learning_rate": 5e-06, + "loss": 1.4725, + "num_input_tokens_seen": 757147352, + "step": 4380 + }, + { + "epoch": 1.6775181922635007, + "loss": 1.5374525785446167, + "loss_ce": 0.1311381757259369, + "loss_iou": 0.6446855068206787, + "loss_num": 0.0234375, + "loss_xval": 1.4063143730163574, + "num_input_tokens_seen": 757147352, + "step": 4380 + }, + { + "epoch": 1.6779011872845653, + "grad_norm": 190.88086679141256, + "learning_rate": 5e-06, + "loss": 1.4762, + "num_input_tokens_seen": 757320336, + "step": 4381 + }, + { + "epoch": 1.6779011872845653, + "loss": 1.4439189434051514, + "loss_ce": 0.17356400191783905, + "loss_iou": 0.5909727811813354, + "loss_num": 0.0177001953125, + "loss_xval": 1.270354986190796, + "num_input_tokens_seen": 757320336, + "step": 4381 + }, + { + "epoch": 1.67828418230563, + "grad_norm": 116.10727496920556, + "learning_rate": 5e-06, + "loss": 1.742, + "num_input_tokens_seen": 757492872, + "step": 4382 + }, + { + "epoch": 1.67828418230563, + "loss": 1.6558839082717896, + "loss_ce": 0.20242519676685333, + "loss_iou": 0.6721638441085815, + "loss_num": 0.0218505859375, + "loss_xval": 1.453458547592163, + "num_input_tokens_seen": 757492872, + "step": 4382 + }, + { + "epoch": 1.6786671773266948, + "grad_norm": 141.76299308346458, + "learning_rate": 5e-06, + "loss": 1.4861, + "num_input_tokens_seen": 757665824, + "step": 4383 + }, + { + "epoch": 1.6786671773266948, + "loss": 1.4049732685089111, + "loss_ce": 0.184755340218544, + "loss_iou": 0.5702530145645142, + "loss_num": 0.0159912109375, + "loss_xval": 1.2202179431915283, + "num_input_tokens_seen": 757665824, + "step": 4383 + }, + { + "epoch": 1.6790501723477593, + "grad_norm": 135.79000566704417, + "learning_rate": 5e-06, + "loss": 1.5194, + "num_input_tokens_seen": 757838728, + "step": 4384 + }, + { + "epoch": 1.6790501723477593, + "loss": 1.4411894083023071, + "loss_ce": 0.17360654473304749, + "loss_iou": 0.5963310599327087, + "loss_num": 0.0150146484375, + "loss_xval": 1.2675827741622925, + "num_input_tokens_seen": 757838728, + "step": 4384 + }, + { + "epoch": 1.679433167368824, + "grad_norm": 146.4705927634235, + "learning_rate": 5e-06, + "loss": 1.541, + "num_input_tokens_seen": 758011640, + "step": 4385 + }, + { + "epoch": 1.679433167368824, + "loss": 1.5647530555725098, + "loss_ce": 0.16160601377487183, + "loss_iou": 0.6532489061355591, + "loss_num": 0.019287109375, + "loss_xval": 1.4031469821929932, + "num_input_tokens_seen": 758011640, + "step": 4385 + }, + { + "epoch": 1.6798161623898888, + "grad_norm": 177.39243836807586, + "learning_rate": 5e-06, + "loss": 2.0011, + "num_input_tokens_seen": 758184696, + "step": 4386 + }, + { + "epoch": 1.6798161623898888, + "loss": 2.0785374641418457, + "loss_ce": 0.16670063138008118, + "loss_iou": 0.8813335299491882, + "loss_num": 0.02978515625, + "loss_xval": 1.9118369817733765, + "num_input_tokens_seen": 758184696, + "step": 4386 + }, + { + "epoch": 1.6801991574109536, + "grad_norm": 167.1618213411226, + "learning_rate": 5e-06, + "loss": 1.4763, + "num_input_tokens_seen": 758357448, + "step": 4387 + }, + { + "epoch": 1.6801991574109536, + "loss": 1.451549768447876, + "loss_ce": 0.1958613395690918, + "loss_iou": 0.5861572027206421, + "loss_num": 0.0167236328125, + "loss_xval": 1.2556884288787842, + "num_input_tokens_seen": 758357448, + "step": 4387 + }, + { + "epoch": 1.6805821524320184, + "grad_norm": 120.3318231030432, + "learning_rate": 5e-06, + "loss": 1.3509, + "num_input_tokens_seen": 758530552, + "step": 4388 + }, + { + "epoch": 1.6805821524320184, + "loss": 1.3233647346496582, + "loss_ce": 0.12391694635152817, + "loss_iou": 0.555976927280426, + "loss_num": 0.0174560546875, + "loss_xval": 1.199447751045227, + "num_input_tokens_seen": 758530552, + "step": 4388 + }, + { + "epoch": 1.6809651474530831, + "grad_norm": 133.2513019471835, + "learning_rate": 5e-06, + "loss": 1.3939, + "num_input_tokens_seen": 758703744, + "step": 4389 + }, + { + "epoch": 1.6809651474530831, + "loss": 1.524483561515808, + "loss_ce": 0.18629640340805054, + "loss_iou": 0.6356615424156189, + "loss_num": 0.01336669921875, + "loss_xval": 1.3381870985031128, + "num_input_tokens_seen": 758703744, + "step": 4389 + }, + { + "epoch": 1.6813481424741479, + "grad_norm": 126.09216545969078, + "learning_rate": 5e-06, + "loss": 1.3425, + "num_input_tokens_seen": 758876632, + "step": 4390 + }, + { + "epoch": 1.6813481424741479, + "loss": 1.4416831731796265, + "loss_ce": 0.17311879992485046, + "loss_iou": 0.5921832323074341, + "loss_num": 0.016845703125, + "loss_xval": 1.2685644626617432, + "num_input_tokens_seen": 758876632, + "step": 4390 + }, + { + "epoch": 1.6817311374952126, + "grad_norm": 119.3339984017816, + "learning_rate": 5e-06, + "loss": 1.6733, + "num_input_tokens_seen": 759049768, + "step": 4391 + }, + { + "epoch": 1.6817311374952126, + "loss": 1.6312274932861328, + "loss_ce": 0.1816640943288803, + "loss_iou": 0.6654555201530457, + "loss_num": 0.023681640625, + "loss_xval": 1.4495635032653809, + "num_input_tokens_seen": 759049768, + "step": 4391 + }, + { + "epoch": 1.6821141325162774, + "grad_norm": 87.3722891188307, + "learning_rate": 5e-06, + "loss": 1.2888, + "num_input_tokens_seen": 759222352, + "step": 4392 + }, + { + "epoch": 1.6821141325162774, + "loss": 1.141264796257019, + "loss_ce": 0.13132750988006592, + "loss_iou": 0.47223857045173645, + "loss_num": 0.01312255859375, + "loss_xval": 1.0099372863769531, + "num_input_tokens_seen": 759222352, + "step": 4392 + }, + { + "epoch": 1.6824971275373422, + "grad_norm": 94.5590831376501, + "learning_rate": 5e-06, + "loss": 1.4809, + "num_input_tokens_seen": 759395056, + "step": 4393 + }, + { + "epoch": 1.6824971275373422, + "loss": 1.6476218700408936, + "loss_ce": 0.1588333249092102, + "loss_iou": 0.6961917877197266, + "loss_num": 0.019287109375, + "loss_xval": 1.4887886047363281, + "num_input_tokens_seen": 759395056, + "step": 4393 + }, + { + "epoch": 1.6828801225584067, + "grad_norm": 87.55062154600387, + "learning_rate": 5e-06, + "loss": 1.4034, + "num_input_tokens_seen": 759568416, + "step": 4394 + }, + { + "epoch": 1.6828801225584067, + "loss": 1.526280403137207, + "loss_ce": 0.19728177785873413, + "loss_iou": 0.6122837662696838, + "loss_num": 0.0208740234375, + "loss_xval": 1.3289986848831177, + "num_input_tokens_seen": 759568416, + "step": 4394 + }, + { + "epoch": 1.6832631175794714, + "grad_norm": 169.4888444747027, + "learning_rate": 5e-06, + "loss": 1.4735, + "num_input_tokens_seen": 759741552, + "step": 4395 + }, + { + "epoch": 1.6832631175794714, + "loss": 1.3289074897766113, + "loss_ce": 0.16519808769226074, + "loss_iou": 0.5489262342453003, + "loss_num": 0.01318359375, + "loss_xval": 1.1637094020843506, + "num_input_tokens_seen": 759741552, + "step": 4395 + }, + { + "epoch": 1.6836461126005362, + "grad_norm": 171.116956366303, + "learning_rate": 5e-06, + "loss": 1.5816, + "num_input_tokens_seen": 759914608, + "step": 4396 + }, + { + "epoch": 1.6836461126005362, + "loss": 1.6261197328567505, + "loss_ce": 0.1836853176355362, + "loss_iou": 0.6743574738502502, + "loss_num": 0.018798828125, + "loss_xval": 1.4424344301223755, + "num_input_tokens_seen": 759914608, + "step": 4396 + }, + { + "epoch": 1.684029107621601, + "grad_norm": 108.58593223828433, + "learning_rate": 5e-06, + "loss": 1.4723, + "num_input_tokens_seen": 760087568, + "step": 4397 + }, + { + "epoch": 1.684029107621601, + "loss": 1.4462080001831055, + "loss_ce": 0.18597783148288727, + "loss_iou": 0.5661959648132324, + "loss_num": 0.025634765625, + "loss_xval": 1.2602300643920898, + "num_input_tokens_seen": 760087568, + "step": 4397 + }, + { + "epoch": 1.6844121026426655, + "grad_norm": 158.90865656353407, + "learning_rate": 5e-06, + "loss": 1.3799, + "num_input_tokens_seen": 760260616, + "step": 4398 + }, + { + "epoch": 1.6844121026426655, + "loss": 1.4212050437927246, + "loss_ce": 0.15659356117248535, + "loss_iou": 0.5904814004898071, + "loss_num": 0.0167236328125, + "loss_xval": 1.2646114826202393, + "num_input_tokens_seen": 760260616, + "step": 4398 + }, + { + "epoch": 1.6847950976637303, + "grad_norm": 166.7730290021102, + "learning_rate": 5e-06, + "loss": 1.3809, + "num_input_tokens_seen": 760433520, + "step": 4399 + }, + { + "epoch": 1.6847950976637303, + "loss": 1.2549279928207397, + "loss_ce": 0.12187740951776505, + "loss_iou": 0.524761974811554, + "loss_num": 0.0167236328125, + "loss_xval": 1.133050560951233, + "num_input_tokens_seen": 760433520, + "step": 4399 + }, + { + "epoch": 1.685178092684795, + "grad_norm": 221.98706377249738, + "learning_rate": 5e-06, + "loss": 1.8523, + "num_input_tokens_seen": 760606408, + "step": 4400 + }, + { + "epoch": 1.685178092684795, + "loss": 1.804720163345337, + "loss_ce": 0.17628535628318787, + "loss_iou": 0.7449730634689331, + "loss_num": 0.0277099609375, + "loss_xval": 1.6284348964691162, + "num_input_tokens_seen": 760606408, + "step": 4400 + }, + { + "epoch": 1.6855610877058598, + "grad_norm": 112.42241183740752, + "learning_rate": 5e-06, + "loss": 1.4014, + "num_input_tokens_seen": 760779048, + "step": 4401 + }, + { + "epoch": 1.6855610877058598, + "loss": 1.5267062187194824, + "loss_ce": 0.23780015110969543, + "loss_iou": 0.5960673093795776, + "loss_num": 0.019287109375, + "loss_xval": 1.2889058589935303, + "num_input_tokens_seen": 760779048, + "step": 4401 + }, + { + "epoch": 1.6859440827269245, + "grad_norm": 143.74853706173124, + "learning_rate": 5e-06, + "loss": 1.4157, + "num_input_tokens_seen": 760951968, + "step": 4402 + }, + { + "epoch": 1.6859440827269245, + "loss": 1.2913093566894531, + "loss_ce": 0.1852709800004959, + "loss_iou": 0.5213266611099243, + "loss_num": 0.0126953125, + "loss_xval": 1.1060383319854736, + "num_input_tokens_seen": 760951968, + "step": 4402 + }, + { + "epoch": 1.6863270777479893, + "grad_norm": 138.56000546703723, + "learning_rate": 5e-06, + "loss": 1.6607, + "num_input_tokens_seen": 761125120, + "step": 4403 + }, + { + "epoch": 1.6863270777479893, + "loss": 1.567737102508545, + "loss_ce": 0.15642426908016205, + "loss_iou": 0.6509536504745483, + "loss_num": 0.0218505859375, + "loss_xval": 1.4113128185272217, + "num_input_tokens_seen": 761125120, + "step": 4403 + }, + { + "epoch": 1.686710072769054, + "grad_norm": 95.27413689996389, + "learning_rate": 5e-06, + "loss": 1.371, + "num_input_tokens_seen": 761297968, + "step": 4404 + }, + { + "epoch": 1.686710072769054, + "loss": 1.440436840057373, + "loss_ce": 0.19408228993415833, + "loss_iou": 0.5803000330924988, + "loss_num": 0.01708984375, + "loss_xval": 1.2463544607162476, + "num_input_tokens_seen": 761297968, + "step": 4404 + }, + { + "epoch": 1.6870930677901188, + "grad_norm": 103.479557156628, + "learning_rate": 5e-06, + "loss": 1.2875, + "num_input_tokens_seen": 761470936, + "step": 4405 + }, + { + "epoch": 1.6870930677901188, + "loss": 1.2585489749908447, + "loss_ce": 0.16511748731136322, + "loss_iou": 0.5093622207641602, + "loss_num": 0.01495361328125, + "loss_xval": 1.0934314727783203, + "num_input_tokens_seen": 761470936, + "step": 4405 + }, + { + "epoch": 1.6874760628111836, + "grad_norm": 347.94817848014566, + "learning_rate": 5e-06, + "loss": 1.6833, + "num_input_tokens_seen": 761643976, + "step": 4406 + }, + { + "epoch": 1.6874760628111836, + "loss": 1.9606256484985352, + "loss_ce": 0.1655784398317337, + "loss_iou": 0.8379838466644287, + "loss_num": 0.0238037109375, + "loss_xval": 1.7950472831726074, + "num_input_tokens_seen": 761643976, + "step": 4406 + }, + { + "epoch": 1.6878590578322483, + "grad_norm": 123.26309271138102, + "learning_rate": 5e-06, + "loss": 1.7184, + "num_input_tokens_seen": 761817392, + "step": 4407 + }, + { + "epoch": 1.6878590578322483, + "loss": 1.6759836673736572, + "loss_ce": 0.17374470829963684, + "loss_iou": 0.6895960569381714, + "loss_num": 0.024658203125, + "loss_xval": 1.5022389888763428, + "num_input_tokens_seen": 761817392, + "step": 4407 + }, + { + "epoch": 1.6882420528533129, + "grad_norm": 99.28764694934321, + "learning_rate": 5e-06, + "loss": 1.3062, + "num_input_tokens_seen": 761989928, + "step": 4408 + }, + { + "epoch": 1.6882420528533129, + "loss": 1.1667100191116333, + "loss_ce": 0.140516996383667, + "loss_iou": 0.47569724917411804, + "loss_num": 0.01495361328125, + "loss_xval": 1.0261931419372559, + "num_input_tokens_seen": 761989928, + "step": 4408 + }, + { + "epoch": 1.6886250478743776, + "grad_norm": 171.47767896132896, + "learning_rate": 5e-06, + "loss": 1.56, + "num_input_tokens_seen": 762162840, + "step": 4409 + }, + { + "epoch": 1.6886250478743776, + "loss": 1.5050292015075684, + "loss_ce": 0.1594279408454895, + "loss_iou": 0.6307169198989868, + "loss_num": 0.016845703125, + "loss_xval": 1.3456013202667236, + "num_input_tokens_seen": 762162840, + "step": 4409 + }, + { + "epoch": 1.6890080428954424, + "grad_norm": 121.89540539163043, + "learning_rate": 5e-06, + "loss": 1.5027, + "num_input_tokens_seen": 762335752, + "step": 4410 + }, + { + "epoch": 1.6890080428954424, + "loss": 1.3650343418121338, + "loss_ce": 0.13624507188796997, + "loss_iou": 0.574462354183197, + "loss_num": 0.0159912109375, + "loss_xval": 1.228789210319519, + "num_input_tokens_seen": 762335752, + "step": 4410 + }, + { + "epoch": 1.6893910379165071, + "grad_norm": 86.30101791986955, + "learning_rate": 5e-06, + "loss": 1.3677, + "num_input_tokens_seen": 762508736, + "step": 4411 + }, + { + "epoch": 1.6893910379165071, + "loss": 1.0739622116088867, + "loss_ce": 0.15682095289230347, + "loss_iou": 0.4264737367630005, + "loss_num": 0.0128173828125, + "loss_xval": 0.9171411991119385, + "num_input_tokens_seen": 762508736, + "step": 4411 + }, + { + "epoch": 1.6897740329375717, + "grad_norm": 130.31307203258814, + "learning_rate": 5e-06, + "loss": 1.3792, + "num_input_tokens_seen": 762681840, + "step": 4412 + }, + { + "epoch": 1.6897740329375717, + "loss": 1.4163744449615479, + "loss_ce": 0.16193100810050964, + "loss_iou": 0.5822387933731079, + "loss_num": 0.0179443359375, + "loss_xval": 1.2544434070587158, + "num_input_tokens_seen": 762681840, + "step": 4412 + }, + { + "epoch": 1.6901570279586364, + "grad_norm": 217.68686266869614, + "learning_rate": 5e-06, + "loss": 1.6925, + "num_input_tokens_seen": 762854712, + "step": 4413 + }, + { + "epoch": 1.6901570279586364, + "loss": 1.7711422443389893, + "loss_ce": 0.11256309598684311, + "loss_iou": 0.7717334032058716, + "loss_num": 0.0230712890625, + "loss_xval": 1.6585791110992432, + "num_input_tokens_seen": 762854712, + "step": 4413 + }, + { + "epoch": 1.6905400229797012, + "grad_norm": 219.31080060334318, + "learning_rate": 5e-06, + "loss": 1.9431, + "num_input_tokens_seen": 763027792, + "step": 4414 + }, + { + "epoch": 1.6905400229797012, + "loss": 1.839816689491272, + "loss_ce": 0.14460596442222595, + "loss_iou": 0.7895609140396118, + "loss_num": 0.023193359375, + "loss_xval": 1.6952106952667236, + "num_input_tokens_seen": 763027792, + "step": 4414 + }, + { + "epoch": 1.690923018000766, + "grad_norm": 111.31137192535812, + "learning_rate": 5e-06, + "loss": 1.3674, + "num_input_tokens_seen": 763197408, + "step": 4415 + }, + { + "epoch": 1.690923018000766, + "loss": 1.3934731483459473, + "loss_ce": 0.17336517572402954, + "loss_iou": 0.5678481459617615, + "loss_num": 0.016845703125, + "loss_xval": 1.220107913017273, + "num_input_tokens_seen": 763197408, + "step": 4415 + }, + { + "epoch": 1.6913060130218307, + "grad_norm": 199.10840495806485, + "learning_rate": 5e-06, + "loss": 1.4781, + "num_input_tokens_seen": 763370200, + "step": 4416 + }, + { + "epoch": 1.6913060130218307, + "loss": 1.648474931716919, + "loss_ce": 0.18365013599395752, + "loss_iou": 0.6617642045021057, + "loss_num": 0.0283203125, + "loss_xval": 1.4648247957229614, + "num_input_tokens_seen": 763370200, + "step": 4416 + }, + { + "epoch": 1.6916890080428955, + "grad_norm": 172.62679094094702, + "learning_rate": 5e-06, + "loss": 1.6154, + "num_input_tokens_seen": 763543024, + "step": 4417 + }, + { + "epoch": 1.6916890080428955, + "loss": 1.767137885093689, + "loss_ce": 0.20010000467300415, + "loss_iou": 0.731486439704895, + "loss_num": 0.020751953125, + "loss_xval": 1.56703782081604, + "num_input_tokens_seen": 763543024, + "step": 4417 + }, + { + "epoch": 1.6920720030639602, + "grad_norm": 155.6231546622594, + "learning_rate": 5e-06, + "loss": 1.5274, + "num_input_tokens_seen": 763715832, + "step": 4418 + }, + { + "epoch": 1.6920720030639602, + "loss": 1.490821361541748, + "loss_ce": 0.16336703300476074, + "loss_iou": 0.6236423254013062, + "loss_num": 0.0159912109375, + "loss_xval": 1.3274543285369873, + "num_input_tokens_seen": 763715832, + "step": 4418 + }, + { + "epoch": 1.692454998085025, + "grad_norm": 136.93419745344565, + "learning_rate": 5e-06, + "loss": 1.5552, + "num_input_tokens_seen": 763888800, + "step": 4419 + }, + { + "epoch": 1.692454998085025, + "loss": 1.453904628753662, + "loss_ce": 0.16904865205287933, + "loss_iou": 0.5940271615982056, + "loss_num": 0.0194091796875, + "loss_xval": 1.2848560810089111, + "num_input_tokens_seen": 763888800, + "step": 4419 + }, + { + "epoch": 1.6928379931060897, + "grad_norm": 119.27385611344276, + "learning_rate": 5e-06, + "loss": 1.4746, + "num_input_tokens_seen": 764061912, + "step": 4420 + }, + { + "epoch": 1.6928379931060897, + "loss": 1.4965870380401611, + "loss_ce": 0.16408222913742065, + "loss_iou": 0.6219103336334229, + "loss_num": 0.0177001953125, + "loss_xval": 1.3325047492980957, + "num_input_tokens_seen": 764061912, + "step": 4420 + }, + { + "epoch": 1.6932209881271545, + "grad_norm": 148.22914127729123, + "learning_rate": 5e-06, + "loss": 1.2519, + "num_input_tokens_seen": 764234864, + "step": 4421 + }, + { + "epoch": 1.6932209881271545, + "loss": 1.1870217323303223, + "loss_ce": 0.1505342274904251, + "loss_iou": 0.48453712463378906, + "loss_num": 0.01348876953125, + "loss_xval": 1.0364875793457031, + "num_input_tokens_seen": 764234864, + "step": 4421 + }, + { + "epoch": 1.693603983148219, + "grad_norm": 217.42243318030833, + "learning_rate": 5e-06, + "loss": 1.4346, + "num_input_tokens_seen": 764407416, + "step": 4422 + }, + { + "epoch": 1.693603983148219, + "loss": 1.5068445205688477, + "loss_ce": 0.1651231348514557, + "loss_iou": 0.6330951452255249, + "loss_num": 0.01513671875, + "loss_xval": 1.3417212963104248, + "num_input_tokens_seen": 764407416, + "step": 4422 + }, + { + "epoch": 1.6939869781692838, + "grad_norm": 144.58956498694056, + "learning_rate": 5e-06, + "loss": 1.5736, + "num_input_tokens_seen": 764580328, + "step": 4423 + }, + { + "epoch": 1.6939869781692838, + "loss": 1.440114974975586, + "loss_ce": 0.17704446613788605, + "loss_iou": 0.5746352672576904, + "loss_num": 0.022705078125, + "loss_xval": 1.2630705833435059, + "num_input_tokens_seen": 764580328, + "step": 4423 + }, + { + "epoch": 1.6943699731903485, + "grad_norm": 144.27529345784765, + "learning_rate": 5e-06, + "loss": 1.1634, + "num_input_tokens_seen": 764753280, + "step": 4424 + }, + { + "epoch": 1.6943699731903485, + "loss": 1.0947041511535645, + "loss_ce": 0.1289558708667755, + "loss_iou": -2.2310650564016837e+32, + "loss_num": 0.0108642578125, + "loss_xval": -1.5687176177824339e+31, + "num_input_tokens_seen": 764753280, + "step": 4424 + }, + { + "epoch": 1.6947529682114133, + "grad_norm": 133.67172424205728, + "learning_rate": 5e-06, + "loss": 1.4623, + "num_input_tokens_seen": 764926280, + "step": 4425 + }, + { + "epoch": 1.6947529682114133, + "loss": 1.499297857284546, + "loss_ce": 0.19930747151374817, + "loss_iou": 0.6100171804428101, + "loss_num": 0.0159912109375, + "loss_xval": 1.2999904155731201, + "num_input_tokens_seen": 764926280, + "step": 4425 + }, + { + "epoch": 1.6951359632324778, + "grad_norm": 107.91597408181849, + "learning_rate": 5e-06, + "loss": 1.1663, + "num_input_tokens_seen": 765098848, + "step": 4426 + }, + { + "epoch": 1.6951359632324778, + "loss": 1.2694299221038818, + "loss_ce": 0.13105368614196777, + "loss_iou": 0.5242815017700195, + "loss_num": 0.0179443359375, + "loss_xval": 1.138376235961914, + "num_input_tokens_seen": 765098848, + "step": 4426 + }, + { + "epoch": 1.6955189582535426, + "grad_norm": 147.9407199283946, + "learning_rate": 5e-06, + "loss": 1.6608, + "num_input_tokens_seen": 765271832, + "step": 4427 + }, + { + "epoch": 1.6955189582535426, + "loss": 1.682665228843689, + "loss_ce": 0.16860201954841614, + "loss_iou": 0.6825686693191528, + "loss_num": 0.02978515625, + "loss_xval": 1.5140631198883057, + "num_input_tokens_seen": 765271832, + "step": 4427 + }, + { + "epoch": 1.6959019532746074, + "grad_norm": 96.45699860235075, + "learning_rate": 5e-06, + "loss": 1.2963, + "num_input_tokens_seen": 765444376, + "step": 4428 + }, + { + "epoch": 1.6959019532746074, + "loss": 1.4512474536895752, + "loss_ce": 0.110992431640625, + "loss_iou": 0.6127086877822876, + "loss_num": 0.02294921875, + "loss_xval": 1.3402550220489502, + "num_input_tokens_seen": 765444376, + "step": 4428 + }, + { + "epoch": 1.6962849482956721, + "grad_norm": 111.69842313932696, + "learning_rate": 5e-06, + "loss": 1.3578, + "num_input_tokens_seen": 765617160, + "step": 4429 + }, + { + "epoch": 1.6962849482956721, + "loss": 1.37142014503479, + "loss_ce": 0.12713995575904846, + "loss_iou": 0.5754330158233643, + "loss_num": 0.0186767578125, + "loss_xval": 1.2442803382873535, + "num_input_tokens_seen": 765617160, + "step": 4429 + }, + { + "epoch": 1.6966679433167369, + "grad_norm": 126.36557345297781, + "learning_rate": 5e-06, + "loss": 1.1393, + "num_input_tokens_seen": 765789968, + "step": 4430 + }, + { + "epoch": 1.6966679433167369, + "loss": 1.105392575263977, + "loss_ce": 0.14927037060260773, + "loss_iou": 0.44925248622894287, + "loss_num": 0.01153564453125, + "loss_xval": 0.9561222195625305, + "num_input_tokens_seen": 765789968, + "step": 4430 + }, + { + "epoch": 1.6970509383378016, + "grad_norm": 205.77526077065198, + "learning_rate": 5e-06, + "loss": 1.8327, + "num_input_tokens_seen": 765962976, + "step": 4431 + }, + { + "epoch": 1.6970509383378016, + "loss": 1.962249517440796, + "loss_ce": 0.14602002501487732, + "loss_iou": 0.8052705526351929, + "loss_num": 0.041015625, + "loss_xval": 1.8162295818328857, + "num_input_tokens_seen": 765962976, + "step": 4431 + }, + { + "epoch": 1.6974339333588664, + "grad_norm": 150.59541879231966, + "learning_rate": 5e-06, + "loss": 1.1428, + "num_input_tokens_seen": 766135560, + "step": 4432 + }, + { + "epoch": 1.6974339333588664, + "loss": 1.0464577674865723, + "loss_ce": 0.1264677792787552, + "loss_iou": -1011550697553920.0, + "loss_num": 0.01708984375, + "loss_xval": -963172185931776.0, + "num_input_tokens_seen": 766135560, + "step": 4432 + }, + { + "epoch": 1.6978169283799311, + "grad_norm": 119.31109674841161, + "learning_rate": 5e-06, + "loss": 1.7125, + "num_input_tokens_seen": 766308248, + "step": 4433 + }, + { + "epoch": 1.6978169283799311, + "loss": 1.832090973854065, + "loss_ce": 0.17394044995307922, + "loss_iou": 0.7552837133407593, + "loss_num": 0.029541015625, + "loss_xval": 1.6581504344940186, + "num_input_tokens_seen": 766308248, + "step": 4433 + }, + { + "epoch": 1.698199923400996, + "grad_norm": 109.41324524999341, + "learning_rate": 5e-06, + "loss": 1.8317, + "num_input_tokens_seen": 766481184, + "step": 4434 + }, + { + "epoch": 1.698199923400996, + "loss": 1.5660760402679443, + "loss_ce": 0.17505106329917908, + "loss_iou": 0.6377580165863037, + "loss_num": 0.0230712890625, + "loss_xval": 1.3832430839538574, + "num_input_tokens_seen": 766481184, + "step": 4434 + }, + { + "epoch": 1.6985829184220607, + "grad_norm": 128.3683062068298, + "learning_rate": 5e-06, + "loss": 1.2891, + "num_input_tokens_seen": 766653712, + "step": 4435 + }, + { + "epoch": 1.6985829184220607, + "loss": 1.3084986209869385, + "loss_ce": 0.15213462710380554, + "loss_iou": 0.5399739742279053, + "loss_num": 0.0152587890625, + "loss_xval": 1.1563639640808105, + "num_input_tokens_seen": 766653712, + "step": 4435 + }, + { + "epoch": 1.6989659134431252, + "grad_norm": 147.53416318551177, + "learning_rate": 5e-06, + "loss": 1.5314, + "num_input_tokens_seen": 766826512, + "step": 4436 + }, + { + "epoch": 1.6989659134431252, + "loss": 1.686354160308838, + "loss_ce": 0.16853588819503784, + "loss_iou": 0.6877726316452026, + "loss_num": 0.0284423828125, + "loss_xval": 1.5178182125091553, + "num_input_tokens_seen": 766826512, + "step": 4436 + }, + { + "epoch": 1.69934890846419, + "grad_norm": 128.1561609852027, + "learning_rate": 5e-06, + "loss": 1.4629, + "num_input_tokens_seen": 766999496, + "step": 4437 + }, + { + "epoch": 1.69934890846419, + "loss": 1.5417002439498901, + "loss_ce": 0.16466131806373596, + "loss_iou": 0.6501741409301758, + "loss_num": 0.01531982421875, + "loss_xval": 1.3770389556884766, + "num_input_tokens_seen": 766999496, + "step": 4437 + }, + { + "epoch": 1.6997319034852547, + "grad_norm": 147.28453253910888, + "learning_rate": 5e-06, + "loss": 1.6051, + "num_input_tokens_seen": 767172384, + "step": 4438 + }, + { + "epoch": 1.6997319034852547, + "loss": 1.5956776142120361, + "loss_ce": 0.1512976437807083, + "loss_iou": 0.6851721405982971, + "loss_num": 0.01483154296875, + "loss_xval": 1.4443799257278442, + "num_input_tokens_seen": 767172384, + "step": 4438 + }, + { + "epoch": 1.7001148985063195, + "grad_norm": 127.73815537430345, + "learning_rate": 5e-06, + "loss": 1.8774, + "num_input_tokens_seen": 767345432, + "step": 4439 + }, + { + "epoch": 1.7001148985063195, + "loss": 1.9447025060653687, + "loss_ce": 0.19345439970493317, + "loss_iou": 0.7954543828964233, + "loss_num": 0.0322265625, + "loss_xval": 1.7512481212615967, + "num_input_tokens_seen": 767345432, + "step": 4439 + }, + { + "epoch": 1.700497893527384, + "grad_norm": 146.03481917365204, + "learning_rate": 5e-06, + "loss": 1.3528, + "num_input_tokens_seen": 767518664, + "step": 4440 + }, + { + "epoch": 1.700497893527384, + "loss": 1.2968599796295166, + "loss_ce": 0.15924659371376038, + "loss_iou": 0.5294238328933716, + "loss_num": 0.0157470703125, + "loss_xval": 1.1376135349273682, + "num_input_tokens_seen": 767518664, + "step": 4440 + }, + { + "epoch": 1.7008808885484488, + "grad_norm": 174.71735126736093, + "learning_rate": 5e-06, + "loss": 1.4372, + "num_input_tokens_seen": 767691856, + "step": 4441 + }, + { + "epoch": 1.7008808885484488, + "loss": 1.5270781517028809, + "loss_ce": 0.17142990231513977, + "loss_iou": 0.627073347568512, + "loss_num": 0.020263671875, + "loss_xval": 1.3556482791900635, + "num_input_tokens_seen": 767691856, + "step": 4441 + }, + { + "epoch": 1.7012638835695135, + "grad_norm": 142.54225699355837, + "learning_rate": 5e-06, + "loss": 1.8282, + "num_input_tokens_seen": 767864664, + "step": 4442 + }, + { + "epoch": 1.7012638835695135, + "loss": 1.8250694274902344, + "loss_ce": 0.1725281924009323, + "loss_iou": 0.7562022805213928, + "loss_num": 0.028076171875, + "loss_xval": 1.6525412797927856, + "num_input_tokens_seen": 767864664, + "step": 4442 + }, + { + "epoch": 1.7016468785905783, + "grad_norm": 190.05889194422352, + "learning_rate": 5e-06, + "loss": 1.4428, + "num_input_tokens_seen": 768034504, + "step": 4443 + }, + { + "epoch": 1.7016468785905783, + "loss": 1.4877607822418213, + "loss_ce": 0.17869636416435242, + "loss_iou": 0.6047275066375732, + "loss_num": 0.0198974609375, + "loss_xval": 1.3090643882751465, + "num_input_tokens_seen": 768034504, + "step": 4443 + }, + { + "epoch": 1.702029873611643, + "grad_norm": 220.0245345181745, + "learning_rate": 5e-06, + "loss": 1.4822, + "num_input_tokens_seen": 768207584, + "step": 4444 + }, + { + "epoch": 1.702029873611643, + "loss": 1.5720257759094238, + "loss_ce": 0.14049610495567322, + "loss_iou": 0.6731317043304443, + "loss_num": 0.01708984375, + "loss_xval": 1.4315295219421387, + "num_input_tokens_seen": 768207584, + "step": 4444 + }, + { + "epoch": 1.7024128686327078, + "grad_norm": 163.44446468669128, + "learning_rate": 5e-06, + "loss": 1.454, + "num_input_tokens_seen": 768380576, + "step": 4445 + }, + { + "epoch": 1.7024128686327078, + "loss": 1.4086887836456299, + "loss_ce": 0.16745227575302124, + "loss_iou": 0.5547307729721069, + "loss_num": 0.0263671875, + "loss_xval": 1.2412365674972534, + "num_input_tokens_seen": 768380576, + "step": 4445 + }, + { + "epoch": 1.7027958636537726, + "grad_norm": 177.0732586462458, + "learning_rate": 5e-06, + "loss": 1.5041, + "num_input_tokens_seen": 768553848, + "step": 4446 + }, + { + "epoch": 1.7027958636537726, + "loss": 1.3350529670715332, + "loss_ce": 0.16157157719135284, + "loss_iou": 0.5390570163726807, + "loss_num": 0.01904296875, + "loss_xval": 1.1734814643859863, + "num_input_tokens_seen": 768553848, + "step": 4446 + }, + { + "epoch": 1.7031788586748373, + "grad_norm": 324.704177597589, + "learning_rate": 5e-06, + "loss": 1.5901, + "num_input_tokens_seen": 768727168, + "step": 4447 + }, + { + "epoch": 1.7031788586748373, + "loss": 1.524158239364624, + "loss_ce": 0.19488538801670074, + "loss_iou": 0.617181658744812, + "loss_num": 0.01904296875, + "loss_xval": 1.329272985458374, + "num_input_tokens_seen": 768727168, + "step": 4447 + }, + { + "epoch": 1.703561853695902, + "grad_norm": 149.51563464904, + "learning_rate": 5e-06, + "loss": 1.3408, + "num_input_tokens_seen": 768900200, + "step": 4448 + }, + { + "epoch": 1.703561853695902, + "loss": 1.549727439880371, + "loss_ce": 0.19345209002494812, + "loss_iou": 0.631743311882019, + "loss_num": 0.0185546875, + "loss_xval": 1.3562753200531006, + "num_input_tokens_seen": 768900200, + "step": 4448 + }, + { + "epoch": 1.7039448487169668, + "grad_norm": 173.92162894221724, + "learning_rate": 5e-06, + "loss": 1.3456, + "num_input_tokens_seen": 769073760, + "step": 4449 + }, + { + "epoch": 1.7039448487169668, + "loss": 1.4524028301239014, + "loss_ce": 0.2201455682516098, + "loss_iou": 0.5847184062004089, + "loss_num": 0.0125732421875, + "loss_xval": 1.2322572469711304, + "num_input_tokens_seen": 769073760, + "step": 4449 + }, + { + "epoch": 1.7043278437380314, + "grad_norm": 111.37129237785055, + "learning_rate": 5e-06, + "loss": 2.125, + "num_input_tokens_seen": 769246672, + "step": 4450 + }, + { + "epoch": 1.7043278437380314, + "loss": 2.1455016136169434, + "loss_ce": 0.19774608314037323, + "loss_iou": 0.9000557661056519, + "loss_num": 0.029541015625, + "loss_xval": 1.9477556943893433, + "num_input_tokens_seen": 769246672, + "step": 4450 + }, + { + "epoch": 1.7047108387590961, + "grad_norm": 119.46190874424306, + "learning_rate": 5e-06, + "loss": 1.5371, + "num_input_tokens_seen": 769419440, + "step": 4451 + }, + { + "epoch": 1.7047108387590961, + "loss": 1.6254570484161377, + "loss_ce": 0.20357799530029297, + "loss_iou": 0.6606770753860474, + "loss_num": 0.0201416015625, + "loss_xval": 1.4218790531158447, + "num_input_tokens_seen": 769419440, + "step": 4451 + }, + { + "epoch": 1.7050938337801609, + "grad_norm": 178.46957738083788, + "learning_rate": 5e-06, + "loss": 1.4418, + "num_input_tokens_seen": 769592536, + "step": 4452 + }, + { + "epoch": 1.7050938337801609, + "loss": 1.4509649276733398, + "loss_ce": 0.20302559435367584, + "loss_iou": 0.5866924524307251, + "loss_num": 0.014892578125, + "loss_xval": 1.2479393482208252, + "num_input_tokens_seen": 769592536, + "step": 4452 + }, + { + "epoch": 1.7054768288012254, + "grad_norm": 122.64579033541503, + "learning_rate": 5e-06, + "loss": 1.6957, + "num_input_tokens_seen": 769765208, + "step": 4453 + }, + { + "epoch": 1.7054768288012254, + "loss": 1.6926445960998535, + "loss_ce": 0.16972097754478455, + "loss_iou": 0.7031121253967285, + "loss_num": 0.0233154296875, + "loss_xval": 1.522923469543457, + "num_input_tokens_seen": 769765208, + "step": 4453 + }, + { + "epoch": 1.7058598238222902, + "grad_norm": 117.43935994232162, + "learning_rate": 5e-06, + "loss": 1.2035, + "num_input_tokens_seen": 769938488, + "step": 4454 + }, + { + "epoch": 1.7058598238222902, + "loss": 1.2287380695343018, + "loss_ce": 0.17198912799358368, + "loss_iou": 0.49802476167678833, + "loss_num": 0.01214599609375, + "loss_xval": 1.0567489862442017, + "num_input_tokens_seen": 769938488, + "step": 4454 + }, + { + "epoch": 1.706242818843355, + "grad_norm": 171.4136323693914, + "learning_rate": 5e-06, + "loss": 1.4278, + "num_input_tokens_seen": 770111528, + "step": 4455 + }, + { + "epoch": 1.706242818843355, + "loss": 1.3430722951889038, + "loss_ce": 0.17970164120197296, + "loss_iou": 0.5561878681182861, + "loss_num": 0.01019287109375, + "loss_xval": 1.1633706092834473, + "num_input_tokens_seen": 770111528, + "step": 4455 + }, + { + "epoch": 1.7066258138644197, + "grad_norm": 112.23669738981084, + "learning_rate": 5e-06, + "loss": 1.3412, + "num_input_tokens_seen": 770284456, + "step": 4456 + }, + { + "epoch": 1.7066258138644197, + "loss": 1.4289932250976562, + "loss_ce": 0.1448153406381607, + "loss_iou": 0.5987387299537659, + "loss_num": 0.017333984375, + "loss_xval": 1.2841778993606567, + "num_input_tokens_seen": 770284456, + "step": 4456 + }, + { + "epoch": 1.7070088088854845, + "grad_norm": 159.1568544580478, + "learning_rate": 5e-06, + "loss": 1.6756, + "num_input_tokens_seen": 770457600, + "step": 4457 + }, + { + "epoch": 1.7070088088854845, + "loss": 1.526301383972168, + "loss_ce": 0.15513348579406738, + "loss_iou": 0.6388309597969055, + "loss_num": 0.0186767578125, + "loss_xval": 1.371167778968811, + "num_input_tokens_seen": 770457600, + "step": 4457 + }, + { + "epoch": 1.7073918039065492, + "grad_norm": 119.78269604509411, + "learning_rate": 5e-06, + "loss": 1.3578, + "num_input_tokens_seen": 770630944, + "step": 4458 + }, + { + "epoch": 1.7073918039065492, + "loss": 1.4750096797943115, + "loss_ce": 0.20000287890434265, + "loss_iou": 0.6068332195281982, + "loss_num": 0.01226806640625, + "loss_xval": 1.2750067710876465, + "num_input_tokens_seen": 770630944, + "step": 4458 + }, + { + "epoch": 1.707774798927614, + "grad_norm": 174.9362652954177, + "learning_rate": 5e-06, + "loss": 2.8323, + "num_input_tokens_seen": 770803792, + "step": 4459 + }, + { + "epoch": 1.707774798927614, + "loss": 3.0283281803131104, + "loss_ce": 0.21793930232524872, + "loss_iou": 1.3216679096221924, + "loss_num": 0.033447265625, + "loss_xval": 2.8103890419006348, + "num_input_tokens_seen": 770803792, + "step": 4459 + }, + { + "epoch": 1.7081577939486787, + "grad_norm": 217.87076352949362, + "learning_rate": 5e-06, + "loss": 1.7368, + "num_input_tokens_seen": 770976568, + "step": 4460 + }, + { + "epoch": 1.7081577939486787, + "loss": 1.71844482421875, + "loss_ce": 0.1270090639591217, + "loss_iou": 0.7199426889419556, + "loss_num": 0.0302734375, + "loss_xval": 1.5914356708526611, + "num_input_tokens_seen": 770976568, + "step": 4460 + }, + { + "epoch": 1.7085407889697435, + "grad_norm": 206.31400409004686, + "learning_rate": 5e-06, + "loss": 1.8282, + "num_input_tokens_seen": 771149464, + "step": 4461 + }, + { + "epoch": 1.7085407889697435, + "loss": 1.7612568140029907, + "loss_ce": 0.13727818429470062, + "loss_iou": 0.774467945098877, + "loss_num": 0.0150146484375, + "loss_xval": 1.623978614807129, + "num_input_tokens_seen": 771149464, + "step": 4461 + }, + { + "epoch": 1.7089237839908082, + "grad_norm": 75.67437294320627, + "learning_rate": 5e-06, + "loss": 3.3843, + "num_input_tokens_seen": 771322536, + "step": 4462 + }, + { + "epoch": 1.7089237839908082, + "loss": 3.3501906394958496, + "loss_ce": 0.1202506422996521, + "loss_iou": 1.4440104961395264, + "loss_num": 0.068359375, + "loss_xval": 3.2066245079040527, + "num_input_tokens_seen": 771322536, + "step": 4462 + }, + { + "epoch": 1.709306779011873, + "grad_norm": 51.130343034001655, + "learning_rate": 5e-06, + "loss": 2.3109, + "num_input_tokens_seen": 771495240, + "step": 4463 + }, + { + "epoch": 1.709306779011873, + "loss": 2.279101848602295, + "loss_ce": 0.1349378526210785, + "loss_iou": 0.9278560280799866, + "loss_num": 0.0576171875, + "loss_xval": 2.1441640853881836, + "num_input_tokens_seen": 771495240, + "step": 4463 + }, + { + "epoch": 1.7096897740329375, + "grad_norm": 207.40094931787996, + "learning_rate": 5e-06, + "loss": 2.0242, + "num_input_tokens_seen": 771668448, + "step": 4464 + }, + { + "epoch": 1.7096897740329375, + "loss": 2.151801586151123, + "loss_ce": 0.12937594950199127, + "loss_iou": 0.9341254830360413, + "loss_num": 0.0308837890625, + "loss_xval": 2.022425889968872, + "num_input_tokens_seen": 771668448, + "step": 4464 + }, + { + "epoch": 1.7100727690540023, + "grad_norm": 31.24866368096372, + "learning_rate": 5e-06, + "loss": 3.8595, + "num_input_tokens_seen": 771841696, + "step": 4465 + }, + { + "epoch": 1.7100727690540023, + "loss": 3.599266767501831, + "loss_ce": 0.1363958716392517, + "loss_iou": 1.5269067287445068, + "loss_num": 0.08154296875, + "loss_xval": 3.4512743949890137, + "num_input_tokens_seen": 771841696, + "step": 4465 + }, + { + "epoch": 1.710455764075067, + "grad_norm": 69.05065205129283, + "learning_rate": 5e-06, + "loss": 3.5328, + "num_input_tokens_seen": 772014960, + "step": 4466 + }, + { + "epoch": 1.710455764075067, + "loss": 3.5418059825897217, + "loss_ce": 0.1020163744688034, + "loss_iou": 1.5677952766418457, + "loss_num": 0.060791015625, + "loss_xval": 3.4397897720336914, + "num_input_tokens_seen": 772014960, + "step": 4466 + }, + { + "epoch": 1.7108387590961316, + "grad_norm": 84.27304567971493, + "learning_rate": 5e-06, + "loss": 2.3251, + "num_input_tokens_seen": 772188136, + "step": 4467 + }, + { + "epoch": 1.7108387590961316, + "loss": 2.3423097133636475, + "loss_ce": 0.03949767351150513, + "loss_iou": 0.9586570262908936, + "loss_num": 0.0771484375, + "loss_xval": 2.302812099456787, + "num_input_tokens_seen": 772188136, + "step": 4467 + }, + { + "epoch": 1.7112217541171963, + "grad_norm": 82.79243727200686, + "learning_rate": 5e-06, + "loss": 1.6614, + "num_input_tokens_seen": 772360792, + "step": 4468 + }, + { + "epoch": 1.7112217541171963, + "loss": 1.734809398651123, + "loss_ce": 0.02447887510061264, + "loss_iou": 0.7800614833831787, + "loss_num": 0.030029296875, + "loss_xval": 1.7103304862976074, + "num_input_tokens_seen": 772360792, + "step": 4468 + }, + { + "epoch": 1.711604749138261, + "grad_norm": 186.4089718887717, + "learning_rate": 5e-06, + "loss": 1.9182, + "num_input_tokens_seen": 772533304, + "step": 4469 + }, + { + "epoch": 1.711604749138261, + "loss": 1.9668108224868774, + "loss_ce": 0.02520131692290306, + "loss_iou": 0.894724428653717, + "loss_num": 0.030517578125, + "loss_xval": 1.9416093826293945, + "num_input_tokens_seen": 772533304, + "step": 4469 + }, + { + "epoch": 1.7119877441593259, + "grad_norm": 110.13222302725462, + "learning_rate": 5e-06, + "loss": 2.0086, + "num_input_tokens_seen": 772706024, + "step": 4470 + }, + { + "epoch": 1.7119877441593259, + "loss": 2.069946527481079, + "loss_ce": 0.0550527423620224, + "loss_iou": 0.896729052066803, + "loss_num": 0.044189453125, + "loss_xval": 2.0148935317993164, + "num_input_tokens_seen": 772706024, + "step": 4470 + }, + { + "epoch": 1.7123707391803906, + "grad_norm": 181.69641199426349, + "learning_rate": 5e-06, + "loss": 2.0872, + "num_input_tokens_seen": 772879144, + "step": 4471 + }, + { + "epoch": 1.7123707391803906, + "loss": 1.9669781923294067, + "loss_ce": 0.03427477926015854, + "loss_iou": 0.8851749300956726, + "loss_num": 0.032470703125, + "loss_xval": 1.9327033758163452, + "num_input_tokens_seen": 772879144, + "step": 4471 + }, + { + "epoch": 1.7127537342014554, + "grad_norm": 114.76042625727442, + "learning_rate": 5e-06, + "loss": 2.89, + "num_input_tokens_seen": 773052304, + "step": 4472 + }, + { + "epoch": 1.7127537342014554, + "loss": 2.91530704498291, + "loss_ce": 0.0744759738445282, + "loss_iou": 1.0402885675430298, + "loss_num": 0.15234375, + "loss_xval": 2.8408310413360596, + "num_input_tokens_seen": 773052304, + "step": 4472 + }, + { + "epoch": 1.7131367292225201, + "grad_norm": 42.576118664516486, + "learning_rate": 5e-06, + "loss": 2.3271, + "num_input_tokens_seen": 773224992, + "step": 4473 + }, + { + "epoch": 1.7131367292225201, + "loss": 2.4316883087158203, + "loss_ce": 0.0571332685649395, + "loss_iou": 1.0326144695281982, + "loss_num": 0.061767578125, + "loss_xval": 2.3745551109313965, + "num_input_tokens_seen": 773224992, + "step": 4473 + }, + { + "epoch": 1.713519724243585, + "grad_norm": 93.32687384805274, + "learning_rate": 5e-06, + "loss": 1.6449, + "num_input_tokens_seen": 773397960, + "step": 4474 + }, + { + "epoch": 1.713519724243585, + "loss": 1.6859182119369507, + "loss_ce": 0.07083331048488617, + "loss_iou": 0.7422043085098267, + "loss_num": 0.026123046875, + "loss_xval": 1.6150848865509033, + "num_input_tokens_seen": 773397960, + "step": 4474 + }, + { + "epoch": 1.7139027192646497, + "grad_norm": 192.48370805098128, + "learning_rate": 5e-06, + "loss": 3.3229, + "num_input_tokens_seen": 773570760, + "step": 4475 + }, + { + "epoch": 1.7139027192646497, + "loss": 3.3088417053222656, + "loss_ce": 0.07787613570690155, + "loss_iou": 1.5147746801376343, + "loss_num": 0.040283203125, + "loss_xval": 3.2309653759002686, + "num_input_tokens_seen": 773570760, + "step": 4475 + }, + { + "epoch": 1.7142857142857144, + "grad_norm": 135.82201524103337, + "learning_rate": 5e-06, + "loss": 2.8948, + "num_input_tokens_seen": 773743496, + "step": 4476 + }, + { + "epoch": 1.7142857142857144, + "loss": 2.903146266937256, + "loss_ce": 0.08123888820409775, + "loss_iou": 1.2310221195220947, + "loss_num": 0.072265625, + "loss_xval": 2.8219075202941895, + "num_input_tokens_seen": 773743496, + "step": 4476 + }, + { + "epoch": 1.7146687093067792, + "grad_norm": 228.59322919300953, + "learning_rate": 5e-06, + "loss": 2.7464, + "num_input_tokens_seen": 773916120, + "step": 4477 + }, + { + "epoch": 1.7146687093067792, + "loss": 2.861143112182617, + "loss_ce": 0.059802476316690445, + "loss_iou": 0.9922231435775757, + "loss_num": 0.1630859375, + "loss_xval": 2.8013405799865723, + "num_input_tokens_seen": 773916120, + "step": 4477 + }, + { + "epoch": 1.7150517043278437, + "grad_norm": 116.40818586712737, + "learning_rate": 5e-06, + "loss": 1.8228, + "num_input_tokens_seen": 774089200, + "step": 4478 + }, + { + "epoch": 1.7150517043278437, + "loss": 1.9288240671157837, + "loss_ce": 0.08956010639667511, + "loss_iou": 0.8184051513671875, + "loss_num": 0.04052734375, + "loss_xval": 1.839263916015625, + "num_input_tokens_seen": 774089200, + "step": 4478 + }, + { + "epoch": 1.7154346993489085, + "grad_norm": 230.81461555684135, + "learning_rate": 5e-06, + "loss": 2.1421, + "num_input_tokens_seen": 774262152, + "step": 4479 + }, + { + "epoch": 1.7154346993489085, + "loss": 2.154937267303467, + "loss_ce": 0.08769381791353226, + "loss_iou": 0.9814976453781128, + "loss_num": 0.0208740234375, + "loss_xval": 2.0672433376312256, + "num_input_tokens_seen": 774262152, + "step": 4479 + }, + { + "epoch": 1.7158176943699732, + "grad_norm": 148.6980282717763, + "learning_rate": 5e-06, + "loss": 2.4154, + "num_input_tokens_seen": 774435080, + "step": 4480 + }, + { + "epoch": 1.7158176943699732, + "loss": 2.1077704429626465, + "loss_ce": 0.0960702896118164, + "loss_iou": 0.908865213394165, + "loss_num": 0.038818359375, + "loss_xval": 2.01170015335083, + "num_input_tokens_seen": 774435080, + "step": 4480 + }, + { + "epoch": 1.7162006893910378, + "grad_norm": 252.26938985562964, + "learning_rate": 5e-06, + "loss": 2.7989, + "num_input_tokens_seen": 774608088, + "step": 4481 + }, + { + "epoch": 1.7162006893910378, + "loss": 3.045273542404175, + "loss_ce": 0.06512296199798584, + "loss_iou": 1.2238399982452393, + "loss_num": 0.1064453125, + "loss_xval": 2.9801506996154785, + "num_input_tokens_seen": 774608088, + "step": 4481 + }, + { + "epoch": 1.7165836844121025, + "grad_norm": 85.5479843529618, + "learning_rate": 5e-06, + "loss": 2.2, + "num_input_tokens_seen": 774781128, + "step": 4482 + }, + { + "epoch": 1.7165836844121025, + "loss": 2.168067693710327, + "loss_ce": 0.13942614197731018, + "loss_iou": 0.8768696784973145, + "loss_num": 0.054931640625, + "loss_xval": 2.028641700744629, + "num_input_tokens_seen": 774781128, + "step": 4482 + }, + { + "epoch": 1.7169666794331673, + "grad_norm": 74.43292598569569, + "learning_rate": 5e-06, + "loss": 1.8072, + "num_input_tokens_seen": 774954192, + "step": 4483 + }, + { + "epoch": 1.7169666794331673, + "loss": 1.8323304653167725, + "loss_ce": 0.11497731506824493, + "loss_iou": 0.796664834022522, + "loss_num": 0.0247802734375, + "loss_xval": 1.717353105545044, + "num_input_tokens_seen": 774954192, + "step": 4483 + }, + { + "epoch": 1.717349674454232, + "grad_norm": 105.396020354874, + "learning_rate": 5e-06, + "loss": 1.5868, + "num_input_tokens_seen": 775127248, + "step": 4484 + }, + { + "epoch": 1.717349674454232, + "loss": 1.6237494945526123, + "loss_ce": 0.0984693318605423, + "loss_iou": 0.7000790238380432, + "loss_num": 0.0250244140625, + "loss_xval": 1.5252801179885864, + "num_input_tokens_seen": 775127248, + "step": 4484 + }, + { + "epoch": 1.7177326694752968, + "grad_norm": 143.44924050936237, + "learning_rate": 5e-06, + "loss": 2.091, + "num_input_tokens_seen": 775300320, + "step": 4485 + }, + { + "epoch": 1.7177326694752968, + "loss": 2.083596706390381, + "loss_ce": 0.09264512360095978, + "loss_iou": 0.9233322143554688, + "loss_num": 0.02880859375, + "loss_xval": 1.9909515380859375, + "num_input_tokens_seen": 775300320, + "step": 4485 + }, + { + "epoch": 1.7181156644963615, + "grad_norm": 98.5734193934433, + "learning_rate": 5e-06, + "loss": 1.6979, + "num_input_tokens_seen": 775473160, + "step": 4486 + }, + { + "epoch": 1.7181156644963615, + "loss": 1.7515714168548584, + "loss_ce": 0.06850959360599518, + "loss_iou": 0.7677699327468872, + "loss_num": 0.029541015625, + "loss_xval": 1.6830618381500244, + "num_input_tokens_seen": 775473160, + "step": 4486 + }, + { + "epoch": 1.7184986595174263, + "grad_norm": 134.16194814590582, + "learning_rate": 5e-06, + "loss": 2.0556, + "num_input_tokens_seen": 775646392, + "step": 4487 + }, + { + "epoch": 1.7184986595174263, + "loss": 2.051379919052124, + "loss_ce": 0.0793566182255745, + "loss_iou": 0.9005929231643677, + "loss_num": 0.0341796875, + "loss_xval": 1.9720232486724854, + "num_input_tokens_seen": 775646392, + "step": 4487 + }, + { + "epoch": 1.718881654538491, + "grad_norm": 101.5943321708956, + "learning_rate": 5e-06, + "loss": 2.0247, + "num_input_tokens_seen": 775818960, + "step": 4488 + }, + { + "epoch": 1.718881654538491, + "loss": 2.083380699157715, + "loss_ce": 0.08968771994113922, + "loss_iou": 0.8956502676010132, + "loss_num": 0.04052734375, + "loss_xval": 1.9936931133270264, + "num_input_tokens_seen": 775818960, + "step": 4488 + }, + { + "epoch": 1.7192646495595558, + "grad_norm": 73.59504718495192, + "learning_rate": 5e-06, + "loss": 1.5223, + "num_input_tokens_seen": 775992016, + "step": 4489 + }, + { + "epoch": 1.7192646495595558, + "loss": 1.4159590005874634, + "loss_ce": 0.08734661340713501, + "loss_iou": 0.5951838493347168, + "loss_num": 0.027587890625, + "loss_xval": 1.3286123275756836, + "num_input_tokens_seen": 775992016, + "step": 4489 + }, + { + "epoch": 1.7196476445806206, + "grad_norm": 106.59675438874885, + "learning_rate": 5e-06, + "loss": 1.6202, + "num_input_tokens_seen": 776164744, + "step": 4490 + }, + { + "epoch": 1.7196476445806206, + "loss": 1.59060537815094, + "loss_ce": 0.09607075154781342, + "loss_iou": 0.6930070519447327, + "loss_num": 0.021728515625, + "loss_xval": 1.4945346117019653, + "num_input_tokens_seen": 776164744, + "step": 4490 + }, + { + "epoch": 1.7200306396016853, + "grad_norm": 139.04872605535303, + "learning_rate": 5e-06, + "loss": 2.0484, + "num_input_tokens_seen": 776337136, + "step": 4491 + }, + { + "epoch": 1.7200306396016853, + "loss": 2.171727180480957, + "loss_ce": 0.10225944966077805, + "loss_iou": 0.9552661180496216, + "loss_num": 0.03173828125, + "loss_xval": 2.069467782974243, + "num_input_tokens_seen": 776337136, + "step": 4491 + }, + { + "epoch": 1.7204136346227499, + "grad_norm": 122.62049336158304, + "learning_rate": 5e-06, + "loss": 1.547, + "num_input_tokens_seen": 776510008, + "step": 4492 + }, + { + "epoch": 1.7204136346227499, + "loss": 1.5319805145263672, + "loss_ce": 0.11845797300338745, + "loss_iou": 0.6547898054122925, + "loss_num": 0.020751953125, + "loss_xval": 1.413522481918335, + "num_input_tokens_seen": 776510008, + "step": 4492 + }, + { + "epoch": 1.7207966296438146, + "grad_norm": 231.88840384635802, + "learning_rate": 5e-06, + "loss": 1.7515, + "num_input_tokens_seen": 776682808, + "step": 4493 + }, + { + "epoch": 1.7207966296438146, + "loss": 1.772214651107788, + "loss_ce": 0.08848626911640167, + "loss_iou": 0.7935243844985962, + "loss_num": 0.019287109375, + "loss_xval": 1.6837284564971924, + "num_input_tokens_seen": 776682808, + "step": 4493 + }, + { + "epoch": 1.7211796246648794, + "grad_norm": 160.42903791757934, + "learning_rate": 5e-06, + "loss": 2.0512, + "num_input_tokens_seen": 776856032, + "step": 4494 + }, + { + "epoch": 1.7211796246648794, + "loss": 2.05763840675354, + "loss_ce": 0.08359895646572113, + "loss_iou": 0.9068195819854736, + "loss_num": 0.0322265625, + "loss_xval": 1.9740395545959473, + "num_input_tokens_seen": 776856032, + "step": 4494 + }, + { + "epoch": 1.721562619685944, + "grad_norm": 261.3765985052225, + "learning_rate": 5e-06, + "loss": 2.5035, + "num_input_tokens_seen": 777028608, + "step": 4495 + }, + { + "epoch": 1.721562619685944, + "loss": 2.637826681137085, + "loss_ce": 0.07435683161020279, + "loss_iou": 1.1283535957336426, + "loss_num": 0.061279296875, + "loss_xval": 2.563469886779785, + "num_input_tokens_seen": 777028608, + "step": 4495 + }, + { + "epoch": 1.7219456147070087, + "grad_norm": 105.80215052792512, + "learning_rate": 5e-06, + "loss": 1.8306, + "num_input_tokens_seen": 777197728, + "step": 4496 + }, + { + "epoch": 1.7219456147070087, + "loss": 1.7702192068099976, + "loss_ce": 0.10497844219207764, + "loss_iou": 0.757333517074585, + "loss_num": 0.0301513671875, + "loss_xval": 1.66524076461792, + "num_input_tokens_seen": 777197728, + "step": 4496 + }, + { + "epoch": 1.7223286097280734, + "grad_norm": 212.14864221149261, + "learning_rate": 5e-06, + "loss": 1.6199, + "num_input_tokens_seen": 777370368, + "step": 4497 + }, + { + "epoch": 1.7223286097280734, + "loss": 1.4185817241668701, + "loss_ce": 0.07815849035978317, + "loss_iou": 0.6156766414642334, + "loss_num": 0.0218505859375, + "loss_xval": 1.3404231071472168, + "num_input_tokens_seen": 777370368, + "step": 4497 + }, + { + "epoch": 1.7227116047491382, + "grad_norm": 96.49319602480695, + "learning_rate": 5e-06, + "loss": 1.8624, + "num_input_tokens_seen": 777543328, + "step": 4498 + }, + { + "epoch": 1.7227116047491382, + "loss": 1.8736438751220703, + "loss_ce": 0.11589843779802322, + "loss_iou": 0.8065766096115112, + "loss_num": 0.0289306640625, + "loss_xval": 1.7577455043792725, + "num_input_tokens_seen": 777543328, + "step": 4498 + }, + { + "epoch": 1.723094599770203, + "grad_norm": 97.74607710610101, + "learning_rate": 5e-06, + "loss": 1.5515, + "num_input_tokens_seen": 777716200, + "step": 4499 + }, + { + "epoch": 1.723094599770203, + "loss": 1.6198670864105225, + "loss_ce": 0.10625797510147095, + "loss_iou": 0.6743460893630981, + "loss_num": 0.032958984375, + "loss_xval": 1.5136091709136963, + "num_input_tokens_seen": 777716200, + "step": 4499 + }, + { + "epoch": 1.7234775947912677, + "grad_norm": 115.47292848843725, + "learning_rate": 5e-06, + "loss": 1.3939, + "num_input_tokens_seen": 777889224, + "step": 4500 + }, + { + "epoch": 1.7234775947912677, + "eval_websight_new_CIoU": 0.8651993870735168, + "eval_websight_new_GIoU": 0.8643690347671509, + "eval_websight_new_IoU": 0.866310328245163, + "eval_websight_new_MAE_all": 0.014750218950212002, + "eval_websight_new_MAE_h": 0.01531203230842948, + "eval_websight_new_MAE_w": 0.01181055186316371, + "eval_websight_new_MAE_x": 0.014414471574127674, + "eval_websight_new_MAE_y": 0.01746381539851427, + "eval_websight_new_NUM_probability": 0.42599648237228394, + "eval_websight_new_inside_bbox": 1.0, + "eval_websight_new_loss": 1.4053547382354736, + "eval_websight_new_loss_ce": 0.15365064144134521, + "eval_websight_new_loss_iou": 0.6049605309963226, + "eval_websight_new_loss_num": 0.012073516845703125, + "eval_websight_new_loss_xval": 1.2702237963676453, + "eval_websight_new_runtime": 56.6952, + "eval_websight_new_samples_per_second": 0.882, + "eval_websight_new_steps_per_second": 0.035, + "num_input_tokens_seen": 777889224, + "step": 4500 + }, + { + "epoch": 1.7234775947912677, + "eval_seeclick_CIoU": 0.6288062632083893, + "eval_seeclick_GIoU": 0.6406533718109131, + "eval_seeclick_IoU": 0.6674403548240662, + "eval_seeclick_MAE_all": 0.06479492411017418, + "eval_seeclick_MAE_h": 0.027880861423909664, + "eval_seeclick_MAE_w": 0.10141636058688164, + "eval_seeclick_MAE_x": 0.09699774533510208, + "eval_seeclick_MAE_y": 0.03288473468273878, + "eval_seeclick_NUM_probability": 0.5766470730304718, + "eval_seeclick_inside_bbox": 0.8541666567325592, + "eval_seeclick_loss": 1.6871815919876099, + "eval_seeclick_loss_ce": 0.13062653690576553, + "eval_seeclick_loss_iou": 0.6280854344367981, + "eval_seeclick_loss_num": 0.04927825927734375, + "eval_seeclick_loss_xval": 1.5024629831314087, + "eval_seeclick_runtime": 88.2076, + "eval_seeclick_samples_per_second": 0.567, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 777889224, + "step": 4500 + }, + { + "epoch": 1.7234775947912677, + "eval_icons_CIoU": 0.7683820724487305, + "eval_icons_GIoU": 0.7657085359096527, + "eval_icons_IoU": 0.7841051816940308, + "eval_icons_MAE_all": 0.03661998547613621, + "eval_icons_MAE_h": 0.026071634143590927, + "eval_icons_MAE_w": 0.04999036341905594, + "eval_icons_MAE_x": 0.048626719042658806, + "eval_icons_MAE_y": 0.021791222505271435, + "eval_icons_NUM_probability": 0.40404000878334045, + "eval_icons_inside_bbox": 0.9565972089767456, + "eval_icons_loss": 2.2493743896484375, + "eval_icons_loss_ce": 0.23850609362125397, + "eval_icons_loss_iou": 0.9390899240970612, + "eval_icons_loss_num": 0.0259246826171875, + "eval_icons_loss_xval": 2.0077268481254578, + "eval_icons_runtime": 87.9887, + "eval_icons_samples_per_second": 0.568, + "eval_icons_steps_per_second": 0.023, + "num_input_tokens_seen": 777889224, + "step": 4500 + } + ], + "logging_steps": 1.0, + "max_steps": 7833, + "num_input_tokens_seen": 777889224, + "num_train_epochs": 3, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4869346964406272.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}