{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09531071292413268, "eval_steps": 250, "global_step": 250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003812428516965307, "grad_norm": 166.66033166263705, "learning_rate": 5e-06, "loss": 5.5181, "num_input_tokens_seen": 171824, "step": 1 }, { "epoch": 0.0003812428516965307, "loss": 5.511815547943115, "loss_ce": 5.203954219818115, "loss_iou": 0.875, "loss_num": 0.30859375, "loss_xval": 0.30859375, "num_input_tokens_seen": 171824, "step": 1 }, { "epoch": 0.0007624857033930614, "grad_norm": 177.43252265373968, "learning_rate": 5e-06, "loss": 5.4175, "num_input_tokens_seen": 340268, "step": 2 }, { "epoch": 0.0007624857033930614, "loss": 5.353693008422852, "loss_ce": 5.080987930297852, "loss_iou": 0.69921875, "loss_num": 0.2734375, "loss_xval": 0.2734375, "num_input_tokens_seen": 340268, "step": 2 }, { "epoch": 0.001143728555089592, "grad_norm": 169.4480809535085, "learning_rate": 5e-06, "loss": 5.1458, "num_input_tokens_seen": 509856, "step": 3 }, { "epoch": 0.001143728555089592, "loss": 5.112088203430176, "loss_ce": 4.874661445617676, "loss_iou": 0.66015625, "loss_num": 0.2373046875, "loss_xval": 0.2373046875, "num_input_tokens_seen": 509856, "step": 3 }, { "epoch": 0.0015249714067861228, "grad_norm": 165.0464349086778, "learning_rate": 5e-06, "loss": 5.2169, "num_input_tokens_seen": 676960, "step": 4 }, { "epoch": 0.0015249714067861228, "loss": 5.128868579864502, "loss_ce": 4.904503345489502, "loss_iou": 0.15234375, "loss_num": 0.224609375, "loss_xval": 0.224609375, "num_input_tokens_seen": 676960, "step": 4 }, { "epoch": 0.0019062142584826535, "grad_norm": 156.68894357346014, "learning_rate": 5e-06, "loss": 5.1368, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.0019062142584826535, "eval_websight_new_CIoU": 0.014646705240011215, "eval_websight_new_GIoU": -0.05545324832201004, "eval_websight_new_IoU": 0.16141273081302643, "eval_websight_new_MAE_all": 0.23055454343557358, "eval_websight_new_MAE_h": 0.2790681719779968, "eval_websight_new_MAE_w": 0.16171859204769135, "eval_websight_new_MAE_x": 0.2205788567662239, "eval_websight_new_MAE_y": 0.2608525678515434, "eval_websight_new_NUM_probability": 5.514878331780437e-08, "eval_websight_new_inside_bbox": 0.3697916716337204, "eval_websight_new_loss": 5.538997173309326, "eval_websight_new_loss_ce": 5.386038541793823, "eval_websight_new_loss_iou": 0.1309814453125, "eval_websight_new_loss_num": 0.14739990234375, "eval_websight_new_loss_xval": 0.14739990234375, "eval_websight_new_runtime": 64.8384, "eval_websight_new_samples_per_second": 0.771, "eval_websight_new_steps_per_second": 0.031, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.0019062142584826535, "eval_seeclick_CIoU": -0.10113272070884705, "eval_seeclick_GIoU": -0.19475465267896652, "eval_seeclick_IoU": 0.081342913210392, "eval_seeclick_MAE_all": 0.3178282380104065, "eval_seeclick_MAE_h": 0.29355502128601074, "eval_seeclick_MAE_w": 0.30315980315208435, "eval_seeclick_MAE_x": 0.34532545506954193, "eval_seeclick_MAE_y": 0.3292728066444397, "eval_seeclick_NUM_probability": 2.8387768935544955e-07, "eval_seeclick_inside_bbox": 0.2361111119389534, "eval_seeclick_loss": 6.38102912902832, "eval_seeclick_loss_ce": 6.161922454833984, "eval_seeclick_loss_iou": 0.22113037109375, "eval_seeclick_loss_num": 0.2393798828125, "eval_seeclick_loss_xval": 0.2393798828125, "eval_seeclick_runtime": 82.7425, "eval_seeclick_samples_per_second": 0.604, "eval_seeclick_steps_per_second": 0.024, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.0019062142584826535, "eval_icons_CIoU": 0.04171837493777275, "eval_icons_GIoU": -0.015274678356945515, "eval_icons_IoU": 0.17068828642368317, "eval_icons_MAE_all": 0.22043060511350632, "eval_icons_MAE_h": 0.21020717918872833, "eval_icons_MAE_w": 0.2284608781337738, "eval_icons_MAE_x": 0.2374560832977295, "eval_icons_MAE_y": 0.20559833943843842, "eval_icons_NUM_probability": 1.474576443172282e-07, "eval_icons_inside_bbox": 0.3940972238779068, "eval_icons_loss": 5.1574883460998535, "eval_icons_loss_ce": 5.055776119232178, "eval_icons_loss_iou": 0.0128173828125, "eval_icons_loss_num": 0.12457275390625, "eval_icons_loss_xval": 0.12457275390625, "eval_icons_runtime": 82.3641, "eval_icons_samples_per_second": 0.607, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.0019062142584826535, "eval_compot_CIoU": 0.07103381492197514, "eval_compot_GIoU": 0.010495346039533615, "eval_compot_IoU": 0.20030608028173447, "eval_compot_MAE_all": 0.17506222426891327, "eval_compot_MAE_h": 0.1381075605750084, "eval_compot_MAE_w": 0.22323701530694962, "eval_compot_MAE_x": 0.22260665148496628, "eval_compot_MAE_y": 0.11629766970872879, "eval_compot_NUM_probability": 3.672893456041493e-08, "eval_compot_inside_bbox": 0.3229166716337204, "eval_compot_loss": 5.476764678955078, "eval_compot_loss_ce": 5.353034734725952, "eval_compot_loss_iou": 0.105712890625, "eval_compot_loss_num": 0.116241455078125, "eval_compot_loss_xval": 0.116241455078125, "eval_compot_runtime": 82.3456, "eval_compot_samples_per_second": 0.607, "eval_compot_steps_per_second": 0.024, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.0019062142584826535, "eval_web_actions_CIoU": -0.09799446724355221, "eval_web_actions_GIoU": -0.22149190306663513, "eval_web_actions_IoU": 0.08019107207655907, "eval_web_actions_MAE_all": 0.28694653511047363, "eval_web_actions_MAE_h": 0.2967325896024704, "eval_web_actions_MAE_w": 0.2789995074272156, "eval_web_actions_MAE_x": 0.2816382795572281, "eval_web_actions_MAE_y": 0.29041585326194763, "eval_web_actions_NUM_probability": 2.1177974929287302e-07, "eval_web_actions_inside_bbox": 0.19166667014360428, "eval_web_actions_loss": 5.91270112991333, "eval_web_actions_loss_ce": 5.719156980514526, "eval_web_actions_loss_iou": 0.08514404296875, "eval_web_actions_loss_num": 0.17974853515625, "eval_web_actions_loss_xval": 0.17974853515625, "eval_web_actions_runtime": 78.0841, "eval_web_actions_samples_per_second": 0.602, "eval_web_actions_steps_per_second": 0.026, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.0019062142584826535, "loss": 5.942988395690918, "loss_ce": 5.761713981628418, "loss_iou": 0.0703125, "loss_num": 0.181640625, "loss_xval": 0.181640625, "num_input_tokens_seen": 847420, "step": 5 }, { "epoch": 0.002287457110179184, "grad_norm": 146.72349551014184, "learning_rate": 5e-06, "loss": 5.0025, "num_input_tokens_seen": 1019588, "step": 6 }, { "epoch": 0.002287457110179184, "loss": 4.935182571411133, "loss_ce": 4.728639602661133, "loss_iou": 0.130859375, "loss_num": 0.20703125, "loss_xval": 0.20703125, "num_input_tokens_seen": 1019588, "step": 6 }, { "epoch": 0.002668699961875715, "grad_norm": 140.54078928260213, "learning_rate": 5e-06, "loss": 4.8787, "num_input_tokens_seen": 1188148, "step": 7 }, { "epoch": 0.002668699961875715, "loss": 4.864795684814453, "loss_ce": 4.719409942626953, "loss_iou": 0.1298828125, "loss_num": 0.1455078125, "loss_xval": 0.1455078125, "num_input_tokens_seen": 1188148, "step": 7 }, { "epoch": 0.0030499428135722455, "grad_norm": 112.55645040581052, "learning_rate": 5e-06, "loss": 4.4461, "num_input_tokens_seen": 1358400, "step": 8 }, { "epoch": 0.0030499428135722455, "loss": 4.246901988983154, "loss_ce": 4.098220348358154, "loss_iou": 0.162109375, "loss_num": 0.1484375, "loss_xval": 0.1484375, "num_input_tokens_seen": 1358400, "step": 8 }, { "epoch": 0.003431185665268776, "grad_norm": 113.36373607042844, "learning_rate": 5e-06, "loss": 4.3505, "num_input_tokens_seen": 1525448, "step": 9 }, { "epoch": 0.003431185665268776, "loss": 4.16458797454834, "loss_ce": 3.9982669353485107, "loss_iou": 0.06884765625, "loss_num": 0.166015625, "loss_xval": 0.166015625, "num_input_tokens_seen": 1525448, "step": 9 }, { "epoch": 0.003812428516965307, "grad_norm": 118.47049136018163, "learning_rate": 5e-06, "loss": 3.8089, "num_input_tokens_seen": 1687988, "step": 10 }, { "epoch": 0.003812428516965307, "loss": 3.628293752670288, "loss_ce": 3.449582815170288, "loss_iou": 0.0419921875, "loss_num": 0.1787109375, "loss_xval": 0.1787109375, "num_input_tokens_seen": 1687988, "step": 10 }, { "epoch": 0.0041936713686618375, "grad_norm": 99.61131893349591, "learning_rate": 5e-06, "loss": 2.996, "num_input_tokens_seen": 1858248, "step": 11 }, { "epoch": 0.0041936713686618375, "loss": 3.101732015609741, "loss_ce": 2.939622640609741, "loss_iou": 0.095703125, "loss_num": 0.162109375, "loss_xval": 0.162109375, "num_input_tokens_seen": 1858248, "step": 11 }, { "epoch": 0.004574914220358368, "grad_norm": 106.98300209226464, "learning_rate": 5e-06, "loss": 2.6687, "num_input_tokens_seen": 2030264, "step": 12 }, { "epoch": 0.004574914220358368, "loss": 2.5463790893554688, "loss_ce": 2.3260421752929688, "loss_iou": 0.05224609375, "loss_num": 0.220703125, "loss_xval": 0.220703125, "num_input_tokens_seen": 2030264, "step": 12 }, { "epoch": 0.004956157072054899, "grad_norm": 76.91542550535567, "learning_rate": 5e-06, "loss": 2.2417, "num_input_tokens_seen": 2200880, "step": 13 }, { "epoch": 0.004956157072054899, "loss": 2.266770601272583, "loss_ce": 2.038377046585083, "loss_iou": 0.34375, "loss_num": 0.228515625, "loss_xval": 0.228515625, "num_input_tokens_seen": 2200880, "step": 13 }, { "epoch": 0.00533739992375143, "grad_norm": 78.72540804350768, "learning_rate": 5e-06, "loss": 1.9466, "num_input_tokens_seen": 2369604, "step": 14 }, { "epoch": 0.00533739992375143, "loss": 2.0582070350646973, "loss_ce": 1.8293249607086182, "loss_iou": 0.06201171875, "loss_num": 0.228515625, "loss_xval": 0.228515625, "num_input_tokens_seen": 2369604, "step": 14 }, { "epoch": 0.0057186427754479605, "grad_norm": 59.8244048261341, "learning_rate": 5e-06, "loss": 1.4382, "num_input_tokens_seen": 2539572, "step": 15 }, { "epoch": 0.0057186427754479605, "loss": 1.469299554824829, "loss_ce": 1.316223382949829, "loss_iou": 0.248046875, "loss_num": 0.1533203125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 2539572, "step": 15 }, { "epoch": 0.006099885627144491, "grad_norm": 41.99255824550064, "learning_rate": 5e-06, "loss": 1.2798, "num_input_tokens_seen": 2709740, "step": 16 }, { "epoch": 0.006099885627144491, "loss": 1.2045753002166748, "loss_ce": 1.0448462963104248, "loss_iou": 0.25390625, "loss_num": 0.16015625, "loss_xval": 0.16015625, "num_input_tokens_seen": 2709740, "step": 16 }, { "epoch": 0.006481128478841022, "grad_norm": 30.717288714523608, "learning_rate": 5e-06, "loss": 1.3016, "num_input_tokens_seen": 2881856, "step": 17 }, { "epoch": 0.006481128478841022, "loss": 1.2061893939971924, "loss_ce": 0.9561895132064819, "loss_iou": 0.0947265625, "loss_num": 0.25, "loss_xval": 0.25, "num_input_tokens_seen": 2881856, "step": 17 }, { "epoch": 0.006862371330537552, "grad_norm": 29.605658753942066, "learning_rate": 5e-06, "loss": 1.1777, "num_input_tokens_seen": 3053876, "step": 18 }, { "epoch": 0.006862371330537552, "loss": 1.2317192554473877, "loss_ce": 1.0397636890411377, "loss_iou": 0.2158203125, "loss_num": 0.1923828125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 3053876, "step": 18 }, { "epoch": 0.0072436141822340835, "grad_norm": 29.86222597046028, "learning_rate": 5e-06, "loss": 1.1369, "num_input_tokens_seen": 3225060, "step": 19 }, { "epoch": 0.0072436141822340835, "loss": 1.1779117584228516, "loss_ce": 0.9886417388916016, "loss_iou": 0.06201171875, "loss_num": 0.189453125, "loss_xval": 0.189453125, "num_input_tokens_seen": 3225060, "step": 19 }, { "epoch": 0.007624857033930614, "grad_norm": 25.831037095100537, "learning_rate": 5e-06, "loss": 1.0894, "num_input_tokens_seen": 3395392, "step": 20 }, { "epoch": 0.007624857033930614, "loss": 1.1610549688339233, "loss_ce": 1.0183547735214233, "loss_iou": 0.177734375, "loss_num": 0.142578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 3395392, "step": 20 }, { "epoch": 0.008006099885627144, "grad_norm": 19.771258523994035, "learning_rate": 5e-06, "loss": 1.0349, "num_input_tokens_seen": 3566916, "step": 21 }, { "epoch": 0.008006099885627144, "loss": 1.0371689796447754, "loss_ce": 0.8748155236244202, "loss_iou": 0.234375, "loss_num": 0.162109375, "loss_xval": 0.162109375, "num_input_tokens_seen": 3566916, "step": 21 }, { "epoch": 0.008387342737323675, "grad_norm": 17.12428105280431, "learning_rate": 5e-06, "loss": 0.9893, "num_input_tokens_seen": 3736368, "step": 22 }, { "epoch": 0.008387342737323675, "loss": 1.0070894956588745, "loss_ce": 0.8448580503463745, "loss_iou": 0.08837890625, "loss_num": 0.162109375, "loss_xval": 0.162109375, "num_input_tokens_seen": 3736368, "step": 22 }, { "epoch": 0.008768585589020206, "grad_norm": 15.113078816318653, "learning_rate": 5e-06, "loss": 0.8795, "num_input_tokens_seen": 3908260, "step": 23 }, { "epoch": 0.008768585589020206, "loss": 0.8438772559165955, "loss_ce": 0.6823782920837402, "loss_iou": 0.0498046875, "loss_num": 0.1611328125, "loss_xval": 0.1611328125, "num_input_tokens_seen": 3908260, "step": 23 }, { "epoch": 0.009149828440716736, "grad_norm": 18.544695956515877, "learning_rate": 5e-06, "loss": 0.9071, "num_input_tokens_seen": 4080448, "step": 24 }, { "epoch": 0.009149828440716736, "loss": 0.8525056838989258, "loss_ce": 0.6808748245239258, "loss_iou": 0.3984375, "loss_num": 0.171875, "loss_xval": 0.171875, "num_input_tokens_seen": 4080448, "step": 24 }, { "epoch": 0.009531071292413268, "grad_norm": 23.276487882198364, "learning_rate": 5e-06, "loss": 0.9212, "num_input_tokens_seen": 4249956, "step": 25 }, { "epoch": 0.009531071292413268, "loss": 0.9321600198745728, "loss_ce": 0.6882635354995728, "loss_iou": 0.018798828125, "loss_num": 0.244140625, "loss_xval": 0.244140625, "num_input_tokens_seen": 4249956, "step": 25 }, { "epoch": 0.009912314144109797, "grad_norm": 15.784302554680545, "learning_rate": 5e-06, "loss": 0.8371, "num_input_tokens_seen": 4418480, "step": 26 }, { "epoch": 0.009912314144109797, "loss": 0.8192111253738403, "loss_ce": 0.6472750902175903, "loss_iou": 0.02587890625, "loss_num": 0.171875, "loss_xval": 0.171875, "num_input_tokens_seen": 4418480, "step": 26 }, { "epoch": 0.010293556995806329, "grad_norm": 12.941867028628593, "learning_rate": 5e-06, "loss": 0.7928, "num_input_tokens_seen": 4588692, "step": 27 }, { "epoch": 0.010293556995806329, "loss": 0.7072745561599731, "loss_ce": 0.5241690874099731, "loss_iou": 0.00732421875, "loss_num": 0.18359375, "loss_xval": 0.18359375, "num_input_tokens_seen": 4588692, "step": 27 }, { "epoch": 0.01067479984750286, "grad_norm": 13.321258109616243, "learning_rate": 5e-06, "loss": 0.8187, "num_input_tokens_seen": 4757996, "step": 28 }, { "epoch": 0.01067479984750286, "loss": 0.8777631521224976, "loss_ce": 0.7008832693099976, "loss_iou": 0.11865234375, "loss_num": 0.1767578125, "loss_xval": 0.1767578125, "num_input_tokens_seen": 4757996, "step": 28 }, { "epoch": 0.01105604269919939, "grad_norm": 11.870311389259395, "learning_rate": 5e-06, "loss": 0.7568, "num_input_tokens_seen": 4926676, "step": 29 }, { "epoch": 0.01105604269919939, "loss": 0.719562292098999, "loss_ce": 0.5721014142036438, "loss_iou": 0.1923828125, "loss_num": 0.1474609375, "loss_xval": 0.1474609375, "num_input_tokens_seen": 4926676, "step": 29 }, { "epoch": 0.011437285550895921, "grad_norm": 14.463066295571423, "learning_rate": 5e-06, "loss": 0.7608, "num_input_tokens_seen": 5097060, "step": 30 }, { "epoch": 0.011437285550895921, "loss": 0.7708301544189453, "loss_ce": 0.6216602325439453, "loss_iou": 0.2392578125, "loss_num": 0.1494140625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 5097060, "step": 30 }, { "epoch": 0.01181852840259245, "grad_norm": 11.75302392316454, "learning_rate": 5e-06, "loss": 0.7324, "num_input_tokens_seen": 5269192, "step": 31 }, { "epoch": 0.01181852840259245, "loss": 0.7086876034736633, "loss_ce": 0.5559776425361633, "loss_iou": 0.0732421875, "loss_num": 0.15234375, "loss_xval": 0.15234375, "num_input_tokens_seen": 5269192, "step": 31 }, { "epoch": 0.012199771254288982, "grad_norm": 14.301637316863893, "learning_rate": 5e-06, "loss": 0.7253, "num_input_tokens_seen": 5441256, "step": 32 }, { "epoch": 0.012199771254288982, "loss": 0.715785026550293, "loss_ce": 0.5498914122581482, "loss_iou": 0.24609375, "loss_num": 0.166015625, "loss_xval": 0.166015625, "num_input_tokens_seen": 5441256, "step": 32 }, { "epoch": 0.012581014105985514, "grad_norm": 13.076635214648364, "learning_rate": 5e-06, "loss": 0.7391, "num_input_tokens_seen": 5611392, "step": 33 }, { "epoch": 0.012581014105985514, "loss": 0.7384083271026611, "loss_ce": 0.5559132099151611, "loss_iou": 0.125, "loss_num": 0.1826171875, "loss_xval": 0.1826171875, "num_input_tokens_seen": 5611392, "step": 33 }, { "epoch": 0.012962256957682043, "grad_norm": 13.045891494955844, "learning_rate": 5e-06, "loss": 0.7512, "num_input_tokens_seen": 5776300, "step": 34 }, { "epoch": 0.012962256957682043, "loss": 0.7740552425384521, "loss_ce": 0.5727002620697021, "loss_iou": 0.12060546875, "loss_num": 0.201171875, "loss_xval": 0.201171875, "num_input_tokens_seen": 5776300, "step": 34 }, { "epoch": 0.013343499809378575, "grad_norm": 12.523133776622043, "learning_rate": 5e-06, "loss": 0.7389, "num_input_tokens_seen": 5948620, "step": 35 }, { "epoch": 0.013343499809378575, "loss": 0.7952262759208679, "loss_ce": 0.6077262759208679, "loss_iou": 0.09912109375, "loss_num": 0.1875, "loss_xval": 0.1875, "num_input_tokens_seen": 5948620, "step": 35 }, { "epoch": 0.013724742661075104, "grad_norm": 15.473776854664337, "learning_rate": 5e-06, "loss": 0.6972, "num_input_tokens_seen": 6121040, "step": 36 }, { "epoch": 0.013724742661075104, "loss": 0.7074524164199829, "loss_ce": 0.5458313822746277, "loss_iou": 0.2177734375, "loss_num": 0.162109375, "loss_xval": 0.162109375, "num_input_tokens_seen": 6121040, "step": 36 }, { "epoch": 0.014105985512771636, "grad_norm": 13.090989368724633, "learning_rate": 5e-06, "loss": 0.7056, "num_input_tokens_seen": 6293600, "step": 37 }, { "epoch": 0.014105985512771636, "loss": 0.7142536640167236, "loss_ce": 0.5716145038604736, "loss_iou": 0.158203125, "loss_num": 0.142578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 6293600, "step": 37 }, { "epoch": 0.014487228364468167, "grad_norm": 11.923105224279563, "learning_rate": 5e-06, "loss": 0.6759, "num_input_tokens_seen": 6465744, "step": 38 }, { "epoch": 0.014487228364468167, "loss": 0.6937756538391113, "loss_ce": 0.49968382716178894, "loss_iou": 0.125, "loss_num": 0.1943359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 6465744, "step": 38 }, { "epoch": 0.014868471216164697, "grad_norm": 11.32292254804611, "learning_rate": 5e-06, "loss": 0.6567, "num_input_tokens_seen": 6637832, "step": 39 }, { "epoch": 0.014868471216164697, "loss": 0.6161133050918579, "loss_ce": 0.4471680521965027, "loss_iou": 0.1103515625, "loss_num": 0.1689453125, "loss_xval": 0.1689453125, "num_input_tokens_seen": 6637832, "step": 39 }, { "epoch": 0.015249714067861228, "grad_norm": 14.554101213212657, "learning_rate": 5e-06, "loss": 0.6785, "num_input_tokens_seen": 6805988, "step": 40 }, { "epoch": 0.015249714067861228, "loss": 0.6730799674987793, "loss_ce": 0.5253138542175293, "loss_iou": 0.17578125, "loss_num": 0.1474609375, "loss_xval": 0.1474609375, "num_input_tokens_seen": 6805988, "step": 40 }, { "epoch": 0.015630956919557758, "grad_norm": 12.069633926063188, "learning_rate": 5e-06, "loss": 0.6535, "num_input_tokens_seen": 6978164, "step": 41 }, { "epoch": 0.015630956919557758, "loss": 0.6302179098129272, "loss_ce": 0.456695020198822, "loss_iou": 0.208984375, "loss_num": 0.173828125, "loss_xval": 0.173828125, "num_input_tokens_seen": 6978164, "step": 41 }, { "epoch": 0.016012199771254287, "grad_norm": 12.41805814562438, "learning_rate": 5e-06, "loss": 0.6603, "num_input_tokens_seen": 7150132, "step": 42 }, { "epoch": 0.016012199771254287, "loss": 0.6476210355758667, "loss_ce": 0.4699476361274719, "loss_iou": 0.0233154296875, "loss_num": 0.177734375, "loss_xval": 0.177734375, "num_input_tokens_seen": 7150132, "step": 42 }, { "epoch": 0.01639344262295082, "grad_norm": 11.434070417991139, "learning_rate": 5e-06, "loss": 0.6363, "num_input_tokens_seen": 7321812, "step": 43 }, { "epoch": 0.01639344262295082, "loss": 0.6272916793823242, "loss_ce": 0.4530363082885742, "loss_iou": 0.19140625, "loss_num": 0.173828125, "loss_xval": 0.173828125, "num_input_tokens_seen": 7321812, "step": 43 }, { "epoch": 0.01677468547464735, "grad_norm": 11.95243015763611, "learning_rate": 5e-06, "loss": 0.6148, "num_input_tokens_seen": 7493996, "step": 44 }, { "epoch": 0.01677468547464735, "loss": 0.5882515907287598, "loss_ce": 0.42339563369750977, "loss_iou": 0.06884765625, "loss_num": 0.1650390625, "loss_xval": 0.1650390625, "num_input_tokens_seen": 7493996, "step": 44 }, { "epoch": 0.01715592832634388, "grad_norm": 11.274627145163922, "learning_rate": 5e-06, "loss": 0.5914, "num_input_tokens_seen": 7660036, "step": 45 }, { "epoch": 0.01715592832634388, "loss": 0.6103206872940063, "loss_ce": 0.43142664432525635, "loss_iou": 0.08203125, "loss_num": 0.1787109375, "loss_xval": 0.1787109375, "num_input_tokens_seen": 7660036, "step": 45 }, { "epoch": 0.017537171178040413, "grad_norm": 11.712186194645579, "learning_rate": 5e-06, "loss": 0.6034, "num_input_tokens_seen": 7830632, "step": 46 }, { "epoch": 0.017537171178040413, "loss": 0.5953316688537598, "loss_ce": 0.43291711807250977, "loss_iou": 0.1982421875, "loss_num": 0.162109375, "loss_xval": 0.162109375, "num_input_tokens_seen": 7830632, "step": 46 }, { "epoch": 0.017918414029736943, "grad_norm": 11.749337957492449, "learning_rate": 5e-06, "loss": 0.6061, "num_input_tokens_seen": 7996552, "step": 47 }, { "epoch": 0.017918414029736943, "loss": 0.6277086734771729, "loss_ce": 0.4752427935600281, "loss_iou": 0.1923828125, "loss_num": 0.15234375, "loss_xval": 0.15234375, "num_input_tokens_seen": 7996552, "step": 47 }, { "epoch": 0.018299656881433472, "grad_norm": 11.60241640002812, "learning_rate": 5e-06, "loss": 0.5649, "num_input_tokens_seen": 8168628, "step": 48 }, { "epoch": 0.018299656881433472, "loss": 0.5392067432403564, "loss_ce": 0.41530537605285645, "loss_iou": 0.10888671875, "loss_num": 0.1240234375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 8168628, "step": 48 }, { "epoch": 0.018680899733130005, "grad_norm": 11.892983212154894, "learning_rate": 5e-06, "loss": 0.5984, "num_input_tokens_seen": 8333940, "step": 49 }, { "epoch": 0.018680899733130005, "loss": 0.6391422748565674, "loss_ce": 0.4573795795440674, "loss_iou": 0.07958984375, "loss_num": 0.181640625, "loss_xval": 0.181640625, "num_input_tokens_seen": 8333940, "step": 49 }, { "epoch": 0.019062142584826535, "grad_norm": 12.370360966405286, "learning_rate": 5e-06, "loss": 0.5927, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019062142584826535, "eval_websight_new_CIoU": 0.12265287712216377, "eval_websight_new_GIoU": 0.07912312902044505, "eval_websight_new_IoU": 0.20381823182106018, "eval_websight_new_MAE_all": 0.1915627419948578, "eval_websight_new_MAE_h": 0.23717200756072998, "eval_websight_new_MAE_w": 0.226213701069355, "eval_websight_new_MAE_x": 0.08390067517757416, "eval_websight_new_MAE_y": 0.2189645618200302, "eval_websight_new_NUM_probability": 0.0060775557067245245, "eval_websight_new_inside_bbox": 0.7118055522441864, "eval_websight_new_loss": 0.6745176911354065, "eval_websight_new_loss_ce": 0.5277671813964844, "eval_websight_new_loss_iou": 0.18060302734375, "eval_websight_new_loss_num": 0.14251708984375, "eval_websight_new_loss_xval": 0.14251708984375, "eval_websight_new_runtime": 66.8504, "eval_websight_new_samples_per_second": 0.748, "eval_websight_new_steps_per_second": 0.03, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019062142584826535, "eval_seeclick_CIoU": -0.052548233419656754, "eval_seeclick_GIoU": -0.15978088974952698, "eval_seeclick_IoU": 0.0995083898305893, "eval_seeclick_MAE_all": 0.27841828763484955, "eval_seeclick_MAE_h": 0.2928212434053421, "eval_seeclick_MAE_w": 0.31220175325870514, "eval_seeclick_MAE_x": 0.227464959025383, "eval_seeclick_MAE_y": 0.28118522465229034, "eval_seeclick_NUM_probability": 0.006080519873648882, "eval_seeclick_inside_bbox": 0.2795138955116272, "eval_seeclick_loss": 0.8024284839630127, "eval_seeclick_loss_ce": 0.5892761051654816, "eval_seeclick_loss_iou": 0.1800537109375, "eval_seeclick_loss_num": 0.2235107421875, "eval_seeclick_loss_xval": 0.2235107421875, "eval_seeclick_runtime": 90.1923, "eval_seeclick_samples_per_second": 0.554, "eval_seeclick_steps_per_second": 0.022, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019062142584826535, "eval_icons_CIoU": 0.1178673729300499, "eval_icons_GIoU": 0.0633512157946825, "eval_icons_IoU": 0.22628046572208405, "eval_icons_MAE_all": 0.1937492936849594, "eval_icons_MAE_h": 0.2025865912437439, "eval_icons_MAE_w": 0.19258208572864532, "eval_icons_MAE_x": 0.17922218143939972, "eval_icons_MAE_y": 0.2006063312292099, "eval_icons_NUM_probability": 0.006224965211004019, "eval_icons_inside_bbox": 0.5711805522441864, "eval_icons_loss": 0.6320141553878784, "eval_icons_loss_ce": 0.515306681394577, "eval_icons_loss_iou": 0.05596923828125, "eval_icons_loss_num": 0.11151123046875, "eval_icons_loss_xval": 0.11151123046875, "eval_icons_runtime": 81.6336, "eval_icons_samples_per_second": 0.612, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019062142584826535, "eval_compot_CIoU": 0.14036905393004417, "eval_compot_GIoU": 0.05752933071926236, "eval_compot_IoU": 0.22682655602693558, "eval_compot_MAE_all": 0.12615687400102615, "eval_compot_MAE_h": 0.08502155169844627, "eval_compot_MAE_w": 0.20546764880418777, "eval_compot_MAE_x": 0.12541664391756058, "eval_compot_MAE_y": 0.0887216366827488, "eval_compot_NUM_probability": 0.005897745722904801, "eval_compot_inside_bbox": 0.6336805522441864, "eval_compot_loss": 0.6307579278945923, "eval_compot_loss_ce": 0.5412943065166473, "eval_compot_loss_iou": 0.0687255859375, "eval_compot_loss_num": 0.089385986328125, "eval_compot_loss_xval": 0.089385986328125, "eval_compot_runtime": 81.6243, "eval_compot_samples_per_second": 0.613, "eval_compot_steps_per_second": 0.025, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019062142584826535, "eval_web_actions_CIoU": -0.07848251238465309, "eval_web_actions_GIoU": -0.16024185717105865, "eval_web_actions_IoU": 0.08452721312642097, "eval_web_actions_MAE_all": 0.26327383518218994, "eval_web_actions_MAE_h": 0.31245075166225433, "eval_web_actions_MAE_w": 0.26912088692188263, "eval_web_actions_MAE_x": 0.165983684360981, "eval_web_actions_MAE_y": 0.30553998053073883, "eval_web_actions_NUM_probability": 0.005677123321220279, "eval_web_actions_inside_bbox": 0.30520834028720856, "eval_web_actions_loss": 0.9075774550437927, "eval_web_actions_loss_ce": 0.732213020324707, "eval_web_actions_loss_iou": 0.10137939453125, "eval_web_actions_loss_num": 0.17706298828125, "eval_web_actions_loss_xval": 0.17706298828125, "eval_web_actions_runtime": 78.0055, "eval_web_actions_samples_per_second": 0.603, "eval_web_actions_steps_per_second": 0.026, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019062142584826535, "loss": 0.9026013612747192, "loss_ce": 0.7248669862747192, "loss_iou": 0.10791015625, "loss_num": 0.177734375, "loss_xval": 0.177734375, "num_input_tokens_seen": 8502756, "step": 50 }, { "epoch": 0.019443385436523065, "grad_norm": 13.734342041508446, "learning_rate": 5e-06, "loss": 0.597, "num_input_tokens_seen": 8673368, "step": 51 }, { "epoch": 0.019443385436523065, "loss": 0.6097111701965332, "loss_ce": 0.4595646858215332, "loss_iou": 0.10693359375, "loss_num": 0.150390625, "loss_xval": 0.150390625, "num_input_tokens_seen": 8673368, "step": 51 }, { "epoch": 0.019824628288219594, "grad_norm": 15.391900523046136, "learning_rate": 5e-06, "loss": 0.5814, "num_input_tokens_seen": 8837248, "step": 52 }, { "epoch": 0.019824628288219594, "loss": 0.6093646287918091, "loss_ce": 0.4040423631668091, "loss_iou": 0.3125, "loss_num": 0.205078125, "loss_xval": 0.205078125, "num_input_tokens_seen": 8837248, "step": 52 }, { "epoch": 0.020205871139916128, "grad_norm": 14.955627476533499, "learning_rate": 5e-06, "loss": 0.5752, "num_input_tokens_seen": 9007996, "step": 53 }, { "epoch": 0.020205871139916128, "loss": 0.5916285514831543, "loss_ce": 0.4221949577331543, "loss_iou": 0.00897216796875, "loss_num": 0.169921875, "loss_xval": 0.169921875, "num_input_tokens_seen": 9007996, "step": 53 }, { "epoch": 0.020587113991612657, "grad_norm": 12.376336564449462, "learning_rate": 5e-06, "loss": 0.5657, "num_input_tokens_seen": 9177940, "step": 54 }, { "epoch": 0.020587113991612657, "loss": 0.5488950610160828, "loss_ce": 0.39649027585983276, "loss_iou": 0.053955078125, "loss_num": 0.15234375, "loss_xval": 0.15234375, "num_input_tokens_seen": 9177940, "step": 54 }, { "epoch": 0.020968356843309187, "grad_norm": 12.090405564173633, "learning_rate": 5e-06, "loss": 0.5691, "num_input_tokens_seen": 9348308, "step": 55 }, { "epoch": 0.020968356843309187, "loss": 0.6079497933387756, "loss_ce": 0.42185360193252563, "loss_iou": 0.13671875, "loss_num": 0.1865234375, "loss_xval": 0.1865234375, "num_input_tokens_seen": 9348308, "step": 55 }, { "epoch": 0.02134959969500572, "grad_norm": 11.902958231800097, "learning_rate": 5e-06, "loss": 0.5616, "num_input_tokens_seen": 9517760, "step": 56 }, { "epoch": 0.02134959969500572, "loss": 0.5244883298873901, "loss_ce": 0.34608256816864014, "loss_iou": 0.150390625, "loss_num": 0.1787109375, "loss_xval": 0.1787109375, "num_input_tokens_seen": 9517760, "step": 56 }, { "epoch": 0.02173084254670225, "grad_norm": 11.80221150505286, "learning_rate": 5e-06, "loss": 0.535, "num_input_tokens_seen": 9687752, "step": 57 }, { "epoch": 0.02173084254670225, "loss": 0.5444226264953613, "loss_ce": 0.39342164993286133, "loss_iou": 0.0693359375, "loss_num": 0.1513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 9687752, "step": 57 }, { "epoch": 0.02211208539839878, "grad_norm": 12.261361128826103, "learning_rate": 5e-06, "loss": 0.5449, "num_input_tokens_seen": 9857820, "step": 58 }, { "epoch": 0.02211208539839878, "loss": 0.5440876483917236, "loss_ce": 0.36427807807922363, "loss_iou": 0.173828125, "loss_num": 0.1796875, "loss_xval": 0.1796875, "num_input_tokens_seen": 9857820, "step": 58 }, { "epoch": 0.02249332825009531, "grad_norm": 12.453126203244764, "learning_rate": 5e-06, "loss": 0.563, "num_input_tokens_seen": 10028280, "step": 59 }, { "epoch": 0.02249332825009531, "loss": 0.5746830105781555, "loss_ce": 0.4281376004219055, "loss_iou": 0.146484375, "loss_num": 0.146484375, "loss_xval": 0.146484375, "num_input_tokens_seen": 10028280, "step": 59 }, { "epoch": 0.022874571101791842, "grad_norm": 12.304581281055258, "learning_rate": 5e-06, "loss": 0.5321, "num_input_tokens_seen": 10193584, "step": 60 }, { "epoch": 0.022874571101791842, "loss": 0.5188683867454529, "loss_ce": 0.35169312357902527, "loss_iou": 0.1640625, "loss_num": 0.1669921875, "loss_xval": 0.1669921875, "num_input_tokens_seen": 10193584, "step": 60 }, { "epoch": 0.023255813953488372, "grad_norm": 13.998044458420281, "learning_rate": 5e-06, "loss": 0.5362, "num_input_tokens_seen": 10362196, "step": 61 }, { "epoch": 0.023255813953488372, "loss": 0.5754693150520325, "loss_ce": 0.41866999864578247, "loss_iou": 0.1455078125, "loss_num": 0.1572265625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 10362196, "step": 61 }, { "epoch": 0.0236370568051849, "grad_norm": 11.92308808122042, "learning_rate": 5e-06, "loss": 0.5326, "num_input_tokens_seen": 10527168, "step": 62 }, { "epoch": 0.0236370568051849, "loss": 0.5353362560272217, "loss_ce": 0.3612639904022217, "loss_iou": 0.21484375, "loss_num": 0.173828125, "loss_xval": 0.173828125, "num_input_tokens_seen": 10527168, "step": 62 }, { "epoch": 0.024018299656881435, "grad_norm": 12.233902047421576, "learning_rate": 5e-06, "loss": 0.5208, "num_input_tokens_seen": 10696296, "step": 63 }, { "epoch": 0.024018299656881435, "loss": 0.5133154392242432, "loss_ce": 0.3806860148906708, "loss_iou": 0.0498046875, "loss_num": 0.1328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 10696296, "step": 63 }, { "epoch": 0.024399542508577964, "grad_norm": 12.137384843380636, "learning_rate": 5e-06, "loss": 0.5347, "num_input_tokens_seen": 10865244, "step": 64 }, { "epoch": 0.024399542508577964, "loss": 0.49595263600349426, "loss_ce": 0.29728323221206665, "loss_iou": 0.1875, "loss_num": 0.1982421875, "loss_xval": 0.1982421875, "num_input_tokens_seen": 10865244, "step": 64 }, { "epoch": 0.024780785360274494, "grad_norm": 12.183441390066275, "learning_rate": 5e-06, "loss": 0.5108, "num_input_tokens_seen": 11037272, "step": 65 }, { "epoch": 0.024780785360274494, "loss": 0.4988226890563965, "loss_ce": 0.3487982749938965, "loss_iou": 0.126953125, "loss_num": 0.150390625, "loss_xval": 0.150390625, "num_input_tokens_seen": 11037272, "step": 65 }, { "epoch": 0.025162028211971027, "grad_norm": 13.03848111499602, "learning_rate": 5e-06, "loss": 0.5194, "num_input_tokens_seen": 11209428, "step": 66 }, { "epoch": 0.025162028211971027, "loss": 0.48021817207336426, "loss_ce": 0.33275723457336426, "loss_iou": 0.07861328125, "loss_num": 0.1474609375, "loss_xval": 0.1474609375, "num_input_tokens_seen": 11209428, "step": 66 }, { "epoch": 0.025543271063667557, "grad_norm": 12.129796554865111, "learning_rate": 5e-06, "loss": 0.4982, "num_input_tokens_seen": 11377784, "step": 67 }, { "epoch": 0.025543271063667557, "loss": 0.5201830863952637, "loss_ce": 0.3668017089366913, "loss_iou": 0.212890625, "loss_num": 0.1533203125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 11377784, "step": 67 }, { "epoch": 0.025924513915364086, "grad_norm": 12.16663337244556, "learning_rate": 5e-06, "loss": 0.4924, "num_input_tokens_seen": 11546724, "step": 68 }, { "epoch": 0.025924513915364086, "loss": 0.44336187839508057, "loss_ce": 0.29925787448883057, "loss_iou": 0.05810546875, "loss_num": 0.14453125, "loss_xval": 0.14453125, "num_input_tokens_seen": 11546724, "step": 68 }, { "epoch": 0.026305756767060616, "grad_norm": 11.758052516976239, "learning_rate": 5e-06, "loss": 0.478, "num_input_tokens_seen": 11715460, "step": 69 }, { "epoch": 0.026305756767060616, "loss": 0.4771363139152527, "loss_ce": 0.3299195170402527, "loss_iou": 0.041748046875, "loss_num": 0.1474609375, "loss_xval": 0.1474609375, "num_input_tokens_seen": 11715460, "step": 69 }, { "epoch": 0.02668699961875715, "grad_norm": 11.694824569228489, "learning_rate": 5e-06, "loss": 0.4735, "num_input_tokens_seen": 11883428, "step": 70 }, { "epoch": 0.02668699961875715, "loss": 0.4738195538520813, "loss_ce": 0.3340490162372589, "loss_iou": 0.1796875, "loss_num": 0.1396484375, "loss_xval": 0.1396484375, "num_input_tokens_seen": 11883428, "step": 70 }, { "epoch": 0.02706824247045368, "grad_norm": 12.792427192285375, "learning_rate": 5e-06, "loss": 0.4822, "num_input_tokens_seen": 12051952, "step": 71 }, { "epoch": 0.02706824247045368, "loss": 0.500407338142395, "loss_ce": 0.364543080329895, "loss_iou": 0.15625, "loss_num": 0.1357421875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 12051952, "step": 71 }, { "epoch": 0.02744948532215021, "grad_norm": 12.494356160653988, "learning_rate": 5e-06, "loss": 0.4905, "num_input_tokens_seen": 12222456, "step": 72 }, { "epoch": 0.02744948532215021, "loss": 0.49499523639678955, "loss_ce": 0.33422863483428955, "loss_iou": 0.048583984375, "loss_num": 0.1611328125, "loss_xval": 0.1611328125, "num_input_tokens_seen": 12222456, "step": 72 }, { "epoch": 0.02783072817384674, "grad_norm": 11.851075223778734, "learning_rate": 5e-06, "loss": 0.4734, "num_input_tokens_seen": 12394628, "step": 73 }, { "epoch": 0.02783072817384674, "loss": 0.4998481273651123, "loss_ce": 0.3393256664276123, "loss_iou": 0.169921875, "loss_num": 0.16015625, "loss_xval": 0.16015625, "num_input_tokens_seen": 12394628, "step": 73 }, { "epoch": 0.02821197102554327, "grad_norm": 12.345182296953553, "learning_rate": 5e-06, "loss": 0.461, "num_input_tokens_seen": 12561616, "step": 74 }, { "epoch": 0.02821197102554327, "loss": 0.4780183732509613, "loss_ce": 0.3509432077407837, "loss_iou": 0.11767578125, "loss_num": 0.126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 12561616, "step": 74 }, { "epoch": 0.0285932138772398, "grad_norm": 12.343535241732928, "learning_rate": 5e-06, "loss": 0.4799, "num_input_tokens_seen": 12729864, "step": 75 }, { "epoch": 0.0285932138772398, "loss": 0.49244657158851624, "loss_ce": 0.34345975518226624, "loss_iou": 0.1767578125, "loss_num": 0.1494140625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 12729864, "step": 75 }, { "epoch": 0.028974456728936334, "grad_norm": 13.53465987183171, "learning_rate": 5e-06, "loss": 0.4822, "num_input_tokens_seen": 12902300, "step": 76 }, { "epoch": 0.028974456728936334, "loss": 0.4951751232147217, "loss_ce": 0.3575408458709717, "loss_iou": 0.09765625, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 12902300, "step": 76 }, { "epoch": 0.029355699580632864, "grad_norm": 17.340928619373877, "learning_rate": 5e-06, "loss": 0.4669, "num_input_tokens_seen": 13074048, "step": 77 }, { "epoch": 0.029355699580632864, "loss": 0.48726654052734375, "loss_ce": 0.32576748728752136, "loss_iou": 0.392578125, "loss_num": 0.1611328125, "loss_xval": 0.1611328125, "num_input_tokens_seen": 13074048, "step": 77 }, { "epoch": 0.029736942432329393, "grad_norm": 23.622274934623174, "learning_rate": 5e-06, "loss": 0.567, "num_input_tokens_seen": 13240004, "step": 78 }, { "epoch": 0.029736942432329393, "loss": 0.5309600830078125, "loss_ce": 0.3266143798828125, "loss_iou": 0.0732421875, "loss_num": 0.2041015625, "loss_xval": 0.2041015625, "num_input_tokens_seen": 13240004, "step": 78 }, { "epoch": 0.030118185284025923, "grad_norm": 18.14847407024033, "learning_rate": 5e-06, "loss": 0.5159, "num_input_tokens_seen": 13410448, "step": 79 }, { "epoch": 0.030118185284025923, "loss": 0.5415377616882324, "loss_ce": 0.32718226313591003, "loss_iou": 0.47265625, "loss_num": 0.21484375, "loss_xval": 0.21484375, "num_input_tokens_seen": 13410448, "step": 79 }, { "epoch": 0.030499428135722456, "grad_norm": 11.985177640237248, "learning_rate": 5e-06, "loss": 0.4619, "num_input_tokens_seen": 13582736, "step": 80 }, { "epoch": 0.030499428135722456, "loss": 0.4745754599571228, "loss_ce": 0.3023342490196228, "loss_iou": 0.1044921875, "loss_num": 0.171875, "loss_xval": 0.171875, "num_input_tokens_seen": 13582736, "step": 80 }, { "epoch": 0.030880670987418986, "grad_norm": 11.856005341040106, "learning_rate": 5e-06, "loss": 0.4572, "num_input_tokens_seen": 13753100, "step": 81 }, { "epoch": 0.030880670987418986, "loss": 0.48636406660079956, "loss_ce": 0.33749932050704956, "loss_iou": 0.052001953125, "loss_num": 0.1484375, "loss_xval": 0.1484375, "num_input_tokens_seen": 13753100, "step": 81 }, { "epoch": 0.031261913839115515, "grad_norm": 11.382685680416452, "learning_rate": 5e-06, "loss": 0.4647, "num_input_tokens_seen": 13923248, "step": 82 }, { "epoch": 0.031261913839115515, "loss": 0.46927064657211304, "loss_ce": 0.25540345907211304, "loss_iou": 0.03125, "loss_num": 0.2138671875, "loss_xval": 0.2138671875, "num_input_tokens_seen": 13923248, "step": 82 }, { "epoch": 0.031643156690812045, "grad_norm": 11.753169915446275, "learning_rate": 5e-06, "loss": 0.4505, "num_input_tokens_seen": 14095636, "step": 83 }, { "epoch": 0.031643156690812045, "loss": 0.4410349130630493, "loss_ce": 0.2765451669692993, "loss_iou": 0.07080078125, "loss_num": 0.1640625, "loss_xval": 0.1640625, "num_input_tokens_seen": 14095636, "step": 83 }, { "epoch": 0.032024399542508575, "grad_norm": 11.990754545496944, "learning_rate": 5e-06, "loss": 0.4445, "num_input_tokens_seen": 14267868, "step": 84 }, { "epoch": 0.032024399542508575, "loss": 0.4556080102920532, "loss_ce": 0.3024708032608032, "loss_iou": 0.08056640625, "loss_num": 0.1533203125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 14267868, "step": 84 }, { "epoch": 0.03240564239420511, "grad_norm": 11.76168733215327, "learning_rate": 5e-06, "loss": 0.4287, "num_input_tokens_seen": 14436716, "step": 85 }, { "epoch": 0.03240564239420511, "loss": 0.41517770290374756, "loss_ce": 0.27424752712249756, "loss_iou": 0.140625, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 14436716, "step": 85 }, { "epoch": 0.03278688524590164, "grad_norm": 11.973750469066818, "learning_rate": 5e-06, "loss": 0.431, "num_input_tokens_seen": 14608776, "step": 86 }, { "epoch": 0.03278688524590164, "loss": 0.4094088077545166, "loss_ce": 0.235702782869339, "loss_iou": 0.04248046875, "loss_num": 0.173828125, "loss_xval": 0.173828125, "num_input_tokens_seen": 14608776, "step": 86 }, { "epoch": 0.03316812809759817, "grad_norm": 11.33393330382593, "learning_rate": 5e-06, "loss": 0.4344, "num_input_tokens_seen": 14774140, "step": 87 }, { "epoch": 0.03316812809759817, "loss": 0.41893303394317627, "loss_ce": 0.23631584644317627, "loss_iou": 0.06494140625, "loss_num": 0.1826171875, "loss_xval": 0.1826171875, "num_input_tokens_seen": 14774140, "step": 87 }, { "epoch": 0.0335493709492947, "grad_norm": 11.037333592059825, "learning_rate": 5e-06, "loss": 0.4189, "num_input_tokens_seen": 14942700, "step": 88 }, { "epoch": 0.0335493709492947, "loss": 0.3910408616065979, "loss_ce": 0.2472420036792755, "loss_iou": 0.1337890625, "loss_num": 0.1435546875, "loss_xval": 0.1435546875, "num_input_tokens_seen": 14942700, "step": 88 }, { "epoch": 0.03393061380099123, "grad_norm": 12.107053347481228, "learning_rate": 5e-06, "loss": 0.4231, "num_input_tokens_seen": 15110488, "step": 89 }, { "epoch": 0.03393061380099123, "loss": 0.42818814516067505, "loss_ce": 0.29909878969192505, "loss_iou": 0.0908203125, "loss_num": 0.12890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 15110488, "step": 89 }, { "epoch": 0.03431185665268776, "grad_norm": 11.42784337247996, "learning_rate": 5e-06, "loss": 0.3987, "num_input_tokens_seen": 15279188, "step": 90 }, { "epoch": 0.03431185665268776, "loss": 0.38819554448127747, "loss_ce": 0.23054175078868866, "loss_iou": 0.1875, "loss_num": 0.1572265625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 15279188, "step": 90 }, { "epoch": 0.03469309950438429, "grad_norm": 12.710478502345447, "learning_rate": 5e-06, "loss": 0.4212, "num_input_tokens_seen": 15451604, "step": 91 }, { "epoch": 0.03469309950438429, "loss": 0.41965770721435547, "loss_ce": 0.28641796112060547, "loss_iou": 0.08447265625, "loss_num": 0.1328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 15451604, "step": 91 }, { "epoch": 0.035074342356080826, "grad_norm": 12.111297962407386, "learning_rate": 5e-06, "loss": 0.4206, "num_input_tokens_seen": 15620592, "step": 92 }, { "epoch": 0.035074342356080826, "loss": 0.43124547600746155, "loss_ce": 0.22311557829380035, "loss_iou": 0.05078125, "loss_num": 0.2080078125, "loss_xval": 0.2080078125, "num_input_tokens_seen": 15620592, "step": 92 }, { "epoch": 0.035455585207777356, "grad_norm": 12.242275289330596, "learning_rate": 5e-06, "loss": 0.4124, "num_input_tokens_seen": 15791204, "step": 93 }, { "epoch": 0.035455585207777356, "loss": 0.38425594568252563, "loss_ce": 0.23789364099502563, "loss_iou": 0.13671875, "loss_num": 0.146484375, "loss_xval": 0.146484375, "num_input_tokens_seen": 15791204, "step": 93 }, { "epoch": 0.035836828059473885, "grad_norm": 11.212776469423819, "learning_rate": 5e-06, "loss": 0.4074, "num_input_tokens_seen": 15960280, "step": 94 }, { "epoch": 0.035836828059473885, "loss": 0.40042412281036377, "loss_ce": 0.22482600808143616, "loss_iou": 0.014892578125, "loss_num": 0.17578125, "loss_xval": 0.17578125, "num_input_tokens_seen": 15960280, "step": 94 }, { "epoch": 0.036218070911170415, "grad_norm": 12.657266487359777, "learning_rate": 5e-06, "loss": 0.4047, "num_input_tokens_seen": 16132564, "step": 95 }, { "epoch": 0.036218070911170415, "loss": 0.40306520462036133, "loss_ce": 0.26109743118286133, "loss_iou": 0.0203857421875, "loss_num": 0.1416015625, "loss_xval": 0.1416015625, "num_input_tokens_seen": 16132564, "step": 95 }, { "epoch": 0.036599313762866945, "grad_norm": 11.6211209359735, "learning_rate": 5e-06, "loss": 0.4029, "num_input_tokens_seen": 16304472, "step": 96 }, { "epoch": 0.036599313762866945, "loss": 0.3593056797981262, "loss_ce": 0.18291407823562622, "loss_iou": 0.06005859375, "loss_num": 0.1767578125, "loss_xval": 0.1767578125, "num_input_tokens_seen": 16304472, "step": 96 }, { "epoch": 0.036980556614563474, "grad_norm": 12.071393902645383, "learning_rate": 5e-06, "loss": 0.4006, "num_input_tokens_seen": 16474764, "step": 97 }, { "epoch": 0.036980556614563474, "loss": 0.37911850214004517, "loss_ce": 0.22012192010879517, "loss_iou": 0.048583984375, "loss_num": 0.1591796875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 16474764, "step": 97 }, { "epoch": 0.03736179946626001, "grad_norm": 12.000598741161477, "learning_rate": 5e-06, "loss": 0.3995, "num_input_tokens_seen": 16646672, "step": 98 }, { "epoch": 0.03736179946626001, "loss": 0.4102625846862793, "loss_ce": 0.22538712620735168, "loss_iou": 0.01025390625, "loss_num": 0.1845703125, "loss_xval": 0.1845703125, "num_input_tokens_seen": 16646672, "step": 98 }, { "epoch": 0.03774304231795654, "grad_norm": 12.707295898303995, "learning_rate": 5e-06, "loss": 0.3889, "num_input_tokens_seen": 16815208, "step": 99 }, { "epoch": 0.03774304231795654, "loss": 0.3928859233856201, "loss_ce": 0.2523829936981201, "loss_iou": 0.06884765625, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 16815208, "step": 99 }, { "epoch": 0.03812428516965307, "grad_norm": 12.427783036031853, "learning_rate": 5e-06, "loss": 0.3838, "num_input_tokens_seen": 16978948, "step": 100 }, { "epoch": 0.03812428516965307, "loss": 0.38862472772598267, "loss_ce": 0.21125656366348267, "loss_iou": 0.1474609375, "loss_num": 0.177734375, "loss_xval": 0.177734375, "num_input_tokens_seen": 16978948, "step": 100 }, { "epoch": 0.0385055280213496, "grad_norm": 12.291136690956028, "learning_rate": 5e-06, "loss": 0.3777, "num_input_tokens_seen": 17147808, "step": 101 }, { "epoch": 0.0385055280213496, "loss": 0.35547709465026855, "loss_ce": 0.21521833539009094, "loss_iou": 0.115234375, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 17147808, "step": 101 }, { "epoch": 0.03888677087304613, "grad_norm": 12.569611940916834, "learning_rate": 5e-06, "loss": 0.3753, "num_input_tokens_seen": 17315064, "step": 102 }, { "epoch": 0.03888677087304613, "loss": 0.3729170560836792, "loss_ce": 0.2106856107711792, "loss_iou": 0.076171875, "loss_num": 0.162109375, "loss_xval": 0.162109375, "num_input_tokens_seen": 17315064, "step": 102 }, { "epoch": 0.03926801372474266, "grad_norm": 16.243870473803266, "learning_rate": 5e-06, "loss": 0.3866, "num_input_tokens_seen": 17482872, "step": 103 }, { "epoch": 0.03926801372474266, "loss": 0.3830409049987793, "loss_ce": 0.2242884337902069, "loss_iou": 0.28515625, "loss_num": 0.1591796875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 17482872, "step": 103 }, { "epoch": 0.03964925657643919, "grad_norm": 13.452612030193265, "learning_rate": 5e-06, "loss": 0.3889, "num_input_tokens_seen": 17651212, "step": 104 }, { "epoch": 0.03964925657643919, "loss": 0.37701019644737244, "loss_ce": 0.20379243791103363, "loss_iou": 0.027587890625, "loss_num": 0.1728515625, "loss_xval": 0.1728515625, "num_input_tokens_seen": 17651212, "step": 104 }, { "epoch": 0.040030499428135725, "grad_norm": 11.54516678372868, "learning_rate": 5e-06, "loss": 0.3777, "num_input_tokens_seen": 17823268, "step": 105 }, { "epoch": 0.040030499428135725, "loss": 0.37387654185295105, "loss_ce": 0.20303913950920105, "loss_iou": 0.0439453125, "loss_num": 0.1708984375, "loss_xval": 0.1708984375, "num_input_tokens_seen": 17823268, "step": 105 }, { "epoch": 0.040411742279832255, "grad_norm": 10.956441122920754, "learning_rate": 5e-06, "loss": 0.3505, "num_input_tokens_seen": 17991568, "step": 106 }, { "epoch": 0.040411742279832255, "loss": 0.33052903413772583, "loss_ce": 0.18843916058540344, "loss_iou": 0.0732421875, "loss_num": 0.142578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 17991568, "step": 106 }, { "epoch": 0.040792985131528785, "grad_norm": 12.608733901165248, "learning_rate": 5e-06, "loss": 0.3781, "num_input_tokens_seen": 18161860, "step": 107 }, { "epoch": 0.040792985131528785, "loss": 0.34461355209350586, "loss_ce": 0.21344900131225586, "loss_iou": 0.10498046875, "loss_num": 0.130859375, "loss_xval": 0.130859375, "num_input_tokens_seen": 18161860, "step": 107 }, { "epoch": 0.041174227983225314, "grad_norm": 12.373322278476905, "learning_rate": 5e-06, "loss": 0.3694, "num_input_tokens_seen": 18330556, "step": 108 }, { "epoch": 0.041174227983225314, "loss": 0.3656204342842102, "loss_ce": 0.2087600827217102, "loss_iou": 0.1298828125, "loss_num": 0.1572265625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 18330556, "step": 108 }, { "epoch": 0.041555470834921844, "grad_norm": 10.869785200963268, "learning_rate": 5e-06, "loss": 0.3578, "num_input_tokens_seen": 18502672, "step": 109 }, { "epoch": 0.041555470834921844, "loss": 0.3445308208465576, "loss_ce": 0.19542193412780762, "loss_iou": 0.09375, "loss_num": 0.1494140625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 18502672, "step": 109 }, { "epoch": 0.041936713686618374, "grad_norm": 12.647777677695915, "learning_rate": 5e-06, "loss": 0.357, "num_input_tokens_seen": 18674676, "step": 110 }, { "epoch": 0.041936713686618374, "loss": 0.3487279713153839, "loss_ce": 0.1894262135028839, "loss_iou": 0.0849609375, "loss_num": 0.1591796875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 18674676, "step": 110 }, { "epoch": 0.0423179565383149, "grad_norm": 12.153649007209351, "learning_rate": 5e-06, "loss": 0.3626, "num_input_tokens_seen": 18841332, "step": 111 }, { "epoch": 0.0423179565383149, "loss": 0.3468208312988281, "loss_ce": 0.19014358520507812, "loss_iou": 0.19140625, "loss_num": 0.15625, "loss_xval": 0.15625, "num_input_tokens_seen": 18841332, "step": 111 }, { "epoch": 0.04269919939001144, "grad_norm": 13.154446666834914, "learning_rate": 5e-06, "loss": 0.3607, "num_input_tokens_seen": 19010808, "step": 112 }, { "epoch": 0.04269919939001144, "loss": 0.3582812547683716, "loss_ce": 0.1991625726222992, "loss_iou": 0.2177734375, "loss_num": 0.1591796875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 19010808, "step": 112 }, { "epoch": 0.04308044224170797, "grad_norm": 12.600474897392363, "learning_rate": 5e-06, "loss": 0.3588, "num_input_tokens_seen": 19183008, "step": 113 }, { "epoch": 0.04308044224170797, "loss": 0.3567988872528076, "loss_ce": 0.21086381375789642, "loss_iou": 0.11962890625, "loss_num": 0.1455078125, "loss_xval": 0.1455078125, "num_input_tokens_seen": 19183008, "step": 113 }, { "epoch": 0.0434616850934045, "grad_norm": 11.521956563077518, "learning_rate": 5e-06, "loss": 0.3354, "num_input_tokens_seen": 19355056, "step": 114 }, { "epoch": 0.0434616850934045, "loss": 0.3423011898994446, "loss_ce": 0.20753559470176697, "loss_iou": 0.0712890625, "loss_num": 0.134765625, "loss_xval": 0.134765625, "num_input_tokens_seen": 19355056, "step": 114 }, { "epoch": 0.04384292794510103, "grad_norm": 11.521584098092609, "learning_rate": 5e-06, "loss": 0.3334, "num_input_tokens_seen": 19525288, "step": 115 }, { "epoch": 0.04384292794510103, "loss": 0.35636138916015625, "loss_ce": 0.20127102732658386, "loss_iou": 0.03271484375, "loss_num": 0.1552734375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 19525288, "step": 115 }, { "epoch": 0.04422417079679756, "grad_norm": 12.326935183856571, "learning_rate": 5e-06, "loss": 0.3336, "num_input_tokens_seen": 19697428, "step": 116 }, { "epoch": 0.04422417079679756, "loss": 0.30840563774108887, "loss_ce": 0.18126940727233887, "loss_iou": 0.1904296875, "loss_num": 0.126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 19697428, "step": 116 }, { "epoch": 0.04460541364849409, "grad_norm": 14.141221536016685, "learning_rate": 5e-06, "loss": 0.3245, "num_input_tokens_seen": 19867612, "step": 117 }, { "epoch": 0.04460541364849409, "loss": 0.3331416845321655, "loss_ce": 0.19361534714698792, "loss_iou": 0.07177734375, "loss_num": 0.1396484375, "loss_xval": 0.1396484375, "num_input_tokens_seen": 19867612, "step": 117 }, { "epoch": 0.04498665650019062, "grad_norm": 87.14020102629748, "learning_rate": 5e-06, "loss": 0.3998, "num_input_tokens_seen": 20038036, "step": 118 }, { "epoch": 0.04498665650019062, "loss": 0.3945692777633667, "loss_ce": 0.2542494535446167, "loss_iou": 0.12060546875, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 20038036, "step": 118 }, { "epoch": 0.045367899351887155, "grad_norm": 15.830506521221166, "learning_rate": 5e-06, "loss": 0.4132, "num_input_tokens_seen": 20208368, "step": 119 }, { "epoch": 0.045367899351887155, "loss": 0.4296550452709198, "loss_ce": 0.2777385115623474, "loss_iou": 0.1943359375, "loss_num": 0.15234375, "loss_xval": 0.15234375, "num_input_tokens_seen": 20208368, "step": 119 }, { "epoch": 0.045749142203583684, "grad_norm": 14.724468931472991, "learning_rate": 5e-06, "loss": 0.3901, "num_input_tokens_seen": 20375724, "step": 120 }, { "epoch": 0.045749142203583684, "loss": 0.40854495763778687, "loss_ce": 0.25070804357528687, "loss_iou": 0.099609375, "loss_num": 0.158203125, "loss_xval": 0.158203125, "num_input_tokens_seen": 20375724, "step": 120 }, { "epoch": 0.046130385055280214, "grad_norm": 12.297283170117506, "learning_rate": 5e-06, "loss": 0.3595, "num_input_tokens_seen": 20545956, "step": 121 }, { "epoch": 0.046130385055280214, "loss": 0.38752883672714233, "loss_ce": 0.20295852422714233, "loss_iou": 0.07763671875, "loss_num": 0.1845703125, "loss_xval": 0.1845703125, "num_input_tokens_seen": 20545956, "step": 121 }, { "epoch": 0.046511627906976744, "grad_norm": 11.98722544983197, "learning_rate": 5e-06, "loss": 0.3451, "num_input_tokens_seen": 20711180, "step": 122 }, { "epoch": 0.046511627906976744, "loss": 0.34829050302505493, "loss_ce": 0.17073921859264374, "loss_iou": 0.0205078125, "loss_num": 0.177734375, "loss_xval": 0.177734375, "num_input_tokens_seen": 20711180, "step": 122 }, { "epoch": 0.04689287075867327, "grad_norm": 11.236273190561704, "learning_rate": 5e-06, "loss": 0.3303, "num_input_tokens_seen": 20883020, "step": 123 }, { "epoch": 0.04689287075867327, "loss": 0.32952818274497986, "loss_ce": 0.16564878821372986, "loss_iou": 0.052490234375, "loss_num": 0.1640625, "loss_xval": 0.1640625, "num_input_tokens_seen": 20883020, "step": 123 }, { "epoch": 0.0472741136103698, "grad_norm": 11.47342867332456, "learning_rate": 5e-06, "loss": 0.3273, "num_input_tokens_seen": 21053360, "step": 124 }, { "epoch": 0.0472741136103698, "loss": 0.3332858085632324, "loss_ce": 0.1640963852405548, "loss_iou": 0.05517578125, "loss_num": 0.1689453125, "loss_xval": 0.1689453125, "num_input_tokens_seen": 21053360, "step": 124 }, { "epoch": 0.04765535646206634, "grad_norm": 10.67542691608568, "learning_rate": 5e-06, "loss": 0.3022, "num_input_tokens_seen": 21221792, "step": 125 }, { "epoch": 0.04765535646206634, "loss": 0.29388928413391113, "loss_ce": 0.17273451387882233, "loss_iou": 0.138671875, "loss_num": 0.12109375, "loss_xval": 0.12109375, "num_input_tokens_seen": 21221792, "step": 125 }, { "epoch": 0.04803659931376287, "grad_norm": 10.793472255887544, "learning_rate": 5e-06, "loss": 0.298, "num_input_tokens_seen": 21388780, "step": 126 }, { "epoch": 0.04803659931376287, "loss": 0.31272128224372864, "loss_ce": 0.17954257130622864, "loss_iou": 0.02587890625, "loss_num": 0.1328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 21388780, "step": 126 }, { "epoch": 0.0484178421654594, "grad_norm": 10.457819380972436, "learning_rate": 5e-06, "loss": 0.2871, "num_input_tokens_seen": 21559156, "step": 127 }, { "epoch": 0.0484178421654594, "loss": 0.2922600209712982, "loss_ce": 0.1415642350912094, "loss_iou": 0.115234375, "loss_num": 0.150390625, "loss_xval": 0.150390625, "num_input_tokens_seen": 21559156, "step": 127 }, { "epoch": 0.04879908501715593, "grad_norm": 12.075104791130935, "learning_rate": 5e-06, "loss": 0.3002, "num_input_tokens_seen": 21728684, "step": 128 }, { "epoch": 0.04879908501715593, "loss": 0.2932286858558655, "loss_ce": 0.14759881794452667, "loss_iou": 0.02734375, "loss_num": 0.1455078125, "loss_xval": 0.1455078125, "num_input_tokens_seen": 21728684, "step": 128 }, { "epoch": 0.04918032786885246, "grad_norm": 17.596022200669275, "learning_rate": 5e-06, "loss": 0.3052, "num_input_tokens_seen": 21898896, "step": 129 }, { "epoch": 0.04918032786885246, "loss": 0.2854222059249878, "loss_ce": 0.15090075135231018, "loss_iou": 0.11572265625, "loss_num": 0.134765625, "loss_xval": 0.134765625, "num_input_tokens_seen": 21898896, "step": 129 }, { "epoch": 0.04956157072054899, "grad_norm": 22.591538039851265, "learning_rate": 5e-06, "loss": 0.3237, "num_input_tokens_seen": 22071000, "step": 130 }, { "epoch": 0.04956157072054899, "loss": 0.34077489376068115, "loss_ce": 0.16413915157318115, "loss_iou": 0.2119140625, "loss_num": 0.1767578125, "loss_xval": 0.1767578125, "num_input_tokens_seen": 22071000, "step": 130 }, { "epoch": 0.04994281357224552, "grad_norm": 23.969048922051158, "learning_rate": 5e-06, "loss": 0.3373, "num_input_tokens_seen": 22241740, "step": 131 }, { "epoch": 0.04994281357224552, "loss": 0.3232199549674988, "loss_ce": 0.17606420814990997, "loss_iou": 0.00830078125, "loss_num": 0.1474609375, "loss_xval": 0.1474609375, "num_input_tokens_seen": 22241740, "step": 131 }, { "epoch": 0.050324056423942054, "grad_norm": 26.453649680091864, "learning_rate": 5e-06, "loss": 0.3504, "num_input_tokens_seen": 22411932, "step": 132 }, { "epoch": 0.050324056423942054, "loss": 0.35389894247055054, "loss_ce": 0.18177980184555054, "loss_iou": 0.09716796875, "loss_num": 0.171875, "loss_xval": 0.171875, "num_input_tokens_seen": 22411932, "step": 132 }, { "epoch": 0.050705299275638584, "grad_norm": 29.770376889810056, "learning_rate": 5e-06, "loss": 0.321, "num_input_tokens_seen": 22580712, "step": 133 }, { "epoch": 0.050705299275638584, "loss": 0.3106634318828583, "loss_ce": 0.16460628807544708, "loss_iou": 0.2099609375, "loss_num": 0.146484375, "loss_xval": 0.146484375, "num_input_tokens_seen": 22580712, "step": 133 }, { "epoch": 0.05108654212733511, "grad_norm": 40.261099546087195, "learning_rate": 5e-06, "loss": 0.4734, "num_input_tokens_seen": 22751228, "step": 134 }, { "epoch": 0.05108654212733511, "loss": 0.4574471116065979, "loss_ce": 0.3043099045753479, "loss_iou": 0.208984375, "loss_num": 0.1533203125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 22751228, "step": 134 }, { "epoch": 0.05146778497903164, "grad_norm": 51.93099158970609, "learning_rate": 5e-06, "loss": 0.3771, "num_input_tokens_seen": 22920056, "step": 135 }, { "epoch": 0.05146778497903164, "loss": 0.38384222984313965, "loss_ce": 0.24974800646305084, "loss_iou": 0.03466796875, "loss_num": 0.1337890625, "loss_xval": 0.1337890625, "num_input_tokens_seen": 22920056, "step": 135 }, { "epoch": 0.05184902783072817, "grad_norm": 44.023197993806264, "learning_rate": 5e-06, "loss": 0.5048, "num_input_tokens_seen": 23090280, "step": 136 }, { "epoch": 0.05184902783072817, "loss": 0.45330941677093506, "loss_ce": 0.31573617458343506, "loss_iou": 0.01129150390625, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 23090280, "step": 136 }, { "epoch": 0.0522302706824247, "grad_norm": 17.668600356916173, "learning_rate": 5e-06, "loss": 0.3013, "num_input_tokens_seen": 23258092, "step": 137 }, { "epoch": 0.0522302706824247, "loss": 0.2805832028388977, "loss_ce": 0.1533859372138977, "loss_iou": 0.271484375, "loss_num": 0.126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 23258092, "step": 137 }, { "epoch": 0.05261151353412123, "grad_norm": 11.853121000899684, "learning_rate": 5e-06, "loss": 0.2956, "num_input_tokens_seen": 23428304, "step": 138 }, { "epoch": 0.05261151353412123, "loss": 0.2811272442340851, "loss_ce": 0.12933281064033508, "loss_iou": 0.015869140625, "loss_num": 0.1513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 23428304, "step": 138 }, { "epoch": 0.05299275638581777, "grad_norm": 10.782142098964552, "learning_rate": 5e-06, "loss": 0.2952, "num_input_tokens_seen": 23597868, "step": 139 }, { "epoch": 0.05299275638581777, "loss": 0.27758559584617615, "loss_ce": 0.13134536147117615, "loss_iou": 0.205078125, "loss_num": 0.146484375, "loss_xval": 0.146484375, "num_input_tokens_seen": 23597868, "step": 139 }, { "epoch": 0.0533739992375143, "grad_norm": 10.56623325337934, "learning_rate": 5e-06, "loss": 0.2759, "num_input_tokens_seen": 23769828, "step": 140 }, { "epoch": 0.0533739992375143, "loss": 0.2588083744049072, "loss_ce": 0.12971901893615723, "loss_iou": 0.142578125, "loss_num": 0.12890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 23769828, "step": 140 }, { "epoch": 0.05375524208921083, "grad_norm": 9.295556192178626, "learning_rate": 5e-06, "loss": 0.2827, "num_input_tokens_seen": 23941928, "step": 141 }, { "epoch": 0.05375524208921083, "loss": 0.2925659716129303, "loss_ce": 0.12453620135784149, "loss_iou": 0.07177734375, "loss_num": 0.16796875, "loss_xval": 0.16796875, "num_input_tokens_seen": 23941928, "step": 141 }, { "epoch": 0.05413648494090736, "grad_norm": 9.941605277841138, "learning_rate": 5e-06, "loss": 0.2708, "num_input_tokens_seen": 24108640, "step": 142 }, { "epoch": 0.05413648494090736, "loss": 0.30066150426864624, "loss_ce": 0.12451402842998505, "loss_iou": 0.30859375, "loss_num": 0.17578125, "loss_xval": 0.17578125, "num_input_tokens_seen": 24108640, "step": 142 }, { "epoch": 0.05451772779260389, "grad_norm": 10.112022703753036, "learning_rate": 5e-06, "loss": 0.2652, "num_input_tokens_seen": 24280820, "step": 143 }, { "epoch": 0.05451772779260389, "loss": 0.2717851400375366, "loss_ce": 0.13170944154262543, "loss_iou": 0.0283203125, "loss_num": 0.1396484375, "loss_xval": 0.1396484375, "num_input_tokens_seen": 24280820, "step": 143 }, { "epoch": 0.05489897064430042, "grad_norm": 9.423737008725107, "learning_rate": 5e-06, "loss": 0.2729, "num_input_tokens_seen": 24443752, "step": 144 }, { "epoch": 0.05489897064430042, "loss": 0.2589079439640045, "loss_ce": 0.11364426463842392, "loss_iou": 0.12158203125, "loss_num": 0.1455078125, "loss_xval": 0.1455078125, "num_input_tokens_seen": 24443752, "step": 144 }, { "epoch": 0.055280213495996953, "grad_norm": 10.806035234827066, "learning_rate": 5e-06, "loss": 0.2676, "num_input_tokens_seen": 24614996, "step": 145 }, { "epoch": 0.055280213495996953, "loss": 0.2665177583694458, "loss_ce": 0.1084977239370346, "loss_iou": 0.0361328125, "loss_num": 0.158203125, "loss_xval": 0.158203125, "num_input_tokens_seen": 24614996, "step": 145 }, { "epoch": 0.05566145634769348, "grad_norm": 11.25713824124703, "learning_rate": 5e-06, "loss": 0.2855, "num_input_tokens_seen": 24784460, "step": 146 }, { "epoch": 0.05566145634769348, "loss": 0.26414167881011963, "loss_ce": 0.12302839756011963, "loss_iou": 0.029052734375, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 24784460, "step": 146 }, { "epoch": 0.05604269919939001, "grad_norm": 10.106443684805265, "learning_rate": 5e-06, "loss": 0.2656, "num_input_tokens_seen": 24954704, "step": 147 }, { "epoch": 0.05604269919939001, "loss": 0.25407490134239197, "loss_ce": 0.10862812399864197, "loss_iou": 0.0269775390625, "loss_num": 0.1455078125, "loss_xval": 0.1455078125, "num_input_tokens_seen": 24954704, "step": 147 }, { "epoch": 0.05642394205108654, "grad_norm": 9.275986795258602, "learning_rate": 5e-06, "loss": 0.2428, "num_input_tokens_seen": 25123164, "step": 148 }, { "epoch": 0.05642394205108654, "loss": 0.2414214015007019, "loss_ce": 0.1008574366569519, "loss_iou": 0.16796875, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 25123164, "step": 148 }, { "epoch": 0.05680518490278307, "grad_norm": 12.235038905105682, "learning_rate": 5e-06, "loss": 0.2484, "num_input_tokens_seen": 25289736, "step": 149 }, { "epoch": 0.05680518490278307, "loss": 0.24917542934417725, "loss_ce": 0.09304748475551605, "loss_iou": 0.07958984375, "loss_num": 0.15625, "loss_xval": 0.15625, "num_input_tokens_seen": 25289736, "step": 149 }, { "epoch": 0.0571864277544796, "grad_norm": 10.171954399456862, "learning_rate": 5e-06, "loss": 0.2653, "num_input_tokens_seen": 25460180, "step": 150 }, { "epoch": 0.0571864277544796, "loss": 0.2596849203109741, "loss_ce": 0.10398421436548233, "loss_iou": 0.0517578125, "loss_num": 0.1552734375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 25460180, "step": 150 }, { "epoch": 0.05756767060617613, "grad_norm": 9.681432337059835, "learning_rate": 5e-06, "loss": 0.2515, "num_input_tokens_seen": 25630484, "step": 151 }, { "epoch": 0.05756767060617613, "loss": 0.25730687379837036, "loss_ce": 0.11491183936595917, "loss_iou": 0.2333984375, "loss_num": 0.142578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 25630484, "step": 151 }, { "epoch": 0.05794891345787267, "grad_norm": 9.005698061356364, "learning_rate": 5e-06, "loss": 0.2406, "num_input_tokens_seen": 25800676, "step": 152 }, { "epoch": 0.05794891345787267, "loss": 0.2408166080713272, "loss_ce": 0.10714960098266602, "loss_iou": 0.052978515625, "loss_num": 0.1337890625, "loss_xval": 0.1337890625, "num_input_tokens_seen": 25800676, "step": 152 }, { "epoch": 0.0583301563095692, "grad_norm": 9.23109905221705, "learning_rate": 5e-06, "loss": 0.2478, "num_input_tokens_seen": 25970308, "step": 153 }, { "epoch": 0.0583301563095692, "loss": 0.2394830286502838, "loss_ce": 0.09977356344461441, "loss_iou": 0.08984375, "loss_num": 0.1396484375, "loss_xval": 0.1396484375, "num_input_tokens_seen": 25970308, "step": 153 }, { "epoch": 0.05871139916126573, "grad_norm": 9.694346052607242, "learning_rate": 5e-06, "loss": 0.2383, "num_input_tokens_seen": 26139336, "step": 154 }, { "epoch": 0.05871139916126573, "loss": 0.24506092071533203, "loss_ce": 0.08954335749149323, "loss_iou": 0.1435546875, "loss_num": 0.1552734375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 26139336, "step": 154 }, { "epoch": 0.05909264201296226, "grad_norm": 10.26482253569495, "learning_rate": 5e-06, "loss": 0.242, "num_input_tokens_seen": 26309800, "step": 155 }, { "epoch": 0.05909264201296226, "loss": 0.2486727237701416, "loss_ce": 0.09480307996273041, "loss_iou": 0.05615234375, "loss_num": 0.154296875, "loss_xval": 0.154296875, "num_input_tokens_seen": 26309800, "step": 155 }, { "epoch": 0.05947388486465879, "grad_norm": 10.728319795398349, "learning_rate": 5e-06, "loss": 0.2523, "num_input_tokens_seen": 26477652, "step": 156 }, { "epoch": 0.05947388486465879, "loss": 0.26340925693511963, "loss_ce": 0.10184921324253082, "loss_iou": 0.138671875, "loss_num": 0.1611328125, "loss_xval": 0.1611328125, "num_input_tokens_seen": 26477652, "step": 156 }, { "epoch": 0.059855127716355316, "grad_norm": 8.708486379954715, "learning_rate": 5e-06, "loss": 0.2442, "num_input_tokens_seen": 26647156, "step": 157 }, { "epoch": 0.059855127716355316, "loss": 0.2594885230064392, "loss_ce": 0.0865149199962616, "loss_iou": 0.0908203125, "loss_num": 0.1728515625, "loss_xval": 0.1728515625, "num_input_tokens_seen": 26647156, "step": 157 }, { "epoch": 0.060236370568051846, "grad_norm": 8.318082248992015, "learning_rate": 5e-06, "loss": 0.2296, "num_input_tokens_seen": 26817628, "step": 158 }, { "epoch": 0.060236370568051846, "loss": 0.23485592007637024, "loss_ce": 0.08330562710762024, "loss_iou": 0.154296875, "loss_num": 0.1513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 26817628, "step": 158 }, { "epoch": 0.06061761341974838, "grad_norm": 8.479454996279628, "learning_rate": 5e-06, "loss": 0.2326, "num_input_tokens_seen": 26989672, "step": 159 }, { "epoch": 0.06061761341974838, "loss": 0.2239830046892166, "loss_ce": 0.07115097343921661, "loss_iou": 0.024658203125, "loss_num": 0.15234375, "loss_xval": 0.15234375, "num_input_tokens_seen": 26989672, "step": 159 }, { "epoch": 0.06099885627144491, "grad_norm": 9.256918264151901, "learning_rate": 5e-06, "loss": 0.2312, "num_input_tokens_seen": 27159948, "step": 160 }, { "epoch": 0.06099885627144491, "loss": 0.23189058899879456, "loss_ce": 0.08729829639196396, "loss_iou": 0.046875, "loss_num": 0.14453125, "loss_xval": 0.14453125, "num_input_tokens_seen": 27159948, "step": 160 }, { "epoch": 0.06138009912314144, "grad_norm": 12.48003698093438, "learning_rate": 5e-06, "loss": 0.2475, "num_input_tokens_seen": 27331912, "step": 161 }, { "epoch": 0.06138009912314144, "loss": 0.24232828617095947, "loss_ce": 0.08272135257720947, "loss_iou": 0.017578125, "loss_num": 0.1591796875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 27331912, "step": 161 }, { "epoch": 0.06176134197483797, "grad_norm": 11.268891211576127, "learning_rate": 5e-06, "loss": 0.2454, "num_input_tokens_seen": 27500572, "step": 162 }, { "epoch": 0.06176134197483797, "loss": 0.2539252042770386, "loss_ce": 0.08461367338895798, "loss_iou": 0.271484375, "loss_num": 0.1689453125, "loss_xval": 0.1689453125, "num_input_tokens_seen": 27500572, "step": 162 }, { "epoch": 0.0621425848265345, "grad_norm": 8.295163566141365, "learning_rate": 5e-06, "loss": 0.2189, "num_input_tokens_seen": 27670916, "step": 163 }, { "epoch": 0.0621425848265345, "loss": 0.21146176755428314, "loss_ce": 0.08090756833553314, "loss_iou": 0.12060546875, "loss_num": 0.130859375, "loss_xval": 0.130859375, "num_input_tokens_seen": 27670916, "step": 163 }, { "epoch": 0.06252382767823103, "grad_norm": 7.772818852021809, "learning_rate": 5e-06, "loss": 0.2362, "num_input_tokens_seen": 27839668, "step": 164 }, { "epoch": 0.06252382767823103, "loss": 0.24545946717262268, "loss_ce": 0.08115281909704208, "loss_iou": 0.1484375, "loss_num": 0.1640625, "loss_xval": 0.1640625, "num_input_tokens_seen": 27839668, "step": 164 }, { "epoch": 0.06290507052992757, "grad_norm": 7.7406600766597995, "learning_rate": 5e-06, "loss": 0.2296, "num_input_tokens_seen": 28011736, "step": 165 }, { "epoch": 0.06290507052992757, "loss": 0.24421420693397522, "loss_ce": 0.08125033974647522, "loss_iou": 0.036376953125, "loss_num": 0.1630859375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 28011736, "step": 165 }, { "epoch": 0.06328631338162409, "grad_norm": 7.558234750827758, "learning_rate": 5e-06, "loss": 0.2191, "num_input_tokens_seen": 28181428, "step": 166 }, { "epoch": 0.06328631338162409, "loss": 0.2300872951745987, "loss_ce": 0.06968691200017929, "loss_iou": 0.267578125, "loss_num": 0.16015625, "loss_xval": 0.16015625, "num_input_tokens_seen": 28181428, "step": 166 }, { "epoch": 0.06366755623332063, "grad_norm": 7.904016136847001, "learning_rate": 5e-06, "loss": 0.2202, "num_input_tokens_seen": 28351744, "step": 167 }, { "epoch": 0.06366755623332063, "loss": 0.2199670970439911, "loss_ce": 0.08190558105707169, "loss_iou": 0.1025390625, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 28351744, "step": 167 }, { "epoch": 0.06404879908501715, "grad_norm": 8.048339423245295, "learning_rate": 5e-06, "loss": 0.2037, "num_input_tokens_seen": 28523912, "step": 168 }, { "epoch": 0.06404879908501715, "loss": 0.1940288096666336, "loss_ce": 0.0752543956041336, "loss_iou": 0.1474609375, "loss_num": 0.11865234375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 28523912, "step": 168 }, { "epoch": 0.06443004193671369, "grad_norm": 11.225230091380547, "learning_rate": 5e-06, "loss": 0.2132, "num_input_tokens_seen": 28691576, "step": 169 }, { "epoch": 0.06443004193671369, "loss": 0.2098519206047058, "loss_ce": 0.067212775349617, "loss_iou": 0.142578125, "loss_num": 0.142578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 28691576, "step": 169 }, { "epoch": 0.06481128478841022, "grad_norm": 24.483990446755985, "learning_rate": 5e-06, "loss": 0.3308, "num_input_tokens_seen": 28859312, "step": 170 }, { "epoch": 0.06481128478841022, "loss": 0.33362236618995667, "loss_ce": 0.06909599900245667, "loss_iou": 0.37109375, "loss_num": 0.263671875, "loss_xval": 0.263671875, "num_input_tokens_seen": 28859312, "step": 170 }, { "epoch": 0.06519252764010675, "grad_norm": 11.280338076240879, "learning_rate": 5e-06, "loss": 0.2534, "num_input_tokens_seen": 29027856, "step": 171 }, { "epoch": 0.06519252764010675, "loss": 0.2349179983139038, "loss_ce": 0.071221724152565, "loss_iou": 0.056396484375, "loss_num": 0.1640625, "loss_xval": 0.1640625, "num_input_tokens_seen": 29027856, "step": 171 }, { "epoch": 0.06557377049180328, "grad_norm": 19.174485435722143, "learning_rate": 5e-06, "loss": 0.2755, "num_input_tokens_seen": 29196644, "step": 172 }, { "epoch": 0.06557377049180328, "loss": 0.30110809206962585, "loss_ce": 0.10616179555654526, "loss_iou": 0.3984375, "loss_num": 0.1953125, "loss_xval": 0.1953125, "num_input_tokens_seen": 29196644, "step": 172 }, { "epoch": 0.0659550133434998, "grad_norm": 11.53696010791549, "learning_rate": 5e-06, "loss": 0.2506, "num_input_tokens_seen": 29361972, "step": 173 }, { "epoch": 0.0659550133434998, "loss": 0.24097901582717896, "loss_ce": 0.06373292952775955, "loss_iou": 0.0771484375, "loss_num": 0.177734375, "loss_xval": 0.177734375, "num_input_tokens_seen": 29361972, "step": 173 }, { "epoch": 0.06633625619519634, "grad_norm": 7.365584889438314, "learning_rate": 5e-06, "loss": 0.2221, "num_input_tokens_seen": 29530720, "step": 174 }, { "epoch": 0.06633625619519634, "loss": 0.20714986324310303, "loss_ce": 0.07134665548801422, "loss_iou": 0.0242919921875, "loss_num": 0.1357421875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 29530720, "step": 174 }, { "epoch": 0.06671749904689286, "grad_norm": 7.619362858636232, "learning_rate": 5e-06, "loss": 0.2124, "num_input_tokens_seen": 29699604, "step": 175 }, { "epoch": 0.06671749904689286, "loss": 0.2332572489976883, "loss_ce": 0.0637015849351883, "loss_iou": 0.0732421875, "loss_num": 0.169921875, "loss_xval": 0.169921875, "num_input_tokens_seen": 29699604, "step": 175 }, { "epoch": 0.0670987418985894, "grad_norm": 7.073823802448804, "learning_rate": 5e-06, "loss": 0.2051, "num_input_tokens_seen": 29867992, "step": 176 }, { "epoch": 0.0670987418985894, "loss": 0.18709206581115723, "loss_ce": 0.06709694862365723, "loss_iou": 0.09716796875, "loss_num": 0.1201171875, "loss_xval": 0.1201171875, "num_input_tokens_seen": 29867992, "step": 176 }, { "epoch": 0.06747998475028594, "grad_norm": 8.039181008207422, "learning_rate": 5e-06, "loss": 0.2211, "num_input_tokens_seen": 30035884, "step": 177 }, { "epoch": 0.06747998475028594, "loss": 0.2507503628730774, "loss_ce": 0.11000329256057739, "loss_iou": 0.04296875, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 30035884, "step": 177 }, { "epoch": 0.06786122760198246, "grad_norm": 6.614190886473386, "learning_rate": 5e-06, "loss": 0.2033, "num_input_tokens_seen": 30206128, "step": 178 }, { "epoch": 0.06786122760198246, "loss": 0.19981291890144348, "loss_ce": 0.05955413728952408, "loss_iou": 0.09228515625, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 30206128, "step": 178 }, { "epoch": 0.068242470453679, "grad_norm": 6.636430428575403, "learning_rate": 5e-06, "loss": 0.1949, "num_input_tokens_seen": 30378096, "step": 179 }, { "epoch": 0.068242470453679, "loss": 0.1966387778520584, "loss_ce": 0.06101866066455841, "loss_iou": 0.1318359375, "loss_num": 0.1357421875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 30378096, "step": 179 }, { "epoch": 0.06862371330537552, "grad_norm": 6.867381193636216, "learning_rate": 5e-06, "loss": 0.1896, "num_input_tokens_seen": 30550272, "step": 180 }, { "epoch": 0.06862371330537552, "loss": 0.2098548263311386, "loss_ce": 0.06001351401209831, "loss_iou": 0.16796875, "loss_num": 0.1494140625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 30550272, "step": 180 }, { "epoch": 0.06900495615707206, "grad_norm": 7.25383585501624, "learning_rate": 5e-06, "loss": 0.1873, "num_input_tokens_seen": 30718864, "step": 181 }, { "epoch": 0.06900495615707206, "loss": 0.17539140582084656, "loss_ce": 0.049781057983636856, "loss_iou": 0.1005859375, "loss_num": 0.1259765625, "loss_xval": 0.1259765625, "num_input_tokens_seen": 30718864, "step": 181 }, { "epoch": 0.06938619900876858, "grad_norm": 9.593416424164415, "learning_rate": 5e-06, "loss": 0.2219, "num_input_tokens_seen": 30888492, "step": 182 }, { "epoch": 0.06938619900876858, "loss": 0.21108978986740112, "loss_ce": 0.06234711408615112, "loss_iou": 0.205078125, "loss_num": 0.1484375, "loss_xval": 0.1484375, "num_input_tokens_seen": 30888492, "step": 182 }, { "epoch": 0.06976744186046512, "grad_norm": 11.387146816371892, "learning_rate": 5e-06, "loss": 0.2342, "num_input_tokens_seen": 31059204, "step": 183 }, { "epoch": 0.06976744186046512, "loss": 0.2156086564064026, "loss_ce": 0.05594068765640259, "loss_iou": 0.11669921875, "loss_num": 0.16015625, "loss_xval": 0.16015625, "num_input_tokens_seen": 31059204, "step": 183 }, { "epoch": 0.07014868471216165, "grad_norm": 7.155039422400645, "learning_rate": 5e-06, "loss": 0.2119, "num_input_tokens_seen": 31231448, "step": 184 }, { "epoch": 0.07014868471216165, "loss": 0.23432870209217072, "loss_ce": 0.06617684662342072, "loss_iou": 0.033203125, "loss_num": 0.16796875, "loss_xval": 0.16796875, "num_input_tokens_seen": 31231448, "step": 184 }, { "epoch": 0.07052992756385817, "grad_norm": 6.902516548706931, "learning_rate": 5e-06, "loss": 0.2087, "num_input_tokens_seen": 31401680, "step": 185 }, { "epoch": 0.07052992756385817, "loss": 0.22064757347106934, "loss_ce": 0.06769345700740814, "loss_iou": 0.27734375, "loss_num": 0.1533203125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 31401680, "step": 185 }, { "epoch": 0.07091117041555471, "grad_norm": 6.069188803983981, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 31572216, "step": 186 }, { "epoch": 0.07091117041555471, "loss": 0.2209046185016632, "loss_ce": 0.0724671259522438, "loss_iou": 0.07763671875, "loss_num": 0.1484375, "loss_xval": 0.1484375, "num_input_tokens_seen": 31572216, "step": 186 }, { "epoch": 0.07129241326725123, "grad_norm": 6.435423493978937, "learning_rate": 5e-06, "loss": 0.1857, "num_input_tokens_seen": 31744228, "step": 187 }, { "epoch": 0.07129241326725123, "loss": 0.19196385145187378, "loss_ce": 0.053902335464954376, "loss_iou": 0.158203125, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 31744228, "step": 187 }, { "epoch": 0.07167365611894777, "grad_norm": 6.229232274862448, "learning_rate": 5e-06, "loss": 0.1916, "num_input_tokens_seen": 31914484, "step": 188 }, { "epoch": 0.07167365611894777, "loss": 0.22564879059791565, "loss_ce": 0.08831968903541565, "loss_iou": 0.1376953125, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 31914484, "step": 188 }, { "epoch": 0.07205489897064431, "grad_norm": 5.9170687779491855, "learning_rate": 5e-06, "loss": 0.1979, "num_input_tokens_seen": 32086484, "step": 189 }, { "epoch": 0.07205489897064431, "loss": 0.19572624564170837, "loss_ce": 0.043382514268159866, "loss_iou": 0.1630859375, "loss_num": 0.15234375, "loss_xval": 0.15234375, "num_input_tokens_seen": 32086484, "step": 189 }, { "epoch": 0.07243614182234083, "grad_norm": 7.09938319359785, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 32256820, "step": 190 }, { "epoch": 0.07243614182234083, "loss": 0.16142967343330383, "loss_ce": 0.04827050492167473, "loss_iou": 0.0208740234375, "loss_num": 0.11328125, "loss_xval": 0.11328125, "num_input_tokens_seen": 32256820, "step": 190 }, { "epoch": 0.07281738467403737, "grad_norm": 5.710676708424859, "learning_rate": 5e-06, "loss": 0.1807, "num_input_tokens_seen": 32426060, "step": 191 }, { "epoch": 0.07281738467403737, "loss": 0.18200108408927917, "loss_ce": 0.04430576413869858, "loss_iou": 0.103515625, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 32426060, "step": 191 }, { "epoch": 0.07319862752573389, "grad_norm": 6.58794684908767, "learning_rate": 5e-06, "loss": 0.1942, "num_input_tokens_seen": 32594448, "step": 192 }, { "epoch": 0.07319862752573389, "loss": 0.1748276799917221, "loss_ce": 0.04299173876643181, "loss_iou": 0.125, "loss_num": 0.1318359375, "loss_xval": 0.1318359375, "num_input_tokens_seen": 32594448, "step": 192 }, { "epoch": 0.07357987037743043, "grad_norm": 8.787740714089832, "learning_rate": 5e-06, "loss": 0.1881, "num_input_tokens_seen": 32766532, "step": 193 }, { "epoch": 0.07357987037743043, "loss": 0.1966419517993927, "loss_ce": 0.0433826819062233, "loss_iou": 0.103515625, "loss_num": 0.1533203125, "loss_xval": 0.1533203125, "num_input_tokens_seen": 32766532, "step": 193 }, { "epoch": 0.07396111322912695, "grad_norm": 6.874779554170579, "learning_rate": 5e-06, "loss": 0.1934, "num_input_tokens_seen": 32936888, "step": 194 }, { "epoch": 0.07396111322912695, "loss": 0.1619054675102234, "loss_ce": 0.047403521835803986, "loss_iou": 0.212890625, "loss_num": 0.1142578125, "loss_xval": 0.1142578125, "num_input_tokens_seen": 32936888, "step": 194 }, { "epoch": 0.07434235608082349, "grad_norm": 6.446485562335773, "learning_rate": 5e-06, "loss": 0.1883, "num_input_tokens_seen": 33108180, "step": 195 }, { "epoch": 0.07434235608082349, "loss": 0.18052715063095093, "loss_ce": 0.07121318578720093, "loss_iou": 0.041748046875, "loss_num": 0.109375, "loss_xval": 0.109375, "num_input_tokens_seen": 33108180, "step": 195 }, { "epoch": 0.07472359893252002, "grad_norm": 6.581289116802433, "learning_rate": 5e-06, "loss": 0.1709, "num_input_tokens_seen": 33280144, "step": 196 }, { "epoch": 0.07472359893252002, "loss": 0.17547522485256195, "loss_ce": 0.049193479120731354, "loss_iou": 0.138671875, "loss_num": 0.1259765625, "loss_xval": 0.1259765625, "num_input_tokens_seen": 33280144, "step": 196 }, { "epoch": 0.07510484178421654, "grad_norm": 5.5655095665329695, "learning_rate": 5e-06, "loss": 0.179, "num_input_tokens_seen": 33450488, "step": 197 }, { "epoch": 0.07510484178421654, "loss": 0.182396799325943, "loss_ce": 0.037804510444402695, "loss_iou": 0.078125, "loss_num": 0.14453125, "loss_xval": 0.14453125, "num_input_tokens_seen": 33450488, "step": 197 }, { "epoch": 0.07548608463591308, "grad_norm": 5.529419425652068, "learning_rate": 5e-06, "loss": 0.1723, "num_input_tokens_seen": 33618828, "step": 198 }, { "epoch": 0.07548608463591308, "loss": 0.1725044995546341, "loss_ce": 0.051105573773384094, "loss_iou": 0.05908203125, "loss_num": 0.12158203125, "loss_xval": 0.12158203125, "num_input_tokens_seen": 33618828, "step": 198 }, { "epoch": 0.0758673274876096, "grad_norm": 5.257627463715373, "learning_rate": 5e-06, "loss": 0.1788, "num_input_tokens_seen": 33788292, "step": 199 }, { "epoch": 0.0758673274876096, "loss": 0.15512898564338684, "loss_ce": 0.035194914788007736, "loss_iou": 0.06298828125, "loss_num": 0.1201171875, "loss_xval": 0.1201171875, "num_input_tokens_seen": 33788292, "step": 199 }, { "epoch": 0.07624857033930614, "grad_norm": 7.308182402927082, "learning_rate": 5e-06, "loss": 0.1741, "num_input_tokens_seen": 33957508, "step": 200 }, { "epoch": 0.07624857033930614, "loss": 0.17762351036071777, "loss_ce": 0.03748679906129837, "loss_iou": 0.1611328125, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 33957508, "step": 200 }, { "epoch": 0.07662981319100266, "grad_norm": 11.727204455068861, "learning_rate": 5e-06, "loss": 0.2073, "num_input_tokens_seen": 34126596, "step": 201 }, { "epoch": 0.07662981319100266, "loss": 0.19732967019081116, "loss_ce": 0.04156793653964996, "loss_iou": 0.19140625, "loss_num": 0.15625, "loss_xval": 0.15625, "num_input_tokens_seen": 34126596, "step": 201 }, { "epoch": 0.0770110560426992, "grad_norm": 7.6470813001116555, "learning_rate": 5e-06, "loss": 0.1831, "num_input_tokens_seen": 34296920, "step": 202 }, { "epoch": 0.0770110560426992, "loss": 0.1773093044757843, "loss_ce": 0.036806363612413406, "loss_iou": 0.3359375, "loss_num": 0.140625, "loss_xval": 0.140625, "num_input_tokens_seen": 34296920, "step": 202 }, { "epoch": 0.07739229889439574, "grad_norm": 5.57340451910343, "learning_rate": 5e-06, "loss": 0.1917, "num_input_tokens_seen": 34462376, "step": 203 }, { "epoch": 0.07739229889439574, "loss": 0.2552993893623352, "loss_ce": 0.0850113034248352, "loss_iou": 0.2236328125, "loss_num": 0.169921875, "loss_xval": 0.169921875, "num_input_tokens_seen": 34462376, "step": 203 }, { "epoch": 0.07777354174609226, "grad_norm": 5.14332067227009, "learning_rate": 5e-06, "loss": 0.1731, "num_input_tokens_seen": 34632808, "step": 204 }, { "epoch": 0.07777354174609226, "loss": 0.17063459753990173, "loss_ce": 0.03275619447231293, "loss_iou": 0.1669921875, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 34632808, "step": 204 }, { "epoch": 0.0781547845977888, "grad_norm": 4.785402363672343, "learning_rate": 5e-06, "loss": 0.1782, "num_input_tokens_seen": 34805040, "step": 205 }, { "epoch": 0.0781547845977888, "loss": 0.1723075956106186, "loss_ce": 0.03308640792965889, "loss_iou": 0.2490234375, "loss_num": 0.1396484375, "loss_xval": 0.1396484375, "num_input_tokens_seen": 34805040, "step": 205 }, { "epoch": 0.07853602744948532, "grad_norm": 5.037982883600604, "learning_rate": 5e-06, "loss": 0.1678, "num_input_tokens_seen": 34972936, "step": 206 }, { "epoch": 0.07853602744948532, "loss": 0.17780755460262299, "loss_ce": 0.038708437234163284, "loss_iou": 0.2265625, "loss_num": 0.138671875, "loss_xval": 0.138671875, "num_input_tokens_seen": 34972936, "step": 206 }, { "epoch": 0.07891727030118185, "grad_norm": 4.393851666761968, "learning_rate": 5e-06, "loss": 0.1562, "num_input_tokens_seen": 35141640, "step": 207 }, { "epoch": 0.07891727030118185, "loss": 0.1302179992198944, "loss_ce": 0.032927948981523514, "loss_iou": 0.10546875, "loss_num": 0.09716796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 35141640, "step": 207 }, { "epoch": 0.07929851315287838, "grad_norm": 4.788946279040103, "learning_rate": 5e-06, "loss": 0.1596, "num_input_tokens_seen": 35313780, "step": 208 }, { "epoch": 0.07929851315287838, "loss": 0.1403164565563202, "loss_ce": 0.035824261605739594, "loss_iou": 0.0576171875, "loss_num": 0.1044921875, "loss_xval": 0.1044921875, "num_input_tokens_seen": 35313780, "step": 208 }, { "epoch": 0.07967975600457491, "grad_norm": 6.031729420886291, "learning_rate": 5e-06, "loss": 0.1487, "num_input_tokens_seen": 35476224, "step": 209 }, { "epoch": 0.07967975600457491, "loss": 0.15272024273872375, "loss_ce": 0.029612332582473755, "loss_iou": 0.205078125, "loss_num": 0.123046875, "loss_xval": 0.123046875, "num_input_tokens_seen": 35476224, "step": 209 }, { "epoch": 0.08006099885627145, "grad_norm": 8.180171259285881, "learning_rate": 5e-06, "loss": 0.1771, "num_input_tokens_seen": 35645576, "step": 210 }, { "epoch": 0.08006099885627145, "loss": 0.15656079351902008, "loss_ce": 0.03070629946887493, "loss_iou": 0.123046875, "loss_num": 0.1259765625, "loss_xval": 0.1259765625, "num_input_tokens_seen": 35645576, "step": 210 }, { "epoch": 0.08044224170796797, "grad_norm": 7.452042280101069, "learning_rate": 5e-06, "loss": 0.1913, "num_input_tokens_seen": 35815944, "step": 211 }, { "epoch": 0.08044224170796797, "loss": 0.1716148853302002, "loss_ce": 0.038008928298950195, "loss_iou": 0.10498046875, "loss_num": 0.1337890625, "loss_xval": 0.1337890625, "num_input_tokens_seen": 35815944, "step": 211 }, { "epoch": 0.08082348455966451, "grad_norm": 5.048442437645143, "learning_rate": 5e-06, "loss": 0.172, "num_input_tokens_seen": 35983292, "step": 212 }, { "epoch": 0.08082348455966451, "loss": 0.17964288592338562, "loss_ce": 0.03273127228021622, "loss_iou": 0.044921875, "loss_num": 0.146484375, "loss_xval": 0.146484375, "num_input_tokens_seen": 35983292, "step": 212 }, { "epoch": 0.08120472741136103, "grad_norm": 4.523995490752935, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 36150308, "step": 213 }, { "epoch": 0.08120472741136103, "loss": 0.13153302669525146, "loss_ce": 0.026125309988856316, "loss_iou": 0.058837890625, "loss_num": 0.10546875, "loss_xval": 0.10546875, "num_input_tokens_seen": 36150308, "step": 213 }, { "epoch": 0.08158597026305757, "grad_norm": 4.93523703261995, "learning_rate": 5e-06, "loss": 0.152, "num_input_tokens_seen": 36318844, "step": 214 }, { "epoch": 0.08158597026305757, "loss": 0.17730222642421722, "loss_ce": 0.03966795653104782, "loss_iou": 0.08837890625, "loss_num": 0.1376953125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 36318844, "step": 214 }, { "epoch": 0.08196721311475409, "grad_norm": 5.449610633251273, "learning_rate": 5e-06, "loss": 0.1633, "num_input_tokens_seen": 36486788, "step": 215 }, { "epoch": 0.08196721311475409, "loss": 0.17240044474601746, "loss_ce": 0.02805231139063835, "loss_iou": 0.1162109375, "loss_num": 0.14453125, "loss_xval": 0.14453125, "num_input_tokens_seen": 36486788, "step": 215 }, { "epoch": 0.08234845596645063, "grad_norm": 5.677928031865306, "learning_rate": 5e-06, "loss": 0.1628, "num_input_tokens_seen": 36655424, "step": 216 }, { "epoch": 0.08234845596645063, "loss": 0.17816764116287231, "loss_ce": 0.026495283469557762, "loss_iou": 0.251953125, "loss_num": 0.1513671875, "loss_xval": 0.1513671875, "num_input_tokens_seen": 36655424, "step": 216 }, { "epoch": 0.08272969881814717, "grad_norm": 5.057230107952905, "learning_rate": 5e-06, "loss": 0.1666, "num_input_tokens_seen": 36822400, "step": 217 }, { "epoch": 0.08272969881814717, "loss": 0.16810831427574158, "loss_ce": 0.02681192383170128, "loss_iou": 0.0654296875, "loss_num": 0.1416015625, "loss_xval": 0.1416015625, "num_input_tokens_seen": 36822400, "step": 217 }, { "epoch": 0.08311094166984369, "grad_norm": 4.693685798905787, "learning_rate": 5e-06, "loss": 0.1533, "num_input_tokens_seen": 36987660, "step": 218 }, { "epoch": 0.08311094166984369, "loss": 0.17427818477153778, "loss_ce": 0.03041832335293293, "loss_iou": 0.2041015625, "loss_num": 0.1435546875, "loss_xval": 0.1435546875, "num_input_tokens_seen": 36987660, "step": 218 }, { "epoch": 0.08349218452154022, "grad_norm": 5.369656059536361, "learning_rate": 5e-06, "loss": 0.1547, "num_input_tokens_seen": 37157224, "step": 219 }, { "epoch": 0.08349218452154022, "loss": 0.1782151609659195, "loss_ce": 0.035820137709379196, "loss_iou": 0.09765625, "loss_num": 0.142578125, "loss_xval": 0.142578125, "num_input_tokens_seen": 37157224, "step": 219 }, { "epoch": 0.08387342737323675, "grad_norm": 6.865296132529728, "learning_rate": 5e-06, "loss": 0.1492, "num_input_tokens_seen": 37327616, "step": 220 }, { "epoch": 0.08387342737323675, "loss": 0.14614805579185486, "loss_ce": 0.029631949961185455, "loss_iou": 0.25, "loss_num": 0.11669921875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 37327616, "step": 220 }, { "epoch": 0.08425467022493328, "grad_norm": 8.867559833380975, "learning_rate": 5e-06, "loss": 0.1788, "num_input_tokens_seen": 37499668, "step": 221 }, { "epoch": 0.08425467022493328, "loss": 0.1794005036354065, "loss_ce": 0.030230596661567688, "loss_iou": 0.057373046875, "loss_num": 0.1494140625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 37499668, "step": 221 }, { "epoch": 0.0846359130766298, "grad_norm": 6.786482744735267, "learning_rate": 5e-06, "loss": 0.1873, "num_input_tokens_seen": 37671868, "step": 222 }, { "epoch": 0.0846359130766298, "loss": 0.1732577085494995, "loss_ce": 0.02451504021883011, "loss_iou": 0.0107421875, "loss_num": 0.1484375, "loss_xval": 0.1484375, "num_input_tokens_seen": 37671868, "step": 222 }, { "epoch": 0.08501715592832634, "grad_norm": 3.7922150320746355, "learning_rate": 5e-06, "loss": 0.161, "num_input_tokens_seen": 37844060, "step": 223 }, { "epoch": 0.08501715592832634, "loss": 0.15339231491088867, "loss_ce": 0.025523660704493523, "loss_iou": 0.11865234375, "loss_num": 0.1279296875, "loss_xval": 0.1279296875, "num_input_tokens_seen": 37844060, "step": 223 }, { "epoch": 0.08539839878002288, "grad_norm": 3.8325377982480164, "learning_rate": 5e-06, "loss": 0.1522, "num_input_tokens_seen": 38016136, "step": 224 }, { "epoch": 0.08539839878002288, "loss": 0.15799179673194885, "loss_ce": 0.026033777743577957, "loss_iou": 0.125, "loss_num": 0.1318359375, "loss_xval": 0.1318359375, "num_input_tokens_seen": 38016136, "step": 224 }, { "epoch": 0.0857796416317194, "grad_norm": 3.7826073928821424, "learning_rate": 5e-06, "loss": 0.1455, "num_input_tokens_seen": 38186392, "step": 225 }, { "epoch": 0.0857796416317194, "loss": 0.12594732642173767, "loss_ce": 0.02249274216592312, "loss_iou": NaN, "loss_num": 0.103515625, "loss_xval": 0.103515625, "num_input_tokens_seen": 38186392, "step": 225 }, { "epoch": 0.08616088448341594, "grad_norm": 4.773532856351368, "learning_rate": 5e-06, "loss": 0.1563, "num_input_tokens_seen": 38357024, "step": 226 }, { "epoch": 0.08616088448341594, "loss": 0.16969117522239685, "loss_ce": 0.06281863152980804, "loss_iou": 0.2158203125, "loss_num": 0.10693359375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 38357024, "step": 226 }, { "epoch": 0.08654212733511246, "grad_norm": 5.039765731931007, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 38529420, "step": 227 }, { "epoch": 0.08654212733511246, "loss": 0.14553721249103546, "loss_ce": 0.02633555233478546, "loss_iou": 0.11181640625, "loss_num": 0.119140625, "loss_xval": 0.119140625, "num_input_tokens_seen": 38529420, "step": 227 }, { "epoch": 0.086923370186809, "grad_norm": 4.455939335870368, "learning_rate": 5e-06, "loss": 0.1521, "num_input_tokens_seen": 38694780, "step": 228 }, { "epoch": 0.086923370186809, "loss": 0.1465291976928711, "loss_ce": 0.02574063278734684, "loss_iou": 0.1611328125, "loss_num": 0.12060546875, "loss_xval": 0.12060546875, "num_input_tokens_seen": 38694780, "step": 228 }, { "epoch": 0.08730461303850552, "grad_norm": 4.54430108149329, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 38861664, "step": 229 }, { "epoch": 0.08730461303850552, "loss": 0.1365373432636261, "loss_ce": 0.021302981302142143, "loss_iou": 0.041748046875, "loss_num": 0.115234375, "loss_xval": 0.115234375, "num_input_tokens_seen": 38861664, "step": 229 }, { "epoch": 0.08768585589020206, "grad_norm": 5.912647310779185, "learning_rate": 5e-06, "loss": 0.1484, "num_input_tokens_seen": 39032148, "step": 230 }, { "epoch": 0.08768585589020206, "loss": 0.1516009271144867, "loss_ce": 0.024891935288906097, "loss_iou": 0.197265625, "loss_num": 0.126953125, "loss_xval": 0.126953125, "num_input_tokens_seen": 39032148, "step": 230 }, { "epoch": 0.0880670987418986, "grad_norm": 7.303331811765866, "learning_rate": 5e-06, "loss": 0.1583, "num_input_tokens_seen": 39204260, "step": 231 }, { "epoch": 0.0880670987418986, "loss": 0.14178459346294403, "loss_ce": 0.02050773799419403, "loss_iou": 0.11279296875, "loss_num": 0.12109375, "loss_xval": 0.12109375, "num_input_tokens_seen": 39204260, "step": 231 }, { "epoch": 0.08844834159359512, "grad_norm": 8.680326100039322, "learning_rate": 5e-06, "loss": 0.1736, "num_input_tokens_seen": 39374744, "step": 232 }, { "epoch": 0.08844834159359512, "loss": 0.17430295050144196, "loss_ce": 0.018236054107546806, "loss_iou": 0.28515625, "loss_num": 0.15625, "loss_xval": 0.15625, "num_input_tokens_seen": 39374744, "step": 232 }, { "epoch": 0.08882958444529165, "grad_norm": 4.55783117181828, "learning_rate": 5e-06, "loss": 0.1472, "num_input_tokens_seen": 39546800, "step": 233 }, { "epoch": 0.08882958444529165, "loss": 0.1543968766927719, "loss_ce": 0.024514062330126762, "loss_iou": 0.1572265625, "loss_num": 0.1298828125, "loss_xval": 0.1298828125, "num_input_tokens_seen": 39546800, "step": 233 }, { "epoch": 0.08921082729698818, "grad_norm": 4.339806160257698, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 39717400, "step": 234 }, { "epoch": 0.08921082729698818, "loss": 0.13223184645175934, "loss_ce": 0.02438272535800934, "loss_iou": 0.1455078125, "loss_num": 0.10791015625, "loss_xval": 0.10791015625, "num_input_tokens_seen": 39717400, "step": 234 }, { "epoch": 0.08959207014868471, "grad_norm": 4.11875762493815, "learning_rate": 5e-06, "loss": 0.1351, "num_input_tokens_seen": 39889540, "step": 235 }, { "epoch": 0.08959207014868471, "loss": 0.1536053717136383, "loss_ce": 0.028544342145323753, "loss_iou": 0.306640625, "loss_num": 0.125, "loss_xval": 0.125, "num_input_tokens_seen": 39889540, "step": 235 }, { "epoch": 0.08997331300038124, "grad_norm": 5.984822976025027, "learning_rate": 5e-06, "loss": 0.1403, "num_input_tokens_seen": 40059768, "step": 236 }, { "epoch": 0.08997331300038124, "loss": 0.1520293951034546, "loss_ce": 0.02336728200316429, "loss_iou": 0.3359375, "loss_num": 0.12890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 40059768, "step": 236 }, { "epoch": 0.09035455585207777, "grad_norm": 4.255352081307566, "learning_rate": 5e-06, "loss": 0.1344, "num_input_tokens_seen": 40228428, "step": 237 }, { "epoch": 0.09035455585207777, "loss": 0.15023866295814514, "loss_ce": 0.022247936576604843, "loss_iou": 0.1708984375, "loss_num": 0.1279296875, "loss_xval": 0.1279296875, "num_input_tokens_seen": 40228428, "step": 237 }, { "epoch": 0.09073579870377431, "grad_norm": 5.5659532084663565, "learning_rate": 5e-06, "loss": 0.1479, "num_input_tokens_seen": 40397980, "step": 238 }, { "epoch": 0.09073579870377431, "loss": 0.13192544877529144, "loss_ce": 0.01992594078183174, "loss_iou": 0.27734375, "loss_num": 0.11181640625, "loss_xval": 0.11181640625, "num_input_tokens_seen": 40397980, "step": 238 }, { "epoch": 0.09111704155547083, "grad_norm": 5.8852256482709615, "learning_rate": 5e-06, "loss": 0.1309, "num_input_tokens_seen": 40570488, "step": 239 }, { "epoch": 0.09111704155547083, "loss": 0.11962257325649261, "loss_ce": 0.017754895612597466, "loss_iou": 0.0810546875, "loss_num": 0.10205078125, "loss_xval": 0.10205078125, "num_input_tokens_seen": 40570488, "step": 239 }, { "epoch": 0.09149828440716737, "grad_norm": 5.419783608703198, "learning_rate": 5e-06, "loss": 0.1448, "num_input_tokens_seen": 40737024, "step": 240 }, { "epoch": 0.09149828440716737, "loss": 0.13016757369041443, "loss_ce": 0.029154382646083832, "loss_iou": 0.029296875, "loss_num": 0.10107421875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 40737024, "step": 240 }, { "epoch": 0.09187952725886389, "grad_norm": 4.535791208409517, "learning_rate": 5e-06, "loss": 0.1339, "num_input_tokens_seen": 40909248, "step": 241 }, { "epoch": 0.09187952725886389, "loss": 0.1285448521375656, "loss_ce": 0.023961104452610016, "loss_iou": 0.220703125, "loss_num": 0.1044921875, "loss_xval": 0.1044921875, "num_input_tokens_seen": 40909248, "step": 241 }, { "epoch": 0.09226077011056043, "grad_norm": 4.724704535695608, "learning_rate": 5e-06, "loss": 0.1225, "num_input_tokens_seen": 41081556, "step": 242 }, { "epoch": 0.09226077011056043, "loss": 0.12991374731063843, "loss_ce": 0.017975281924009323, "loss_iou": 0.236328125, "loss_num": 0.11181640625, "loss_xval": 0.11181640625, "num_input_tokens_seen": 41081556, "step": 242 }, { "epoch": 0.09264201296225696, "grad_norm": 4.031142861359274, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 41251944, "step": 243 }, { "epoch": 0.09264201296225696, "loss": 0.13619326055049896, "loss_ce": 0.023766502737998962, "loss_iou": 0.19921875, "loss_num": 0.1123046875, "loss_xval": 0.1123046875, "num_input_tokens_seen": 41251944, "step": 243 }, { "epoch": 0.09302325581395349, "grad_norm": 6.087561315245691, "learning_rate": 5e-06, "loss": 0.1404, "num_input_tokens_seen": 41420360, "step": 244 }, { "epoch": 0.09302325581395349, "loss": 0.1481267511844635, "loss_ce": 0.029047157615423203, "loss_iou": 0.2197265625, "loss_num": 0.119140625, "loss_xval": 0.119140625, "num_input_tokens_seen": 41420360, "step": 244 }, { "epoch": 0.09340449866565002, "grad_norm": 7.17961860113985, "learning_rate": 5e-06, "loss": 0.1359, "num_input_tokens_seen": 41584420, "step": 245 }, { "epoch": 0.09340449866565002, "loss": 0.12763813138008118, "loss_ce": 0.01771380752325058, "loss_iou": 0.251953125, "loss_num": 0.10986328125, "loss_xval": 0.10986328125, "num_input_tokens_seen": 41584420, "step": 245 }, { "epoch": 0.09378574151734655, "grad_norm": 6.115132953164955, "learning_rate": 5e-06, "loss": 0.1423, "num_input_tokens_seen": 41754632, "step": 246 }, { "epoch": 0.09378574151734655, "loss": 0.1468658596277237, "loss_ce": 0.023574844002723694, "loss_iou": 0.2353515625, "loss_num": 0.123046875, "loss_xval": 0.123046875, "num_input_tokens_seen": 41754632, "step": 246 }, { "epoch": 0.09416698436904308, "grad_norm": 4.518161429919516, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 41925212, "step": 247 }, { "epoch": 0.09416698436904308, "loss": 0.15290366113185883, "loss_ce": 0.020762551575899124, "loss_iou": 0.14453125, "loss_num": 0.1318359375, "loss_xval": 0.1318359375, "num_input_tokens_seen": 41925212, "step": 247 }, { "epoch": 0.0945482272207396, "grad_norm": 5.586149224771644, "learning_rate": 5e-06, "loss": 0.1305, "num_input_tokens_seen": 42094112, "step": 248 }, { "epoch": 0.0945482272207396, "loss": 0.14219020307064056, "loss_ce": 0.04535793513059616, "loss_iou": 0.052734375, "loss_num": 0.0966796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 42094112, "step": 248 }, { "epoch": 0.09492947007243614, "grad_norm": 5.9267599562533775, "learning_rate": 5e-06, "loss": 0.1433, "num_input_tokens_seen": 42264276, "step": 249 }, { "epoch": 0.09492947007243614, "loss": 0.12405319511890411, "loss_ce": 0.016631316393613815, "loss_iou": 0.115234375, "loss_num": 0.107421875, "loss_xval": 0.107421875, "num_input_tokens_seen": 42264276, "step": 249 }, { "epoch": 0.09531071292413268, "grad_norm": 5.056901931708464, "learning_rate": 5e-06, "loss": 0.118, "num_input_tokens_seen": 42436288, "step": 250 }, { "epoch": 0.09531071292413268, "eval_websight_new_CIoU": 0.43392522633075714, "eval_websight_new_GIoU": 0.41100695729255676, "eval_websight_new_IoU": 0.46178892254829407, "eval_websight_new_MAE_all": 0.10471450164914131, "eval_websight_new_MAE_h": 0.1256338357925415, "eval_websight_new_MAE_w": 0.12052245810627937, "eval_websight_new_MAE_x": 0.061581023037433624, "eval_websight_new_MAE_y": 0.11112068220973015, "eval_websight_new_NUM_probability": 0.8488701581954956, "eval_websight_new_inside_bbox": 0.6961805522441864, "eval_websight_new_loss": 0.10748042911291122, "eval_websight_new_loss_ce": 0.016938342712819576, "eval_websight_new_loss_iou": 0.46575927734375, "eval_websight_new_loss_num": 0.087310791015625, "eval_websight_new_loss_xval": 0.087310791015625, "eval_websight_new_runtime": 61.5137, "eval_websight_new_samples_per_second": 0.813, "eval_websight_new_steps_per_second": 0.033, "num_input_tokens_seen": 42436288, "step": 250 }, { "epoch": 0.09531071292413268, "eval_seeclick_CIoU": 0.1753879114985466, "eval_seeclick_GIoU": 0.10619913786649704, "eval_seeclick_IoU": 0.25801894813776016, "eval_seeclick_MAE_all": 0.1939466893672943, "eval_seeclick_MAE_h": 0.17077196389436722, "eval_seeclick_MAE_w": 0.21835819631814957, "eval_seeclick_MAE_x": 0.21049726754426956, "eval_seeclick_MAE_y": 0.1761593446135521, "eval_seeclick_NUM_probability": 0.8450920283794403, "eval_seeclick_inside_bbox": 0.4722222238779068, "eval_seeclick_loss": 0.18936492502689362, "eval_seeclick_loss_ce": 0.030919981189072132, "eval_seeclick_loss_iou": 0.37200927734375, "eval_seeclick_loss_num": 0.161529541015625, "eval_seeclick_loss_xval": 0.161529541015625, "eval_seeclick_runtime": 85.1383, "eval_seeclick_samples_per_second": 0.587, "eval_seeclick_steps_per_second": 0.023, "num_input_tokens_seen": 42436288, "step": 250 }, { "epoch": 0.09531071292413268, "eval_icons_CIoU": 0.28013716638088226, "eval_icons_GIoU": 0.26097799837589264, "eval_icons_IoU": 0.34551550447940826, "eval_icons_MAE_all": 0.13232684880495071, "eval_icons_MAE_h": 0.14004291594028473, "eval_icons_MAE_w": 0.12625902891159058, "eval_icons_MAE_x": 0.11041285842657089, "eval_icons_MAE_y": 0.15259258449077606, "eval_icons_NUM_probability": 0.8528884649276733, "eval_icons_inside_bbox": 0.5277777910232544, "eval_icons_loss": 0.10058583319187164, "eval_icons_loss_ce": 0.01819693110883236, "eval_icons_loss_iou": 0.073486328125, "eval_icons_loss_num": 0.079833984375, "eval_icons_loss_xval": 0.079833984375, "eval_icons_runtime": 85.5343, "eval_icons_samples_per_second": 0.585, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 42436288, "step": 250 }, { "epoch": 0.09531071292413268, "eval_compot_CIoU": 0.29037410020828247, "eval_compot_GIoU": 0.26138684898614883, "eval_compot_IoU": 0.32414330542087555, "eval_compot_MAE_all": 0.09265598654747009, "eval_compot_MAE_h": 0.07443492859601974, "eval_compot_MAE_w": 0.13811790198087692, "eval_compot_MAE_x": 0.07125015556812286, "eval_compot_MAE_y": 0.08682098612189293, "eval_compot_NUM_probability": 0.8410935997962952, "eval_compot_inside_bbox": 0.4913194477558136, "eval_compot_loss": 0.09226094186306, "eval_compot_loss_ce": 0.018759255297482014, "eval_compot_loss_iou": 0.27276611328125, "eval_compot_loss_num": 0.0702972412109375, "eval_compot_loss_xval": 0.0702972412109375, "eval_compot_runtime": 81.6599, "eval_compot_samples_per_second": 0.612, "eval_compot_steps_per_second": 0.024, "num_input_tokens_seen": 42436288, "step": 250 }, { "epoch": 0.09531071292413268, "eval_web_actions_CIoU": 0.2671080306172371, "eval_web_actions_GIoU": 0.17311245203018188, "eval_web_actions_IoU": 0.3291372060775757, "eval_web_actions_MAE_all": 0.16092178970575333, "eval_web_actions_MAE_h": 0.14207028597593307, "eval_web_actions_MAE_w": 0.21995393186807632, "eval_web_actions_MAE_x": 0.14041096717119217, "eval_web_actions_MAE_y": 0.14125195145606995, "eval_web_actions_NUM_probability": 0.8412942886352539, "eval_web_actions_inside_bbox": 0.6614583432674408, "eval_web_actions_loss": 0.2487429976463318, "eval_web_actions_loss_ce": 0.09953882917761803, "eval_web_actions_loss_iou": 0.287841796875, "eval_web_actions_loss_num": 0.143218994140625, "eval_web_actions_loss_xval": 0.143218994140625, "eval_web_actions_runtime": 81.9846, "eval_web_actions_samples_per_second": 0.573, "eval_web_actions_steps_per_second": 0.024, "num_input_tokens_seen": 42436288, "step": 250 } ], "logging_steps": 1.0, "max_steps": 7869, "num_input_tokens_seen": 42436288, "num_train_epochs": 3, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 304855459102720.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }