minicpm_xval_merged-250 / trainer_state.json
alishudi's picture
Upload folder using huggingface_hub
ae57cc2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.09531071292413268,
"eval_steps": 250,
"global_step": 250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003812428516965307,
"grad_norm": 166.66033166263705,
"learning_rate": 5e-06,
"loss": 5.5181,
"num_input_tokens_seen": 171824,
"step": 1
},
{
"epoch": 0.0003812428516965307,
"loss": 5.511815547943115,
"loss_ce": 5.203954219818115,
"loss_iou": 0.875,
"loss_num": 0.30859375,
"loss_xval": 0.30859375,
"num_input_tokens_seen": 171824,
"step": 1
},
{
"epoch": 0.0007624857033930614,
"grad_norm": 177.43252265373968,
"learning_rate": 5e-06,
"loss": 5.4175,
"num_input_tokens_seen": 340268,
"step": 2
},
{
"epoch": 0.0007624857033930614,
"loss": 5.353693008422852,
"loss_ce": 5.080987930297852,
"loss_iou": 0.69921875,
"loss_num": 0.2734375,
"loss_xval": 0.2734375,
"num_input_tokens_seen": 340268,
"step": 2
},
{
"epoch": 0.001143728555089592,
"grad_norm": 169.4480809535085,
"learning_rate": 5e-06,
"loss": 5.1458,
"num_input_tokens_seen": 509856,
"step": 3
},
{
"epoch": 0.001143728555089592,
"loss": 5.112088203430176,
"loss_ce": 4.874661445617676,
"loss_iou": 0.66015625,
"loss_num": 0.2373046875,
"loss_xval": 0.2373046875,
"num_input_tokens_seen": 509856,
"step": 3
},
{
"epoch": 0.0015249714067861228,
"grad_norm": 165.0464349086778,
"learning_rate": 5e-06,
"loss": 5.2169,
"num_input_tokens_seen": 676960,
"step": 4
},
{
"epoch": 0.0015249714067861228,
"loss": 5.128868579864502,
"loss_ce": 4.904503345489502,
"loss_iou": 0.15234375,
"loss_num": 0.224609375,
"loss_xval": 0.224609375,
"num_input_tokens_seen": 676960,
"step": 4
},
{
"epoch": 0.0019062142584826535,
"grad_norm": 156.68894357346014,
"learning_rate": 5e-06,
"loss": 5.1368,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.0019062142584826535,
"eval_websight_new_CIoU": 0.014646705240011215,
"eval_websight_new_GIoU": -0.05545324832201004,
"eval_websight_new_IoU": 0.16141273081302643,
"eval_websight_new_MAE_all": 0.23055454343557358,
"eval_websight_new_MAE_h": 0.2790681719779968,
"eval_websight_new_MAE_w": 0.16171859204769135,
"eval_websight_new_MAE_x": 0.2205788567662239,
"eval_websight_new_MAE_y": 0.2608525678515434,
"eval_websight_new_NUM_probability": 5.514878331780437e-08,
"eval_websight_new_inside_bbox": 0.3697916716337204,
"eval_websight_new_loss": 5.538997173309326,
"eval_websight_new_loss_ce": 5.386038541793823,
"eval_websight_new_loss_iou": 0.1309814453125,
"eval_websight_new_loss_num": 0.14739990234375,
"eval_websight_new_loss_xval": 0.14739990234375,
"eval_websight_new_runtime": 64.8384,
"eval_websight_new_samples_per_second": 0.771,
"eval_websight_new_steps_per_second": 0.031,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.0019062142584826535,
"eval_seeclick_CIoU": -0.10113272070884705,
"eval_seeclick_GIoU": -0.19475465267896652,
"eval_seeclick_IoU": 0.081342913210392,
"eval_seeclick_MAE_all": 0.3178282380104065,
"eval_seeclick_MAE_h": 0.29355502128601074,
"eval_seeclick_MAE_w": 0.30315980315208435,
"eval_seeclick_MAE_x": 0.34532545506954193,
"eval_seeclick_MAE_y": 0.3292728066444397,
"eval_seeclick_NUM_probability": 2.8387768935544955e-07,
"eval_seeclick_inside_bbox": 0.2361111119389534,
"eval_seeclick_loss": 6.38102912902832,
"eval_seeclick_loss_ce": 6.161922454833984,
"eval_seeclick_loss_iou": 0.22113037109375,
"eval_seeclick_loss_num": 0.2393798828125,
"eval_seeclick_loss_xval": 0.2393798828125,
"eval_seeclick_runtime": 82.7425,
"eval_seeclick_samples_per_second": 0.604,
"eval_seeclick_steps_per_second": 0.024,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.0019062142584826535,
"eval_icons_CIoU": 0.04171837493777275,
"eval_icons_GIoU": -0.015274678356945515,
"eval_icons_IoU": 0.17068828642368317,
"eval_icons_MAE_all": 0.22043060511350632,
"eval_icons_MAE_h": 0.21020717918872833,
"eval_icons_MAE_w": 0.2284608781337738,
"eval_icons_MAE_x": 0.2374560832977295,
"eval_icons_MAE_y": 0.20559833943843842,
"eval_icons_NUM_probability": 1.474576443172282e-07,
"eval_icons_inside_bbox": 0.3940972238779068,
"eval_icons_loss": 5.1574883460998535,
"eval_icons_loss_ce": 5.055776119232178,
"eval_icons_loss_iou": 0.0128173828125,
"eval_icons_loss_num": 0.12457275390625,
"eval_icons_loss_xval": 0.12457275390625,
"eval_icons_runtime": 82.3641,
"eval_icons_samples_per_second": 0.607,
"eval_icons_steps_per_second": 0.024,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.0019062142584826535,
"eval_compot_CIoU": 0.07103381492197514,
"eval_compot_GIoU": 0.010495346039533615,
"eval_compot_IoU": 0.20030608028173447,
"eval_compot_MAE_all": 0.17506222426891327,
"eval_compot_MAE_h": 0.1381075605750084,
"eval_compot_MAE_w": 0.22323701530694962,
"eval_compot_MAE_x": 0.22260665148496628,
"eval_compot_MAE_y": 0.11629766970872879,
"eval_compot_NUM_probability": 3.672893456041493e-08,
"eval_compot_inside_bbox": 0.3229166716337204,
"eval_compot_loss": 5.476764678955078,
"eval_compot_loss_ce": 5.353034734725952,
"eval_compot_loss_iou": 0.105712890625,
"eval_compot_loss_num": 0.116241455078125,
"eval_compot_loss_xval": 0.116241455078125,
"eval_compot_runtime": 82.3456,
"eval_compot_samples_per_second": 0.607,
"eval_compot_steps_per_second": 0.024,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.0019062142584826535,
"eval_web_actions_CIoU": -0.09799446724355221,
"eval_web_actions_GIoU": -0.22149190306663513,
"eval_web_actions_IoU": 0.08019107207655907,
"eval_web_actions_MAE_all": 0.28694653511047363,
"eval_web_actions_MAE_h": 0.2967325896024704,
"eval_web_actions_MAE_w": 0.2789995074272156,
"eval_web_actions_MAE_x": 0.2816382795572281,
"eval_web_actions_MAE_y": 0.29041585326194763,
"eval_web_actions_NUM_probability": 2.1177974929287302e-07,
"eval_web_actions_inside_bbox": 0.19166667014360428,
"eval_web_actions_loss": 5.91270112991333,
"eval_web_actions_loss_ce": 5.719156980514526,
"eval_web_actions_loss_iou": 0.08514404296875,
"eval_web_actions_loss_num": 0.17974853515625,
"eval_web_actions_loss_xval": 0.17974853515625,
"eval_web_actions_runtime": 78.0841,
"eval_web_actions_samples_per_second": 0.602,
"eval_web_actions_steps_per_second": 0.026,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.0019062142584826535,
"loss": 5.942988395690918,
"loss_ce": 5.761713981628418,
"loss_iou": 0.0703125,
"loss_num": 0.181640625,
"loss_xval": 0.181640625,
"num_input_tokens_seen": 847420,
"step": 5
},
{
"epoch": 0.002287457110179184,
"grad_norm": 146.72349551014184,
"learning_rate": 5e-06,
"loss": 5.0025,
"num_input_tokens_seen": 1019588,
"step": 6
},
{
"epoch": 0.002287457110179184,
"loss": 4.935182571411133,
"loss_ce": 4.728639602661133,
"loss_iou": 0.130859375,
"loss_num": 0.20703125,
"loss_xval": 0.20703125,
"num_input_tokens_seen": 1019588,
"step": 6
},
{
"epoch": 0.002668699961875715,
"grad_norm": 140.54078928260213,
"learning_rate": 5e-06,
"loss": 4.8787,
"num_input_tokens_seen": 1188148,
"step": 7
},
{
"epoch": 0.002668699961875715,
"loss": 4.864795684814453,
"loss_ce": 4.719409942626953,
"loss_iou": 0.1298828125,
"loss_num": 0.1455078125,
"loss_xval": 0.1455078125,
"num_input_tokens_seen": 1188148,
"step": 7
},
{
"epoch": 0.0030499428135722455,
"grad_norm": 112.55645040581052,
"learning_rate": 5e-06,
"loss": 4.4461,
"num_input_tokens_seen": 1358400,
"step": 8
},
{
"epoch": 0.0030499428135722455,
"loss": 4.246901988983154,
"loss_ce": 4.098220348358154,
"loss_iou": 0.162109375,
"loss_num": 0.1484375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 1358400,
"step": 8
},
{
"epoch": 0.003431185665268776,
"grad_norm": 113.36373607042844,
"learning_rate": 5e-06,
"loss": 4.3505,
"num_input_tokens_seen": 1525448,
"step": 9
},
{
"epoch": 0.003431185665268776,
"loss": 4.16458797454834,
"loss_ce": 3.9982669353485107,
"loss_iou": 0.06884765625,
"loss_num": 0.166015625,
"loss_xval": 0.166015625,
"num_input_tokens_seen": 1525448,
"step": 9
},
{
"epoch": 0.003812428516965307,
"grad_norm": 118.47049136018163,
"learning_rate": 5e-06,
"loss": 3.8089,
"num_input_tokens_seen": 1687988,
"step": 10
},
{
"epoch": 0.003812428516965307,
"loss": 3.628293752670288,
"loss_ce": 3.449582815170288,
"loss_iou": 0.0419921875,
"loss_num": 0.1787109375,
"loss_xval": 0.1787109375,
"num_input_tokens_seen": 1687988,
"step": 10
},
{
"epoch": 0.0041936713686618375,
"grad_norm": 99.61131893349591,
"learning_rate": 5e-06,
"loss": 2.996,
"num_input_tokens_seen": 1858248,
"step": 11
},
{
"epoch": 0.0041936713686618375,
"loss": 3.101732015609741,
"loss_ce": 2.939622640609741,
"loss_iou": 0.095703125,
"loss_num": 0.162109375,
"loss_xval": 0.162109375,
"num_input_tokens_seen": 1858248,
"step": 11
},
{
"epoch": 0.004574914220358368,
"grad_norm": 106.98300209226464,
"learning_rate": 5e-06,
"loss": 2.6687,
"num_input_tokens_seen": 2030264,
"step": 12
},
{
"epoch": 0.004574914220358368,
"loss": 2.5463790893554688,
"loss_ce": 2.3260421752929688,
"loss_iou": 0.05224609375,
"loss_num": 0.220703125,
"loss_xval": 0.220703125,
"num_input_tokens_seen": 2030264,
"step": 12
},
{
"epoch": 0.004956157072054899,
"grad_norm": 76.91542550535567,
"learning_rate": 5e-06,
"loss": 2.2417,
"num_input_tokens_seen": 2200880,
"step": 13
},
{
"epoch": 0.004956157072054899,
"loss": 2.266770601272583,
"loss_ce": 2.038377046585083,
"loss_iou": 0.34375,
"loss_num": 0.228515625,
"loss_xval": 0.228515625,
"num_input_tokens_seen": 2200880,
"step": 13
},
{
"epoch": 0.00533739992375143,
"grad_norm": 78.72540804350768,
"learning_rate": 5e-06,
"loss": 1.9466,
"num_input_tokens_seen": 2369604,
"step": 14
},
{
"epoch": 0.00533739992375143,
"loss": 2.0582070350646973,
"loss_ce": 1.8293249607086182,
"loss_iou": 0.06201171875,
"loss_num": 0.228515625,
"loss_xval": 0.228515625,
"num_input_tokens_seen": 2369604,
"step": 14
},
{
"epoch": 0.0057186427754479605,
"grad_norm": 59.8244048261341,
"learning_rate": 5e-06,
"loss": 1.4382,
"num_input_tokens_seen": 2539572,
"step": 15
},
{
"epoch": 0.0057186427754479605,
"loss": 1.469299554824829,
"loss_ce": 1.316223382949829,
"loss_iou": 0.248046875,
"loss_num": 0.1533203125,
"loss_xval": 0.1533203125,
"num_input_tokens_seen": 2539572,
"step": 15
},
{
"epoch": 0.006099885627144491,
"grad_norm": 41.99255824550064,
"learning_rate": 5e-06,
"loss": 1.2798,
"num_input_tokens_seen": 2709740,
"step": 16
},
{
"epoch": 0.006099885627144491,
"loss": 1.2045753002166748,
"loss_ce": 1.0448462963104248,
"loss_iou": 0.25390625,
"loss_num": 0.16015625,
"loss_xval": 0.16015625,
"num_input_tokens_seen": 2709740,
"step": 16
},
{
"epoch": 0.006481128478841022,
"grad_norm": 30.717288714523608,
"learning_rate": 5e-06,
"loss": 1.3016,
"num_input_tokens_seen": 2881856,
"step": 17
},
{
"epoch": 0.006481128478841022,
"loss": 1.2061893939971924,
"loss_ce": 0.9561895132064819,
"loss_iou": 0.0947265625,
"loss_num": 0.25,
"loss_xval": 0.25,
"num_input_tokens_seen": 2881856,
"step": 17
},
{
"epoch": 0.006862371330537552,
"grad_norm": 29.605658753942066,
"learning_rate": 5e-06,
"loss": 1.1777,
"num_input_tokens_seen": 3053876,
"step": 18
},
{
"epoch": 0.006862371330537552,
"loss": 1.2317192554473877,
"loss_ce": 1.0397636890411377,
"loss_iou": 0.2158203125,
"loss_num": 0.1923828125,
"loss_xval": 0.1923828125,
"num_input_tokens_seen": 3053876,
"step": 18
},
{
"epoch": 0.0072436141822340835,
"grad_norm": 29.86222597046028,
"learning_rate": 5e-06,
"loss": 1.1369,
"num_input_tokens_seen": 3225060,
"step": 19
},
{
"epoch": 0.0072436141822340835,
"loss": 1.1779117584228516,
"loss_ce": 0.9886417388916016,
"loss_iou": 0.06201171875,
"loss_num": 0.189453125,
"loss_xval": 0.189453125,
"num_input_tokens_seen": 3225060,
"step": 19
},
{
"epoch": 0.007624857033930614,
"grad_norm": 25.831037095100537,
"learning_rate": 5e-06,
"loss": 1.0894,
"num_input_tokens_seen": 3395392,
"step": 20
},
{
"epoch": 0.007624857033930614,
"loss": 1.1610549688339233,
"loss_ce": 1.0183547735214233,
"loss_iou": 0.177734375,
"loss_num": 0.142578125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 3395392,
"step": 20
},
{
"epoch": 0.008006099885627144,
"grad_norm": 19.771258523994035,
"learning_rate": 5e-06,
"loss": 1.0349,
"num_input_tokens_seen": 3566916,
"step": 21
},
{
"epoch": 0.008006099885627144,
"loss": 1.0371689796447754,
"loss_ce": 0.8748155236244202,
"loss_iou": 0.234375,
"loss_num": 0.162109375,
"loss_xval": 0.162109375,
"num_input_tokens_seen": 3566916,
"step": 21
},
{
"epoch": 0.008387342737323675,
"grad_norm": 17.12428105280431,
"learning_rate": 5e-06,
"loss": 0.9893,
"num_input_tokens_seen": 3736368,
"step": 22
},
{
"epoch": 0.008387342737323675,
"loss": 1.0070894956588745,
"loss_ce": 0.8448580503463745,
"loss_iou": 0.08837890625,
"loss_num": 0.162109375,
"loss_xval": 0.162109375,
"num_input_tokens_seen": 3736368,
"step": 22
},
{
"epoch": 0.008768585589020206,
"grad_norm": 15.113078816318653,
"learning_rate": 5e-06,
"loss": 0.8795,
"num_input_tokens_seen": 3908260,
"step": 23
},
{
"epoch": 0.008768585589020206,
"loss": 0.8438772559165955,
"loss_ce": 0.6823782920837402,
"loss_iou": 0.0498046875,
"loss_num": 0.1611328125,
"loss_xval": 0.1611328125,
"num_input_tokens_seen": 3908260,
"step": 23
},
{
"epoch": 0.009149828440716736,
"grad_norm": 18.544695956515877,
"learning_rate": 5e-06,
"loss": 0.9071,
"num_input_tokens_seen": 4080448,
"step": 24
},
{
"epoch": 0.009149828440716736,
"loss": 0.8525056838989258,
"loss_ce": 0.6808748245239258,
"loss_iou": 0.3984375,
"loss_num": 0.171875,
"loss_xval": 0.171875,
"num_input_tokens_seen": 4080448,
"step": 24
},
{
"epoch": 0.009531071292413268,
"grad_norm": 23.276487882198364,
"learning_rate": 5e-06,
"loss": 0.9212,
"num_input_tokens_seen": 4249956,
"step": 25
},
{
"epoch": 0.009531071292413268,
"loss": 0.9321600198745728,
"loss_ce": 0.6882635354995728,
"loss_iou": 0.018798828125,
"loss_num": 0.244140625,
"loss_xval": 0.244140625,
"num_input_tokens_seen": 4249956,
"step": 25
},
{
"epoch": 0.009912314144109797,
"grad_norm": 15.784302554680545,
"learning_rate": 5e-06,
"loss": 0.8371,
"num_input_tokens_seen": 4418480,
"step": 26
},
{
"epoch": 0.009912314144109797,
"loss": 0.8192111253738403,
"loss_ce": 0.6472750902175903,
"loss_iou": 0.02587890625,
"loss_num": 0.171875,
"loss_xval": 0.171875,
"num_input_tokens_seen": 4418480,
"step": 26
},
{
"epoch": 0.010293556995806329,
"grad_norm": 12.941867028628593,
"learning_rate": 5e-06,
"loss": 0.7928,
"num_input_tokens_seen": 4588692,
"step": 27
},
{
"epoch": 0.010293556995806329,
"loss": 0.7072745561599731,
"loss_ce": 0.5241690874099731,
"loss_iou": 0.00732421875,
"loss_num": 0.18359375,
"loss_xval": 0.18359375,
"num_input_tokens_seen": 4588692,
"step": 27
},
{
"epoch": 0.01067479984750286,
"grad_norm": 13.321258109616243,
"learning_rate": 5e-06,
"loss": 0.8187,
"num_input_tokens_seen": 4757996,
"step": 28
},
{
"epoch": 0.01067479984750286,
"loss": 0.8777631521224976,
"loss_ce": 0.7008832693099976,
"loss_iou": 0.11865234375,
"loss_num": 0.1767578125,
"loss_xval": 0.1767578125,
"num_input_tokens_seen": 4757996,
"step": 28
},
{
"epoch": 0.01105604269919939,
"grad_norm": 11.870311389259395,
"learning_rate": 5e-06,
"loss": 0.7568,
"num_input_tokens_seen": 4926676,
"step": 29
},
{
"epoch": 0.01105604269919939,
"loss": 0.719562292098999,
"loss_ce": 0.5721014142036438,
"loss_iou": 0.1923828125,
"loss_num": 0.1474609375,
"loss_xval": 0.1474609375,
"num_input_tokens_seen": 4926676,
"step": 29
},
{
"epoch": 0.011437285550895921,
"grad_norm": 14.463066295571423,
"learning_rate": 5e-06,
"loss": 0.7608,
"num_input_tokens_seen": 5097060,
"step": 30
},
{
"epoch": 0.011437285550895921,
"loss": 0.7708301544189453,
"loss_ce": 0.6216602325439453,
"loss_iou": 0.2392578125,
"loss_num": 0.1494140625,
"loss_xval": 0.1494140625,
"num_input_tokens_seen": 5097060,
"step": 30
},
{
"epoch": 0.01181852840259245,
"grad_norm": 11.75302392316454,
"learning_rate": 5e-06,
"loss": 0.7324,
"num_input_tokens_seen": 5269192,
"step": 31
},
{
"epoch": 0.01181852840259245,
"loss": 0.7086876034736633,
"loss_ce": 0.5559776425361633,
"loss_iou": 0.0732421875,
"loss_num": 0.15234375,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 5269192,
"step": 31
},
{
"epoch": 0.012199771254288982,
"grad_norm": 14.301637316863893,
"learning_rate": 5e-06,
"loss": 0.7253,
"num_input_tokens_seen": 5441256,
"step": 32
},
{
"epoch": 0.012199771254288982,
"loss": 0.715785026550293,
"loss_ce": 0.5498914122581482,
"loss_iou": 0.24609375,
"loss_num": 0.166015625,
"loss_xval": 0.166015625,
"num_input_tokens_seen": 5441256,
"step": 32
},
{
"epoch": 0.012581014105985514,
"grad_norm": 13.076635214648364,
"learning_rate": 5e-06,
"loss": 0.7391,
"num_input_tokens_seen": 5611392,
"step": 33
},
{
"epoch": 0.012581014105985514,
"loss": 0.7384083271026611,
"loss_ce": 0.5559132099151611,
"loss_iou": 0.125,
"loss_num": 0.1826171875,
"loss_xval": 0.1826171875,
"num_input_tokens_seen": 5611392,
"step": 33
},
{
"epoch": 0.012962256957682043,
"grad_norm": 13.045891494955844,
"learning_rate": 5e-06,
"loss": 0.7512,
"num_input_tokens_seen": 5776300,
"step": 34
},
{
"epoch": 0.012962256957682043,
"loss": 0.7740552425384521,
"loss_ce": 0.5727002620697021,
"loss_iou": 0.12060546875,
"loss_num": 0.201171875,
"loss_xval": 0.201171875,
"num_input_tokens_seen": 5776300,
"step": 34
},
{
"epoch": 0.013343499809378575,
"grad_norm": 12.523133776622043,
"learning_rate": 5e-06,
"loss": 0.7389,
"num_input_tokens_seen": 5948620,
"step": 35
},
{
"epoch": 0.013343499809378575,
"loss": 0.7952262759208679,
"loss_ce": 0.6077262759208679,
"loss_iou": 0.09912109375,
"loss_num": 0.1875,
"loss_xval": 0.1875,
"num_input_tokens_seen": 5948620,
"step": 35
},
{
"epoch": 0.013724742661075104,
"grad_norm": 15.473776854664337,
"learning_rate": 5e-06,
"loss": 0.6972,
"num_input_tokens_seen": 6121040,
"step": 36
},
{
"epoch": 0.013724742661075104,
"loss": 0.7074524164199829,
"loss_ce": 0.5458313822746277,
"loss_iou": 0.2177734375,
"loss_num": 0.162109375,
"loss_xval": 0.162109375,
"num_input_tokens_seen": 6121040,
"step": 36
},
{
"epoch": 0.014105985512771636,
"grad_norm": 13.090989368724633,
"learning_rate": 5e-06,
"loss": 0.7056,
"num_input_tokens_seen": 6293600,
"step": 37
},
{
"epoch": 0.014105985512771636,
"loss": 0.7142536640167236,
"loss_ce": 0.5716145038604736,
"loss_iou": 0.158203125,
"loss_num": 0.142578125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 6293600,
"step": 37
},
{
"epoch": 0.014487228364468167,
"grad_norm": 11.923105224279563,
"learning_rate": 5e-06,
"loss": 0.6759,
"num_input_tokens_seen": 6465744,
"step": 38
},
{
"epoch": 0.014487228364468167,
"loss": 0.6937756538391113,
"loss_ce": 0.49968382716178894,
"loss_iou": 0.125,
"loss_num": 0.1943359375,
"loss_xval": 0.1943359375,
"num_input_tokens_seen": 6465744,
"step": 38
},
{
"epoch": 0.014868471216164697,
"grad_norm": 11.32292254804611,
"learning_rate": 5e-06,
"loss": 0.6567,
"num_input_tokens_seen": 6637832,
"step": 39
},
{
"epoch": 0.014868471216164697,
"loss": 0.6161133050918579,
"loss_ce": 0.4471680521965027,
"loss_iou": 0.1103515625,
"loss_num": 0.1689453125,
"loss_xval": 0.1689453125,
"num_input_tokens_seen": 6637832,
"step": 39
},
{
"epoch": 0.015249714067861228,
"grad_norm": 14.554101213212657,
"learning_rate": 5e-06,
"loss": 0.6785,
"num_input_tokens_seen": 6805988,
"step": 40
},
{
"epoch": 0.015249714067861228,
"loss": 0.6730799674987793,
"loss_ce": 0.5253138542175293,
"loss_iou": 0.17578125,
"loss_num": 0.1474609375,
"loss_xval": 0.1474609375,
"num_input_tokens_seen": 6805988,
"step": 40
},
{
"epoch": 0.015630956919557758,
"grad_norm": 12.069633926063188,
"learning_rate": 5e-06,
"loss": 0.6535,
"num_input_tokens_seen": 6978164,
"step": 41
},
{
"epoch": 0.015630956919557758,
"loss": 0.6302179098129272,
"loss_ce": 0.456695020198822,
"loss_iou": 0.208984375,
"loss_num": 0.173828125,
"loss_xval": 0.173828125,
"num_input_tokens_seen": 6978164,
"step": 41
},
{
"epoch": 0.016012199771254287,
"grad_norm": 12.41805814562438,
"learning_rate": 5e-06,
"loss": 0.6603,
"num_input_tokens_seen": 7150132,
"step": 42
},
{
"epoch": 0.016012199771254287,
"loss": 0.6476210355758667,
"loss_ce": 0.4699476361274719,
"loss_iou": 0.0233154296875,
"loss_num": 0.177734375,
"loss_xval": 0.177734375,
"num_input_tokens_seen": 7150132,
"step": 42
},
{
"epoch": 0.01639344262295082,
"grad_norm": 11.434070417991139,
"learning_rate": 5e-06,
"loss": 0.6363,
"num_input_tokens_seen": 7321812,
"step": 43
},
{
"epoch": 0.01639344262295082,
"loss": 0.6272916793823242,
"loss_ce": 0.4530363082885742,
"loss_iou": 0.19140625,
"loss_num": 0.173828125,
"loss_xval": 0.173828125,
"num_input_tokens_seen": 7321812,
"step": 43
},
{
"epoch": 0.01677468547464735,
"grad_norm": 11.95243015763611,
"learning_rate": 5e-06,
"loss": 0.6148,
"num_input_tokens_seen": 7493996,
"step": 44
},
{
"epoch": 0.01677468547464735,
"loss": 0.5882515907287598,
"loss_ce": 0.42339563369750977,
"loss_iou": 0.06884765625,
"loss_num": 0.1650390625,
"loss_xval": 0.1650390625,
"num_input_tokens_seen": 7493996,
"step": 44
},
{
"epoch": 0.01715592832634388,
"grad_norm": 11.274627145163922,
"learning_rate": 5e-06,
"loss": 0.5914,
"num_input_tokens_seen": 7660036,
"step": 45
},
{
"epoch": 0.01715592832634388,
"loss": 0.6103206872940063,
"loss_ce": 0.43142664432525635,
"loss_iou": 0.08203125,
"loss_num": 0.1787109375,
"loss_xval": 0.1787109375,
"num_input_tokens_seen": 7660036,
"step": 45
},
{
"epoch": 0.017537171178040413,
"grad_norm": 11.712186194645579,
"learning_rate": 5e-06,
"loss": 0.6034,
"num_input_tokens_seen": 7830632,
"step": 46
},
{
"epoch": 0.017537171178040413,
"loss": 0.5953316688537598,
"loss_ce": 0.43291711807250977,
"loss_iou": 0.1982421875,
"loss_num": 0.162109375,
"loss_xval": 0.162109375,
"num_input_tokens_seen": 7830632,
"step": 46
},
{
"epoch": 0.017918414029736943,
"grad_norm": 11.749337957492449,
"learning_rate": 5e-06,
"loss": 0.6061,
"num_input_tokens_seen": 7996552,
"step": 47
},
{
"epoch": 0.017918414029736943,
"loss": 0.6277086734771729,
"loss_ce": 0.4752427935600281,
"loss_iou": 0.1923828125,
"loss_num": 0.15234375,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 7996552,
"step": 47
},
{
"epoch": 0.018299656881433472,
"grad_norm": 11.60241640002812,
"learning_rate": 5e-06,
"loss": 0.5649,
"num_input_tokens_seen": 8168628,
"step": 48
},
{
"epoch": 0.018299656881433472,
"loss": 0.5392067432403564,
"loss_ce": 0.41530537605285645,
"loss_iou": 0.10888671875,
"loss_num": 0.1240234375,
"loss_xval": 0.1240234375,
"num_input_tokens_seen": 8168628,
"step": 48
},
{
"epoch": 0.018680899733130005,
"grad_norm": 11.892983212154894,
"learning_rate": 5e-06,
"loss": 0.5984,
"num_input_tokens_seen": 8333940,
"step": 49
},
{
"epoch": 0.018680899733130005,
"loss": 0.6391422748565674,
"loss_ce": 0.4573795795440674,
"loss_iou": 0.07958984375,
"loss_num": 0.181640625,
"loss_xval": 0.181640625,
"num_input_tokens_seen": 8333940,
"step": 49
},
{
"epoch": 0.019062142584826535,
"grad_norm": 12.370360966405286,
"learning_rate": 5e-06,
"loss": 0.5927,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019062142584826535,
"eval_websight_new_CIoU": 0.12265287712216377,
"eval_websight_new_GIoU": 0.07912312902044505,
"eval_websight_new_IoU": 0.20381823182106018,
"eval_websight_new_MAE_all": 0.1915627419948578,
"eval_websight_new_MAE_h": 0.23717200756072998,
"eval_websight_new_MAE_w": 0.226213701069355,
"eval_websight_new_MAE_x": 0.08390067517757416,
"eval_websight_new_MAE_y": 0.2189645618200302,
"eval_websight_new_NUM_probability": 0.0060775557067245245,
"eval_websight_new_inside_bbox": 0.7118055522441864,
"eval_websight_new_loss": 0.6745176911354065,
"eval_websight_new_loss_ce": 0.5277671813964844,
"eval_websight_new_loss_iou": 0.18060302734375,
"eval_websight_new_loss_num": 0.14251708984375,
"eval_websight_new_loss_xval": 0.14251708984375,
"eval_websight_new_runtime": 66.8504,
"eval_websight_new_samples_per_second": 0.748,
"eval_websight_new_steps_per_second": 0.03,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019062142584826535,
"eval_seeclick_CIoU": -0.052548233419656754,
"eval_seeclick_GIoU": -0.15978088974952698,
"eval_seeclick_IoU": 0.0995083898305893,
"eval_seeclick_MAE_all": 0.27841828763484955,
"eval_seeclick_MAE_h": 0.2928212434053421,
"eval_seeclick_MAE_w": 0.31220175325870514,
"eval_seeclick_MAE_x": 0.227464959025383,
"eval_seeclick_MAE_y": 0.28118522465229034,
"eval_seeclick_NUM_probability": 0.006080519873648882,
"eval_seeclick_inside_bbox": 0.2795138955116272,
"eval_seeclick_loss": 0.8024284839630127,
"eval_seeclick_loss_ce": 0.5892761051654816,
"eval_seeclick_loss_iou": 0.1800537109375,
"eval_seeclick_loss_num": 0.2235107421875,
"eval_seeclick_loss_xval": 0.2235107421875,
"eval_seeclick_runtime": 90.1923,
"eval_seeclick_samples_per_second": 0.554,
"eval_seeclick_steps_per_second": 0.022,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019062142584826535,
"eval_icons_CIoU": 0.1178673729300499,
"eval_icons_GIoU": 0.0633512157946825,
"eval_icons_IoU": 0.22628046572208405,
"eval_icons_MAE_all": 0.1937492936849594,
"eval_icons_MAE_h": 0.2025865912437439,
"eval_icons_MAE_w": 0.19258208572864532,
"eval_icons_MAE_x": 0.17922218143939972,
"eval_icons_MAE_y": 0.2006063312292099,
"eval_icons_NUM_probability": 0.006224965211004019,
"eval_icons_inside_bbox": 0.5711805522441864,
"eval_icons_loss": 0.6320141553878784,
"eval_icons_loss_ce": 0.515306681394577,
"eval_icons_loss_iou": 0.05596923828125,
"eval_icons_loss_num": 0.11151123046875,
"eval_icons_loss_xval": 0.11151123046875,
"eval_icons_runtime": 81.6336,
"eval_icons_samples_per_second": 0.612,
"eval_icons_steps_per_second": 0.024,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019062142584826535,
"eval_compot_CIoU": 0.14036905393004417,
"eval_compot_GIoU": 0.05752933071926236,
"eval_compot_IoU": 0.22682655602693558,
"eval_compot_MAE_all": 0.12615687400102615,
"eval_compot_MAE_h": 0.08502155169844627,
"eval_compot_MAE_w": 0.20546764880418777,
"eval_compot_MAE_x": 0.12541664391756058,
"eval_compot_MAE_y": 0.0887216366827488,
"eval_compot_NUM_probability": 0.005897745722904801,
"eval_compot_inside_bbox": 0.6336805522441864,
"eval_compot_loss": 0.6307579278945923,
"eval_compot_loss_ce": 0.5412943065166473,
"eval_compot_loss_iou": 0.0687255859375,
"eval_compot_loss_num": 0.089385986328125,
"eval_compot_loss_xval": 0.089385986328125,
"eval_compot_runtime": 81.6243,
"eval_compot_samples_per_second": 0.613,
"eval_compot_steps_per_second": 0.025,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019062142584826535,
"eval_web_actions_CIoU": -0.07848251238465309,
"eval_web_actions_GIoU": -0.16024185717105865,
"eval_web_actions_IoU": 0.08452721312642097,
"eval_web_actions_MAE_all": 0.26327383518218994,
"eval_web_actions_MAE_h": 0.31245075166225433,
"eval_web_actions_MAE_w": 0.26912088692188263,
"eval_web_actions_MAE_x": 0.165983684360981,
"eval_web_actions_MAE_y": 0.30553998053073883,
"eval_web_actions_NUM_probability": 0.005677123321220279,
"eval_web_actions_inside_bbox": 0.30520834028720856,
"eval_web_actions_loss": 0.9075774550437927,
"eval_web_actions_loss_ce": 0.732213020324707,
"eval_web_actions_loss_iou": 0.10137939453125,
"eval_web_actions_loss_num": 0.17706298828125,
"eval_web_actions_loss_xval": 0.17706298828125,
"eval_web_actions_runtime": 78.0055,
"eval_web_actions_samples_per_second": 0.603,
"eval_web_actions_steps_per_second": 0.026,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019062142584826535,
"loss": 0.9026013612747192,
"loss_ce": 0.7248669862747192,
"loss_iou": 0.10791015625,
"loss_num": 0.177734375,
"loss_xval": 0.177734375,
"num_input_tokens_seen": 8502756,
"step": 50
},
{
"epoch": 0.019443385436523065,
"grad_norm": 13.734342041508446,
"learning_rate": 5e-06,
"loss": 0.597,
"num_input_tokens_seen": 8673368,
"step": 51
},
{
"epoch": 0.019443385436523065,
"loss": 0.6097111701965332,
"loss_ce": 0.4595646858215332,
"loss_iou": 0.10693359375,
"loss_num": 0.150390625,
"loss_xval": 0.150390625,
"num_input_tokens_seen": 8673368,
"step": 51
},
{
"epoch": 0.019824628288219594,
"grad_norm": 15.391900523046136,
"learning_rate": 5e-06,
"loss": 0.5814,
"num_input_tokens_seen": 8837248,
"step": 52
},
{
"epoch": 0.019824628288219594,
"loss": 0.6093646287918091,
"loss_ce": 0.4040423631668091,
"loss_iou": 0.3125,
"loss_num": 0.205078125,
"loss_xval": 0.205078125,
"num_input_tokens_seen": 8837248,
"step": 52
},
{
"epoch": 0.020205871139916128,
"grad_norm": 14.955627476533499,
"learning_rate": 5e-06,
"loss": 0.5752,
"num_input_tokens_seen": 9007996,
"step": 53
},
{
"epoch": 0.020205871139916128,
"loss": 0.5916285514831543,
"loss_ce": 0.4221949577331543,
"loss_iou": 0.00897216796875,
"loss_num": 0.169921875,
"loss_xval": 0.169921875,
"num_input_tokens_seen": 9007996,
"step": 53
},
{
"epoch": 0.020587113991612657,
"grad_norm": 12.376336564449462,
"learning_rate": 5e-06,
"loss": 0.5657,
"num_input_tokens_seen": 9177940,
"step": 54
},
{
"epoch": 0.020587113991612657,
"loss": 0.5488950610160828,
"loss_ce": 0.39649027585983276,
"loss_iou": 0.053955078125,
"loss_num": 0.15234375,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 9177940,
"step": 54
},
{
"epoch": 0.020968356843309187,
"grad_norm": 12.090405564173633,
"learning_rate": 5e-06,
"loss": 0.5691,
"num_input_tokens_seen": 9348308,
"step": 55
},
{
"epoch": 0.020968356843309187,
"loss": 0.6079497933387756,
"loss_ce": 0.42185360193252563,
"loss_iou": 0.13671875,
"loss_num": 0.1865234375,
"loss_xval": 0.1865234375,
"num_input_tokens_seen": 9348308,
"step": 55
},
{
"epoch": 0.02134959969500572,
"grad_norm": 11.902958231800097,
"learning_rate": 5e-06,
"loss": 0.5616,
"num_input_tokens_seen": 9517760,
"step": 56
},
{
"epoch": 0.02134959969500572,
"loss": 0.5244883298873901,
"loss_ce": 0.34608256816864014,
"loss_iou": 0.150390625,
"loss_num": 0.1787109375,
"loss_xval": 0.1787109375,
"num_input_tokens_seen": 9517760,
"step": 56
},
{
"epoch": 0.02173084254670225,
"grad_norm": 11.80221150505286,
"learning_rate": 5e-06,
"loss": 0.535,
"num_input_tokens_seen": 9687752,
"step": 57
},
{
"epoch": 0.02173084254670225,
"loss": 0.5444226264953613,
"loss_ce": 0.39342164993286133,
"loss_iou": 0.0693359375,
"loss_num": 0.1513671875,
"loss_xval": 0.1513671875,
"num_input_tokens_seen": 9687752,
"step": 57
},
{
"epoch": 0.02211208539839878,
"grad_norm": 12.261361128826103,
"learning_rate": 5e-06,
"loss": 0.5449,
"num_input_tokens_seen": 9857820,
"step": 58
},
{
"epoch": 0.02211208539839878,
"loss": 0.5440876483917236,
"loss_ce": 0.36427807807922363,
"loss_iou": 0.173828125,
"loss_num": 0.1796875,
"loss_xval": 0.1796875,
"num_input_tokens_seen": 9857820,
"step": 58
},
{
"epoch": 0.02249332825009531,
"grad_norm": 12.453126203244764,
"learning_rate": 5e-06,
"loss": 0.563,
"num_input_tokens_seen": 10028280,
"step": 59
},
{
"epoch": 0.02249332825009531,
"loss": 0.5746830105781555,
"loss_ce": 0.4281376004219055,
"loss_iou": 0.146484375,
"loss_num": 0.146484375,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 10028280,
"step": 59
},
{
"epoch": 0.022874571101791842,
"grad_norm": 12.304581281055258,
"learning_rate": 5e-06,
"loss": 0.5321,
"num_input_tokens_seen": 10193584,
"step": 60
},
{
"epoch": 0.022874571101791842,
"loss": 0.5188683867454529,
"loss_ce": 0.35169312357902527,
"loss_iou": 0.1640625,
"loss_num": 0.1669921875,
"loss_xval": 0.1669921875,
"num_input_tokens_seen": 10193584,
"step": 60
},
{
"epoch": 0.023255813953488372,
"grad_norm": 13.998044458420281,
"learning_rate": 5e-06,
"loss": 0.5362,
"num_input_tokens_seen": 10362196,
"step": 61
},
{
"epoch": 0.023255813953488372,
"loss": 0.5754693150520325,
"loss_ce": 0.41866999864578247,
"loss_iou": 0.1455078125,
"loss_num": 0.1572265625,
"loss_xval": 0.1572265625,
"num_input_tokens_seen": 10362196,
"step": 61
},
{
"epoch": 0.0236370568051849,
"grad_norm": 11.92308808122042,
"learning_rate": 5e-06,
"loss": 0.5326,
"num_input_tokens_seen": 10527168,
"step": 62
},
{
"epoch": 0.0236370568051849,
"loss": 0.5353362560272217,
"loss_ce": 0.3612639904022217,
"loss_iou": 0.21484375,
"loss_num": 0.173828125,
"loss_xval": 0.173828125,
"num_input_tokens_seen": 10527168,
"step": 62
},
{
"epoch": 0.024018299656881435,
"grad_norm": 12.233902047421576,
"learning_rate": 5e-06,
"loss": 0.5208,
"num_input_tokens_seen": 10696296,
"step": 63
},
{
"epoch": 0.024018299656881435,
"loss": 0.5133154392242432,
"loss_ce": 0.3806860148906708,
"loss_iou": 0.0498046875,
"loss_num": 0.1328125,
"loss_xval": 0.1328125,
"num_input_tokens_seen": 10696296,
"step": 63
},
{
"epoch": 0.024399542508577964,
"grad_norm": 12.137384843380636,
"learning_rate": 5e-06,
"loss": 0.5347,
"num_input_tokens_seen": 10865244,
"step": 64
},
{
"epoch": 0.024399542508577964,
"loss": 0.49595263600349426,
"loss_ce": 0.29728323221206665,
"loss_iou": 0.1875,
"loss_num": 0.1982421875,
"loss_xval": 0.1982421875,
"num_input_tokens_seen": 10865244,
"step": 64
},
{
"epoch": 0.024780785360274494,
"grad_norm": 12.183441390066275,
"learning_rate": 5e-06,
"loss": 0.5108,
"num_input_tokens_seen": 11037272,
"step": 65
},
{
"epoch": 0.024780785360274494,
"loss": 0.4988226890563965,
"loss_ce": 0.3487982749938965,
"loss_iou": 0.126953125,
"loss_num": 0.150390625,
"loss_xval": 0.150390625,
"num_input_tokens_seen": 11037272,
"step": 65
},
{
"epoch": 0.025162028211971027,
"grad_norm": 13.03848111499602,
"learning_rate": 5e-06,
"loss": 0.5194,
"num_input_tokens_seen": 11209428,
"step": 66
},
{
"epoch": 0.025162028211971027,
"loss": 0.48021817207336426,
"loss_ce": 0.33275723457336426,
"loss_iou": 0.07861328125,
"loss_num": 0.1474609375,
"loss_xval": 0.1474609375,
"num_input_tokens_seen": 11209428,
"step": 66
},
{
"epoch": 0.025543271063667557,
"grad_norm": 12.129796554865111,
"learning_rate": 5e-06,
"loss": 0.4982,
"num_input_tokens_seen": 11377784,
"step": 67
},
{
"epoch": 0.025543271063667557,
"loss": 0.5201830863952637,
"loss_ce": 0.3668017089366913,
"loss_iou": 0.212890625,
"loss_num": 0.1533203125,
"loss_xval": 0.1533203125,
"num_input_tokens_seen": 11377784,
"step": 67
},
{
"epoch": 0.025924513915364086,
"grad_norm": 12.16663337244556,
"learning_rate": 5e-06,
"loss": 0.4924,
"num_input_tokens_seen": 11546724,
"step": 68
},
{
"epoch": 0.025924513915364086,
"loss": 0.44336187839508057,
"loss_ce": 0.29925787448883057,
"loss_iou": 0.05810546875,
"loss_num": 0.14453125,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 11546724,
"step": 68
},
{
"epoch": 0.026305756767060616,
"grad_norm": 11.758052516976239,
"learning_rate": 5e-06,
"loss": 0.478,
"num_input_tokens_seen": 11715460,
"step": 69
},
{
"epoch": 0.026305756767060616,
"loss": 0.4771363139152527,
"loss_ce": 0.3299195170402527,
"loss_iou": 0.041748046875,
"loss_num": 0.1474609375,
"loss_xval": 0.1474609375,
"num_input_tokens_seen": 11715460,
"step": 69
},
{
"epoch": 0.02668699961875715,
"grad_norm": 11.694824569228489,
"learning_rate": 5e-06,
"loss": 0.4735,
"num_input_tokens_seen": 11883428,
"step": 70
},
{
"epoch": 0.02668699961875715,
"loss": 0.4738195538520813,
"loss_ce": 0.3340490162372589,
"loss_iou": 0.1796875,
"loss_num": 0.1396484375,
"loss_xval": 0.1396484375,
"num_input_tokens_seen": 11883428,
"step": 70
},
{
"epoch": 0.02706824247045368,
"grad_norm": 12.792427192285375,
"learning_rate": 5e-06,
"loss": 0.4822,
"num_input_tokens_seen": 12051952,
"step": 71
},
{
"epoch": 0.02706824247045368,
"loss": 0.500407338142395,
"loss_ce": 0.364543080329895,
"loss_iou": 0.15625,
"loss_num": 0.1357421875,
"loss_xval": 0.1357421875,
"num_input_tokens_seen": 12051952,
"step": 71
},
{
"epoch": 0.02744948532215021,
"grad_norm": 12.494356160653988,
"learning_rate": 5e-06,
"loss": 0.4905,
"num_input_tokens_seen": 12222456,
"step": 72
},
{
"epoch": 0.02744948532215021,
"loss": 0.49499523639678955,
"loss_ce": 0.33422863483428955,
"loss_iou": 0.048583984375,
"loss_num": 0.1611328125,
"loss_xval": 0.1611328125,
"num_input_tokens_seen": 12222456,
"step": 72
},
{
"epoch": 0.02783072817384674,
"grad_norm": 11.851075223778734,
"learning_rate": 5e-06,
"loss": 0.4734,
"num_input_tokens_seen": 12394628,
"step": 73
},
{
"epoch": 0.02783072817384674,
"loss": 0.4998481273651123,
"loss_ce": 0.3393256664276123,
"loss_iou": 0.169921875,
"loss_num": 0.16015625,
"loss_xval": 0.16015625,
"num_input_tokens_seen": 12394628,
"step": 73
},
{
"epoch": 0.02821197102554327,
"grad_norm": 12.345182296953553,
"learning_rate": 5e-06,
"loss": 0.461,
"num_input_tokens_seen": 12561616,
"step": 74
},
{
"epoch": 0.02821197102554327,
"loss": 0.4780183732509613,
"loss_ce": 0.3509432077407837,
"loss_iou": 0.11767578125,
"loss_num": 0.126953125,
"loss_xval": 0.126953125,
"num_input_tokens_seen": 12561616,
"step": 74
},
{
"epoch": 0.0285932138772398,
"grad_norm": 12.343535241732928,
"learning_rate": 5e-06,
"loss": 0.4799,
"num_input_tokens_seen": 12729864,
"step": 75
},
{
"epoch": 0.0285932138772398,
"loss": 0.49244657158851624,
"loss_ce": 0.34345975518226624,
"loss_iou": 0.1767578125,
"loss_num": 0.1494140625,
"loss_xval": 0.1494140625,
"num_input_tokens_seen": 12729864,
"step": 75
},
{
"epoch": 0.028974456728936334,
"grad_norm": 13.53465987183171,
"learning_rate": 5e-06,
"loss": 0.4822,
"num_input_tokens_seen": 12902300,
"step": 76
},
{
"epoch": 0.028974456728936334,
"loss": 0.4951751232147217,
"loss_ce": 0.3575408458709717,
"loss_iou": 0.09765625,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 12902300,
"step": 76
},
{
"epoch": 0.029355699580632864,
"grad_norm": 17.340928619373877,
"learning_rate": 5e-06,
"loss": 0.4669,
"num_input_tokens_seen": 13074048,
"step": 77
},
{
"epoch": 0.029355699580632864,
"loss": 0.48726654052734375,
"loss_ce": 0.32576748728752136,
"loss_iou": 0.392578125,
"loss_num": 0.1611328125,
"loss_xval": 0.1611328125,
"num_input_tokens_seen": 13074048,
"step": 77
},
{
"epoch": 0.029736942432329393,
"grad_norm": 23.622274934623174,
"learning_rate": 5e-06,
"loss": 0.567,
"num_input_tokens_seen": 13240004,
"step": 78
},
{
"epoch": 0.029736942432329393,
"loss": 0.5309600830078125,
"loss_ce": 0.3266143798828125,
"loss_iou": 0.0732421875,
"loss_num": 0.2041015625,
"loss_xval": 0.2041015625,
"num_input_tokens_seen": 13240004,
"step": 78
},
{
"epoch": 0.030118185284025923,
"grad_norm": 18.14847407024033,
"learning_rate": 5e-06,
"loss": 0.5159,
"num_input_tokens_seen": 13410448,
"step": 79
},
{
"epoch": 0.030118185284025923,
"loss": 0.5415377616882324,
"loss_ce": 0.32718226313591003,
"loss_iou": 0.47265625,
"loss_num": 0.21484375,
"loss_xval": 0.21484375,
"num_input_tokens_seen": 13410448,
"step": 79
},
{
"epoch": 0.030499428135722456,
"grad_norm": 11.985177640237248,
"learning_rate": 5e-06,
"loss": 0.4619,
"num_input_tokens_seen": 13582736,
"step": 80
},
{
"epoch": 0.030499428135722456,
"loss": 0.4745754599571228,
"loss_ce": 0.3023342490196228,
"loss_iou": 0.1044921875,
"loss_num": 0.171875,
"loss_xval": 0.171875,
"num_input_tokens_seen": 13582736,
"step": 80
},
{
"epoch": 0.030880670987418986,
"grad_norm": 11.856005341040106,
"learning_rate": 5e-06,
"loss": 0.4572,
"num_input_tokens_seen": 13753100,
"step": 81
},
{
"epoch": 0.030880670987418986,
"loss": 0.48636406660079956,
"loss_ce": 0.33749932050704956,
"loss_iou": 0.052001953125,
"loss_num": 0.1484375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 13753100,
"step": 81
},
{
"epoch": 0.031261913839115515,
"grad_norm": 11.382685680416452,
"learning_rate": 5e-06,
"loss": 0.4647,
"num_input_tokens_seen": 13923248,
"step": 82
},
{
"epoch": 0.031261913839115515,
"loss": 0.46927064657211304,
"loss_ce": 0.25540345907211304,
"loss_iou": 0.03125,
"loss_num": 0.2138671875,
"loss_xval": 0.2138671875,
"num_input_tokens_seen": 13923248,
"step": 82
},
{
"epoch": 0.031643156690812045,
"grad_norm": 11.753169915446275,
"learning_rate": 5e-06,
"loss": 0.4505,
"num_input_tokens_seen": 14095636,
"step": 83
},
{
"epoch": 0.031643156690812045,
"loss": 0.4410349130630493,
"loss_ce": 0.2765451669692993,
"loss_iou": 0.07080078125,
"loss_num": 0.1640625,
"loss_xval": 0.1640625,
"num_input_tokens_seen": 14095636,
"step": 83
},
{
"epoch": 0.032024399542508575,
"grad_norm": 11.990754545496944,
"learning_rate": 5e-06,
"loss": 0.4445,
"num_input_tokens_seen": 14267868,
"step": 84
},
{
"epoch": 0.032024399542508575,
"loss": 0.4556080102920532,
"loss_ce": 0.3024708032608032,
"loss_iou": 0.08056640625,
"loss_num": 0.1533203125,
"loss_xval": 0.1533203125,
"num_input_tokens_seen": 14267868,
"step": 84
},
{
"epoch": 0.03240564239420511,
"grad_norm": 11.76168733215327,
"learning_rate": 5e-06,
"loss": 0.4287,
"num_input_tokens_seen": 14436716,
"step": 85
},
{
"epoch": 0.03240564239420511,
"loss": 0.41517770290374756,
"loss_ce": 0.27424752712249756,
"loss_iou": 0.140625,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 14436716,
"step": 85
},
{
"epoch": 0.03278688524590164,
"grad_norm": 11.973750469066818,
"learning_rate": 5e-06,
"loss": 0.431,
"num_input_tokens_seen": 14608776,
"step": 86
},
{
"epoch": 0.03278688524590164,
"loss": 0.4094088077545166,
"loss_ce": 0.235702782869339,
"loss_iou": 0.04248046875,
"loss_num": 0.173828125,
"loss_xval": 0.173828125,
"num_input_tokens_seen": 14608776,
"step": 86
},
{
"epoch": 0.03316812809759817,
"grad_norm": 11.33393330382593,
"learning_rate": 5e-06,
"loss": 0.4344,
"num_input_tokens_seen": 14774140,
"step": 87
},
{
"epoch": 0.03316812809759817,
"loss": 0.41893303394317627,
"loss_ce": 0.23631584644317627,
"loss_iou": 0.06494140625,
"loss_num": 0.1826171875,
"loss_xval": 0.1826171875,
"num_input_tokens_seen": 14774140,
"step": 87
},
{
"epoch": 0.0335493709492947,
"grad_norm": 11.037333592059825,
"learning_rate": 5e-06,
"loss": 0.4189,
"num_input_tokens_seen": 14942700,
"step": 88
},
{
"epoch": 0.0335493709492947,
"loss": 0.3910408616065979,
"loss_ce": 0.2472420036792755,
"loss_iou": 0.1337890625,
"loss_num": 0.1435546875,
"loss_xval": 0.1435546875,
"num_input_tokens_seen": 14942700,
"step": 88
},
{
"epoch": 0.03393061380099123,
"grad_norm": 12.107053347481228,
"learning_rate": 5e-06,
"loss": 0.4231,
"num_input_tokens_seen": 15110488,
"step": 89
},
{
"epoch": 0.03393061380099123,
"loss": 0.42818814516067505,
"loss_ce": 0.29909878969192505,
"loss_iou": 0.0908203125,
"loss_num": 0.12890625,
"loss_xval": 0.12890625,
"num_input_tokens_seen": 15110488,
"step": 89
},
{
"epoch": 0.03431185665268776,
"grad_norm": 11.42784337247996,
"learning_rate": 5e-06,
"loss": 0.3987,
"num_input_tokens_seen": 15279188,
"step": 90
},
{
"epoch": 0.03431185665268776,
"loss": 0.38819554448127747,
"loss_ce": 0.23054175078868866,
"loss_iou": 0.1875,
"loss_num": 0.1572265625,
"loss_xval": 0.1572265625,
"num_input_tokens_seen": 15279188,
"step": 90
},
{
"epoch": 0.03469309950438429,
"grad_norm": 12.710478502345447,
"learning_rate": 5e-06,
"loss": 0.4212,
"num_input_tokens_seen": 15451604,
"step": 91
},
{
"epoch": 0.03469309950438429,
"loss": 0.41965770721435547,
"loss_ce": 0.28641796112060547,
"loss_iou": 0.08447265625,
"loss_num": 0.1328125,
"loss_xval": 0.1328125,
"num_input_tokens_seen": 15451604,
"step": 91
},
{
"epoch": 0.035074342356080826,
"grad_norm": 12.111297962407386,
"learning_rate": 5e-06,
"loss": 0.4206,
"num_input_tokens_seen": 15620592,
"step": 92
},
{
"epoch": 0.035074342356080826,
"loss": 0.43124547600746155,
"loss_ce": 0.22311557829380035,
"loss_iou": 0.05078125,
"loss_num": 0.2080078125,
"loss_xval": 0.2080078125,
"num_input_tokens_seen": 15620592,
"step": 92
},
{
"epoch": 0.035455585207777356,
"grad_norm": 12.242275289330596,
"learning_rate": 5e-06,
"loss": 0.4124,
"num_input_tokens_seen": 15791204,
"step": 93
},
{
"epoch": 0.035455585207777356,
"loss": 0.38425594568252563,
"loss_ce": 0.23789364099502563,
"loss_iou": 0.13671875,
"loss_num": 0.146484375,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 15791204,
"step": 93
},
{
"epoch": 0.035836828059473885,
"grad_norm": 11.212776469423819,
"learning_rate": 5e-06,
"loss": 0.4074,
"num_input_tokens_seen": 15960280,
"step": 94
},
{
"epoch": 0.035836828059473885,
"loss": 0.40042412281036377,
"loss_ce": 0.22482600808143616,
"loss_iou": 0.014892578125,
"loss_num": 0.17578125,
"loss_xval": 0.17578125,
"num_input_tokens_seen": 15960280,
"step": 94
},
{
"epoch": 0.036218070911170415,
"grad_norm": 12.657266487359777,
"learning_rate": 5e-06,
"loss": 0.4047,
"num_input_tokens_seen": 16132564,
"step": 95
},
{
"epoch": 0.036218070911170415,
"loss": 0.40306520462036133,
"loss_ce": 0.26109743118286133,
"loss_iou": 0.0203857421875,
"loss_num": 0.1416015625,
"loss_xval": 0.1416015625,
"num_input_tokens_seen": 16132564,
"step": 95
},
{
"epoch": 0.036599313762866945,
"grad_norm": 11.6211209359735,
"learning_rate": 5e-06,
"loss": 0.4029,
"num_input_tokens_seen": 16304472,
"step": 96
},
{
"epoch": 0.036599313762866945,
"loss": 0.3593056797981262,
"loss_ce": 0.18291407823562622,
"loss_iou": 0.06005859375,
"loss_num": 0.1767578125,
"loss_xval": 0.1767578125,
"num_input_tokens_seen": 16304472,
"step": 96
},
{
"epoch": 0.036980556614563474,
"grad_norm": 12.071393902645383,
"learning_rate": 5e-06,
"loss": 0.4006,
"num_input_tokens_seen": 16474764,
"step": 97
},
{
"epoch": 0.036980556614563474,
"loss": 0.37911850214004517,
"loss_ce": 0.22012192010879517,
"loss_iou": 0.048583984375,
"loss_num": 0.1591796875,
"loss_xval": 0.1591796875,
"num_input_tokens_seen": 16474764,
"step": 97
},
{
"epoch": 0.03736179946626001,
"grad_norm": 12.000598741161477,
"learning_rate": 5e-06,
"loss": 0.3995,
"num_input_tokens_seen": 16646672,
"step": 98
},
{
"epoch": 0.03736179946626001,
"loss": 0.4102625846862793,
"loss_ce": 0.22538712620735168,
"loss_iou": 0.01025390625,
"loss_num": 0.1845703125,
"loss_xval": 0.1845703125,
"num_input_tokens_seen": 16646672,
"step": 98
},
{
"epoch": 0.03774304231795654,
"grad_norm": 12.707295898303995,
"learning_rate": 5e-06,
"loss": 0.3889,
"num_input_tokens_seen": 16815208,
"step": 99
},
{
"epoch": 0.03774304231795654,
"loss": 0.3928859233856201,
"loss_ce": 0.2523829936981201,
"loss_iou": 0.06884765625,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 16815208,
"step": 99
},
{
"epoch": 0.03812428516965307,
"grad_norm": 12.427783036031853,
"learning_rate": 5e-06,
"loss": 0.3838,
"num_input_tokens_seen": 16978948,
"step": 100
},
{
"epoch": 0.03812428516965307,
"loss": 0.38862472772598267,
"loss_ce": 0.21125656366348267,
"loss_iou": 0.1474609375,
"loss_num": 0.177734375,
"loss_xval": 0.177734375,
"num_input_tokens_seen": 16978948,
"step": 100
},
{
"epoch": 0.0385055280213496,
"grad_norm": 12.291136690956028,
"learning_rate": 5e-06,
"loss": 0.3777,
"num_input_tokens_seen": 17147808,
"step": 101
},
{
"epoch": 0.0385055280213496,
"loss": 0.35547709465026855,
"loss_ce": 0.21521833539009094,
"loss_iou": 0.115234375,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 17147808,
"step": 101
},
{
"epoch": 0.03888677087304613,
"grad_norm": 12.569611940916834,
"learning_rate": 5e-06,
"loss": 0.3753,
"num_input_tokens_seen": 17315064,
"step": 102
},
{
"epoch": 0.03888677087304613,
"loss": 0.3729170560836792,
"loss_ce": 0.2106856107711792,
"loss_iou": 0.076171875,
"loss_num": 0.162109375,
"loss_xval": 0.162109375,
"num_input_tokens_seen": 17315064,
"step": 102
},
{
"epoch": 0.03926801372474266,
"grad_norm": 16.243870473803266,
"learning_rate": 5e-06,
"loss": 0.3866,
"num_input_tokens_seen": 17482872,
"step": 103
},
{
"epoch": 0.03926801372474266,
"loss": 0.3830409049987793,
"loss_ce": 0.2242884337902069,
"loss_iou": 0.28515625,
"loss_num": 0.1591796875,
"loss_xval": 0.1591796875,
"num_input_tokens_seen": 17482872,
"step": 103
},
{
"epoch": 0.03964925657643919,
"grad_norm": 13.452612030193265,
"learning_rate": 5e-06,
"loss": 0.3889,
"num_input_tokens_seen": 17651212,
"step": 104
},
{
"epoch": 0.03964925657643919,
"loss": 0.37701019644737244,
"loss_ce": 0.20379243791103363,
"loss_iou": 0.027587890625,
"loss_num": 0.1728515625,
"loss_xval": 0.1728515625,
"num_input_tokens_seen": 17651212,
"step": 104
},
{
"epoch": 0.040030499428135725,
"grad_norm": 11.54516678372868,
"learning_rate": 5e-06,
"loss": 0.3777,
"num_input_tokens_seen": 17823268,
"step": 105
},
{
"epoch": 0.040030499428135725,
"loss": 0.37387654185295105,
"loss_ce": 0.20303913950920105,
"loss_iou": 0.0439453125,
"loss_num": 0.1708984375,
"loss_xval": 0.1708984375,
"num_input_tokens_seen": 17823268,
"step": 105
},
{
"epoch": 0.040411742279832255,
"grad_norm": 10.956441122920754,
"learning_rate": 5e-06,
"loss": 0.3505,
"num_input_tokens_seen": 17991568,
"step": 106
},
{
"epoch": 0.040411742279832255,
"loss": 0.33052903413772583,
"loss_ce": 0.18843916058540344,
"loss_iou": 0.0732421875,
"loss_num": 0.142578125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 17991568,
"step": 106
},
{
"epoch": 0.040792985131528785,
"grad_norm": 12.608733901165248,
"learning_rate": 5e-06,
"loss": 0.3781,
"num_input_tokens_seen": 18161860,
"step": 107
},
{
"epoch": 0.040792985131528785,
"loss": 0.34461355209350586,
"loss_ce": 0.21344900131225586,
"loss_iou": 0.10498046875,
"loss_num": 0.130859375,
"loss_xval": 0.130859375,
"num_input_tokens_seen": 18161860,
"step": 107
},
{
"epoch": 0.041174227983225314,
"grad_norm": 12.373322278476905,
"learning_rate": 5e-06,
"loss": 0.3694,
"num_input_tokens_seen": 18330556,
"step": 108
},
{
"epoch": 0.041174227983225314,
"loss": 0.3656204342842102,
"loss_ce": 0.2087600827217102,
"loss_iou": 0.1298828125,
"loss_num": 0.1572265625,
"loss_xval": 0.1572265625,
"num_input_tokens_seen": 18330556,
"step": 108
},
{
"epoch": 0.041555470834921844,
"grad_norm": 10.869785200963268,
"learning_rate": 5e-06,
"loss": 0.3578,
"num_input_tokens_seen": 18502672,
"step": 109
},
{
"epoch": 0.041555470834921844,
"loss": 0.3445308208465576,
"loss_ce": 0.19542193412780762,
"loss_iou": 0.09375,
"loss_num": 0.1494140625,
"loss_xval": 0.1494140625,
"num_input_tokens_seen": 18502672,
"step": 109
},
{
"epoch": 0.041936713686618374,
"grad_norm": 12.647777677695915,
"learning_rate": 5e-06,
"loss": 0.357,
"num_input_tokens_seen": 18674676,
"step": 110
},
{
"epoch": 0.041936713686618374,
"loss": 0.3487279713153839,
"loss_ce": 0.1894262135028839,
"loss_iou": 0.0849609375,
"loss_num": 0.1591796875,
"loss_xval": 0.1591796875,
"num_input_tokens_seen": 18674676,
"step": 110
},
{
"epoch": 0.0423179565383149,
"grad_norm": 12.153649007209351,
"learning_rate": 5e-06,
"loss": 0.3626,
"num_input_tokens_seen": 18841332,
"step": 111
},
{
"epoch": 0.0423179565383149,
"loss": 0.3468208312988281,
"loss_ce": 0.19014358520507812,
"loss_iou": 0.19140625,
"loss_num": 0.15625,
"loss_xval": 0.15625,
"num_input_tokens_seen": 18841332,
"step": 111
},
{
"epoch": 0.04269919939001144,
"grad_norm": 13.154446666834914,
"learning_rate": 5e-06,
"loss": 0.3607,
"num_input_tokens_seen": 19010808,
"step": 112
},
{
"epoch": 0.04269919939001144,
"loss": 0.3582812547683716,
"loss_ce": 0.1991625726222992,
"loss_iou": 0.2177734375,
"loss_num": 0.1591796875,
"loss_xval": 0.1591796875,
"num_input_tokens_seen": 19010808,
"step": 112
},
{
"epoch": 0.04308044224170797,
"grad_norm": 12.600474897392363,
"learning_rate": 5e-06,
"loss": 0.3588,
"num_input_tokens_seen": 19183008,
"step": 113
},
{
"epoch": 0.04308044224170797,
"loss": 0.3567988872528076,
"loss_ce": 0.21086381375789642,
"loss_iou": 0.11962890625,
"loss_num": 0.1455078125,
"loss_xval": 0.1455078125,
"num_input_tokens_seen": 19183008,
"step": 113
},
{
"epoch": 0.0434616850934045,
"grad_norm": 11.521956563077518,
"learning_rate": 5e-06,
"loss": 0.3354,
"num_input_tokens_seen": 19355056,
"step": 114
},
{
"epoch": 0.0434616850934045,
"loss": 0.3423011898994446,
"loss_ce": 0.20753559470176697,
"loss_iou": 0.0712890625,
"loss_num": 0.134765625,
"loss_xval": 0.134765625,
"num_input_tokens_seen": 19355056,
"step": 114
},
{
"epoch": 0.04384292794510103,
"grad_norm": 11.521584098092609,
"learning_rate": 5e-06,
"loss": 0.3334,
"num_input_tokens_seen": 19525288,
"step": 115
},
{
"epoch": 0.04384292794510103,
"loss": 0.35636138916015625,
"loss_ce": 0.20127102732658386,
"loss_iou": 0.03271484375,
"loss_num": 0.1552734375,
"loss_xval": 0.1552734375,
"num_input_tokens_seen": 19525288,
"step": 115
},
{
"epoch": 0.04422417079679756,
"grad_norm": 12.326935183856571,
"learning_rate": 5e-06,
"loss": 0.3336,
"num_input_tokens_seen": 19697428,
"step": 116
},
{
"epoch": 0.04422417079679756,
"loss": 0.30840563774108887,
"loss_ce": 0.18126940727233887,
"loss_iou": 0.1904296875,
"loss_num": 0.126953125,
"loss_xval": 0.126953125,
"num_input_tokens_seen": 19697428,
"step": 116
},
{
"epoch": 0.04460541364849409,
"grad_norm": 14.141221536016685,
"learning_rate": 5e-06,
"loss": 0.3245,
"num_input_tokens_seen": 19867612,
"step": 117
},
{
"epoch": 0.04460541364849409,
"loss": 0.3331416845321655,
"loss_ce": 0.19361534714698792,
"loss_iou": 0.07177734375,
"loss_num": 0.1396484375,
"loss_xval": 0.1396484375,
"num_input_tokens_seen": 19867612,
"step": 117
},
{
"epoch": 0.04498665650019062,
"grad_norm": 87.14020102629748,
"learning_rate": 5e-06,
"loss": 0.3998,
"num_input_tokens_seen": 20038036,
"step": 118
},
{
"epoch": 0.04498665650019062,
"loss": 0.3945692777633667,
"loss_ce": 0.2542494535446167,
"loss_iou": 0.12060546875,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 20038036,
"step": 118
},
{
"epoch": 0.045367899351887155,
"grad_norm": 15.830506521221166,
"learning_rate": 5e-06,
"loss": 0.4132,
"num_input_tokens_seen": 20208368,
"step": 119
},
{
"epoch": 0.045367899351887155,
"loss": 0.4296550452709198,
"loss_ce": 0.2777385115623474,
"loss_iou": 0.1943359375,
"loss_num": 0.15234375,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 20208368,
"step": 119
},
{
"epoch": 0.045749142203583684,
"grad_norm": 14.724468931472991,
"learning_rate": 5e-06,
"loss": 0.3901,
"num_input_tokens_seen": 20375724,
"step": 120
},
{
"epoch": 0.045749142203583684,
"loss": 0.40854495763778687,
"loss_ce": 0.25070804357528687,
"loss_iou": 0.099609375,
"loss_num": 0.158203125,
"loss_xval": 0.158203125,
"num_input_tokens_seen": 20375724,
"step": 120
},
{
"epoch": 0.046130385055280214,
"grad_norm": 12.297283170117506,
"learning_rate": 5e-06,
"loss": 0.3595,
"num_input_tokens_seen": 20545956,
"step": 121
},
{
"epoch": 0.046130385055280214,
"loss": 0.38752883672714233,
"loss_ce": 0.20295852422714233,
"loss_iou": 0.07763671875,
"loss_num": 0.1845703125,
"loss_xval": 0.1845703125,
"num_input_tokens_seen": 20545956,
"step": 121
},
{
"epoch": 0.046511627906976744,
"grad_norm": 11.98722544983197,
"learning_rate": 5e-06,
"loss": 0.3451,
"num_input_tokens_seen": 20711180,
"step": 122
},
{
"epoch": 0.046511627906976744,
"loss": 0.34829050302505493,
"loss_ce": 0.17073921859264374,
"loss_iou": 0.0205078125,
"loss_num": 0.177734375,
"loss_xval": 0.177734375,
"num_input_tokens_seen": 20711180,
"step": 122
},
{
"epoch": 0.04689287075867327,
"grad_norm": 11.236273190561704,
"learning_rate": 5e-06,
"loss": 0.3303,
"num_input_tokens_seen": 20883020,
"step": 123
},
{
"epoch": 0.04689287075867327,
"loss": 0.32952818274497986,
"loss_ce": 0.16564878821372986,
"loss_iou": 0.052490234375,
"loss_num": 0.1640625,
"loss_xval": 0.1640625,
"num_input_tokens_seen": 20883020,
"step": 123
},
{
"epoch": 0.0472741136103698,
"grad_norm": 11.47342867332456,
"learning_rate": 5e-06,
"loss": 0.3273,
"num_input_tokens_seen": 21053360,
"step": 124
},
{
"epoch": 0.0472741136103698,
"loss": 0.3332858085632324,
"loss_ce": 0.1640963852405548,
"loss_iou": 0.05517578125,
"loss_num": 0.1689453125,
"loss_xval": 0.1689453125,
"num_input_tokens_seen": 21053360,
"step": 124
},
{
"epoch": 0.04765535646206634,
"grad_norm": 10.67542691608568,
"learning_rate": 5e-06,
"loss": 0.3022,
"num_input_tokens_seen": 21221792,
"step": 125
},
{
"epoch": 0.04765535646206634,
"loss": 0.29388928413391113,
"loss_ce": 0.17273451387882233,
"loss_iou": 0.138671875,
"loss_num": 0.12109375,
"loss_xval": 0.12109375,
"num_input_tokens_seen": 21221792,
"step": 125
},
{
"epoch": 0.04803659931376287,
"grad_norm": 10.793472255887544,
"learning_rate": 5e-06,
"loss": 0.298,
"num_input_tokens_seen": 21388780,
"step": 126
},
{
"epoch": 0.04803659931376287,
"loss": 0.31272128224372864,
"loss_ce": 0.17954257130622864,
"loss_iou": 0.02587890625,
"loss_num": 0.1328125,
"loss_xval": 0.1328125,
"num_input_tokens_seen": 21388780,
"step": 126
},
{
"epoch": 0.0484178421654594,
"grad_norm": 10.457819380972436,
"learning_rate": 5e-06,
"loss": 0.2871,
"num_input_tokens_seen": 21559156,
"step": 127
},
{
"epoch": 0.0484178421654594,
"loss": 0.2922600209712982,
"loss_ce": 0.1415642350912094,
"loss_iou": 0.115234375,
"loss_num": 0.150390625,
"loss_xval": 0.150390625,
"num_input_tokens_seen": 21559156,
"step": 127
},
{
"epoch": 0.04879908501715593,
"grad_norm": 12.075104791130935,
"learning_rate": 5e-06,
"loss": 0.3002,
"num_input_tokens_seen": 21728684,
"step": 128
},
{
"epoch": 0.04879908501715593,
"loss": 0.2932286858558655,
"loss_ce": 0.14759881794452667,
"loss_iou": 0.02734375,
"loss_num": 0.1455078125,
"loss_xval": 0.1455078125,
"num_input_tokens_seen": 21728684,
"step": 128
},
{
"epoch": 0.04918032786885246,
"grad_norm": 17.596022200669275,
"learning_rate": 5e-06,
"loss": 0.3052,
"num_input_tokens_seen": 21898896,
"step": 129
},
{
"epoch": 0.04918032786885246,
"loss": 0.2854222059249878,
"loss_ce": 0.15090075135231018,
"loss_iou": 0.11572265625,
"loss_num": 0.134765625,
"loss_xval": 0.134765625,
"num_input_tokens_seen": 21898896,
"step": 129
},
{
"epoch": 0.04956157072054899,
"grad_norm": 22.591538039851265,
"learning_rate": 5e-06,
"loss": 0.3237,
"num_input_tokens_seen": 22071000,
"step": 130
},
{
"epoch": 0.04956157072054899,
"loss": 0.34077489376068115,
"loss_ce": 0.16413915157318115,
"loss_iou": 0.2119140625,
"loss_num": 0.1767578125,
"loss_xval": 0.1767578125,
"num_input_tokens_seen": 22071000,
"step": 130
},
{
"epoch": 0.04994281357224552,
"grad_norm": 23.969048922051158,
"learning_rate": 5e-06,
"loss": 0.3373,
"num_input_tokens_seen": 22241740,
"step": 131
},
{
"epoch": 0.04994281357224552,
"loss": 0.3232199549674988,
"loss_ce": 0.17606420814990997,
"loss_iou": 0.00830078125,
"loss_num": 0.1474609375,
"loss_xval": 0.1474609375,
"num_input_tokens_seen": 22241740,
"step": 131
},
{
"epoch": 0.050324056423942054,
"grad_norm": 26.453649680091864,
"learning_rate": 5e-06,
"loss": 0.3504,
"num_input_tokens_seen": 22411932,
"step": 132
},
{
"epoch": 0.050324056423942054,
"loss": 0.35389894247055054,
"loss_ce": 0.18177980184555054,
"loss_iou": 0.09716796875,
"loss_num": 0.171875,
"loss_xval": 0.171875,
"num_input_tokens_seen": 22411932,
"step": 132
},
{
"epoch": 0.050705299275638584,
"grad_norm": 29.770376889810056,
"learning_rate": 5e-06,
"loss": 0.321,
"num_input_tokens_seen": 22580712,
"step": 133
},
{
"epoch": 0.050705299275638584,
"loss": 0.3106634318828583,
"loss_ce": 0.16460628807544708,
"loss_iou": 0.2099609375,
"loss_num": 0.146484375,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 22580712,
"step": 133
},
{
"epoch": 0.05108654212733511,
"grad_norm": 40.261099546087195,
"learning_rate": 5e-06,
"loss": 0.4734,
"num_input_tokens_seen": 22751228,
"step": 134
},
{
"epoch": 0.05108654212733511,
"loss": 0.4574471116065979,
"loss_ce": 0.3043099045753479,
"loss_iou": 0.208984375,
"loss_num": 0.1533203125,
"loss_xval": 0.1533203125,
"num_input_tokens_seen": 22751228,
"step": 134
},
{
"epoch": 0.05146778497903164,
"grad_norm": 51.93099158970609,
"learning_rate": 5e-06,
"loss": 0.3771,
"num_input_tokens_seen": 22920056,
"step": 135
},
{
"epoch": 0.05146778497903164,
"loss": 0.38384222984313965,
"loss_ce": 0.24974800646305084,
"loss_iou": 0.03466796875,
"loss_num": 0.1337890625,
"loss_xval": 0.1337890625,
"num_input_tokens_seen": 22920056,
"step": 135
},
{
"epoch": 0.05184902783072817,
"grad_norm": 44.023197993806264,
"learning_rate": 5e-06,
"loss": 0.5048,
"num_input_tokens_seen": 23090280,
"step": 136
},
{
"epoch": 0.05184902783072817,
"loss": 0.45330941677093506,
"loss_ce": 0.31573617458343506,
"loss_iou": 0.01129150390625,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 23090280,
"step": 136
},
{
"epoch": 0.0522302706824247,
"grad_norm": 17.668600356916173,
"learning_rate": 5e-06,
"loss": 0.3013,
"num_input_tokens_seen": 23258092,
"step": 137
},
{
"epoch": 0.0522302706824247,
"loss": 0.2805832028388977,
"loss_ce": 0.1533859372138977,
"loss_iou": 0.271484375,
"loss_num": 0.126953125,
"loss_xval": 0.126953125,
"num_input_tokens_seen": 23258092,
"step": 137
},
{
"epoch": 0.05261151353412123,
"grad_norm": 11.853121000899684,
"learning_rate": 5e-06,
"loss": 0.2956,
"num_input_tokens_seen": 23428304,
"step": 138
},
{
"epoch": 0.05261151353412123,
"loss": 0.2811272442340851,
"loss_ce": 0.12933281064033508,
"loss_iou": 0.015869140625,
"loss_num": 0.1513671875,
"loss_xval": 0.1513671875,
"num_input_tokens_seen": 23428304,
"step": 138
},
{
"epoch": 0.05299275638581777,
"grad_norm": 10.782142098964552,
"learning_rate": 5e-06,
"loss": 0.2952,
"num_input_tokens_seen": 23597868,
"step": 139
},
{
"epoch": 0.05299275638581777,
"loss": 0.27758559584617615,
"loss_ce": 0.13134536147117615,
"loss_iou": 0.205078125,
"loss_num": 0.146484375,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 23597868,
"step": 139
},
{
"epoch": 0.0533739992375143,
"grad_norm": 10.56623325337934,
"learning_rate": 5e-06,
"loss": 0.2759,
"num_input_tokens_seen": 23769828,
"step": 140
},
{
"epoch": 0.0533739992375143,
"loss": 0.2588083744049072,
"loss_ce": 0.12971901893615723,
"loss_iou": 0.142578125,
"loss_num": 0.12890625,
"loss_xval": 0.12890625,
"num_input_tokens_seen": 23769828,
"step": 140
},
{
"epoch": 0.05375524208921083,
"grad_norm": 9.295556192178626,
"learning_rate": 5e-06,
"loss": 0.2827,
"num_input_tokens_seen": 23941928,
"step": 141
},
{
"epoch": 0.05375524208921083,
"loss": 0.2925659716129303,
"loss_ce": 0.12453620135784149,
"loss_iou": 0.07177734375,
"loss_num": 0.16796875,
"loss_xval": 0.16796875,
"num_input_tokens_seen": 23941928,
"step": 141
},
{
"epoch": 0.05413648494090736,
"grad_norm": 9.941605277841138,
"learning_rate": 5e-06,
"loss": 0.2708,
"num_input_tokens_seen": 24108640,
"step": 142
},
{
"epoch": 0.05413648494090736,
"loss": 0.30066150426864624,
"loss_ce": 0.12451402842998505,
"loss_iou": 0.30859375,
"loss_num": 0.17578125,
"loss_xval": 0.17578125,
"num_input_tokens_seen": 24108640,
"step": 142
},
{
"epoch": 0.05451772779260389,
"grad_norm": 10.112022703753036,
"learning_rate": 5e-06,
"loss": 0.2652,
"num_input_tokens_seen": 24280820,
"step": 143
},
{
"epoch": 0.05451772779260389,
"loss": 0.2717851400375366,
"loss_ce": 0.13170944154262543,
"loss_iou": 0.0283203125,
"loss_num": 0.1396484375,
"loss_xval": 0.1396484375,
"num_input_tokens_seen": 24280820,
"step": 143
},
{
"epoch": 0.05489897064430042,
"grad_norm": 9.423737008725107,
"learning_rate": 5e-06,
"loss": 0.2729,
"num_input_tokens_seen": 24443752,
"step": 144
},
{
"epoch": 0.05489897064430042,
"loss": 0.2589079439640045,
"loss_ce": 0.11364426463842392,
"loss_iou": 0.12158203125,
"loss_num": 0.1455078125,
"loss_xval": 0.1455078125,
"num_input_tokens_seen": 24443752,
"step": 144
},
{
"epoch": 0.055280213495996953,
"grad_norm": 10.806035234827066,
"learning_rate": 5e-06,
"loss": 0.2676,
"num_input_tokens_seen": 24614996,
"step": 145
},
{
"epoch": 0.055280213495996953,
"loss": 0.2665177583694458,
"loss_ce": 0.1084977239370346,
"loss_iou": 0.0361328125,
"loss_num": 0.158203125,
"loss_xval": 0.158203125,
"num_input_tokens_seen": 24614996,
"step": 145
},
{
"epoch": 0.05566145634769348,
"grad_norm": 11.25713824124703,
"learning_rate": 5e-06,
"loss": 0.2855,
"num_input_tokens_seen": 24784460,
"step": 146
},
{
"epoch": 0.05566145634769348,
"loss": 0.26414167881011963,
"loss_ce": 0.12302839756011963,
"loss_iou": 0.029052734375,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 24784460,
"step": 146
},
{
"epoch": 0.05604269919939001,
"grad_norm": 10.106443684805265,
"learning_rate": 5e-06,
"loss": 0.2656,
"num_input_tokens_seen": 24954704,
"step": 147
},
{
"epoch": 0.05604269919939001,
"loss": 0.25407490134239197,
"loss_ce": 0.10862812399864197,
"loss_iou": 0.0269775390625,
"loss_num": 0.1455078125,
"loss_xval": 0.1455078125,
"num_input_tokens_seen": 24954704,
"step": 147
},
{
"epoch": 0.05642394205108654,
"grad_norm": 9.275986795258602,
"learning_rate": 5e-06,
"loss": 0.2428,
"num_input_tokens_seen": 25123164,
"step": 148
},
{
"epoch": 0.05642394205108654,
"loss": 0.2414214015007019,
"loss_ce": 0.1008574366569519,
"loss_iou": 0.16796875,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 25123164,
"step": 148
},
{
"epoch": 0.05680518490278307,
"grad_norm": 12.235038905105682,
"learning_rate": 5e-06,
"loss": 0.2484,
"num_input_tokens_seen": 25289736,
"step": 149
},
{
"epoch": 0.05680518490278307,
"loss": 0.24917542934417725,
"loss_ce": 0.09304748475551605,
"loss_iou": 0.07958984375,
"loss_num": 0.15625,
"loss_xval": 0.15625,
"num_input_tokens_seen": 25289736,
"step": 149
},
{
"epoch": 0.0571864277544796,
"grad_norm": 10.171954399456862,
"learning_rate": 5e-06,
"loss": 0.2653,
"num_input_tokens_seen": 25460180,
"step": 150
},
{
"epoch": 0.0571864277544796,
"loss": 0.2596849203109741,
"loss_ce": 0.10398421436548233,
"loss_iou": 0.0517578125,
"loss_num": 0.1552734375,
"loss_xval": 0.1552734375,
"num_input_tokens_seen": 25460180,
"step": 150
},
{
"epoch": 0.05756767060617613,
"grad_norm": 9.681432337059835,
"learning_rate": 5e-06,
"loss": 0.2515,
"num_input_tokens_seen": 25630484,
"step": 151
},
{
"epoch": 0.05756767060617613,
"loss": 0.25730687379837036,
"loss_ce": 0.11491183936595917,
"loss_iou": 0.2333984375,
"loss_num": 0.142578125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 25630484,
"step": 151
},
{
"epoch": 0.05794891345787267,
"grad_norm": 9.005698061356364,
"learning_rate": 5e-06,
"loss": 0.2406,
"num_input_tokens_seen": 25800676,
"step": 152
},
{
"epoch": 0.05794891345787267,
"loss": 0.2408166080713272,
"loss_ce": 0.10714960098266602,
"loss_iou": 0.052978515625,
"loss_num": 0.1337890625,
"loss_xval": 0.1337890625,
"num_input_tokens_seen": 25800676,
"step": 152
},
{
"epoch": 0.0583301563095692,
"grad_norm": 9.23109905221705,
"learning_rate": 5e-06,
"loss": 0.2478,
"num_input_tokens_seen": 25970308,
"step": 153
},
{
"epoch": 0.0583301563095692,
"loss": 0.2394830286502838,
"loss_ce": 0.09977356344461441,
"loss_iou": 0.08984375,
"loss_num": 0.1396484375,
"loss_xval": 0.1396484375,
"num_input_tokens_seen": 25970308,
"step": 153
},
{
"epoch": 0.05871139916126573,
"grad_norm": 9.694346052607242,
"learning_rate": 5e-06,
"loss": 0.2383,
"num_input_tokens_seen": 26139336,
"step": 154
},
{
"epoch": 0.05871139916126573,
"loss": 0.24506092071533203,
"loss_ce": 0.08954335749149323,
"loss_iou": 0.1435546875,
"loss_num": 0.1552734375,
"loss_xval": 0.1552734375,
"num_input_tokens_seen": 26139336,
"step": 154
},
{
"epoch": 0.05909264201296226,
"grad_norm": 10.26482253569495,
"learning_rate": 5e-06,
"loss": 0.242,
"num_input_tokens_seen": 26309800,
"step": 155
},
{
"epoch": 0.05909264201296226,
"loss": 0.2486727237701416,
"loss_ce": 0.09480307996273041,
"loss_iou": 0.05615234375,
"loss_num": 0.154296875,
"loss_xval": 0.154296875,
"num_input_tokens_seen": 26309800,
"step": 155
},
{
"epoch": 0.05947388486465879,
"grad_norm": 10.728319795398349,
"learning_rate": 5e-06,
"loss": 0.2523,
"num_input_tokens_seen": 26477652,
"step": 156
},
{
"epoch": 0.05947388486465879,
"loss": 0.26340925693511963,
"loss_ce": 0.10184921324253082,
"loss_iou": 0.138671875,
"loss_num": 0.1611328125,
"loss_xval": 0.1611328125,
"num_input_tokens_seen": 26477652,
"step": 156
},
{
"epoch": 0.059855127716355316,
"grad_norm": 8.708486379954715,
"learning_rate": 5e-06,
"loss": 0.2442,
"num_input_tokens_seen": 26647156,
"step": 157
},
{
"epoch": 0.059855127716355316,
"loss": 0.2594885230064392,
"loss_ce": 0.0865149199962616,
"loss_iou": 0.0908203125,
"loss_num": 0.1728515625,
"loss_xval": 0.1728515625,
"num_input_tokens_seen": 26647156,
"step": 157
},
{
"epoch": 0.060236370568051846,
"grad_norm": 8.318082248992015,
"learning_rate": 5e-06,
"loss": 0.2296,
"num_input_tokens_seen": 26817628,
"step": 158
},
{
"epoch": 0.060236370568051846,
"loss": 0.23485592007637024,
"loss_ce": 0.08330562710762024,
"loss_iou": 0.154296875,
"loss_num": 0.1513671875,
"loss_xval": 0.1513671875,
"num_input_tokens_seen": 26817628,
"step": 158
},
{
"epoch": 0.06061761341974838,
"grad_norm": 8.479454996279628,
"learning_rate": 5e-06,
"loss": 0.2326,
"num_input_tokens_seen": 26989672,
"step": 159
},
{
"epoch": 0.06061761341974838,
"loss": 0.2239830046892166,
"loss_ce": 0.07115097343921661,
"loss_iou": 0.024658203125,
"loss_num": 0.15234375,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 26989672,
"step": 159
},
{
"epoch": 0.06099885627144491,
"grad_norm": 9.256918264151901,
"learning_rate": 5e-06,
"loss": 0.2312,
"num_input_tokens_seen": 27159948,
"step": 160
},
{
"epoch": 0.06099885627144491,
"loss": 0.23189058899879456,
"loss_ce": 0.08729829639196396,
"loss_iou": 0.046875,
"loss_num": 0.14453125,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 27159948,
"step": 160
},
{
"epoch": 0.06138009912314144,
"grad_norm": 12.48003698093438,
"learning_rate": 5e-06,
"loss": 0.2475,
"num_input_tokens_seen": 27331912,
"step": 161
},
{
"epoch": 0.06138009912314144,
"loss": 0.24232828617095947,
"loss_ce": 0.08272135257720947,
"loss_iou": 0.017578125,
"loss_num": 0.1591796875,
"loss_xval": 0.1591796875,
"num_input_tokens_seen": 27331912,
"step": 161
},
{
"epoch": 0.06176134197483797,
"grad_norm": 11.268891211576127,
"learning_rate": 5e-06,
"loss": 0.2454,
"num_input_tokens_seen": 27500572,
"step": 162
},
{
"epoch": 0.06176134197483797,
"loss": 0.2539252042770386,
"loss_ce": 0.08461367338895798,
"loss_iou": 0.271484375,
"loss_num": 0.1689453125,
"loss_xval": 0.1689453125,
"num_input_tokens_seen": 27500572,
"step": 162
},
{
"epoch": 0.0621425848265345,
"grad_norm": 8.295163566141365,
"learning_rate": 5e-06,
"loss": 0.2189,
"num_input_tokens_seen": 27670916,
"step": 163
},
{
"epoch": 0.0621425848265345,
"loss": 0.21146176755428314,
"loss_ce": 0.08090756833553314,
"loss_iou": 0.12060546875,
"loss_num": 0.130859375,
"loss_xval": 0.130859375,
"num_input_tokens_seen": 27670916,
"step": 163
},
{
"epoch": 0.06252382767823103,
"grad_norm": 7.772818852021809,
"learning_rate": 5e-06,
"loss": 0.2362,
"num_input_tokens_seen": 27839668,
"step": 164
},
{
"epoch": 0.06252382767823103,
"loss": 0.24545946717262268,
"loss_ce": 0.08115281909704208,
"loss_iou": 0.1484375,
"loss_num": 0.1640625,
"loss_xval": 0.1640625,
"num_input_tokens_seen": 27839668,
"step": 164
},
{
"epoch": 0.06290507052992757,
"grad_norm": 7.7406600766597995,
"learning_rate": 5e-06,
"loss": 0.2296,
"num_input_tokens_seen": 28011736,
"step": 165
},
{
"epoch": 0.06290507052992757,
"loss": 0.24421420693397522,
"loss_ce": 0.08125033974647522,
"loss_iou": 0.036376953125,
"loss_num": 0.1630859375,
"loss_xval": 0.1630859375,
"num_input_tokens_seen": 28011736,
"step": 165
},
{
"epoch": 0.06328631338162409,
"grad_norm": 7.558234750827758,
"learning_rate": 5e-06,
"loss": 0.2191,
"num_input_tokens_seen": 28181428,
"step": 166
},
{
"epoch": 0.06328631338162409,
"loss": 0.2300872951745987,
"loss_ce": 0.06968691200017929,
"loss_iou": 0.267578125,
"loss_num": 0.16015625,
"loss_xval": 0.16015625,
"num_input_tokens_seen": 28181428,
"step": 166
},
{
"epoch": 0.06366755623332063,
"grad_norm": 7.904016136847001,
"learning_rate": 5e-06,
"loss": 0.2202,
"num_input_tokens_seen": 28351744,
"step": 167
},
{
"epoch": 0.06366755623332063,
"loss": 0.2199670970439911,
"loss_ce": 0.08190558105707169,
"loss_iou": 0.1025390625,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 28351744,
"step": 167
},
{
"epoch": 0.06404879908501715,
"grad_norm": 8.048339423245295,
"learning_rate": 5e-06,
"loss": 0.2037,
"num_input_tokens_seen": 28523912,
"step": 168
},
{
"epoch": 0.06404879908501715,
"loss": 0.1940288096666336,
"loss_ce": 0.0752543956041336,
"loss_iou": 0.1474609375,
"loss_num": 0.11865234375,
"loss_xval": 0.11865234375,
"num_input_tokens_seen": 28523912,
"step": 168
},
{
"epoch": 0.06443004193671369,
"grad_norm": 11.225230091380547,
"learning_rate": 5e-06,
"loss": 0.2132,
"num_input_tokens_seen": 28691576,
"step": 169
},
{
"epoch": 0.06443004193671369,
"loss": 0.2098519206047058,
"loss_ce": 0.067212775349617,
"loss_iou": 0.142578125,
"loss_num": 0.142578125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 28691576,
"step": 169
},
{
"epoch": 0.06481128478841022,
"grad_norm": 24.483990446755985,
"learning_rate": 5e-06,
"loss": 0.3308,
"num_input_tokens_seen": 28859312,
"step": 170
},
{
"epoch": 0.06481128478841022,
"loss": 0.33362236618995667,
"loss_ce": 0.06909599900245667,
"loss_iou": 0.37109375,
"loss_num": 0.263671875,
"loss_xval": 0.263671875,
"num_input_tokens_seen": 28859312,
"step": 170
},
{
"epoch": 0.06519252764010675,
"grad_norm": 11.280338076240879,
"learning_rate": 5e-06,
"loss": 0.2534,
"num_input_tokens_seen": 29027856,
"step": 171
},
{
"epoch": 0.06519252764010675,
"loss": 0.2349179983139038,
"loss_ce": 0.071221724152565,
"loss_iou": 0.056396484375,
"loss_num": 0.1640625,
"loss_xval": 0.1640625,
"num_input_tokens_seen": 29027856,
"step": 171
},
{
"epoch": 0.06557377049180328,
"grad_norm": 19.174485435722143,
"learning_rate": 5e-06,
"loss": 0.2755,
"num_input_tokens_seen": 29196644,
"step": 172
},
{
"epoch": 0.06557377049180328,
"loss": 0.30110809206962585,
"loss_ce": 0.10616179555654526,
"loss_iou": 0.3984375,
"loss_num": 0.1953125,
"loss_xval": 0.1953125,
"num_input_tokens_seen": 29196644,
"step": 172
},
{
"epoch": 0.0659550133434998,
"grad_norm": 11.53696010791549,
"learning_rate": 5e-06,
"loss": 0.2506,
"num_input_tokens_seen": 29361972,
"step": 173
},
{
"epoch": 0.0659550133434998,
"loss": 0.24097901582717896,
"loss_ce": 0.06373292952775955,
"loss_iou": 0.0771484375,
"loss_num": 0.177734375,
"loss_xval": 0.177734375,
"num_input_tokens_seen": 29361972,
"step": 173
},
{
"epoch": 0.06633625619519634,
"grad_norm": 7.365584889438314,
"learning_rate": 5e-06,
"loss": 0.2221,
"num_input_tokens_seen": 29530720,
"step": 174
},
{
"epoch": 0.06633625619519634,
"loss": 0.20714986324310303,
"loss_ce": 0.07134665548801422,
"loss_iou": 0.0242919921875,
"loss_num": 0.1357421875,
"loss_xval": 0.1357421875,
"num_input_tokens_seen": 29530720,
"step": 174
},
{
"epoch": 0.06671749904689286,
"grad_norm": 7.619362858636232,
"learning_rate": 5e-06,
"loss": 0.2124,
"num_input_tokens_seen": 29699604,
"step": 175
},
{
"epoch": 0.06671749904689286,
"loss": 0.2332572489976883,
"loss_ce": 0.0637015849351883,
"loss_iou": 0.0732421875,
"loss_num": 0.169921875,
"loss_xval": 0.169921875,
"num_input_tokens_seen": 29699604,
"step": 175
},
{
"epoch": 0.0670987418985894,
"grad_norm": 7.073823802448804,
"learning_rate": 5e-06,
"loss": 0.2051,
"num_input_tokens_seen": 29867992,
"step": 176
},
{
"epoch": 0.0670987418985894,
"loss": 0.18709206581115723,
"loss_ce": 0.06709694862365723,
"loss_iou": 0.09716796875,
"loss_num": 0.1201171875,
"loss_xval": 0.1201171875,
"num_input_tokens_seen": 29867992,
"step": 176
},
{
"epoch": 0.06747998475028594,
"grad_norm": 8.039181008207422,
"learning_rate": 5e-06,
"loss": 0.2211,
"num_input_tokens_seen": 30035884,
"step": 177
},
{
"epoch": 0.06747998475028594,
"loss": 0.2507503628730774,
"loss_ce": 0.11000329256057739,
"loss_iou": 0.04296875,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 30035884,
"step": 177
},
{
"epoch": 0.06786122760198246,
"grad_norm": 6.614190886473386,
"learning_rate": 5e-06,
"loss": 0.2033,
"num_input_tokens_seen": 30206128,
"step": 178
},
{
"epoch": 0.06786122760198246,
"loss": 0.19981291890144348,
"loss_ce": 0.05955413728952408,
"loss_iou": 0.09228515625,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 30206128,
"step": 178
},
{
"epoch": 0.068242470453679,
"grad_norm": 6.636430428575403,
"learning_rate": 5e-06,
"loss": 0.1949,
"num_input_tokens_seen": 30378096,
"step": 179
},
{
"epoch": 0.068242470453679,
"loss": 0.1966387778520584,
"loss_ce": 0.06101866066455841,
"loss_iou": 0.1318359375,
"loss_num": 0.1357421875,
"loss_xval": 0.1357421875,
"num_input_tokens_seen": 30378096,
"step": 179
},
{
"epoch": 0.06862371330537552,
"grad_norm": 6.867381193636216,
"learning_rate": 5e-06,
"loss": 0.1896,
"num_input_tokens_seen": 30550272,
"step": 180
},
{
"epoch": 0.06862371330537552,
"loss": 0.2098548263311386,
"loss_ce": 0.06001351401209831,
"loss_iou": 0.16796875,
"loss_num": 0.1494140625,
"loss_xval": 0.1494140625,
"num_input_tokens_seen": 30550272,
"step": 180
},
{
"epoch": 0.06900495615707206,
"grad_norm": 7.25383585501624,
"learning_rate": 5e-06,
"loss": 0.1873,
"num_input_tokens_seen": 30718864,
"step": 181
},
{
"epoch": 0.06900495615707206,
"loss": 0.17539140582084656,
"loss_ce": 0.049781057983636856,
"loss_iou": 0.1005859375,
"loss_num": 0.1259765625,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 30718864,
"step": 181
},
{
"epoch": 0.06938619900876858,
"grad_norm": 9.593416424164415,
"learning_rate": 5e-06,
"loss": 0.2219,
"num_input_tokens_seen": 30888492,
"step": 182
},
{
"epoch": 0.06938619900876858,
"loss": 0.21108978986740112,
"loss_ce": 0.06234711408615112,
"loss_iou": 0.205078125,
"loss_num": 0.1484375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 30888492,
"step": 182
},
{
"epoch": 0.06976744186046512,
"grad_norm": 11.387146816371892,
"learning_rate": 5e-06,
"loss": 0.2342,
"num_input_tokens_seen": 31059204,
"step": 183
},
{
"epoch": 0.06976744186046512,
"loss": 0.2156086564064026,
"loss_ce": 0.05594068765640259,
"loss_iou": 0.11669921875,
"loss_num": 0.16015625,
"loss_xval": 0.16015625,
"num_input_tokens_seen": 31059204,
"step": 183
},
{
"epoch": 0.07014868471216165,
"grad_norm": 7.155039422400645,
"learning_rate": 5e-06,
"loss": 0.2119,
"num_input_tokens_seen": 31231448,
"step": 184
},
{
"epoch": 0.07014868471216165,
"loss": 0.23432870209217072,
"loss_ce": 0.06617684662342072,
"loss_iou": 0.033203125,
"loss_num": 0.16796875,
"loss_xval": 0.16796875,
"num_input_tokens_seen": 31231448,
"step": 184
},
{
"epoch": 0.07052992756385817,
"grad_norm": 6.902516548706931,
"learning_rate": 5e-06,
"loss": 0.2087,
"num_input_tokens_seen": 31401680,
"step": 185
},
{
"epoch": 0.07052992756385817,
"loss": 0.22064757347106934,
"loss_ce": 0.06769345700740814,
"loss_iou": 0.27734375,
"loss_num": 0.1533203125,
"loss_xval": 0.1533203125,
"num_input_tokens_seen": 31401680,
"step": 185
},
{
"epoch": 0.07091117041555471,
"grad_norm": 6.069188803983981,
"learning_rate": 5e-06,
"loss": 0.1966,
"num_input_tokens_seen": 31572216,
"step": 186
},
{
"epoch": 0.07091117041555471,
"loss": 0.2209046185016632,
"loss_ce": 0.0724671259522438,
"loss_iou": 0.07763671875,
"loss_num": 0.1484375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 31572216,
"step": 186
},
{
"epoch": 0.07129241326725123,
"grad_norm": 6.435423493978937,
"learning_rate": 5e-06,
"loss": 0.1857,
"num_input_tokens_seen": 31744228,
"step": 187
},
{
"epoch": 0.07129241326725123,
"loss": 0.19196385145187378,
"loss_ce": 0.053902335464954376,
"loss_iou": 0.158203125,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 31744228,
"step": 187
},
{
"epoch": 0.07167365611894777,
"grad_norm": 6.229232274862448,
"learning_rate": 5e-06,
"loss": 0.1916,
"num_input_tokens_seen": 31914484,
"step": 188
},
{
"epoch": 0.07167365611894777,
"loss": 0.22564879059791565,
"loss_ce": 0.08831968903541565,
"loss_iou": 0.1376953125,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 31914484,
"step": 188
},
{
"epoch": 0.07205489897064431,
"grad_norm": 5.9170687779491855,
"learning_rate": 5e-06,
"loss": 0.1979,
"num_input_tokens_seen": 32086484,
"step": 189
},
{
"epoch": 0.07205489897064431,
"loss": 0.19572624564170837,
"loss_ce": 0.043382514268159866,
"loss_iou": 0.1630859375,
"loss_num": 0.15234375,
"loss_xval": 0.15234375,
"num_input_tokens_seen": 32086484,
"step": 189
},
{
"epoch": 0.07243614182234083,
"grad_norm": 7.09938319359785,
"learning_rate": 5e-06,
"loss": 0.1782,
"num_input_tokens_seen": 32256820,
"step": 190
},
{
"epoch": 0.07243614182234083,
"loss": 0.16142967343330383,
"loss_ce": 0.04827050492167473,
"loss_iou": 0.0208740234375,
"loss_num": 0.11328125,
"loss_xval": 0.11328125,
"num_input_tokens_seen": 32256820,
"step": 190
},
{
"epoch": 0.07281738467403737,
"grad_norm": 5.710676708424859,
"learning_rate": 5e-06,
"loss": 0.1807,
"num_input_tokens_seen": 32426060,
"step": 191
},
{
"epoch": 0.07281738467403737,
"loss": 0.18200108408927917,
"loss_ce": 0.04430576413869858,
"loss_iou": 0.103515625,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 32426060,
"step": 191
},
{
"epoch": 0.07319862752573389,
"grad_norm": 6.58794684908767,
"learning_rate": 5e-06,
"loss": 0.1942,
"num_input_tokens_seen": 32594448,
"step": 192
},
{
"epoch": 0.07319862752573389,
"loss": 0.1748276799917221,
"loss_ce": 0.04299173876643181,
"loss_iou": 0.125,
"loss_num": 0.1318359375,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 32594448,
"step": 192
},
{
"epoch": 0.07357987037743043,
"grad_norm": 8.787740714089832,
"learning_rate": 5e-06,
"loss": 0.1881,
"num_input_tokens_seen": 32766532,
"step": 193
},
{
"epoch": 0.07357987037743043,
"loss": 0.1966419517993927,
"loss_ce": 0.0433826819062233,
"loss_iou": 0.103515625,
"loss_num": 0.1533203125,
"loss_xval": 0.1533203125,
"num_input_tokens_seen": 32766532,
"step": 193
},
{
"epoch": 0.07396111322912695,
"grad_norm": 6.874779554170579,
"learning_rate": 5e-06,
"loss": 0.1934,
"num_input_tokens_seen": 32936888,
"step": 194
},
{
"epoch": 0.07396111322912695,
"loss": 0.1619054675102234,
"loss_ce": 0.047403521835803986,
"loss_iou": 0.212890625,
"loss_num": 0.1142578125,
"loss_xval": 0.1142578125,
"num_input_tokens_seen": 32936888,
"step": 194
},
{
"epoch": 0.07434235608082349,
"grad_norm": 6.446485562335773,
"learning_rate": 5e-06,
"loss": 0.1883,
"num_input_tokens_seen": 33108180,
"step": 195
},
{
"epoch": 0.07434235608082349,
"loss": 0.18052715063095093,
"loss_ce": 0.07121318578720093,
"loss_iou": 0.041748046875,
"loss_num": 0.109375,
"loss_xval": 0.109375,
"num_input_tokens_seen": 33108180,
"step": 195
},
{
"epoch": 0.07472359893252002,
"grad_norm": 6.581289116802433,
"learning_rate": 5e-06,
"loss": 0.1709,
"num_input_tokens_seen": 33280144,
"step": 196
},
{
"epoch": 0.07472359893252002,
"loss": 0.17547522485256195,
"loss_ce": 0.049193479120731354,
"loss_iou": 0.138671875,
"loss_num": 0.1259765625,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 33280144,
"step": 196
},
{
"epoch": 0.07510484178421654,
"grad_norm": 5.5655095665329695,
"learning_rate": 5e-06,
"loss": 0.179,
"num_input_tokens_seen": 33450488,
"step": 197
},
{
"epoch": 0.07510484178421654,
"loss": 0.182396799325943,
"loss_ce": 0.037804510444402695,
"loss_iou": 0.078125,
"loss_num": 0.14453125,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 33450488,
"step": 197
},
{
"epoch": 0.07548608463591308,
"grad_norm": 5.529419425652068,
"learning_rate": 5e-06,
"loss": 0.1723,
"num_input_tokens_seen": 33618828,
"step": 198
},
{
"epoch": 0.07548608463591308,
"loss": 0.1725044995546341,
"loss_ce": 0.051105573773384094,
"loss_iou": 0.05908203125,
"loss_num": 0.12158203125,
"loss_xval": 0.12158203125,
"num_input_tokens_seen": 33618828,
"step": 198
},
{
"epoch": 0.0758673274876096,
"grad_norm": 5.257627463715373,
"learning_rate": 5e-06,
"loss": 0.1788,
"num_input_tokens_seen": 33788292,
"step": 199
},
{
"epoch": 0.0758673274876096,
"loss": 0.15512898564338684,
"loss_ce": 0.035194914788007736,
"loss_iou": 0.06298828125,
"loss_num": 0.1201171875,
"loss_xval": 0.1201171875,
"num_input_tokens_seen": 33788292,
"step": 199
},
{
"epoch": 0.07624857033930614,
"grad_norm": 7.308182402927082,
"learning_rate": 5e-06,
"loss": 0.1741,
"num_input_tokens_seen": 33957508,
"step": 200
},
{
"epoch": 0.07624857033930614,
"loss": 0.17762351036071777,
"loss_ce": 0.03748679906129837,
"loss_iou": 0.1611328125,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 33957508,
"step": 200
},
{
"epoch": 0.07662981319100266,
"grad_norm": 11.727204455068861,
"learning_rate": 5e-06,
"loss": 0.2073,
"num_input_tokens_seen": 34126596,
"step": 201
},
{
"epoch": 0.07662981319100266,
"loss": 0.19732967019081116,
"loss_ce": 0.04156793653964996,
"loss_iou": 0.19140625,
"loss_num": 0.15625,
"loss_xval": 0.15625,
"num_input_tokens_seen": 34126596,
"step": 201
},
{
"epoch": 0.0770110560426992,
"grad_norm": 7.6470813001116555,
"learning_rate": 5e-06,
"loss": 0.1831,
"num_input_tokens_seen": 34296920,
"step": 202
},
{
"epoch": 0.0770110560426992,
"loss": 0.1773093044757843,
"loss_ce": 0.036806363612413406,
"loss_iou": 0.3359375,
"loss_num": 0.140625,
"loss_xval": 0.140625,
"num_input_tokens_seen": 34296920,
"step": 202
},
{
"epoch": 0.07739229889439574,
"grad_norm": 5.57340451910343,
"learning_rate": 5e-06,
"loss": 0.1917,
"num_input_tokens_seen": 34462376,
"step": 203
},
{
"epoch": 0.07739229889439574,
"loss": 0.2552993893623352,
"loss_ce": 0.0850113034248352,
"loss_iou": 0.2236328125,
"loss_num": 0.169921875,
"loss_xval": 0.169921875,
"num_input_tokens_seen": 34462376,
"step": 203
},
{
"epoch": 0.07777354174609226,
"grad_norm": 5.14332067227009,
"learning_rate": 5e-06,
"loss": 0.1731,
"num_input_tokens_seen": 34632808,
"step": 204
},
{
"epoch": 0.07777354174609226,
"loss": 0.17063459753990173,
"loss_ce": 0.03275619447231293,
"loss_iou": 0.1669921875,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 34632808,
"step": 204
},
{
"epoch": 0.0781547845977888,
"grad_norm": 4.785402363672343,
"learning_rate": 5e-06,
"loss": 0.1782,
"num_input_tokens_seen": 34805040,
"step": 205
},
{
"epoch": 0.0781547845977888,
"loss": 0.1723075956106186,
"loss_ce": 0.03308640792965889,
"loss_iou": 0.2490234375,
"loss_num": 0.1396484375,
"loss_xval": 0.1396484375,
"num_input_tokens_seen": 34805040,
"step": 205
},
{
"epoch": 0.07853602744948532,
"grad_norm": 5.037982883600604,
"learning_rate": 5e-06,
"loss": 0.1678,
"num_input_tokens_seen": 34972936,
"step": 206
},
{
"epoch": 0.07853602744948532,
"loss": 0.17780755460262299,
"loss_ce": 0.038708437234163284,
"loss_iou": 0.2265625,
"loss_num": 0.138671875,
"loss_xval": 0.138671875,
"num_input_tokens_seen": 34972936,
"step": 206
},
{
"epoch": 0.07891727030118185,
"grad_norm": 4.393851666761968,
"learning_rate": 5e-06,
"loss": 0.1562,
"num_input_tokens_seen": 35141640,
"step": 207
},
{
"epoch": 0.07891727030118185,
"loss": 0.1302179992198944,
"loss_ce": 0.032927948981523514,
"loss_iou": 0.10546875,
"loss_num": 0.09716796875,
"loss_xval": 0.09716796875,
"num_input_tokens_seen": 35141640,
"step": 207
},
{
"epoch": 0.07929851315287838,
"grad_norm": 4.788946279040103,
"learning_rate": 5e-06,
"loss": 0.1596,
"num_input_tokens_seen": 35313780,
"step": 208
},
{
"epoch": 0.07929851315287838,
"loss": 0.1403164565563202,
"loss_ce": 0.035824261605739594,
"loss_iou": 0.0576171875,
"loss_num": 0.1044921875,
"loss_xval": 0.1044921875,
"num_input_tokens_seen": 35313780,
"step": 208
},
{
"epoch": 0.07967975600457491,
"grad_norm": 6.031729420886291,
"learning_rate": 5e-06,
"loss": 0.1487,
"num_input_tokens_seen": 35476224,
"step": 209
},
{
"epoch": 0.07967975600457491,
"loss": 0.15272024273872375,
"loss_ce": 0.029612332582473755,
"loss_iou": 0.205078125,
"loss_num": 0.123046875,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 35476224,
"step": 209
},
{
"epoch": 0.08006099885627145,
"grad_norm": 8.180171259285881,
"learning_rate": 5e-06,
"loss": 0.1771,
"num_input_tokens_seen": 35645576,
"step": 210
},
{
"epoch": 0.08006099885627145,
"loss": 0.15656079351902008,
"loss_ce": 0.03070629946887493,
"loss_iou": 0.123046875,
"loss_num": 0.1259765625,
"loss_xval": 0.1259765625,
"num_input_tokens_seen": 35645576,
"step": 210
},
{
"epoch": 0.08044224170796797,
"grad_norm": 7.452042280101069,
"learning_rate": 5e-06,
"loss": 0.1913,
"num_input_tokens_seen": 35815944,
"step": 211
},
{
"epoch": 0.08044224170796797,
"loss": 0.1716148853302002,
"loss_ce": 0.038008928298950195,
"loss_iou": 0.10498046875,
"loss_num": 0.1337890625,
"loss_xval": 0.1337890625,
"num_input_tokens_seen": 35815944,
"step": 211
},
{
"epoch": 0.08082348455966451,
"grad_norm": 5.048442437645143,
"learning_rate": 5e-06,
"loss": 0.172,
"num_input_tokens_seen": 35983292,
"step": 212
},
{
"epoch": 0.08082348455966451,
"loss": 0.17964288592338562,
"loss_ce": 0.03273127228021622,
"loss_iou": 0.044921875,
"loss_num": 0.146484375,
"loss_xval": 0.146484375,
"num_input_tokens_seen": 35983292,
"step": 212
},
{
"epoch": 0.08120472741136103,
"grad_norm": 4.523995490752935,
"learning_rate": 5e-06,
"loss": 0.1546,
"num_input_tokens_seen": 36150308,
"step": 213
},
{
"epoch": 0.08120472741136103,
"loss": 0.13153302669525146,
"loss_ce": 0.026125309988856316,
"loss_iou": 0.058837890625,
"loss_num": 0.10546875,
"loss_xval": 0.10546875,
"num_input_tokens_seen": 36150308,
"step": 213
},
{
"epoch": 0.08158597026305757,
"grad_norm": 4.93523703261995,
"learning_rate": 5e-06,
"loss": 0.152,
"num_input_tokens_seen": 36318844,
"step": 214
},
{
"epoch": 0.08158597026305757,
"loss": 0.17730222642421722,
"loss_ce": 0.03966795653104782,
"loss_iou": 0.08837890625,
"loss_num": 0.1376953125,
"loss_xval": 0.1376953125,
"num_input_tokens_seen": 36318844,
"step": 214
},
{
"epoch": 0.08196721311475409,
"grad_norm": 5.449610633251273,
"learning_rate": 5e-06,
"loss": 0.1633,
"num_input_tokens_seen": 36486788,
"step": 215
},
{
"epoch": 0.08196721311475409,
"loss": 0.17240044474601746,
"loss_ce": 0.02805231139063835,
"loss_iou": 0.1162109375,
"loss_num": 0.14453125,
"loss_xval": 0.14453125,
"num_input_tokens_seen": 36486788,
"step": 215
},
{
"epoch": 0.08234845596645063,
"grad_norm": 5.677928031865306,
"learning_rate": 5e-06,
"loss": 0.1628,
"num_input_tokens_seen": 36655424,
"step": 216
},
{
"epoch": 0.08234845596645063,
"loss": 0.17816764116287231,
"loss_ce": 0.026495283469557762,
"loss_iou": 0.251953125,
"loss_num": 0.1513671875,
"loss_xval": 0.1513671875,
"num_input_tokens_seen": 36655424,
"step": 216
},
{
"epoch": 0.08272969881814717,
"grad_norm": 5.057230107952905,
"learning_rate": 5e-06,
"loss": 0.1666,
"num_input_tokens_seen": 36822400,
"step": 217
},
{
"epoch": 0.08272969881814717,
"loss": 0.16810831427574158,
"loss_ce": 0.02681192383170128,
"loss_iou": 0.0654296875,
"loss_num": 0.1416015625,
"loss_xval": 0.1416015625,
"num_input_tokens_seen": 36822400,
"step": 217
},
{
"epoch": 0.08311094166984369,
"grad_norm": 4.693685798905787,
"learning_rate": 5e-06,
"loss": 0.1533,
"num_input_tokens_seen": 36987660,
"step": 218
},
{
"epoch": 0.08311094166984369,
"loss": 0.17427818477153778,
"loss_ce": 0.03041832335293293,
"loss_iou": 0.2041015625,
"loss_num": 0.1435546875,
"loss_xval": 0.1435546875,
"num_input_tokens_seen": 36987660,
"step": 218
},
{
"epoch": 0.08349218452154022,
"grad_norm": 5.369656059536361,
"learning_rate": 5e-06,
"loss": 0.1547,
"num_input_tokens_seen": 37157224,
"step": 219
},
{
"epoch": 0.08349218452154022,
"loss": 0.1782151609659195,
"loss_ce": 0.035820137709379196,
"loss_iou": 0.09765625,
"loss_num": 0.142578125,
"loss_xval": 0.142578125,
"num_input_tokens_seen": 37157224,
"step": 219
},
{
"epoch": 0.08387342737323675,
"grad_norm": 6.865296132529728,
"learning_rate": 5e-06,
"loss": 0.1492,
"num_input_tokens_seen": 37327616,
"step": 220
},
{
"epoch": 0.08387342737323675,
"loss": 0.14614805579185486,
"loss_ce": 0.029631949961185455,
"loss_iou": 0.25,
"loss_num": 0.11669921875,
"loss_xval": 0.11669921875,
"num_input_tokens_seen": 37327616,
"step": 220
},
{
"epoch": 0.08425467022493328,
"grad_norm": 8.867559833380975,
"learning_rate": 5e-06,
"loss": 0.1788,
"num_input_tokens_seen": 37499668,
"step": 221
},
{
"epoch": 0.08425467022493328,
"loss": 0.1794005036354065,
"loss_ce": 0.030230596661567688,
"loss_iou": 0.057373046875,
"loss_num": 0.1494140625,
"loss_xval": 0.1494140625,
"num_input_tokens_seen": 37499668,
"step": 221
},
{
"epoch": 0.0846359130766298,
"grad_norm": 6.786482744735267,
"learning_rate": 5e-06,
"loss": 0.1873,
"num_input_tokens_seen": 37671868,
"step": 222
},
{
"epoch": 0.0846359130766298,
"loss": 0.1732577085494995,
"loss_ce": 0.02451504021883011,
"loss_iou": 0.0107421875,
"loss_num": 0.1484375,
"loss_xval": 0.1484375,
"num_input_tokens_seen": 37671868,
"step": 222
},
{
"epoch": 0.08501715592832634,
"grad_norm": 3.7922150320746355,
"learning_rate": 5e-06,
"loss": 0.161,
"num_input_tokens_seen": 37844060,
"step": 223
},
{
"epoch": 0.08501715592832634,
"loss": 0.15339231491088867,
"loss_ce": 0.025523660704493523,
"loss_iou": 0.11865234375,
"loss_num": 0.1279296875,
"loss_xval": 0.1279296875,
"num_input_tokens_seen": 37844060,
"step": 223
},
{
"epoch": 0.08539839878002288,
"grad_norm": 3.8325377982480164,
"learning_rate": 5e-06,
"loss": 0.1522,
"num_input_tokens_seen": 38016136,
"step": 224
},
{
"epoch": 0.08539839878002288,
"loss": 0.15799179673194885,
"loss_ce": 0.026033777743577957,
"loss_iou": 0.125,
"loss_num": 0.1318359375,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 38016136,
"step": 224
},
{
"epoch": 0.0857796416317194,
"grad_norm": 3.7826073928821424,
"learning_rate": 5e-06,
"loss": 0.1455,
"num_input_tokens_seen": 38186392,
"step": 225
},
{
"epoch": 0.0857796416317194,
"loss": 0.12594732642173767,
"loss_ce": 0.02249274216592312,
"loss_iou": NaN,
"loss_num": 0.103515625,
"loss_xval": 0.103515625,
"num_input_tokens_seen": 38186392,
"step": 225
},
{
"epoch": 0.08616088448341594,
"grad_norm": 4.773532856351368,
"learning_rate": 5e-06,
"loss": 0.1563,
"num_input_tokens_seen": 38357024,
"step": 226
},
{
"epoch": 0.08616088448341594,
"loss": 0.16969117522239685,
"loss_ce": 0.06281863152980804,
"loss_iou": 0.2158203125,
"loss_num": 0.10693359375,
"loss_xval": 0.10693359375,
"num_input_tokens_seen": 38357024,
"step": 226
},
{
"epoch": 0.08654212733511246,
"grad_norm": 5.039765731931007,
"learning_rate": 5e-06,
"loss": 0.1523,
"num_input_tokens_seen": 38529420,
"step": 227
},
{
"epoch": 0.08654212733511246,
"loss": 0.14553721249103546,
"loss_ce": 0.02633555233478546,
"loss_iou": 0.11181640625,
"loss_num": 0.119140625,
"loss_xval": 0.119140625,
"num_input_tokens_seen": 38529420,
"step": 227
},
{
"epoch": 0.086923370186809,
"grad_norm": 4.455939335870368,
"learning_rate": 5e-06,
"loss": 0.1521,
"num_input_tokens_seen": 38694780,
"step": 228
},
{
"epoch": 0.086923370186809,
"loss": 0.1465291976928711,
"loss_ce": 0.02574063278734684,
"loss_iou": 0.1611328125,
"loss_num": 0.12060546875,
"loss_xval": 0.12060546875,
"num_input_tokens_seen": 38694780,
"step": 228
},
{
"epoch": 0.08730461303850552,
"grad_norm": 4.54430108149329,
"learning_rate": 5e-06,
"loss": 0.1282,
"num_input_tokens_seen": 38861664,
"step": 229
},
{
"epoch": 0.08730461303850552,
"loss": 0.1365373432636261,
"loss_ce": 0.021302981302142143,
"loss_iou": 0.041748046875,
"loss_num": 0.115234375,
"loss_xval": 0.115234375,
"num_input_tokens_seen": 38861664,
"step": 229
},
{
"epoch": 0.08768585589020206,
"grad_norm": 5.912647310779185,
"learning_rate": 5e-06,
"loss": 0.1484,
"num_input_tokens_seen": 39032148,
"step": 230
},
{
"epoch": 0.08768585589020206,
"loss": 0.1516009271144867,
"loss_ce": 0.024891935288906097,
"loss_iou": 0.197265625,
"loss_num": 0.126953125,
"loss_xval": 0.126953125,
"num_input_tokens_seen": 39032148,
"step": 230
},
{
"epoch": 0.0880670987418986,
"grad_norm": 7.303331811765866,
"learning_rate": 5e-06,
"loss": 0.1583,
"num_input_tokens_seen": 39204260,
"step": 231
},
{
"epoch": 0.0880670987418986,
"loss": 0.14178459346294403,
"loss_ce": 0.02050773799419403,
"loss_iou": 0.11279296875,
"loss_num": 0.12109375,
"loss_xval": 0.12109375,
"num_input_tokens_seen": 39204260,
"step": 231
},
{
"epoch": 0.08844834159359512,
"grad_norm": 8.680326100039322,
"learning_rate": 5e-06,
"loss": 0.1736,
"num_input_tokens_seen": 39374744,
"step": 232
},
{
"epoch": 0.08844834159359512,
"loss": 0.17430295050144196,
"loss_ce": 0.018236054107546806,
"loss_iou": 0.28515625,
"loss_num": 0.15625,
"loss_xval": 0.15625,
"num_input_tokens_seen": 39374744,
"step": 232
},
{
"epoch": 0.08882958444529165,
"grad_norm": 4.55783117181828,
"learning_rate": 5e-06,
"loss": 0.1472,
"num_input_tokens_seen": 39546800,
"step": 233
},
{
"epoch": 0.08882958444529165,
"loss": 0.1543968766927719,
"loss_ce": 0.024514062330126762,
"loss_iou": 0.1572265625,
"loss_num": 0.1298828125,
"loss_xval": 0.1298828125,
"num_input_tokens_seen": 39546800,
"step": 233
},
{
"epoch": 0.08921082729698818,
"grad_norm": 4.339806160257698,
"learning_rate": 5e-06,
"loss": 0.142,
"num_input_tokens_seen": 39717400,
"step": 234
},
{
"epoch": 0.08921082729698818,
"loss": 0.13223184645175934,
"loss_ce": 0.02438272535800934,
"loss_iou": 0.1455078125,
"loss_num": 0.10791015625,
"loss_xval": 0.10791015625,
"num_input_tokens_seen": 39717400,
"step": 234
},
{
"epoch": 0.08959207014868471,
"grad_norm": 4.11875762493815,
"learning_rate": 5e-06,
"loss": 0.1351,
"num_input_tokens_seen": 39889540,
"step": 235
},
{
"epoch": 0.08959207014868471,
"loss": 0.1536053717136383,
"loss_ce": 0.028544342145323753,
"loss_iou": 0.306640625,
"loss_num": 0.125,
"loss_xval": 0.125,
"num_input_tokens_seen": 39889540,
"step": 235
},
{
"epoch": 0.08997331300038124,
"grad_norm": 5.984822976025027,
"learning_rate": 5e-06,
"loss": 0.1403,
"num_input_tokens_seen": 40059768,
"step": 236
},
{
"epoch": 0.08997331300038124,
"loss": 0.1520293951034546,
"loss_ce": 0.02336728200316429,
"loss_iou": 0.3359375,
"loss_num": 0.12890625,
"loss_xval": 0.12890625,
"num_input_tokens_seen": 40059768,
"step": 236
},
{
"epoch": 0.09035455585207777,
"grad_norm": 4.255352081307566,
"learning_rate": 5e-06,
"loss": 0.1344,
"num_input_tokens_seen": 40228428,
"step": 237
},
{
"epoch": 0.09035455585207777,
"loss": 0.15023866295814514,
"loss_ce": 0.022247936576604843,
"loss_iou": 0.1708984375,
"loss_num": 0.1279296875,
"loss_xval": 0.1279296875,
"num_input_tokens_seen": 40228428,
"step": 237
},
{
"epoch": 0.09073579870377431,
"grad_norm": 5.5659532084663565,
"learning_rate": 5e-06,
"loss": 0.1479,
"num_input_tokens_seen": 40397980,
"step": 238
},
{
"epoch": 0.09073579870377431,
"loss": 0.13192544877529144,
"loss_ce": 0.01992594078183174,
"loss_iou": 0.27734375,
"loss_num": 0.11181640625,
"loss_xval": 0.11181640625,
"num_input_tokens_seen": 40397980,
"step": 238
},
{
"epoch": 0.09111704155547083,
"grad_norm": 5.8852256482709615,
"learning_rate": 5e-06,
"loss": 0.1309,
"num_input_tokens_seen": 40570488,
"step": 239
},
{
"epoch": 0.09111704155547083,
"loss": 0.11962257325649261,
"loss_ce": 0.017754895612597466,
"loss_iou": 0.0810546875,
"loss_num": 0.10205078125,
"loss_xval": 0.10205078125,
"num_input_tokens_seen": 40570488,
"step": 239
},
{
"epoch": 0.09149828440716737,
"grad_norm": 5.419783608703198,
"learning_rate": 5e-06,
"loss": 0.1448,
"num_input_tokens_seen": 40737024,
"step": 240
},
{
"epoch": 0.09149828440716737,
"loss": 0.13016757369041443,
"loss_ce": 0.029154382646083832,
"loss_iou": 0.029296875,
"loss_num": 0.10107421875,
"loss_xval": 0.10107421875,
"num_input_tokens_seen": 40737024,
"step": 240
},
{
"epoch": 0.09187952725886389,
"grad_norm": 4.535791208409517,
"learning_rate": 5e-06,
"loss": 0.1339,
"num_input_tokens_seen": 40909248,
"step": 241
},
{
"epoch": 0.09187952725886389,
"loss": 0.1285448521375656,
"loss_ce": 0.023961104452610016,
"loss_iou": 0.220703125,
"loss_num": 0.1044921875,
"loss_xval": 0.1044921875,
"num_input_tokens_seen": 40909248,
"step": 241
},
{
"epoch": 0.09226077011056043,
"grad_norm": 4.724704535695608,
"learning_rate": 5e-06,
"loss": 0.1225,
"num_input_tokens_seen": 41081556,
"step": 242
},
{
"epoch": 0.09226077011056043,
"loss": 0.12991374731063843,
"loss_ce": 0.017975281924009323,
"loss_iou": 0.236328125,
"loss_num": 0.11181640625,
"loss_xval": 0.11181640625,
"num_input_tokens_seen": 41081556,
"step": 242
},
{
"epoch": 0.09264201296225696,
"grad_norm": 4.031142861359274,
"learning_rate": 5e-06,
"loss": 0.1299,
"num_input_tokens_seen": 41251944,
"step": 243
},
{
"epoch": 0.09264201296225696,
"loss": 0.13619326055049896,
"loss_ce": 0.023766502737998962,
"loss_iou": 0.19921875,
"loss_num": 0.1123046875,
"loss_xval": 0.1123046875,
"num_input_tokens_seen": 41251944,
"step": 243
},
{
"epoch": 0.09302325581395349,
"grad_norm": 6.087561315245691,
"learning_rate": 5e-06,
"loss": 0.1404,
"num_input_tokens_seen": 41420360,
"step": 244
},
{
"epoch": 0.09302325581395349,
"loss": 0.1481267511844635,
"loss_ce": 0.029047157615423203,
"loss_iou": 0.2197265625,
"loss_num": 0.119140625,
"loss_xval": 0.119140625,
"num_input_tokens_seen": 41420360,
"step": 244
},
{
"epoch": 0.09340449866565002,
"grad_norm": 7.17961860113985,
"learning_rate": 5e-06,
"loss": 0.1359,
"num_input_tokens_seen": 41584420,
"step": 245
},
{
"epoch": 0.09340449866565002,
"loss": 0.12763813138008118,
"loss_ce": 0.01771380752325058,
"loss_iou": 0.251953125,
"loss_num": 0.10986328125,
"loss_xval": 0.10986328125,
"num_input_tokens_seen": 41584420,
"step": 245
},
{
"epoch": 0.09378574151734655,
"grad_norm": 6.115132953164955,
"learning_rate": 5e-06,
"loss": 0.1423,
"num_input_tokens_seen": 41754632,
"step": 246
},
{
"epoch": 0.09378574151734655,
"loss": 0.1468658596277237,
"loss_ce": 0.023574844002723694,
"loss_iou": 0.2353515625,
"loss_num": 0.123046875,
"loss_xval": 0.123046875,
"num_input_tokens_seen": 41754632,
"step": 246
},
{
"epoch": 0.09416698436904308,
"grad_norm": 4.518161429919516,
"learning_rate": 5e-06,
"loss": 0.1408,
"num_input_tokens_seen": 41925212,
"step": 247
},
{
"epoch": 0.09416698436904308,
"loss": 0.15290366113185883,
"loss_ce": 0.020762551575899124,
"loss_iou": 0.14453125,
"loss_num": 0.1318359375,
"loss_xval": 0.1318359375,
"num_input_tokens_seen": 41925212,
"step": 247
},
{
"epoch": 0.0945482272207396,
"grad_norm": 5.586149224771644,
"learning_rate": 5e-06,
"loss": 0.1305,
"num_input_tokens_seen": 42094112,
"step": 248
},
{
"epoch": 0.0945482272207396,
"loss": 0.14219020307064056,
"loss_ce": 0.04535793513059616,
"loss_iou": 0.052734375,
"loss_num": 0.0966796875,
"loss_xval": 0.0966796875,
"num_input_tokens_seen": 42094112,
"step": 248
},
{
"epoch": 0.09492947007243614,
"grad_norm": 5.9267599562533775,
"learning_rate": 5e-06,
"loss": 0.1433,
"num_input_tokens_seen": 42264276,
"step": 249
},
{
"epoch": 0.09492947007243614,
"loss": 0.12405319511890411,
"loss_ce": 0.016631316393613815,
"loss_iou": 0.115234375,
"loss_num": 0.107421875,
"loss_xval": 0.107421875,
"num_input_tokens_seen": 42264276,
"step": 249
},
{
"epoch": 0.09531071292413268,
"grad_norm": 5.056901931708464,
"learning_rate": 5e-06,
"loss": 0.118,
"num_input_tokens_seen": 42436288,
"step": 250
},
{
"epoch": 0.09531071292413268,
"eval_websight_new_CIoU": 0.43392522633075714,
"eval_websight_new_GIoU": 0.41100695729255676,
"eval_websight_new_IoU": 0.46178892254829407,
"eval_websight_new_MAE_all": 0.10471450164914131,
"eval_websight_new_MAE_h": 0.1256338357925415,
"eval_websight_new_MAE_w": 0.12052245810627937,
"eval_websight_new_MAE_x": 0.061581023037433624,
"eval_websight_new_MAE_y": 0.11112068220973015,
"eval_websight_new_NUM_probability": 0.8488701581954956,
"eval_websight_new_inside_bbox": 0.6961805522441864,
"eval_websight_new_loss": 0.10748042911291122,
"eval_websight_new_loss_ce": 0.016938342712819576,
"eval_websight_new_loss_iou": 0.46575927734375,
"eval_websight_new_loss_num": 0.087310791015625,
"eval_websight_new_loss_xval": 0.087310791015625,
"eval_websight_new_runtime": 61.5137,
"eval_websight_new_samples_per_second": 0.813,
"eval_websight_new_steps_per_second": 0.033,
"num_input_tokens_seen": 42436288,
"step": 250
},
{
"epoch": 0.09531071292413268,
"eval_seeclick_CIoU": 0.1753879114985466,
"eval_seeclick_GIoU": 0.10619913786649704,
"eval_seeclick_IoU": 0.25801894813776016,
"eval_seeclick_MAE_all": 0.1939466893672943,
"eval_seeclick_MAE_h": 0.17077196389436722,
"eval_seeclick_MAE_w": 0.21835819631814957,
"eval_seeclick_MAE_x": 0.21049726754426956,
"eval_seeclick_MAE_y": 0.1761593446135521,
"eval_seeclick_NUM_probability": 0.8450920283794403,
"eval_seeclick_inside_bbox": 0.4722222238779068,
"eval_seeclick_loss": 0.18936492502689362,
"eval_seeclick_loss_ce": 0.030919981189072132,
"eval_seeclick_loss_iou": 0.37200927734375,
"eval_seeclick_loss_num": 0.161529541015625,
"eval_seeclick_loss_xval": 0.161529541015625,
"eval_seeclick_runtime": 85.1383,
"eval_seeclick_samples_per_second": 0.587,
"eval_seeclick_steps_per_second": 0.023,
"num_input_tokens_seen": 42436288,
"step": 250
},
{
"epoch": 0.09531071292413268,
"eval_icons_CIoU": 0.28013716638088226,
"eval_icons_GIoU": 0.26097799837589264,
"eval_icons_IoU": 0.34551550447940826,
"eval_icons_MAE_all": 0.13232684880495071,
"eval_icons_MAE_h": 0.14004291594028473,
"eval_icons_MAE_w": 0.12625902891159058,
"eval_icons_MAE_x": 0.11041285842657089,
"eval_icons_MAE_y": 0.15259258449077606,
"eval_icons_NUM_probability": 0.8528884649276733,
"eval_icons_inside_bbox": 0.5277777910232544,
"eval_icons_loss": 0.10058583319187164,
"eval_icons_loss_ce": 0.01819693110883236,
"eval_icons_loss_iou": 0.073486328125,
"eval_icons_loss_num": 0.079833984375,
"eval_icons_loss_xval": 0.079833984375,
"eval_icons_runtime": 85.5343,
"eval_icons_samples_per_second": 0.585,
"eval_icons_steps_per_second": 0.023,
"num_input_tokens_seen": 42436288,
"step": 250
},
{
"epoch": 0.09531071292413268,
"eval_compot_CIoU": 0.29037410020828247,
"eval_compot_GIoU": 0.26138684898614883,
"eval_compot_IoU": 0.32414330542087555,
"eval_compot_MAE_all": 0.09265598654747009,
"eval_compot_MAE_h": 0.07443492859601974,
"eval_compot_MAE_w": 0.13811790198087692,
"eval_compot_MAE_x": 0.07125015556812286,
"eval_compot_MAE_y": 0.08682098612189293,
"eval_compot_NUM_probability": 0.8410935997962952,
"eval_compot_inside_bbox": 0.4913194477558136,
"eval_compot_loss": 0.09226094186306,
"eval_compot_loss_ce": 0.018759255297482014,
"eval_compot_loss_iou": 0.27276611328125,
"eval_compot_loss_num": 0.0702972412109375,
"eval_compot_loss_xval": 0.0702972412109375,
"eval_compot_runtime": 81.6599,
"eval_compot_samples_per_second": 0.612,
"eval_compot_steps_per_second": 0.024,
"num_input_tokens_seen": 42436288,
"step": 250
},
{
"epoch": 0.09531071292413268,
"eval_web_actions_CIoU": 0.2671080306172371,
"eval_web_actions_GIoU": 0.17311245203018188,
"eval_web_actions_IoU": 0.3291372060775757,
"eval_web_actions_MAE_all": 0.16092178970575333,
"eval_web_actions_MAE_h": 0.14207028597593307,
"eval_web_actions_MAE_w": 0.21995393186807632,
"eval_web_actions_MAE_x": 0.14041096717119217,
"eval_web_actions_MAE_y": 0.14125195145606995,
"eval_web_actions_NUM_probability": 0.8412942886352539,
"eval_web_actions_inside_bbox": 0.6614583432674408,
"eval_web_actions_loss": 0.2487429976463318,
"eval_web_actions_loss_ce": 0.09953882917761803,
"eval_web_actions_loss_iou": 0.287841796875,
"eval_web_actions_loss_num": 0.143218994140625,
"eval_web_actions_loss_xval": 0.143218994140625,
"eval_web_actions_runtime": 81.9846,
"eval_web_actions_samples_per_second": 0.573,
"eval_web_actions_steps_per_second": 0.024,
"num_input_tokens_seen": 42436288,
"step": 250
}
],
"logging_steps": 1.0,
"max_steps": 7869,
"num_input_tokens_seen": 42436288,
"num_train_epochs": 3,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 304855459102720.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}