ben81828's picture
Training in progress, step 600, checkpoint
081eb8d verified
raw
history blame
29.5 kB
{
"best_metric": 0.5832681059837341,
"best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_detect_scale4/lora/sft/checkpoint-550",
"epoch": 0.1545197012619109,
"eval_steps": 50,
"global_step": 600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012876641771825909,
"grad_norm": 13.245840411597928,
"learning_rate": 2.9411764705882355e-06,
"loss": 2.8889,
"num_input_tokens_seen": 52840,
"step": 5
},
{
"epoch": 0.0025753283543651817,
"grad_norm": 12.237619501215374,
"learning_rate": 5.882352941176471e-06,
"loss": 2.8165,
"num_input_tokens_seen": 105528,
"step": 10
},
{
"epoch": 0.0038629925315477724,
"grad_norm": 16.29688816410412,
"learning_rate": 8.823529411764707e-06,
"loss": 2.8363,
"num_input_tokens_seen": 158768,
"step": 15
},
{
"epoch": 0.0051506567087303634,
"grad_norm": 11.576419511120797,
"learning_rate": 1.1764705882352942e-05,
"loss": 2.6853,
"num_input_tokens_seen": 210816,
"step": 20
},
{
"epoch": 0.006438320885912954,
"grad_norm": 6.9672256792859,
"learning_rate": 1.4705882352941177e-05,
"loss": 2.2992,
"num_input_tokens_seen": 262936,
"step": 25
},
{
"epoch": 0.007725985063095545,
"grad_norm": 3.1837818528204305,
"learning_rate": 1.7647058823529414e-05,
"loss": 1.8923,
"num_input_tokens_seen": 315264,
"step": 30
},
{
"epoch": 0.009013649240278136,
"grad_norm": 2.835950303969337,
"learning_rate": 2.058823529411765e-05,
"loss": 1.6984,
"num_input_tokens_seen": 367840,
"step": 35
},
{
"epoch": 0.010301313417460727,
"grad_norm": 2.223740001042382,
"learning_rate": 2.3529411764705884e-05,
"loss": 1.6434,
"num_input_tokens_seen": 420112,
"step": 40
},
{
"epoch": 0.011588977594643318,
"grad_norm": 1.9880935044313244,
"learning_rate": 2.647058823529412e-05,
"loss": 1.4659,
"num_input_tokens_seen": 472728,
"step": 45
},
{
"epoch": 0.012876641771825908,
"grad_norm": 1.7151131700495934,
"learning_rate": 2.9411764705882354e-05,
"loss": 1.3506,
"num_input_tokens_seen": 524648,
"step": 50
},
{
"epoch": 0.012876641771825908,
"eval_loss": 1.1727452278137207,
"eval_runtime": 66.3207,
"eval_samples_per_second": 1.809,
"eval_steps_per_second": 0.452,
"num_input_tokens_seen": 524648,
"step": 50
},
{
"epoch": 0.014164305949008499,
"grad_norm": 1.47475981537851,
"learning_rate": 3.235294117647059e-05,
"loss": 1.1455,
"num_input_tokens_seen": 576472,
"step": 55
},
{
"epoch": 0.01545197012619109,
"grad_norm": 1.7476693647440722,
"learning_rate": 3.529411764705883e-05,
"loss": 0.9971,
"num_input_tokens_seen": 628056,
"step": 60
},
{
"epoch": 0.01673963430337368,
"grad_norm": 1.3384365493212875,
"learning_rate": 3.8235294117647055e-05,
"loss": 0.9073,
"num_input_tokens_seen": 680448,
"step": 65
},
{
"epoch": 0.018027298480556272,
"grad_norm": 0.9014358219807773,
"learning_rate": 4.11764705882353e-05,
"loss": 0.8386,
"num_input_tokens_seen": 733664,
"step": 70
},
{
"epoch": 0.01931496265773886,
"grad_norm": 0.8007820009902022,
"learning_rate": 4.411764705882353e-05,
"loss": 0.7827,
"num_input_tokens_seen": 786096,
"step": 75
},
{
"epoch": 0.020602626834921454,
"grad_norm": 0.6701003454307716,
"learning_rate": 4.705882352941177e-05,
"loss": 0.7814,
"num_input_tokens_seen": 838192,
"step": 80
},
{
"epoch": 0.021890291012104043,
"grad_norm": 0.8973165751658843,
"learning_rate": 5e-05,
"loss": 0.7297,
"num_input_tokens_seen": 890112,
"step": 85
},
{
"epoch": 0.023177955189286635,
"grad_norm": 0.9060968630490469,
"learning_rate": 5.294117647058824e-05,
"loss": 0.7894,
"num_input_tokens_seen": 943472,
"step": 90
},
{
"epoch": 0.024465619366469224,
"grad_norm": 0.9520214202472889,
"learning_rate": 5.588235294117647e-05,
"loss": 0.7758,
"num_input_tokens_seen": 996872,
"step": 95
},
{
"epoch": 0.025753283543651816,
"grad_norm": 0.8226006535044261,
"learning_rate": 5.882352941176471e-05,
"loss": 0.7577,
"num_input_tokens_seen": 1049816,
"step": 100
},
{
"epoch": 0.025753283543651816,
"eval_loss": 0.7517351508140564,
"eval_runtime": 38.7829,
"eval_samples_per_second": 3.094,
"eval_steps_per_second": 0.774,
"num_input_tokens_seen": 1049816,
"step": 100
},
{
"epoch": 0.027040947720834405,
"grad_norm": 0.7251208491150668,
"learning_rate": 6.176470588235295e-05,
"loss": 0.7579,
"num_input_tokens_seen": 1102584,
"step": 105
},
{
"epoch": 0.028328611898016998,
"grad_norm": 0.8217419839297042,
"learning_rate": 6.470588235294118e-05,
"loss": 0.7659,
"num_input_tokens_seen": 1155512,
"step": 110
},
{
"epoch": 0.029616276075199587,
"grad_norm": 0.6768053879888967,
"learning_rate": 6.764705882352942e-05,
"loss": 0.7469,
"num_input_tokens_seen": 1207976,
"step": 115
},
{
"epoch": 0.03090394025238218,
"grad_norm": 1.9562630849642013,
"learning_rate": 7.058823529411765e-05,
"loss": 0.7353,
"num_input_tokens_seen": 1259776,
"step": 120
},
{
"epoch": 0.03219160442956477,
"grad_norm": 0.6439041597153087,
"learning_rate": 7.352941176470589e-05,
"loss": 0.7537,
"num_input_tokens_seen": 1312760,
"step": 125
},
{
"epoch": 0.03347926860674736,
"grad_norm": 0.6124318582166212,
"learning_rate": 7.647058823529411e-05,
"loss": 0.7669,
"num_input_tokens_seen": 1365616,
"step": 130
},
{
"epoch": 0.03476693278392995,
"grad_norm": 0.7593534002488418,
"learning_rate": 7.941176470588235e-05,
"loss": 0.722,
"num_input_tokens_seen": 1417544,
"step": 135
},
{
"epoch": 0.036054596961112545,
"grad_norm": 0.7827834651032061,
"learning_rate": 8.23529411764706e-05,
"loss": 0.7502,
"num_input_tokens_seen": 1469856,
"step": 140
},
{
"epoch": 0.037342261138295134,
"grad_norm": 0.5444126155596626,
"learning_rate": 8.529411764705883e-05,
"loss": 0.7174,
"num_input_tokens_seen": 1521496,
"step": 145
},
{
"epoch": 0.03862992531547772,
"grad_norm": 0.40878703812837747,
"learning_rate": 8.823529411764706e-05,
"loss": 0.7018,
"num_input_tokens_seen": 1573376,
"step": 150
},
{
"epoch": 0.03862992531547772,
"eval_loss": 0.7309949994087219,
"eval_runtime": 38.2005,
"eval_samples_per_second": 3.141,
"eval_steps_per_second": 0.785,
"num_input_tokens_seen": 1573376,
"step": 150
},
{
"epoch": 0.03991758949266031,
"grad_norm": 0.5536144453733772,
"learning_rate": 9.11764705882353e-05,
"loss": 0.738,
"num_input_tokens_seen": 1626136,
"step": 155
},
{
"epoch": 0.04120525366984291,
"grad_norm": 0.5151715191704441,
"learning_rate": 9.411764705882353e-05,
"loss": 0.7579,
"num_input_tokens_seen": 1678760,
"step": 160
},
{
"epoch": 0.042492917847025496,
"grad_norm": 0.5209077394596254,
"learning_rate": 9.705882352941177e-05,
"loss": 0.7502,
"num_input_tokens_seen": 1731240,
"step": 165
},
{
"epoch": 0.043780582024208085,
"grad_norm": 0.721213601237688,
"learning_rate": 0.0001,
"loss": 0.7448,
"num_input_tokens_seen": 1783816,
"step": 170
},
{
"epoch": 0.045068246201390674,
"grad_norm": 0.48666007914879555,
"learning_rate": 9.999940874631277e-05,
"loss": 0.6648,
"num_input_tokens_seen": 1834592,
"step": 175
},
{
"epoch": 0.04635591037857327,
"grad_norm": 0.5136600613696797,
"learning_rate": 9.999763499923432e-05,
"loss": 0.7759,
"num_input_tokens_seen": 1888176,
"step": 180
},
{
"epoch": 0.04764357455575586,
"grad_norm": 0.6706281530046975,
"learning_rate": 9.999467880071402e-05,
"loss": 0.7167,
"num_input_tokens_seen": 1940280,
"step": 185
},
{
"epoch": 0.04893123873293845,
"grad_norm": 0.5159139445497618,
"learning_rate": 9.999054022066641e-05,
"loss": 0.7483,
"num_input_tokens_seen": 1993096,
"step": 190
},
{
"epoch": 0.050218902910121044,
"grad_norm": 0.40251006129746847,
"learning_rate": 9.998521935696953e-05,
"loss": 0.7464,
"num_input_tokens_seen": 2045648,
"step": 195
},
{
"epoch": 0.05150656708730363,
"grad_norm": 0.4811730853311867,
"learning_rate": 9.997871633546257e-05,
"loss": 0.7594,
"num_input_tokens_seen": 2099008,
"step": 200
},
{
"epoch": 0.05150656708730363,
"eval_loss": 0.7274295687675476,
"eval_runtime": 38.079,
"eval_samples_per_second": 3.151,
"eval_steps_per_second": 0.788,
"num_input_tokens_seen": 2099008,
"step": 200
},
{
"epoch": 0.05279423126448622,
"grad_norm": 0.591934959695668,
"learning_rate": 9.997103130994296e-05,
"loss": 0.706,
"num_input_tokens_seen": 2151680,
"step": 205
},
{
"epoch": 0.05408189544166881,
"grad_norm": 0.48253717444489286,
"learning_rate": 9.996216446216267e-05,
"loss": 0.7186,
"num_input_tokens_seen": 2203784,
"step": 210
},
{
"epoch": 0.055369559618851406,
"grad_norm": 0.5274315079401322,
"learning_rate": 9.995211600182397e-05,
"loss": 0.7009,
"num_input_tokens_seen": 2255632,
"step": 215
},
{
"epoch": 0.056657223796033995,
"grad_norm": 0.32879215224292613,
"learning_rate": 9.994088616657444e-05,
"loss": 0.6801,
"num_input_tokens_seen": 2308096,
"step": 220
},
{
"epoch": 0.057944887973216584,
"grad_norm": 0.37171195071448215,
"learning_rate": 9.992847522200133e-05,
"loss": 0.7569,
"num_input_tokens_seen": 2361168,
"step": 225
},
{
"epoch": 0.05923255215039917,
"grad_norm": 0.4120941016934064,
"learning_rate": 9.99148834616253e-05,
"loss": 0.7402,
"num_input_tokens_seen": 2413896,
"step": 230
},
{
"epoch": 0.06052021632758177,
"grad_norm": 0.5998680948310651,
"learning_rate": 9.990011120689351e-05,
"loss": 0.7191,
"num_input_tokens_seen": 2466136,
"step": 235
},
{
"epoch": 0.06180788050476436,
"grad_norm": 0.538488141249078,
"learning_rate": 9.988415880717194e-05,
"loss": 0.7274,
"num_input_tokens_seen": 2518848,
"step": 240
},
{
"epoch": 0.06309554468194695,
"grad_norm": 0.4393093124760277,
"learning_rate": 9.986702663973722e-05,
"loss": 0.7704,
"num_input_tokens_seen": 2572384,
"step": 245
},
{
"epoch": 0.06438320885912954,
"grad_norm": 0.6116643616510118,
"learning_rate": 9.98487151097676e-05,
"loss": 0.7346,
"num_input_tokens_seen": 2625352,
"step": 250
},
{
"epoch": 0.06438320885912954,
"eval_loss": 0.7181503176689148,
"eval_runtime": 38.0986,
"eval_samples_per_second": 3.15,
"eval_steps_per_second": 0.787,
"num_input_tokens_seen": 2625352,
"step": 250
},
{
"epoch": 0.06567087303631212,
"grad_norm": 0.41200227731339506,
"learning_rate": 9.98292246503335e-05,
"loss": 0.7408,
"num_input_tokens_seen": 2678216,
"step": 255
},
{
"epoch": 0.06695853721349472,
"grad_norm": 0.44521059732114987,
"learning_rate": 9.980855572238714e-05,
"loss": 0.7044,
"num_input_tokens_seen": 2730664,
"step": 260
},
{
"epoch": 0.06824620139067732,
"grad_norm": 0.571896859428363,
"learning_rate": 9.978670881475172e-05,
"loss": 0.7334,
"num_input_tokens_seen": 2783584,
"step": 265
},
{
"epoch": 0.0695338655678599,
"grad_norm": 0.3907697039722125,
"learning_rate": 9.976368444410985e-05,
"loss": 0.7075,
"num_input_tokens_seen": 2836152,
"step": 270
},
{
"epoch": 0.0708215297450425,
"grad_norm": 0.4507806825752261,
"learning_rate": 9.973948315499126e-05,
"loss": 0.7039,
"num_input_tokens_seen": 2887808,
"step": 275
},
{
"epoch": 0.07210919392222509,
"grad_norm": 0.41330504132984697,
"learning_rate": 9.971410551976002e-05,
"loss": 0.6953,
"num_input_tokens_seen": 2939656,
"step": 280
},
{
"epoch": 0.07339685809940767,
"grad_norm": 0.4625671909482009,
"learning_rate": 9.968755213860094e-05,
"loss": 0.7022,
"num_input_tokens_seen": 2991632,
"step": 285
},
{
"epoch": 0.07468452227659027,
"grad_norm": 0.6553627840267285,
"learning_rate": 9.96598236395054e-05,
"loss": 0.6796,
"num_input_tokens_seen": 3043616,
"step": 290
},
{
"epoch": 0.07597218645377285,
"grad_norm": 0.5157886895754477,
"learning_rate": 9.96309206782565e-05,
"loss": 0.7346,
"num_input_tokens_seen": 3096920,
"step": 295
},
{
"epoch": 0.07725985063095545,
"grad_norm": 0.5672965149433489,
"learning_rate": 9.960084393841355e-05,
"loss": 0.6815,
"num_input_tokens_seen": 3149032,
"step": 300
},
{
"epoch": 0.07725985063095545,
"eval_loss": 0.7073924541473389,
"eval_runtime": 38.1842,
"eval_samples_per_second": 3.143,
"eval_steps_per_second": 0.786,
"num_input_tokens_seen": 3149032,
"step": 300
},
{
"epoch": 0.07854751480813804,
"grad_norm": 0.4479276285203507,
"learning_rate": 9.956959413129585e-05,
"loss": 0.7208,
"num_input_tokens_seen": 3201560,
"step": 305
},
{
"epoch": 0.07983517898532062,
"grad_norm": 0.368457437106614,
"learning_rate": 9.953717199596598e-05,
"loss": 0.7144,
"num_input_tokens_seen": 3254632,
"step": 310
},
{
"epoch": 0.08112284316250322,
"grad_norm": 0.5531413254856732,
"learning_rate": 9.95035782992122e-05,
"loss": 0.6861,
"num_input_tokens_seen": 3306432,
"step": 315
},
{
"epoch": 0.08241050733968582,
"grad_norm": 0.41513991799613037,
"learning_rate": 9.94688138355304e-05,
"loss": 0.6836,
"num_input_tokens_seen": 3358392,
"step": 320
},
{
"epoch": 0.0836981715168684,
"grad_norm": 0.47052274706452957,
"learning_rate": 9.943287942710527e-05,
"loss": 0.7353,
"num_input_tokens_seen": 3411424,
"step": 325
},
{
"epoch": 0.08498583569405099,
"grad_norm": 0.6322586593511644,
"learning_rate": 9.939577592379088e-05,
"loss": 0.6774,
"num_input_tokens_seen": 3462992,
"step": 330
},
{
"epoch": 0.08627349987123359,
"grad_norm": 0.4129597798905344,
"learning_rate": 9.935750420309055e-05,
"loss": 0.7331,
"num_input_tokens_seen": 3516136,
"step": 335
},
{
"epoch": 0.08756116404841617,
"grad_norm": 0.4031509882699161,
"learning_rate": 9.931806517013612e-05,
"loss": 0.6939,
"num_input_tokens_seen": 3568360,
"step": 340
},
{
"epoch": 0.08884882822559877,
"grad_norm": 0.4444358747076587,
"learning_rate": 9.927745975766654e-05,
"loss": 0.7158,
"num_input_tokens_seen": 3620696,
"step": 345
},
{
"epoch": 0.09013649240278135,
"grad_norm": 0.5290547365449167,
"learning_rate": 9.923568892600578e-05,
"loss": 0.6932,
"num_input_tokens_seen": 3673152,
"step": 350
},
{
"epoch": 0.09013649240278135,
"eval_loss": 0.7044599056243896,
"eval_runtime": 38.2709,
"eval_samples_per_second": 3.136,
"eval_steps_per_second": 0.784,
"num_input_tokens_seen": 3673152,
"step": 350
},
{
"epoch": 0.09142415657996394,
"grad_norm": 0.47530311368359207,
"learning_rate": 9.91927536630402e-05,
"loss": 0.6778,
"num_input_tokens_seen": 3725296,
"step": 355
},
{
"epoch": 0.09271182075714654,
"grad_norm": 0.38913022785688944,
"learning_rate": 9.91486549841951e-05,
"loss": 0.6857,
"num_input_tokens_seen": 3777552,
"step": 360
},
{
"epoch": 0.09399948493432912,
"grad_norm": 0.4834773141333328,
"learning_rate": 9.91033939324107e-05,
"loss": 0.7184,
"num_input_tokens_seen": 3830200,
"step": 365
},
{
"epoch": 0.09528714911151172,
"grad_norm": 0.5862045807150876,
"learning_rate": 9.905697157811761e-05,
"loss": 0.7196,
"num_input_tokens_seen": 3883200,
"step": 370
},
{
"epoch": 0.09657481328869431,
"grad_norm": 0.4576971522205563,
"learning_rate": 9.900938901921131e-05,
"loss": 0.6914,
"num_input_tokens_seen": 3935576,
"step": 375
},
{
"epoch": 0.0978624774658769,
"grad_norm": 0.49551517524520683,
"learning_rate": 9.896064738102635e-05,
"loss": 0.6681,
"num_input_tokens_seen": 3987624,
"step": 380
},
{
"epoch": 0.09915014164305949,
"grad_norm": 0.8198390819787913,
"learning_rate": 9.891074781630966e-05,
"loss": 0.6723,
"num_input_tokens_seen": 4039680,
"step": 385
},
{
"epoch": 0.10043780582024209,
"grad_norm": 0.7034626469978683,
"learning_rate": 9.885969150519331e-05,
"loss": 0.6498,
"num_input_tokens_seen": 4091216,
"step": 390
},
{
"epoch": 0.10172546999742467,
"grad_norm": 0.8838075623197742,
"learning_rate": 9.88074796551666e-05,
"loss": 0.7311,
"num_input_tokens_seen": 4144264,
"step": 395
},
{
"epoch": 0.10301313417460727,
"grad_norm": 0.7342758386202114,
"learning_rate": 9.875411350104744e-05,
"loss": 0.7089,
"num_input_tokens_seen": 4197072,
"step": 400
},
{
"epoch": 0.10301313417460727,
"eval_loss": 0.6847750544548035,
"eval_runtime": 37.9238,
"eval_samples_per_second": 3.164,
"eval_steps_per_second": 0.791,
"num_input_tokens_seen": 4197072,
"step": 400
},
{
"epoch": 0.10430079835178985,
"grad_norm": 0.8113533605928532,
"learning_rate": 9.86995943049533e-05,
"loss": 0.7021,
"num_input_tokens_seen": 4249656,
"step": 405
},
{
"epoch": 0.10558846252897244,
"grad_norm": 1.1772677082041305,
"learning_rate": 9.864392335627117e-05,
"loss": 0.6943,
"num_input_tokens_seen": 4302944,
"step": 410
},
{
"epoch": 0.10687612670615504,
"grad_norm": 1.6493280510697776,
"learning_rate": 9.858710197162721e-05,
"loss": 0.7146,
"num_input_tokens_seen": 4355480,
"step": 415
},
{
"epoch": 0.10816379088333762,
"grad_norm": 3.0159798803441715,
"learning_rate": 9.852913149485556e-05,
"loss": 0.6312,
"num_input_tokens_seen": 4407688,
"step": 420
},
{
"epoch": 0.10945145506052022,
"grad_norm": 1.7981196843056153,
"learning_rate": 9.847001329696653e-05,
"loss": 0.6877,
"num_input_tokens_seen": 4459736,
"step": 425
},
{
"epoch": 0.11073911923770281,
"grad_norm": 1.5783278376799834,
"learning_rate": 9.840974877611422e-05,
"loss": 0.6975,
"num_input_tokens_seen": 4512928,
"step": 430
},
{
"epoch": 0.1120267834148854,
"grad_norm": 3.306646516615779,
"learning_rate": 9.834833935756344e-05,
"loss": 0.651,
"num_input_tokens_seen": 4565840,
"step": 435
},
{
"epoch": 0.11331444759206799,
"grad_norm": 2.3184973874904005,
"learning_rate": 9.828578649365601e-05,
"loss": 0.685,
"num_input_tokens_seen": 4618168,
"step": 440
},
{
"epoch": 0.11460211176925057,
"grad_norm": 1.602690016495642,
"learning_rate": 9.822209166377635e-05,
"loss": 0.6258,
"num_input_tokens_seen": 4669784,
"step": 445
},
{
"epoch": 0.11588977594643317,
"grad_norm": 2.6770797227308196,
"learning_rate": 9.815725637431662e-05,
"loss": 0.6732,
"num_input_tokens_seen": 4722528,
"step": 450
},
{
"epoch": 0.11588977594643317,
"eval_loss": 0.6526497006416321,
"eval_runtime": 39.085,
"eval_samples_per_second": 3.07,
"eval_steps_per_second": 0.768,
"num_input_tokens_seen": 4722528,
"step": 450
},
{
"epoch": 0.11717744012361576,
"grad_norm": 2.1823349329218074,
"learning_rate": 9.809128215864097e-05,
"loss": 0.6544,
"num_input_tokens_seen": 4774400,
"step": 455
},
{
"epoch": 0.11846510430079835,
"grad_norm": 1.434521593914191,
"learning_rate": 9.802417057704931e-05,
"loss": 0.652,
"num_input_tokens_seen": 4826704,
"step": 460
},
{
"epoch": 0.11975276847798094,
"grad_norm": 2.399754385687283,
"learning_rate": 9.795592321674045e-05,
"loss": 0.6582,
"num_input_tokens_seen": 4880072,
"step": 465
},
{
"epoch": 0.12104043265516354,
"grad_norm": 3.9235176077985536,
"learning_rate": 9.788654169177453e-05,
"loss": 0.6506,
"num_input_tokens_seen": 4931968,
"step": 470
},
{
"epoch": 0.12232809683234612,
"grad_norm": 3.659330745777227,
"learning_rate": 9.781602764303487e-05,
"loss": 0.6551,
"num_input_tokens_seen": 4983656,
"step": 475
},
{
"epoch": 0.12361576100952872,
"grad_norm": 1.9670601503398757,
"learning_rate": 9.774438273818911e-05,
"loss": 0.6978,
"num_input_tokens_seen": 5036528,
"step": 480
},
{
"epoch": 0.12490342518671131,
"grad_norm": 1.308580869419328,
"learning_rate": 9.767160867164979e-05,
"loss": 0.6407,
"num_input_tokens_seen": 5088768,
"step": 485
},
{
"epoch": 0.1261910893638939,
"grad_norm": 1.7349486072682865,
"learning_rate": 9.759770716453436e-05,
"loss": 0.6641,
"num_input_tokens_seen": 5142080,
"step": 490
},
{
"epoch": 0.1274787535410765,
"grad_norm": 2.993327939872198,
"learning_rate": 9.752267996462434e-05,
"loss": 0.6588,
"num_input_tokens_seen": 5194432,
"step": 495
},
{
"epoch": 0.12876641771825909,
"grad_norm": 2.6430988002320976,
"learning_rate": 9.744652884632406e-05,
"loss": 0.6304,
"num_input_tokens_seen": 5246640,
"step": 500
},
{
"epoch": 0.12876641771825909,
"eval_loss": 0.6272165775299072,
"eval_runtime": 39.4177,
"eval_samples_per_second": 3.044,
"eval_steps_per_second": 0.761,
"num_input_tokens_seen": 5246640,
"step": 500
},
{
"epoch": 0.13005408189544168,
"grad_norm": 2.6047672112920286,
"learning_rate": 9.736925561061871e-05,
"loss": 0.5741,
"num_input_tokens_seen": 5299024,
"step": 505
},
{
"epoch": 0.13134174607262425,
"grad_norm": 2.4706517190834063,
"learning_rate": 9.729086208503174e-05,
"loss": 0.6535,
"num_input_tokens_seen": 5352664,
"step": 510
},
{
"epoch": 0.13262941024980685,
"grad_norm": 2.031672226684599,
"learning_rate": 9.721135012358156e-05,
"loss": 0.6081,
"num_input_tokens_seen": 5406008,
"step": 515
},
{
"epoch": 0.13391707442698944,
"grad_norm": 2.773997809426142,
"learning_rate": 9.713072160673777e-05,
"loss": 0.6792,
"num_input_tokens_seen": 5459368,
"step": 520
},
{
"epoch": 0.13520473860417204,
"grad_norm": 5.083057729524855,
"learning_rate": 9.704897844137673e-05,
"loss": 0.6821,
"num_input_tokens_seen": 5512960,
"step": 525
},
{
"epoch": 0.13649240278135463,
"grad_norm": 3.0440654843385584,
"learning_rate": 9.696612256073633e-05,
"loss": 0.5835,
"num_input_tokens_seen": 5565368,
"step": 530
},
{
"epoch": 0.1377800669585372,
"grad_norm": 3.7400231170971323,
"learning_rate": 9.688215592437039e-05,
"loss": 0.6129,
"num_input_tokens_seen": 5618008,
"step": 535
},
{
"epoch": 0.1390677311357198,
"grad_norm": 6.340287952379529,
"learning_rate": 9.679708051810221e-05,
"loss": 0.5765,
"num_input_tokens_seen": 5670072,
"step": 540
},
{
"epoch": 0.1403553953129024,
"grad_norm": 3.6351560550229207,
"learning_rate": 9.67108983539777e-05,
"loss": 0.6325,
"num_input_tokens_seen": 5722936,
"step": 545
},
{
"epoch": 0.141643059490085,
"grad_norm": 3.8363425916745117,
"learning_rate": 9.662361147021779e-05,
"loss": 0.5596,
"num_input_tokens_seen": 5774880,
"step": 550
},
{
"epoch": 0.141643059490085,
"eval_loss": 0.5832681059837341,
"eval_runtime": 38.2495,
"eval_samples_per_second": 3.137,
"eval_steps_per_second": 0.784,
"num_input_tokens_seen": 5774880,
"step": 550
},
{
"epoch": 0.14293072366726758,
"grad_norm": 3.911447203674744,
"learning_rate": 9.653522193117013e-05,
"loss": 0.5073,
"num_input_tokens_seen": 5826608,
"step": 555
},
{
"epoch": 0.14421838784445018,
"grad_norm": 3.3501835856945763,
"learning_rate": 9.644573182726035e-05,
"loss": 0.5652,
"num_input_tokens_seen": 5879776,
"step": 560
},
{
"epoch": 0.14550605202163275,
"grad_norm": 8.75758822201328,
"learning_rate": 9.63551432749426e-05,
"loss": 0.5727,
"num_input_tokens_seen": 5932888,
"step": 565
},
{
"epoch": 0.14679371619881534,
"grad_norm": 4.351029258458384,
"learning_rate": 9.626345841664953e-05,
"loss": 0.6251,
"num_input_tokens_seen": 5984648,
"step": 570
},
{
"epoch": 0.14808138037599794,
"grad_norm": 7.617020699535255,
"learning_rate": 9.617067942074153e-05,
"loss": 0.6508,
"num_input_tokens_seen": 6037000,
"step": 575
},
{
"epoch": 0.14936904455318054,
"grad_norm": 7.293430172750479,
"learning_rate": 9.607680848145558e-05,
"loss": 0.6686,
"num_input_tokens_seen": 6090512,
"step": 580
},
{
"epoch": 0.15065670873036313,
"grad_norm": 3.3635276124166653,
"learning_rate": 9.598184781885318e-05,
"loss": 0.5793,
"num_input_tokens_seen": 6143320,
"step": 585
},
{
"epoch": 0.1519443729075457,
"grad_norm": 2.7589160396339407,
"learning_rate": 9.588579967876806e-05,
"loss": 0.5954,
"num_input_tokens_seen": 6195720,
"step": 590
},
{
"epoch": 0.1532320370847283,
"grad_norm": 1.582169884399532,
"learning_rate": 9.578866633275288e-05,
"loss": 0.5644,
"num_input_tokens_seen": 6247592,
"step": 595
},
{
"epoch": 0.1545197012619109,
"grad_norm": 3.891844940061855,
"learning_rate": 9.569045007802559e-05,
"loss": 0.5794,
"num_input_tokens_seen": 6299656,
"step": 600
},
{
"epoch": 0.1545197012619109,
"eval_loss": 0.6039358973503113,
"eval_runtime": 38.3138,
"eval_samples_per_second": 3.132,
"eval_steps_per_second": 0.783,
"num_input_tokens_seen": 6299656,
"step": 600
}
],
"logging_steps": 5,
"max_steps": 3400,
"num_input_tokens_seen": 6299656,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 415517873799168.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}