|
{
|
|
"best_metric": 0.16011084616184235,
|
|
"best_model_checkpoint": "d:\\\\whisper-medium-pt-cv16-fleurs2-lr\\checkpoint-5000",
|
|
"epoch": 11.671335200746965,
|
|
"eval_steps": 5000,
|
|
"global_step": 25000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.011671335200746966,
|
|
"grad_norm": 18.578170776367188,
|
|
"learning_rate": 2.875e-08,
|
|
"loss": 0.7382,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.02334267040149393,
|
|
"grad_norm": 37.2661247253418,
|
|
"learning_rate": 5.8750000000000007e-08,
|
|
"loss": 1.2823,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.0350140056022409,
|
|
"grad_norm": 14.59357738494873,
|
|
"learning_rate": 9e-08,
|
|
"loss": 0.7512,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.04668534080298786,
|
|
"grad_norm": 37.37008285522461,
|
|
"learning_rate": 1.2125e-07,
|
|
"loss": 1.2251,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.05835667600373483,
|
|
"grad_norm": 14.07325553894043,
|
|
"learning_rate": 1.5250000000000002e-07,
|
|
"loss": 0.6841,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.0700280112044818,
|
|
"grad_norm": 29.391014099121094,
|
|
"learning_rate": 1.8375000000000001e-07,
|
|
"loss": 1.1131,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.08169934640522876,
|
|
"grad_norm": 12.7340087890625,
|
|
"learning_rate": 2.15e-07,
|
|
"loss": 0.5693,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.09337068160597572,
|
|
"grad_norm": 26.895967483520508,
|
|
"learning_rate": 2.4624999999999997e-07,
|
|
"loss": 0.7876,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.10504201680672269,
|
|
"grad_norm": 8.66826057434082,
|
|
"learning_rate": 2.7750000000000004e-07,
|
|
"loss": 0.2992,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.11671335200746966,
|
|
"grad_norm": 25.45290756225586,
|
|
"learning_rate": 3.0875e-07,
|
|
"loss": 0.4386,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.1283846872082166,
|
|
"grad_norm": 10.706913948059082,
|
|
"learning_rate": 3.4e-07,
|
|
"loss": 0.214,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.1400560224089636,
|
|
"grad_norm": 19.541522979736328,
|
|
"learning_rate": 3.7125000000000005e-07,
|
|
"loss": 0.345,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.15172735760971054,
|
|
"grad_norm": 6.879134654998779,
|
|
"learning_rate": 4.025e-07,
|
|
"loss": 0.2445,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.16339869281045752,
|
|
"grad_norm": 23.4116268157959,
|
|
"learning_rate": 4.3375000000000003e-07,
|
|
"loss": 0.3121,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.17507002801120447,
|
|
"grad_norm": 7.257847785949707,
|
|
"learning_rate": 4.65e-07,
|
|
"loss": 0.2013,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.18674136321195145,
|
|
"grad_norm": 11.849136352539062,
|
|
"learning_rate": 4.9625e-07,
|
|
"loss": 0.2626,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.1984126984126984,
|
|
"grad_norm": 7.970053195953369,
|
|
"learning_rate": 5.275e-07,
|
|
"loss": 0.2131,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.21008403361344538,
|
|
"grad_norm": 19.375301361083984,
|
|
"learning_rate": 5.587499999999999e-07,
|
|
"loss": 0.2652,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.22175536881419233,
|
|
"grad_norm": 10.925929069519043,
|
|
"learning_rate": 5.9e-07,
|
|
"loss": 0.2046,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.2334267040149393,
|
|
"grad_norm": 20.24220085144043,
|
|
"learning_rate": 6.212500000000001e-07,
|
|
"loss": 0.2571,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.24509803921568626,
|
|
"grad_norm": 8.06639575958252,
|
|
"learning_rate": 6.525000000000001e-07,
|
|
"loss": 0.2157,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.2567693744164332,
|
|
"grad_norm": 16.64718246459961,
|
|
"learning_rate": 6.8375e-07,
|
|
"loss": 0.2745,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.2684407096171802,
|
|
"grad_norm": 8.912382125854492,
|
|
"learning_rate": 7.15e-07,
|
|
"loss": 0.1999,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.2801120448179272,
|
|
"grad_norm": 16.385353088378906,
|
|
"learning_rate": 7.462500000000001e-07,
|
|
"loss": 0.2234,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.29178338001867415,
|
|
"grad_norm": 4.625432968139648,
|
|
"learning_rate": 7.775e-07,
|
|
"loss": 0.1847,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.3034547152194211,
|
|
"grad_norm": 30.452251434326172,
|
|
"learning_rate": 8.0875e-07,
|
|
"loss": 0.2338,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.31512605042016806,
|
|
"grad_norm": 9.238980293273926,
|
|
"learning_rate": 8.4e-07,
|
|
"loss": 0.1883,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.32679738562091504,
|
|
"grad_norm": 19.22330093383789,
|
|
"learning_rate": 8.7125e-07,
|
|
"loss": 0.2387,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.338468720821662,
|
|
"grad_norm": 3.236504316329956,
|
|
"learning_rate": 9.025e-07,
|
|
"loss": 0.162,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.35014005602240894,
|
|
"grad_norm": 18.75830078125,
|
|
"learning_rate": 9.337500000000001e-07,
|
|
"loss": 0.2217,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.3618113912231559,
|
|
"grad_norm": 4.1586198806762695,
|
|
"learning_rate": 9.65e-07,
|
|
"loss": 0.1739,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.3734827264239029,
|
|
"grad_norm": 16.235074996948242,
|
|
"learning_rate": 9.9625e-07,
|
|
"loss": 0.227,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.3851540616246499,
|
|
"grad_norm": 7.601698398590088,
|
|
"learning_rate": 1.0275e-06,
|
|
"loss": 0.1764,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.3968253968253968,
|
|
"grad_norm": 17.789981842041016,
|
|
"learning_rate": 1.05875e-06,
|
|
"loss": 0.2138,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.4084967320261438,
|
|
"grad_norm": 5.891432285308838,
|
|
"learning_rate": 1.0900000000000002e-06,
|
|
"loss": 0.1753,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.42016806722689076,
|
|
"grad_norm": 21.724348068237305,
|
|
"learning_rate": 1.12125e-06,
|
|
"loss": 0.206,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.43183940242763774,
|
|
"grad_norm": 5.313950538635254,
|
|
"learning_rate": 1.1525000000000002e-06,
|
|
"loss": 0.167,
|
|
"step": 925
|
|
},
|
|
{
|
|
"epoch": 0.44351073762838467,
|
|
"grad_norm": 11.306236267089844,
|
|
"learning_rate": 1.18375e-06,
|
|
"loss": 0.1974,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.45518207282913165,
|
|
"grad_norm": 8.582784652709961,
|
|
"learning_rate": 1.215e-06,
|
|
"loss": 0.192,
|
|
"step": 975
|
|
},
|
|
{
|
|
"epoch": 0.4668534080298786,
|
|
"grad_norm": 14.138503074645996,
|
|
"learning_rate": 1.24625e-06,
|
|
"loss": 0.1969,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.4785247432306256,
|
|
"grad_norm": 5.119744777679443,
|
|
"learning_rate": 1.2775e-06,
|
|
"loss": 0.1781,
|
|
"step": 1025
|
|
},
|
|
{
|
|
"epoch": 0.49019607843137253,
|
|
"grad_norm": 16.845916748046875,
|
|
"learning_rate": 1.3087500000000002e-06,
|
|
"loss": 0.1714,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.5018674136321195,
|
|
"grad_norm": 8.487195014953613,
|
|
"learning_rate": 1.34e-06,
|
|
"loss": 0.1784,
|
|
"step": 1075
|
|
},
|
|
{
|
|
"epoch": 0.5135387488328664,
|
|
"grad_norm": 10.874380111694336,
|
|
"learning_rate": 1.3712500000000002e-06,
|
|
"loss": 0.2085,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5252100840336135,
|
|
"grad_norm": 5.2850661277771,
|
|
"learning_rate": 1.4025e-06,
|
|
"loss": 0.1731,
|
|
"step": 1125
|
|
},
|
|
{
|
|
"epoch": 0.5368814192343604,
|
|
"grad_norm": 12.621524810791016,
|
|
"learning_rate": 1.43375e-06,
|
|
"loss": 0.2066,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.5485527544351074,
|
|
"grad_norm": 6.700886249542236,
|
|
"learning_rate": 1.465e-06,
|
|
"loss": 0.1599,
|
|
"step": 1175
|
|
},
|
|
{
|
|
"epoch": 0.5602240896358543,
|
|
"grad_norm": 13.19000244140625,
|
|
"learning_rate": 1.49625e-06,
|
|
"loss": 0.1982,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.5718954248366013,
|
|
"grad_norm": 5.436820030212402,
|
|
"learning_rate": 1.5275000000000002e-06,
|
|
"loss": 0.1671,
|
|
"step": 1225
|
|
},
|
|
{
|
|
"epoch": 0.5835667600373483,
|
|
"grad_norm": 14.979127883911133,
|
|
"learning_rate": 1.5587500000000001e-06,
|
|
"loss": 0.192,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.5952380952380952,
|
|
"grad_norm": 4.490325450897217,
|
|
"learning_rate": 1.5900000000000002e-06,
|
|
"loss": 0.1731,
|
|
"step": 1275
|
|
},
|
|
{
|
|
"epoch": 0.6069094304388422,
|
|
"grad_norm": 17.244354248046875,
|
|
"learning_rate": 1.6212500000000001e-06,
|
|
"loss": 0.1763,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.6185807656395892,
|
|
"grad_norm": 8.09378433227539,
|
|
"learning_rate": 1.6525000000000003e-06,
|
|
"loss": 0.1511,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 0.6302521008403361,
|
|
"grad_norm": 13.42496395111084,
|
|
"learning_rate": 1.68375e-06,
|
|
"loss": 0.2064,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.6419234360410832,
|
|
"grad_norm": 7.0393385887146,
|
|
"learning_rate": 1.7149999999999999e-06,
|
|
"loss": 0.1703,
|
|
"step": 1375
|
|
},
|
|
{
|
|
"epoch": 0.6535947712418301,
|
|
"grad_norm": 11.562192916870117,
|
|
"learning_rate": 1.74625e-06,
|
|
"loss": 0.1743,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.665266106442577,
|
|
"grad_norm": 8.296894073486328,
|
|
"learning_rate": 1.7775e-06,
|
|
"loss": 0.1762,
|
|
"step": 1425
|
|
},
|
|
{
|
|
"epoch": 0.676937441643324,
|
|
"grad_norm": 15.146247863769531,
|
|
"learning_rate": 1.80875e-06,
|
|
"loss": 0.1877,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.688608776844071,
|
|
"grad_norm": 6.557362079620361,
|
|
"learning_rate": 1.84e-06,
|
|
"loss": 0.1948,
|
|
"step": 1475
|
|
},
|
|
{
|
|
"epoch": 0.7002801120448179,
|
|
"grad_norm": 9.744128227233887,
|
|
"learning_rate": 1.87125e-06,
|
|
"loss": 0.188,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7119514472455649,
|
|
"grad_norm": 6.373684883117676,
|
|
"learning_rate": 1.9025000000000002e-06,
|
|
"loss": 0.1795,
|
|
"step": 1525
|
|
},
|
|
{
|
|
"epoch": 0.7236227824463118,
|
|
"grad_norm": 12.318848609924316,
|
|
"learning_rate": 1.9337500000000003e-06,
|
|
"loss": 0.16,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.7352941176470589,
|
|
"grad_norm": 8.32919979095459,
|
|
"learning_rate": 1.9650000000000002e-06,
|
|
"loss": 0.1559,
|
|
"step": 1575
|
|
},
|
|
{
|
|
"epoch": 0.7469654528478058,
|
|
"grad_norm": 14.127927780151367,
|
|
"learning_rate": 1.99625e-06,
|
|
"loss": 0.18,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.7586367880485527,
|
|
"grad_norm": 4.867166519165039,
|
|
"learning_rate": 2.0275e-06,
|
|
"loss": 0.1502,
|
|
"step": 1625
|
|
},
|
|
{
|
|
"epoch": 0.7703081232492998,
|
|
"grad_norm": 10.735671043395996,
|
|
"learning_rate": 2.0587500000000004e-06,
|
|
"loss": 0.1798,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.7819794584500467,
|
|
"grad_norm": 7.60561990737915,
|
|
"learning_rate": 2.09e-06,
|
|
"loss": 0.1532,
|
|
"step": 1675
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"grad_norm": 14.279719352722168,
|
|
"learning_rate": 2.12125e-06,
|
|
"loss": 0.1761,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.8053221288515406,
|
|
"grad_norm": 6.583901882171631,
|
|
"learning_rate": 2.1525e-06,
|
|
"loss": 0.162,
|
|
"step": 1725
|
|
},
|
|
{
|
|
"epoch": 0.8169934640522876,
|
|
"grad_norm": 12.237863540649414,
|
|
"learning_rate": 2.18375e-06,
|
|
"loss": 0.1644,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.8286647992530346,
|
|
"grad_norm": 5.648594379425049,
|
|
"learning_rate": 2.215e-06,
|
|
"loss": 0.142,
|
|
"step": 1775
|
|
},
|
|
{
|
|
"epoch": 0.8403361344537815,
|
|
"grad_norm": 15.333111763000488,
|
|
"learning_rate": 2.24625e-06,
|
|
"loss": 0.1932,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.8520074696545284,
|
|
"grad_norm": 7.390342712402344,
|
|
"learning_rate": 2.2775000000000002e-06,
|
|
"loss": 0.1663,
|
|
"step": 1825
|
|
},
|
|
{
|
|
"epoch": 0.8636788048552755,
|
|
"grad_norm": 15.47307300567627,
|
|
"learning_rate": 2.30875e-06,
|
|
"loss": 0.1864,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.8753501400560224,
|
|
"grad_norm": 3.5743496417999268,
|
|
"learning_rate": 2.34e-06,
|
|
"loss": 0.1625,
|
|
"step": 1875
|
|
},
|
|
{
|
|
"epoch": 0.8870214752567693,
|
|
"grad_norm": 12.931510925292969,
|
|
"learning_rate": 2.3712500000000004e-06,
|
|
"loss": 0.1674,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.8986928104575164,
|
|
"grad_norm": 10.445046424865723,
|
|
"learning_rate": 2.4025000000000003e-06,
|
|
"loss": 0.1551,
|
|
"step": 1925
|
|
},
|
|
{
|
|
"epoch": 0.9103641456582633,
|
|
"grad_norm": 15.884492874145508,
|
|
"learning_rate": 2.43375e-06,
|
|
"loss": 0.1797,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.9220354808590103,
|
|
"grad_norm": 3.6354966163635254,
|
|
"learning_rate": 2.465e-06,
|
|
"loss": 0.1726,
|
|
"step": 1975
|
|
},
|
|
{
|
|
"epoch": 0.9337068160597572,
|
|
"grad_norm": 10.952392578125,
|
|
"learning_rate": 2.49625e-06,
|
|
"loss": 0.1579,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.9453781512605042,
|
|
"grad_norm": 5.575680255889893,
|
|
"learning_rate": 2.5275e-06,
|
|
"loss": 0.1607,
|
|
"step": 2025
|
|
},
|
|
{
|
|
"epoch": 0.9570494864612512,
|
|
"grad_norm": 9.334450721740723,
|
|
"learning_rate": 2.55875e-06,
|
|
"loss": 0.1819,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.9687208216619981,
|
|
"grad_norm": 5.0021443367004395,
|
|
"learning_rate": 2.59e-06,
|
|
"loss": 0.1721,
|
|
"step": 2075
|
|
},
|
|
{
|
|
"epoch": 0.9803921568627451,
|
|
"grad_norm": 11.442158699035645,
|
|
"learning_rate": 2.62125e-06,
|
|
"loss": 0.1696,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 0.9920634920634921,
|
|
"grad_norm": 7.345489978790283,
|
|
"learning_rate": 2.6525e-06,
|
|
"loss": 0.1547,
|
|
"step": 2125
|
|
},
|
|
{
|
|
"epoch": 1.003734827264239,
|
|
"grad_norm": 3.4940426349639893,
|
|
"learning_rate": 2.6837500000000004e-06,
|
|
"loss": 0.1642,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 1.015406162464986,
|
|
"grad_norm": 5.598668098449707,
|
|
"learning_rate": 2.7150000000000003e-06,
|
|
"loss": 0.0863,
|
|
"step": 2175
|
|
},
|
|
{
|
|
"epoch": 1.0270774976657329,
|
|
"grad_norm": 3.780421495437622,
|
|
"learning_rate": 2.74625e-06,
|
|
"loss": 0.136,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 1.03874883286648,
|
|
"grad_norm": 3.801037311553955,
|
|
"learning_rate": 2.7775e-06,
|
|
"loss": 0.0962,
|
|
"step": 2225
|
|
},
|
|
{
|
|
"epoch": 1.050420168067227,
|
|
"grad_norm": 4.065105438232422,
|
|
"learning_rate": 2.8087500000000004e-06,
|
|
"loss": 0.1418,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 1.0620915032679739,
|
|
"grad_norm": 4.889001369476318,
|
|
"learning_rate": 2.8400000000000003e-06,
|
|
"loss": 0.1161,
|
|
"step": 2275
|
|
},
|
|
{
|
|
"epoch": 1.0737628384687208,
|
|
"grad_norm": 6.3269944190979,
|
|
"learning_rate": 2.87125e-06,
|
|
"loss": 0.1363,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 1.0854341736694677,
|
|
"grad_norm": 6.666966438293457,
|
|
"learning_rate": 2.9025e-06,
|
|
"loss": 0.1177,
|
|
"step": 2325
|
|
},
|
|
{
|
|
"epoch": 1.0971055088702149,
|
|
"grad_norm": 3.8940012454986572,
|
|
"learning_rate": 2.93375e-06,
|
|
"loss": 0.1297,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 1.1087768440709618,
|
|
"grad_norm": 5.189432144165039,
|
|
"learning_rate": 2.965e-06,
|
|
"loss": 0.1149,
|
|
"step": 2375
|
|
},
|
|
{
|
|
"epoch": 1.1204481792717087,
|
|
"grad_norm": 4.866479873657227,
|
|
"learning_rate": 2.99625e-06,
|
|
"loss": 0.1341,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 1.1321195144724556,
|
|
"grad_norm": 7.036620140075684,
|
|
"learning_rate": 3.0275000000000002e-06,
|
|
"loss": 0.102,
|
|
"step": 2425
|
|
},
|
|
{
|
|
"epoch": 1.1437908496732025,
|
|
"grad_norm": 4.124939441680908,
|
|
"learning_rate": 3.05875e-06,
|
|
"loss": 0.1483,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 1.1554621848739495,
|
|
"grad_norm": 6.803956508636475,
|
|
"learning_rate": 3.09e-06,
|
|
"loss": 0.095,
|
|
"step": 2475
|
|
},
|
|
{
|
|
"epoch": 1.1671335200746966,
|
|
"grad_norm": 4.783279895782471,
|
|
"learning_rate": 3.1212500000000004e-06,
|
|
"loss": 0.1458,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 1.1788048552754435,
|
|
"grad_norm": 5.567852020263672,
|
|
"learning_rate": 3.1525e-06,
|
|
"loss": 0.104,
|
|
"step": 2525
|
|
},
|
|
{
|
|
"epoch": 1.1904761904761905,
|
|
"grad_norm": 4.637757301330566,
|
|
"learning_rate": 3.18375e-06,
|
|
"loss": 0.1335,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 1.2021475256769374,
|
|
"grad_norm": 7.655948162078857,
|
|
"learning_rate": 3.215e-06,
|
|
"loss": 0.1229,
|
|
"step": 2575
|
|
},
|
|
{
|
|
"epoch": 1.2138188608776843,
|
|
"grad_norm": 4.771119117736816,
|
|
"learning_rate": 3.24625e-06,
|
|
"loss": 0.1543,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 1.2254901960784315,
|
|
"grad_norm": 5.286261558532715,
|
|
"learning_rate": 3.2775e-06,
|
|
"loss": 0.1093,
|
|
"step": 2625
|
|
},
|
|
{
|
|
"epoch": 1.2371615312791784,
|
|
"grad_norm": 4.742598533630371,
|
|
"learning_rate": 3.30875e-06,
|
|
"loss": 0.1587,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 1.2488328664799253,
|
|
"grad_norm": 7.248344898223877,
|
|
"learning_rate": 3.34e-06,
|
|
"loss": 0.0946,
|
|
"step": 2675
|
|
},
|
|
{
|
|
"epoch": 1.2605042016806722,
|
|
"grad_norm": 4.244619846343994,
|
|
"learning_rate": 3.37125e-06,
|
|
"loss": 0.1408,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 1.2721755368814192,
|
|
"grad_norm": 4.698862075805664,
|
|
"learning_rate": 3.4025e-06,
|
|
"loss": 0.1115,
|
|
"step": 2725
|
|
},
|
|
{
|
|
"epoch": 1.283846872082166,
|
|
"grad_norm": 5.453229904174805,
|
|
"learning_rate": 3.4337500000000004e-06,
|
|
"loss": 0.1377,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 1.2955182072829132,
|
|
"grad_norm": 5.513113498687744,
|
|
"learning_rate": 3.4650000000000003e-06,
|
|
"loss": 0.1052,
|
|
"step": 2775
|
|
},
|
|
{
|
|
"epoch": 1.3071895424836601,
|
|
"grad_norm": 4.546627521514893,
|
|
"learning_rate": 3.49625e-06,
|
|
"loss": 0.1326,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 1.318860877684407,
|
|
"grad_norm": 7.161789894104004,
|
|
"learning_rate": 3.5275e-06,
|
|
"loss": 0.106,
|
|
"step": 2825
|
|
},
|
|
{
|
|
"epoch": 1.330532212885154,
|
|
"grad_norm": 4.164399147033691,
|
|
"learning_rate": 3.5587500000000004e-06,
|
|
"loss": 0.1358,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 1.3422035480859011,
|
|
"grad_norm": 5.428344249725342,
|
|
"learning_rate": 3.5900000000000004e-06,
|
|
"loss": 0.0949,
|
|
"step": 2875
|
|
},
|
|
{
|
|
"epoch": 1.353874883286648,
|
|
"grad_norm": 6.0663275718688965,
|
|
"learning_rate": 3.6212500000000003e-06,
|
|
"loss": 0.1361,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 1.365546218487395,
|
|
"grad_norm": 6.05164909362793,
|
|
"learning_rate": 3.6525e-06,
|
|
"loss": 0.1098,
|
|
"step": 2925
|
|
},
|
|
{
|
|
"epoch": 1.377217553688142,
|
|
"grad_norm": 5.027311325073242,
|
|
"learning_rate": 3.6837500000000005e-06,
|
|
"loss": 0.1299,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 1.3888888888888888,
|
|
"grad_norm": 7.7624006271362305,
|
|
"learning_rate": 3.7150000000000004e-06,
|
|
"loss": 0.1109,
|
|
"step": 2975
|
|
},
|
|
{
|
|
"epoch": 1.4005602240896358,
|
|
"grad_norm": 5.019223213195801,
|
|
"learning_rate": 3.7462500000000003e-06,
|
|
"loss": 0.1648,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 1.4122315592903827,
|
|
"grad_norm": 6.924857139587402,
|
|
"learning_rate": 3.7775000000000007e-06,
|
|
"loss": 0.1078,
|
|
"step": 3025
|
|
},
|
|
{
|
|
"epoch": 1.4239028944911298,
|
|
"grad_norm": 6.098647594451904,
|
|
"learning_rate": 3.8087500000000006e-06,
|
|
"loss": 0.1511,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 1.4355742296918768,
|
|
"grad_norm": 8.425399780273438,
|
|
"learning_rate": 3.84e-06,
|
|
"loss": 0.1191,
|
|
"step": 3075
|
|
},
|
|
{
|
|
"epoch": 1.4472455648926237,
|
|
"grad_norm": 5.609083652496338,
|
|
"learning_rate": 3.8712499999999996e-06,
|
|
"loss": 0.142,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 1.4589169000933706,
|
|
"grad_norm": 6.1147050857543945,
|
|
"learning_rate": 3.9025e-06,
|
|
"loss": 0.0996,
|
|
"step": 3125
|
|
},
|
|
{
|
|
"epoch": 1.4705882352941178,
|
|
"grad_norm": 2.957465648651123,
|
|
"learning_rate": 3.93375e-06,
|
|
"loss": 0.1439,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 1.4822595704948647,
|
|
"grad_norm": 3.472870111465454,
|
|
"learning_rate": 3.965e-06,
|
|
"loss": 0.0993,
|
|
"step": 3175
|
|
},
|
|
{
|
|
"epoch": 1.4939309056956116,
|
|
"grad_norm": 5.71575927734375,
|
|
"learning_rate": 3.99625e-06,
|
|
"loss": 0.1438,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 1.5056022408963585,
|
|
"grad_norm": 3.8211705684661865,
|
|
"learning_rate": 4.0275e-06,
|
|
"loss": 0.0941,
|
|
"step": 3225
|
|
},
|
|
{
|
|
"epoch": 1.5172735760971054,
|
|
"grad_norm": 5.43381929397583,
|
|
"learning_rate": 4.05875e-06,
|
|
"loss": 0.1565,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 1.5289449112978524,
|
|
"grad_norm": 5.146786212921143,
|
|
"learning_rate": 4.09e-06,
|
|
"loss": 0.1125,
|
|
"step": 3275
|
|
},
|
|
{
|
|
"epoch": 1.5406162464985993,
|
|
"grad_norm": 4.331883430480957,
|
|
"learning_rate": 4.12125e-06,
|
|
"loss": 0.1393,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 1.5522875816993464,
|
|
"grad_norm": 7.666718482971191,
|
|
"learning_rate": 4.1525000000000005e-06,
|
|
"loss": 0.116,
|
|
"step": 3325
|
|
},
|
|
{
|
|
"epoch": 1.5639589169000934,
|
|
"grad_norm": 3.27604079246521,
|
|
"learning_rate": 4.18375e-06,
|
|
"loss": 0.1487,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 1.5756302521008403,
|
|
"grad_norm": 8.548047065734863,
|
|
"learning_rate": 4.215e-06,
|
|
"loss": 0.114,
|
|
"step": 3375
|
|
},
|
|
{
|
|
"epoch": 1.5873015873015874,
|
|
"grad_norm": 3.315171003341675,
|
|
"learning_rate": 4.24625e-06,
|
|
"loss": 0.1372,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 1.5989729225023344,
|
|
"grad_norm": 6.508883953094482,
|
|
"learning_rate": 4.2775e-06,
|
|
"loss": 0.1054,
|
|
"step": 3425
|
|
},
|
|
{
|
|
"epoch": 1.6106442577030813,
|
|
"grad_norm": 4.305449485778809,
|
|
"learning_rate": 4.30875e-06,
|
|
"loss": 0.1449,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 1.6223155929038282,
|
|
"grad_norm": 8.238191604614258,
|
|
"learning_rate": 4.34e-06,
|
|
"loss": 0.1052,
|
|
"step": 3475
|
|
},
|
|
{
|
|
"epoch": 1.6339869281045751,
|
|
"grad_norm": 3.1781492233276367,
|
|
"learning_rate": 4.371250000000001e-06,
|
|
"loss": 0.1462,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 1.645658263305322,
|
|
"grad_norm": 5.8778557777404785,
|
|
"learning_rate": 4.402500000000001e-06,
|
|
"loss": 0.1213,
|
|
"step": 3525
|
|
},
|
|
{
|
|
"epoch": 1.657329598506069,
|
|
"grad_norm": 3.6947333812713623,
|
|
"learning_rate": 4.4337500000000005e-06,
|
|
"loss": 0.1626,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 1.669000933706816,
|
|
"grad_norm": 7.086148738861084,
|
|
"learning_rate": 4.4650000000000004e-06,
|
|
"loss": 0.1254,
|
|
"step": 3575
|
|
},
|
|
{
|
|
"epoch": 1.680672268907563,
|
|
"grad_norm": 4.603717803955078,
|
|
"learning_rate": 4.49625e-06,
|
|
"loss": 0.1403,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 1.69234360410831,
|
|
"grad_norm": 4.92815637588501,
|
|
"learning_rate": 4.5275e-06,
|
|
"loss": 0.1032,
|
|
"step": 3625
|
|
},
|
|
{
|
|
"epoch": 1.7040149393090571,
|
|
"grad_norm": 3.701477289199829,
|
|
"learning_rate": 4.55875e-06,
|
|
"loss": 0.1349,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 1.715686274509804,
|
|
"grad_norm": 13.451651573181152,
|
|
"learning_rate": 4.590000000000001e-06,
|
|
"loss": 0.1238,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 1.727357609710551,
|
|
"grad_norm": 5.369861125946045,
|
|
"learning_rate": 4.62125e-06,
|
|
"loss": 0.1596,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 1.739028944911298,
|
|
"grad_norm": 6.7364702224731445,
|
|
"learning_rate": 4.6525e-06,
|
|
"loss": 0.1288,
|
|
"step": 3725
|
|
},
|
|
{
|
|
"epoch": 1.7507002801120448,
|
|
"grad_norm": 5.925997734069824,
|
|
"learning_rate": 4.68375e-06,
|
|
"loss": 0.1462,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 1.7623716153127917,
|
|
"grad_norm": 4.7654829025268555,
|
|
"learning_rate": 4.715e-06,
|
|
"loss": 0.1146,
|
|
"step": 3775
|
|
},
|
|
{
|
|
"epoch": 1.7740429505135387,
|
|
"grad_norm": 3.232302188873291,
|
|
"learning_rate": 4.74625e-06,
|
|
"loss": 0.1341,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 1.7857142857142856,
|
|
"grad_norm": 6.663305282592773,
|
|
"learning_rate": 4.7775e-06,
|
|
"loss": 0.1047,
|
|
"step": 3825
|
|
},
|
|
{
|
|
"epoch": 1.7973856209150327,
|
|
"grad_norm": 4.3404340744018555,
|
|
"learning_rate": 4.80875e-06,
|
|
"loss": 0.1425,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 1.8090569561157797,
|
|
"grad_norm": 7.439436912536621,
|
|
"learning_rate": 4.84e-06,
|
|
"loss": 0.1056,
|
|
"step": 3875
|
|
},
|
|
{
|
|
"epoch": 1.8207282913165266,
|
|
"grad_norm": 4.493560314178467,
|
|
"learning_rate": 4.87125e-06,
|
|
"loss": 0.152,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 1.8323996265172737,
|
|
"grad_norm": 8.294795036315918,
|
|
"learning_rate": 4.9025e-06,
|
|
"loss": 0.1048,
|
|
"step": 3925
|
|
},
|
|
{
|
|
"epoch": 1.8440709617180207,
|
|
"grad_norm": 4.7361884117126465,
|
|
"learning_rate": 4.93375e-06,
|
|
"loss": 0.1424,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 1.8557422969187676,
|
|
"grad_norm": 6.927464485168457,
|
|
"learning_rate": 4.965e-06,
|
|
"loss": 0.0982,
|
|
"step": 3975
|
|
},
|
|
{
|
|
"epoch": 1.8674136321195145,
|
|
"grad_norm": 6.300534248352051,
|
|
"learning_rate": 4.996250000000001e-06,
|
|
"loss": 0.1273,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 1.8790849673202614,
|
|
"grad_norm": 4.410505294799805,
|
|
"learning_rate": 5.0275000000000006e-06,
|
|
"loss": 0.1223,
|
|
"step": 4025
|
|
},
|
|
{
|
|
"epoch": 1.8907563025210083,
|
|
"grad_norm": 7.067946434020996,
|
|
"learning_rate": 5.0587500000000005e-06,
|
|
"loss": 0.1447,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 1.9024276377217553,
|
|
"grad_norm": 5.033799171447754,
|
|
"learning_rate": 5.09e-06,
|
|
"loss": 0.1066,
|
|
"step": 4075
|
|
},
|
|
{
|
|
"epoch": 1.9140989729225022,
|
|
"grad_norm": 4.994957447052002,
|
|
"learning_rate": 5.12e-06,
|
|
"loss": 0.14,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 1.9257703081232493,
|
|
"grad_norm": 7.51298189163208,
|
|
"learning_rate": 5.151250000000001e-06,
|
|
"loss": 0.1065,
|
|
"step": 4125
|
|
},
|
|
{
|
|
"epoch": 1.9374416433239963,
|
|
"grad_norm": 4.488656997680664,
|
|
"learning_rate": 5.182500000000001e-06,
|
|
"loss": 0.1495,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 1.9491129785247432,
|
|
"grad_norm": 6.328264236450195,
|
|
"learning_rate": 5.213750000000001e-06,
|
|
"loss": 0.1047,
|
|
"step": 4175
|
|
},
|
|
{
|
|
"epoch": 1.9607843137254903,
|
|
"grad_norm": 3.9831016063690186,
|
|
"learning_rate": 5.245e-06,
|
|
"loss": 0.1483,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 1.9724556489262373,
|
|
"grad_norm": 9.204850196838379,
|
|
"learning_rate": 5.27625e-06,
|
|
"loss": 0.1076,
|
|
"step": 4225
|
|
},
|
|
{
|
|
"epoch": 1.9841269841269842,
|
|
"grad_norm": 4.560220241546631,
|
|
"learning_rate": 5.3075e-06,
|
|
"loss": 0.1319,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 1.995798319327731,
|
|
"grad_norm": 6.272380828857422,
|
|
"learning_rate": 5.33875e-06,
|
|
"loss": 0.1127,
|
|
"step": 4275
|
|
},
|
|
{
|
|
"epoch": 2.007469654528478,
|
|
"grad_norm": 2.819856882095337,
|
|
"learning_rate": 5.37e-06,
|
|
"loss": 0.0898,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 2.019140989729225,
|
|
"grad_norm": 3.3237640857696533,
|
|
"learning_rate": 5.40125e-06,
|
|
"loss": 0.0556,
|
|
"step": 4325
|
|
},
|
|
{
|
|
"epoch": 2.030812324929972,
|
|
"grad_norm": 2.4771931171417236,
|
|
"learning_rate": 5.4325e-06,
|
|
"loss": 0.0807,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 2.042483660130719,
|
|
"grad_norm": 3.640773296356201,
|
|
"learning_rate": 5.46375e-06,
|
|
"loss": 0.0528,
|
|
"step": 4375
|
|
},
|
|
{
|
|
"epoch": 2.0541549953314657,
|
|
"grad_norm": 4.329100131988525,
|
|
"learning_rate": 5.495e-06,
|
|
"loss": 0.0852,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 2.065826330532213,
|
|
"grad_norm": 4.2357916831970215,
|
|
"learning_rate": 5.52625e-06,
|
|
"loss": 0.0473,
|
|
"step": 4425
|
|
},
|
|
{
|
|
"epoch": 2.07749766573296,
|
|
"grad_norm": 4.033267974853516,
|
|
"learning_rate": 5.557500000000001e-06,
|
|
"loss": 0.0802,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 2.089169000933707,
|
|
"grad_norm": 6.817841529846191,
|
|
"learning_rate": 5.5887500000000005e-06,
|
|
"loss": 0.0602,
|
|
"step": 4475
|
|
},
|
|
{
|
|
"epoch": 2.100840336134454,
|
|
"grad_norm": 4.568445682525635,
|
|
"learning_rate": 5.62e-06,
|
|
"loss": 0.083,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 2.112511671335201,
|
|
"grad_norm": 8.219367980957031,
|
|
"learning_rate": 5.65125e-06,
|
|
"loss": 0.066,
|
|
"step": 4525
|
|
},
|
|
{
|
|
"epoch": 2.1241830065359477,
|
|
"grad_norm": 1.9845637083053589,
|
|
"learning_rate": 5.6825e-06,
|
|
"loss": 0.0878,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 2.1358543417366946,
|
|
"grad_norm": 7.169174671173096,
|
|
"learning_rate": 5.71375e-06,
|
|
"loss": 0.0651,
|
|
"step": 4575
|
|
},
|
|
{
|
|
"epoch": 2.1475256769374416,
|
|
"grad_norm": 3.6445248126983643,
|
|
"learning_rate": 5.745e-06,
|
|
"loss": 0.1006,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 2.1591970121381885,
|
|
"grad_norm": 4.955069541931152,
|
|
"learning_rate": 5.776250000000001e-06,
|
|
"loss": 0.0572,
|
|
"step": 4625
|
|
},
|
|
{
|
|
"epoch": 2.1708683473389354,
|
|
"grad_norm": 3.909029245376587,
|
|
"learning_rate": 5.807500000000001e-06,
|
|
"loss": 0.0841,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 2.1825396825396823,
|
|
"grad_norm": 2.1477255821228027,
|
|
"learning_rate": 5.838750000000001e-06,
|
|
"loss": 0.0717,
|
|
"step": 4675
|
|
},
|
|
{
|
|
"epoch": 2.1942110177404297,
|
|
"grad_norm": 5.743031978607178,
|
|
"learning_rate": 5.8700000000000005e-06,
|
|
"loss": 0.0808,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 2.2058823529411766,
|
|
"grad_norm": 4.362875461578369,
|
|
"learning_rate": 5.9012500000000005e-06,
|
|
"loss": 0.0609,
|
|
"step": 4725
|
|
},
|
|
{
|
|
"epoch": 2.2175536881419236,
|
|
"grad_norm": 3.8198697566986084,
|
|
"learning_rate": 5.9325e-06,
|
|
"loss": 0.0892,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 2.2292250233426705,
|
|
"grad_norm": 3.9962849617004395,
|
|
"learning_rate": 5.96375e-06,
|
|
"loss": 0.0656,
|
|
"step": 4775
|
|
},
|
|
{
|
|
"epoch": 2.2408963585434174,
|
|
"grad_norm": 2.613006830215454,
|
|
"learning_rate": 5.995e-06,
|
|
"loss": 0.0812,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 2.2525676937441643,
|
|
"grad_norm": 5.6209540367126465,
|
|
"learning_rate": 6.02625e-06,
|
|
"loss": 0.0814,
|
|
"step": 4825
|
|
},
|
|
{
|
|
"epoch": 2.2642390289449112,
|
|
"grad_norm": 4.349456787109375,
|
|
"learning_rate": 6.0575e-06,
|
|
"loss": 0.084,
|
|
"step": 4850
|
|
},
|
|
{
|
|
"epoch": 2.275910364145658,
|
|
"grad_norm": 6.475245952606201,
|
|
"learning_rate": 6.08875e-06,
|
|
"loss": 0.0798,
|
|
"step": 4875
|
|
},
|
|
{
|
|
"epoch": 2.287581699346405,
|
|
"grad_norm": 2.503551959991455,
|
|
"learning_rate": 6.12e-06,
|
|
"loss": 0.0836,
|
|
"step": 4900
|
|
},
|
|
{
|
|
"epoch": 2.299253034547152,
|
|
"grad_norm": 7.365092754364014,
|
|
"learning_rate": 6.15125e-06,
|
|
"loss": 0.07,
|
|
"step": 4925
|
|
},
|
|
{
|
|
"epoch": 2.310924369747899,
|
|
"grad_norm": 4.494143486022949,
|
|
"learning_rate": 6.1825e-06,
|
|
"loss": 0.1019,
|
|
"step": 4950
|
|
},
|
|
{
|
|
"epoch": 2.3225957049486463,
|
|
"grad_norm": 9.088369369506836,
|
|
"learning_rate": 6.2137500000000004e-06,
|
|
"loss": 0.0741,
|
|
"step": 4975
|
|
},
|
|
{
|
|
"epoch": 2.3342670401493932,
|
|
"grad_norm": 3.5182809829711914,
|
|
"learning_rate": 6.245e-06,
|
|
"loss": 0.0856,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 2.3342670401493932,
|
|
"eval_loss": 0.16011084616184235,
|
|
"eval_runtime": 6234.0593,
|
|
"eval_samples_per_second": 1.51,
|
|
"eval_steps_per_second": 0.189,
|
|
"eval_wer": 0.103035685451316,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 2.34593837535014,
|
|
"grad_norm": 3.4111175537109375,
|
|
"learning_rate": 6.2434375e-06,
|
|
"loss": 0.0694,
|
|
"step": 5025
|
|
},
|
|
{
|
|
"epoch": 2.357609710550887,
|
|
"grad_norm": 5.693511486053467,
|
|
"learning_rate": 6.235625e-06,
|
|
"loss": 0.0952,
|
|
"step": 5050
|
|
},
|
|
{
|
|
"epoch": 2.369281045751634,
|
|
"grad_norm": 7.079166412353516,
|
|
"learning_rate": 6.2278125e-06,
|
|
"loss": 0.0605,
|
|
"step": 5075
|
|
},
|
|
{
|
|
"epoch": 2.380952380952381,
|
|
"grad_norm": 2.9197869300842285,
|
|
"learning_rate": 6.22e-06,
|
|
"loss": 0.0941,
|
|
"step": 5100
|
|
},
|
|
{
|
|
"epoch": 2.392623716153128,
|
|
"grad_norm": 6.79939603805542,
|
|
"learning_rate": 6.2121875e-06,
|
|
"loss": 0.0725,
|
|
"step": 5125
|
|
},
|
|
{
|
|
"epoch": 2.404295051353875,
|
|
"grad_norm": 6.609640121459961,
|
|
"learning_rate": 6.204375e-06,
|
|
"loss": 0.0976,
|
|
"step": 5150
|
|
},
|
|
{
|
|
"epoch": 2.4159663865546217,
|
|
"grad_norm": 7.591739654541016,
|
|
"learning_rate": 6.196562500000001e-06,
|
|
"loss": 0.0697,
|
|
"step": 5175
|
|
},
|
|
{
|
|
"epoch": 2.4276377217553686,
|
|
"grad_norm": 7.580626964569092,
|
|
"learning_rate": 6.18875e-06,
|
|
"loss": 0.0921,
|
|
"step": 5200
|
|
},
|
|
{
|
|
"epoch": 2.439309056956116,
|
|
"grad_norm": 7.333129405975342,
|
|
"learning_rate": 6.1809375000000005e-06,
|
|
"loss": 0.0727,
|
|
"step": 5225
|
|
},
|
|
{
|
|
"epoch": 2.450980392156863,
|
|
"grad_norm": 6.632033348083496,
|
|
"learning_rate": 6.173125e-06,
|
|
"loss": 0.0891,
|
|
"step": 5250
|
|
},
|
|
{
|
|
"epoch": 2.46265172735761,
|
|
"grad_norm": 5.526509761810303,
|
|
"learning_rate": 6.165312500000001e-06,
|
|
"loss": 0.077,
|
|
"step": 5275
|
|
},
|
|
{
|
|
"epoch": 2.4743230625583568,
|
|
"grad_norm": 6.0735602378845215,
|
|
"learning_rate": 6.1575e-06,
|
|
"loss": 0.101,
|
|
"step": 5300
|
|
},
|
|
{
|
|
"epoch": 2.4859943977591037,
|
|
"grad_norm": 7.87660026550293,
|
|
"learning_rate": 6.1496875000000006e-06,
|
|
"loss": 0.0737,
|
|
"step": 5325
|
|
},
|
|
{
|
|
"epoch": 2.4976657329598506,
|
|
"grad_norm": 4.01476526260376,
|
|
"learning_rate": 6.141875e-06,
|
|
"loss": 0.0928,
|
|
"step": 5350
|
|
},
|
|
{
|
|
"epoch": 2.5093370681605975,
|
|
"grad_norm": 5.005721569061279,
|
|
"learning_rate": 6.1340625e-06,
|
|
"loss": 0.0717,
|
|
"step": 5375
|
|
},
|
|
{
|
|
"epoch": 2.5210084033613445,
|
|
"grad_norm": 5.76194429397583,
|
|
"learning_rate": 6.12625e-06,
|
|
"loss": 0.0922,
|
|
"step": 5400
|
|
},
|
|
{
|
|
"epoch": 2.5326797385620914,
|
|
"grad_norm": 5.3504157066345215,
|
|
"learning_rate": 6.1184375e-06,
|
|
"loss": 0.0658,
|
|
"step": 5425
|
|
},
|
|
{
|
|
"epoch": 2.5443510737628383,
|
|
"grad_norm": 4.85629415512085,
|
|
"learning_rate": 6.1106250000000005e-06,
|
|
"loss": 0.0825,
|
|
"step": 5450
|
|
},
|
|
{
|
|
"epoch": 2.5560224089635852,
|
|
"grad_norm": 5.944486141204834,
|
|
"learning_rate": 6.1028125e-06,
|
|
"loss": 0.0775,
|
|
"step": 5475
|
|
},
|
|
{
|
|
"epoch": 2.567693744164332,
|
|
"grad_norm": 6.294357776641846,
|
|
"learning_rate": 6.095e-06,
|
|
"loss": 0.0915,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 2.5793650793650795,
|
|
"grad_norm": 5.524097919464111,
|
|
"learning_rate": 6.0871875e-06,
|
|
"loss": 0.0662,
|
|
"step": 5525
|
|
},
|
|
{
|
|
"epoch": 2.5910364145658265,
|
|
"grad_norm": 4.0100812911987305,
|
|
"learning_rate": 6.0793750000000006e-06,
|
|
"loss": 0.0914,
|
|
"step": 5550
|
|
},
|
|
{
|
|
"epoch": 2.6027077497665734,
|
|
"grad_norm": 7.9108123779296875,
|
|
"learning_rate": 6.0715625e-06,
|
|
"loss": 0.0774,
|
|
"step": 5575
|
|
},
|
|
{
|
|
"epoch": 2.6143790849673203,
|
|
"grad_norm": 2.5471160411834717,
|
|
"learning_rate": 6.06375e-06,
|
|
"loss": 0.0839,
|
|
"step": 5600
|
|
},
|
|
{
|
|
"epoch": 2.6260504201680672,
|
|
"grad_norm": 3.6380198001861572,
|
|
"learning_rate": 6.0559375e-06,
|
|
"loss": 0.0621,
|
|
"step": 5625
|
|
},
|
|
{
|
|
"epoch": 2.637721755368814,
|
|
"grad_norm": 2.9542012214660645,
|
|
"learning_rate": 6.048125000000001e-06,
|
|
"loss": 0.0784,
|
|
"step": 5650
|
|
},
|
|
{
|
|
"epoch": 2.649393090569561,
|
|
"grad_norm": 4.753948211669922,
|
|
"learning_rate": 6.0403125000000005e-06,
|
|
"loss": 0.0768,
|
|
"step": 5675
|
|
},
|
|
{
|
|
"epoch": 2.661064425770308,
|
|
"grad_norm": 2.410440444946289,
|
|
"learning_rate": 6.0325e-06,
|
|
"loss": 0.088,
|
|
"step": 5700
|
|
},
|
|
{
|
|
"epoch": 2.6727357609710554,
|
|
"grad_norm": 5.084535121917725,
|
|
"learning_rate": 6.0246875e-06,
|
|
"loss": 0.0743,
|
|
"step": 5725
|
|
},
|
|
{
|
|
"epoch": 2.6844070961718023,
|
|
"grad_norm": 1.9251270294189453,
|
|
"learning_rate": 6.016875e-06,
|
|
"loss": 0.0882,
|
|
"step": 5750
|
|
},
|
|
{
|
|
"epoch": 2.696078431372549,
|
|
"grad_norm": 6.866667747497559,
|
|
"learning_rate": 6.0090625000000005e-06,
|
|
"loss": 0.0675,
|
|
"step": 5775
|
|
},
|
|
{
|
|
"epoch": 2.707749766573296,
|
|
"grad_norm": 5.318982124328613,
|
|
"learning_rate": 6.00125e-06,
|
|
"loss": 0.0856,
|
|
"step": 5800
|
|
},
|
|
{
|
|
"epoch": 2.719421101774043,
|
|
"grad_norm": 4.362662315368652,
|
|
"learning_rate": 5.9934375e-06,
|
|
"loss": 0.0787,
|
|
"step": 5825
|
|
},
|
|
{
|
|
"epoch": 2.73109243697479,
|
|
"grad_norm": 3.6322102546691895,
|
|
"learning_rate": 5.985625e-06,
|
|
"loss": 0.0907,
|
|
"step": 5850
|
|
},
|
|
{
|
|
"epoch": 2.742763772175537,
|
|
"grad_norm": 6.347275257110596,
|
|
"learning_rate": 5.977812500000001e-06,
|
|
"loss": 0.0689,
|
|
"step": 5875
|
|
},
|
|
{
|
|
"epoch": 2.754435107376284,
|
|
"grad_norm": 4.315841197967529,
|
|
"learning_rate": 5.9700000000000004e-06,
|
|
"loss": 0.0971,
|
|
"step": 5900
|
|
},
|
|
{
|
|
"epoch": 2.7661064425770308,
|
|
"grad_norm": 3.1454360485076904,
|
|
"learning_rate": 5.9621875e-06,
|
|
"loss": 0.0615,
|
|
"step": 5925
|
|
},
|
|
{
|
|
"epoch": 2.7777777777777777,
|
|
"grad_norm": 3.9786288738250732,
|
|
"learning_rate": 5.954375e-06,
|
|
"loss": 0.0988,
|
|
"step": 5950
|
|
},
|
|
{
|
|
"epoch": 2.7894491129785246,
|
|
"grad_norm": 7.057102680206299,
|
|
"learning_rate": 5.946562500000001e-06,
|
|
"loss": 0.0768,
|
|
"step": 5975
|
|
},
|
|
{
|
|
"epoch": 2.8011204481792715,
|
|
"grad_norm": 4.522549629211426,
|
|
"learning_rate": 5.9387500000000005e-06,
|
|
"loss": 0.0847,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 2.8127917833800185,
|
|
"grad_norm": 6.361202716827393,
|
|
"learning_rate": 5.9309375e-06,
|
|
"loss": 0.0525,
|
|
"step": 6025
|
|
},
|
|
{
|
|
"epoch": 2.8244631185807654,
|
|
"grad_norm": 3.8292720317840576,
|
|
"learning_rate": 5.923125e-06,
|
|
"loss": 0.0841,
|
|
"step": 6050
|
|
},
|
|
{
|
|
"epoch": 2.8361344537815127,
|
|
"grad_norm": 6.834649085998535,
|
|
"learning_rate": 5.9153125e-06,
|
|
"loss": 0.0687,
|
|
"step": 6075
|
|
},
|
|
{
|
|
"epoch": 2.8478057889822597,
|
|
"grad_norm": 6.25474214553833,
|
|
"learning_rate": 5.907500000000001e-06,
|
|
"loss": 0.0895,
|
|
"step": 6100
|
|
},
|
|
{
|
|
"epoch": 2.8594771241830066,
|
|
"grad_norm": 5.775394439697266,
|
|
"learning_rate": 5.8996875000000004e-06,
|
|
"loss": 0.0727,
|
|
"step": 6125
|
|
},
|
|
{
|
|
"epoch": 2.8711484593837535,
|
|
"grad_norm": 4.371216297149658,
|
|
"learning_rate": 5.891875e-06,
|
|
"loss": 0.1085,
|
|
"step": 6150
|
|
},
|
|
{
|
|
"epoch": 2.8828197945845004,
|
|
"grad_norm": 3.050452947616577,
|
|
"learning_rate": 5.8840625e-06,
|
|
"loss": 0.0654,
|
|
"step": 6175
|
|
},
|
|
{
|
|
"epoch": 2.8944911297852474,
|
|
"grad_norm": 3.992262840270996,
|
|
"learning_rate": 5.876250000000001e-06,
|
|
"loss": 0.0945,
|
|
"step": 6200
|
|
},
|
|
{
|
|
"epoch": 2.9061624649859943,
|
|
"grad_norm": 5.945260047912598,
|
|
"learning_rate": 5.8684375e-06,
|
|
"loss": 0.0724,
|
|
"step": 6225
|
|
},
|
|
{
|
|
"epoch": 2.917833800186741,
|
|
"grad_norm": 3.371884822845459,
|
|
"learning_rate": 5.860625e-06,
|
|
"loss": 0.0961,
|
|
"step": 6250
|
|
},
|
|
{
|
|
"epoch": 2.9295051353874886,
|
|
"grad_norm": 4.842737674713135,
|
|
"learning_rate": 5.8528125e-06,
|
|
"loss": 0.0748,
|
|
"step": 6275
|
|
},
|
|
{
|
|
"epoch": 2.9411764705882355,
|
|
"grad_norm": 3.629974603652954,
|
|
"learning_rate": 5.845000000000001e-06,
|
|
"loss": 0.0897,
|
|
"step": 6300
|
|
},
|
|
{
|
|
"epoch": 2.9528478057889824,
|
|
"grad_norm": 8.20695972442627,
|
|
"learning_rate": 5.8371875e-06,
|
|
"loss": 0.0718,
|
|
"step": 6325
|
|
},
|
|
{
|
|
"epoch": 2.9645191409897294,
|
|
"grad_norm": 3.662733554840088,
|
|
"learning_rate": 5.8296875e-06,
|
|
"loss": 0.0885,
|
|
"step": 6350
|
|
},
|
|
{
|
|
"epoch": 2.9761904761904763,
|
|
"grad_norm": 7.424181938171387,
|
|
"learning_rate": 5.821875e-06,
|
|
"loss": 0.0631,
|
|
"step": 6375
|
|
},
|
|
{
|
|
"epoch": 2.987861811391223,
|
|
"grad_norm": 3.9309329986572266,
|
|
"learning_rate": 5.814062500000001e-06,
|
|
"loss": 0.0894,
|
|
"step": 6400
|
|
},
|
|
{
|
|
"epoch": 2.99953314659197,
|
|
"grad_norm": 10.30614185333252,
|
|
"learning_rate": 5.8062500000000005e-06,
|
|
"loss": 0.087,
|
|
"step": 6425
|
|
},
|
|
{
|
|
"epoch": 3.011204481792717,
|
|
"grad_norm": 4.544281005859375,
|
|
"learning_rate": 5.7984375e-06,
|
|
"loss": 0.04,
|
|
"step": 6450
|
|
},
|
|
{
|
|
"epoch": 3.022875816993464,
|
|
"grad_norm": 9.070793151855469,
|
|
"learning_rate": 5.790625e-06,
|
|
"loss": 0.0373,
|
|
"step": 6475
|
|
},
|
|
{
|
|
"epoch": 3.034547152194211,
|
|
"grad_norm": 7.368350982666016,
|
|
"learning_rate": 5.782812500000001e-06,
|
|
"loss": 0.0485,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 3.046218487394958,
|
|
"grad_norm": 6.376898765563965,
|
|
"learning_rate": 5.775000000000001e-06,
|
|
"loss": 0.0474,
|
|
"step": 6525
|
|
},
|
|
{
|
|
"epoch": 3.0578898225957047,
|
|
"grad_norm": 5.481170654296875,
|
|
"learning_rate": 5.7671875e-06,
|
|
"loss": 0.0458,
|
|
"step": 6550
|
|
},
|
|
{
|
|
"epoch": 3.069561157796452,
|
|
"grad_norm": 10.094844818115234,
|
|
"learning_rate": 5.759375e-06,
|
|
"loss": 0.0451,
|
|
"step": 6575
|
|
},
|
|
{
|
|
"epoch": 3.081232492997199,
|
|
"grad_norm": 0.9469685554504395,
|
|
"learning_rate": 5.7515625e-06,
|
|
"loss": 0.0465,
|
|
"step": 6600
|
|
},
|
|
{
|
|
"epoch": 3.092903828197946,
|
|
"grad_norm": 4.808952331542969,
|
|
"learning_rate": 5.743750000000001e-06,
|
|
"loss": 0.0441,
|
|
"step": 6625
|
|
},
|
|
{
|
|
"epoch": 3.104575163398693,
|
|
"grad_norm": 8.131449699401855,
|
|
"learning_rate": 5.7359375e-06,
|
|
"loss": 0.0396,
|
|
"step": 6650
|
|
},
|
|
{
|
|
"epoch": 3.11624649859944,
|
|
"grad_norm": 6.750060081481934,
|
|
"learning_rate": 5.728125e-06,
|
|
"loss": 0.0611,
|
|
"step": 6675
|
|
},
|
|
{
|
|
"epoch": 3.1279178338001867,
|
|
"grad_norm": 6.601670742034912,
|
|
"learning_rate": 5.7203125e-06,
|
|
"loss": 0.0415,
|
|
"step": 6700
|
|
},
|
|
{
|
|
"epoch": 3.1395891690009337,
|
|
"grad_norm": 13.884129524230957,
|
|
"learning_rate": 5.712500000000001e-06,
|
|
"loss": 0.0417,
|
|
"step": 6725
|
|
},
|
|
{
|
|
"epoch": 3.1512605042016806,
|
|
"grad_norm": 7.539254188537598,
|
|
"learning_rate": 5.7046875e-06,
|
|
"loss": 0.0413,
|
|
"step": 6750
|
|
},
|
|
{
|
|
"epoch": 3.1629318394024275,
|
|
"grad_norm": 6.866730213165283,
|
|
"learning_rate": 5.696875e-06,
|
|
"loss": 0.055,
|
|
"step": 6775
|
|
},
|
|
{
|
|
"epoch": 3.1746031746031744,
|
|
"grad_norm": 2.3453876972198486,
|
|
"learning_rate": 5.6890625e-06,
|
|
"loss": 0.0314,
|
|
"step": 6800
|
|
},
|
|
{
|
|
"epoch": 3.186274509803922,
|
|
"grad_norm": 5.64259672164917,
|
|
"learning_rate": 5.681250000000001e-06,
|
|
"loss": 0.0446,
|
|
"step": 6825
|
|
},
|
|
{
|
|
"epoch": 3.1979458450046687,
|
|
"grad_norm": 1.9124208688735962,
|
|
"learning_rate": 5.6734375e-06,
|
|
"loss": 0.0413,
|
|
"step": 6850
|
|
},
|
|
{
|
|
"epoch": 3.2096171802054156,
|
|
"grad_norm": 8.153667449951172,
|
|
"learning_rate": 5.6656250000000005e-06,
|
|
"loss": 0.0482,
|
|
"step": 6875
|
|
},
|
|
{
|
|
"epoch": 3.2212885154061626,
|
|
"grad_norm": 3.0059521198272705,
|
|
"learning_rate": 5.6578125e-06,
|
|
"loss": 0.0391,
|
|
"step": 6900
|
|
},
|
|
{
|
|
"epoch": 3.2329598506069095,
|
|
"grad_norm": 7.093464374542236,
|
|
"learning_rate": 5.65e-06,
|
|
"loss": 0.0473,
|
|
"step": 6925
|
|
},
|
|
{
|
|
"epoch": 3.2446311858076564,
|
|
"grad_norm": 2.2140514850616455,
|
|
"learning_rate": 5.642187500000001e-06,
|
|
"loss": 0.0472,
|
|
"step": 6950
|
|
},
|
|
{
|
|
"epoch": 3.2563025210084033,
|
|
"grad_norm": 5.731634616851807,
|
|
"learning_rate": 5.634375e-06,
|
|
"loss": 0.0454,
|
|
"step": 6975
|
|
},
|
|
{
|
|
"epoch": 3.2679738562091503,
|
|
"grad_norm": 2.6004838943481445,
|
|
"learning_rate": 5.6265625e-06,
|
|
"loss": 0.0379,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 3.279645191409897,
|
|
"grad_norm": 6.271092414855957,
|
|
"learning_rate": 5.61875e-06,
|
|
"loss": 0.0447,
|
|
"step": 7025
|
|
},
|
|
{
|
|
"epoch": 3.291316526610644,
|
|
"grad_norm": 14.229198455810547,
|
|
"learning_rate": 5.610937500000001e-06,
|
|
"loss": 0.0433,
|
|
"step": 7050
|
|
},
|
|
{
|
|
"epoch": 3.302987861811391,
|
|
"grad_norm": 7.76876974105835,
|
|
"learning_rate": 5.603125e-06,
|
|
"loss": 0.0417,
|
|
"step": 7075
|
|
},
|
|
{
|
|
"epoch": 3.314659197012138,
|
|
"grad_norm": 5.000005722045898,
|
|
"learning_rate": 5.5953125000000005e-06,
|
|
"loss": 0.0365,
|
|
"step": 7100
|
|
},
|
|
{
|
|
"epoch": 3.3263305322128853,
|
|
"grad_norm": 5.754938125610352,
|
|
"learning_rate": 5.5875e-06,
|
|
"loss": 0.0483,
|
|
"step": 7125
|
|
},
|
|
{
|
|
"epoch": 3.3380018674136323,
|
|
"grad_norm": 2.836254835128784,
|
|
"learning_rate": 5.579687500000001e-06,
|
|
"loss": 0.0439,
|
|
"step": 7150
|
|
},
|
|
{
|
|
"epoch": 3.349673202614379,
|
|
"grad_norm": 6.115027904510498,
|
|
"learning_rate": 5.571875e-06,
|
|
"loss": 0.0518,
|
|
"step": 7175
|
|
},
|
|
{
|
|
"epoch": 3.361344537815126,
|
|
"grad_norm": 4.021732330322266,
|
|
"learning_rate": 5.5640625000000006e-06,
|
|
"loss": 0.0413,
|
|
"step": 7200
|
|
},
|
|
{
|
|
"epoch": 3.373015873015873,
|
|
"grad_norm": 12.889508247375488,
|
|
"learning_rate": 5.55625e-06,
|
|
"loss": 0.0608,
|
|
"step": 7225
|
|
},
|
|
{
|
|
"epoch": 3.38468720821662,
|
|
"grad_norm": 5.332011699676514,
|
|
"learning_rate": 5.5484375e-06,
|
|
"loss": 0.0458,
|
|
"step": 7250
|
|
},
|
|
{
|
|
"epoch": 3.396358543417367,
|
|
"grad_norm": 9.390791893005371,
|
|
"learning_rate": 5.540625e-06,
|
|
"loss": 0.0418,
|
|
"step": 7275
|
|
},
|
|
{
|
|
"epoch": 3.408029878618114,
|
|
"grad_norm": 5.002245903015137,
|
|
"learning_rate": 5.5328125e-06,
|
|
"loss": 0.0431,
|
|
"step": 7300
|
|
},
|
|
{
|
|
"epoch": 3.4197012138188607,
|
|
"grad_norm": 9.859498023986816,
|
|
"learning_rate": 5.5250000000000005e-06,
|
|
"loss": 0.0511,
|
|
"step": 7325
|
|
},
|
|
{
|
|
"epoch": 3.431372549019608,
|
|
"grad_norm": 12.415291786193848,
|
|
"learning_rate": 5.5171875e-06,
|
|
"loss": 0.0441,
|
|
"step": 7350
|
|
},
|
|
{
|
|
"epoch": 3.443043884220355,
|
|
"grad_norm": 4.821508884429932,
|
|
"learning_rate": 5.509375e-06,
|
|
"loss": 0.0466,
|
|
"step": 7375
|
|
},
|
|
{
|
|
"epoch": 3.454715219421102,
|
|
"grad_norm": 3.222395420074463,
|
|
"learning_rate": 5.5015625e-06,
|
|
"loss": 0.043,
|
|
"step": 7400
|
|
},
|
|
{
|
|
"epoch": 3.466386554621849,
|
|
"grad_norm": 11.607864379882812,
|
|
"learning_rate": 5.4937500000000006e-06,
|
|
"loss": 0.044,
|
|
"step": 7425
|
|
},
|
|
{
|
|
"epoch": 3.478057889822596,
|
|
"grad_norm": 2.7468137741088867,
|
|
"learning_rate": 5.4859375e-06,
|
|
"loss": 0.0494,
|
|
"step": 7450
|
|
},
|
|
{
|
|
"epoch": 3.4897292250233427,
|
|
"grad_norm": 5.353877067565918,
|
|
"learning_rate": 5.478125e-06,
|
|
"loss": 0.0439,
|
|
"step": 7475
|
|
},
|
|
{
|
|
"epoch": 3.5014005602240896,
|
|
"grad_norm": 5.521659851074219,
|
|
"learning_rate": 5.4703125e-06,
|
|
"loss": 0.043,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 3.5130718954248366,
|
|
"grad_norm": 11.562368392944336,
|
|
"learning_rate": 5.462500000000001e-06,
|
|
"loss": 0.0611,
|
|
"step": 7525
|
|
},
|
|
{
|
|
"epoch": 3.5247432306255835,
|
|
"grad_norm": 2.4676475524902344,
|
|
"learning_rate": 5.4546875000000004e-06,
|
|
"loss": 0.0372,
|
|
"step": 7550
|
|
},
|
|
{
|
|
"epoch": 3.5364145658263304,
|
|
"grad_norm": 7.735954761505127,
|
|
"learning_rate": 5.446875e-06,
|
|
"loss": 0.0434,
|
|
"step": 7575
|
|
},
|
|
{
|
|
"epoch": 3.5480859010270773,
|
|
"grad_norm": 3.367266893386841,
|
|
"learning_rate": 5.4390625e-06,
|
|
"loss": 0.0488,
|
|
"step": 7600
|
|
},
|
|
{
|
|
"epoch": 3.5597572362278243,
|
|
"grad_norm": 6.3219990730285645,
|
|
"learning_rate": 5.43125e-06,
|
|
"loss": 0.0488,
|
|
"step": 7625
|
|
},
|
|
{
|
|
"epoch": 3.571428571428571,
|
|
"grad_norm": 5.415238857269287,
|
|
"learning_rate": 5.4234375000000005e-06,
|
|
"loss": 0.0481,
|
|
"step": 7650
|
|
},
|
|
{
|
|
"epoch": 3.5830999066293185,
|
|
"grad_norm": 7.6133832931518555,
|
|
"learning_rate": 5.415625e-06,
|
|
"loss": 0.0379,
|
|
"step": 7675
|
|
},
|
|
{
|
|
"epoch": 3.5947712418300655,
|
|
"grad_norm": 3.2077534198760986,
|
|
"learning_rate": 5.4078125e-06,
|
|
"loss": 0.0469,
|
|
"step": 7700
|
|
},
|
|
{
|
|
"epoch": 3.6064425770308124,
|
|
"grad_norm": 9.553058624267578,
|
|
"learning_rate": 5.4e-06,
|
|
"loss": 0.044,
|
|
"step": 7725
|
|
},
|
|
{
|
|
"epoch": 3.6181139122315593,
|
|
"grad_norm": 6.475897312164307,
|
|
"learning_rate": 5.392187500000001e-06,
|
|
"loss": 0.0388,
|
|
"step": 7750
|
|
},
|
|
{
|
|
"epoch": 3.6297852474323062,
|
|
"grad_norm": 7.829625129699707,
|
|
"learning_rate": 5.3843750000000004e-06,
|
|
"loss": 0.0543,
|
|
"step": 7775
|
|
},
|
|
{
|
|
"epoch": 3.641456582633053,
|
|
"grad_norm": 2.857725143432617,
|
|
"learning_rate": 5.3765625e-06,
|
|
"loss": 0.0456,
|
|
"step": 7800
|
|
},
|
|
{
|
|
"epoch": 3.6531279178338,
|
|
"grad_norm": 9.913888931274414,
|
|
"learning_rate": 5.36875e-06,
|
|
"loss": 0.0588,
|
|
"step": 7825
|
|
},
|
|
{
|
|
"epoch": 3.664799253034547,
|
|
"grad_norm": 6.124692916870117,
|
|
"learning_rate": 5.360937500000001e-06,
|
|
"loss": 0.0487,
|
|
"step": 7850
|
|
},
|
|
{
|
|
"epoch": 3.6764705882352944,
|
|
"grad_norm": 5.036586284637451,
|
|
"learning_rate": 5.3531250000000005e-06,
|
|
"loss": 0.0381,
|
|
"step": 7875
|
|
},
|
|
{
|
|
"epoch": 3.6881419234360413,
|
|
"grad_norm": 2.280762195587158,
|
|
"learning_rate": 5.3453125e-06,
|
|
"loss": 0.041,
|
|
"step": 7900
|
|
},
|
|
{
|
|
"epoch": 3.6998132586367882,
|
|
"grad_norm": 5.175332546234131,
|
|
"learning_rate": 5.3375e-06,
|
|
"loss": 0.0439,
|
|
"step": 7925
|
|
},
|
|
{
|
|
"epoch": 3.711484593837535,
|
|
"grad_norm": 6.605205535888672,
|
|
"learning_rate": 5.3296875e-06,
|
|
"loss": 0.052,
|
|
"step": 7950
|
|
},
|
|
{
|
|
"epoch": 3.723155929038282,
|
|
"grad_norm": 8.107190132141113,
|
|
"learning_rate": 5.321875000000001e-06,
|
|
"loss": 0.0513,
|
|
"step": 7975
|
|
},
|
|
{
|
|
"epoch": 3.734827264239029,
|
|
"grad_norm": 4.808969497680664,
|
|
"learning_rate": 5.3140624999999996e-06,
|
|
"loss": 0.0474,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 3.746498599439776,
|
|
"grad_norm": 13.523513793945312,
|
|
"learning_rate": 5.30625e-06,
|
|
"loss": 0.0472,
|
|
"step": 8025
|
|
},
|
|
{
|
|
"epoch": 3.758169934640523,
|
|
"grad_norm": 4.2439751625061035,
|
|
"learning_rate": 5.2984375e-06,
|
|
"loss": 0.0493,
|
|
"step": 8050
|
|
},
|
|
{
|
|
"epoch": 3.7698412698412698,
|
|
"grad_norm": 19.3562068939209,
|
|
"learning_rate": 5.290625000000001e-06,
|
|
"loss": 0.0559,
|
|
"step": 8075
|
|
},
|
|
{
|
|
"epoch": 3.7815126050420167,
|
|
"grad_norm": 5.023294448852539,
|
|
"learning_rate": 5.2828125e-06,
|
|
"loss": 0.0444,
|
|
"step": 8100
|
|
},
|
|
{
|
|
"epoch": 3.7931839402427636,
|
|
"grad_norm": 10.296977996826172,
|
|
"learning_rate": 5.275e-06,
|
|
"loss": 0.0485,
|
|
"step": 8125
|
|
},
|
|
{
|
|
"epoch": 3.8048552754435105,
|
|
"grad_norm": 1.339447259902954,
|
|
"learning_rate": 5.2671875e-06,
|
|
"loss": 0.0408,
|
|
"step": 8150
|
|
},
|
|
{
|
|
"epoch": 3.8165266106442575,
|
|
"grad_norm": 8.966866493225098,
|
|
"learning_rate": 5.259375000000001e-06,
|
|
"loss": 0.0481,
|
|
"step": 8175
|
|
},
|
|
{
|
|
"epoch": 3.828197945845005,
|
|
"grad_norm": 4.252060413360596,
|
|
"learning_rate": 5.251562500000001e-06,
|
|
"loss": 0.0452,
|
|
"step": 8200
|
|
},
|
|
{
|
|
"epoch": 3.8398692810457518,
|
|
"grad_norm": 10.229138374328613,
|
|
"learning_rate": 5.24375e-06,
|
|
"loss": 0.0592,
|
|
"step": 8225
|
|
},
|
|
{
|
|
"epoch": 3.8515406162464987,
|
|
"grad_norm": 1.4391601085662842,
|
|
"learning_rate": 5.2359375e-06,
|
|
"loss": 0.0453,
|
|
"step": 8250
|
|
},
|
|
{
|
|
"epoch": 3.8632119514472456,
|
|
"grad_norm": 6.556412696838379,
|
|
"learning_rate": 5.228125e-06,
|
|
"loss": 0.0436,
|
|
"step": 8275
|
|
},
|
|
{
|
|
"epoch": 3.8748832866479925,
|
|
"grad_norm": 4.541426658630371,
|
|
"learning_rate": 5.220312500000001e-06,
|
|
"loss": 0.0399,
|
|
"step": 8300
|
|
},
|
|
{
|
|
"epoch": 3.8865546218487395,
|
|
"grad_norm": 5.626660346984863,
|
|
"learning_rate": 5.2125e-06,
|
|
"loss": 0.0526,
|
|
"step": 8325
|
|
},
|
|
{
|
|
"epoch": 3.8982259570494864,
|
|
"grad_norm": 3.5642924308776855,
|
|
"learning_rate": 5.2046875e-06,
|
|
"loss": 0.041,
|
|
"step": 8350
|
|
},
|
|
{
|
|
"epoch": 3.9098972922502333,
|
|
"grad_norm": 5.301916599273682,
|
|
"learning_rate": 5.196875e-06,
|
|
"loss": 0.0505,
|
|
"step": 8375
|
|
},
|
|
{
|
|
"epoch": 3.9215686274509802,
|
|
"grad_norm": 4.125392913818359,
|
|
"learning_rate": 5.189062500000001e-06,
|
|
"loss": 0.0413,
|
|
"step": 8400
|
|
},
|
|
{
|
|
"epoch": 3.9332399626517276,
|
|
"grad_norm": 10.192436218261719,
|
|
"learning_rate": 5.18125e-06,
|
|
"loss": 0.0531,
|
|
"step": 8425
|
|
},
|
|
{
|
|
"epoch": 3.9449112978524745,
|
|
"grad_norm": 3.3052845001220703,
|
|
"learning_rate": 5.1734375e-06,
|
|
"loss": 0.0398,
|
|
"step": 8450
|
|
},
|
|
{
|
|
"epoch": 3.9565826330532214,
|
|
"grad_norm": 6.241024494171143,
|
|
"learning_rate": 5.165625e-06,
|
|
"loss": 0.0408,
|
|
"step": 8475
|
|
},
|
|
{
|
|
"epoch": 3.9682539682539684,
|
|
"grad_norm": 6.708200454711914,
|
|
"learning_rate": 5.157812500000001e-06,
|
|
"loss": 0.0516,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 3.9799253034547153,
|
|
"grad_norm": 11.930779457092285,
|
|
"learning_rate": 5.15e-06,
|
|
"loss": 0.0452,
|
|
"step": 8525
|
|
},
|
|
{
|
|
"epoch": 3.991596638655462,
|
|
"grad_norm": 7.160813808441162,
|
|
"learning_rate": 5.1421875000000005e-06,
|
|
"loss": 0.0455,
|
|
"step": 8550
|
|
},
|
|
{
|
|
"epoch": 4.003267973856209,
|
|
"grad_norm": 1.401731014251709,
|
|
"learning_rate": 5.134375e-06,
|
|
"loss": 0.0453,
|
|
"step": 8575
|
|
},
|
|
{
|
|
"epoch": 4.014939309056956,
|
|
"grad_norm": 1.890440821647644,
|
|
"learning_rate": 5.1265625e-06,
|
|
"loss": 0.0177,
|
|
"step": 8600
|
|
},
|
|
{
|
|
"epoch": 4.026610644257703,
|
|
"grad_norm": 1.3520216941833496,
|
|
"learning_rate": 5.11875e-06,
|
|
"loss": 0.0305,
|
|
"step": 8625
|
|
},
|
|
{
|
|
"epoch": 4.03828197945845,
|
|
"grad_norm": 4.4095025062561035,
|
|
"learning_rate": 5.1109375e-06,
|
|
"loss": 0.0227,
|
|
"step": 8650
|
|
},
|
|
{
|
|
"epoch": 4.049953314659197,
|
|
"grad_norm": 2.4577364921569824,
|
|
"learning_rate": 5.103125e-06,
|
|
"loss": 0.036,
|
|
"step": 8675
|
|
},
|
|
{
|
|
"epoch": 4.061624649859944,
|
|
"grad_norm": 3.378568410873413,
|
|
"learning_rate": 5.0953125e-06,
|
|
"loss": 0.0235,
|
|
"step": 8700
|
|
},
|
|
{
|
|
"epoch": 4.073295985060691,
|
|
"grad_norm": 1.7685190439224243,
|
|
"learning_rate": 5.0875e-06,
|
|
"loss": 0.031,
|
|
"step": 8725
|
|
},
|
|
{
|
|
"epoch": 4.084967320261438,
|
|
"grad_norm": 1.426932454109192,
|
|
"learning_rate": 5.0796875e-06,
|
|
"loss": 0.0184,
|
|
"step": 8750
|
|
},
|
|
{
|
|
"epoch": 4.0966386554621845,
|
|
"grad_norm": 2.890690803527832,
|
|
"learning_rate": 5.0718750000000005e-06,
|
|
"loss": 0.0319,
|
|
"step": 8775
|
|
},
|
|
{
|
|
"epoch": 4.1083099906629315,
|
|
"grad_norm": 2.438765287399292,
|
|
"learning_rate": 5.0640625e-06,
|
|
"loss": 0.0191,
|
|
"step": 8800
|
|
},
|
|
{
|
|
"epoch": 4.119981325863678,
|
|
"grad_norm": 1.9891207218170166,
|
|
"learning_rate": 5.056250000000001e-06,
|
|
"loss": 0.031,
|
|
"step": 8825
|
|
},
|
|
{
|
|
"epoch": 4.131652661064426,
|
|
"grad_norm": 2.288236141204834,
|
|
"learning_rate": 5.0484375e-06,
|
|
"loss": 0.0174,
|
|
"step": 8850
|
|
},
|
|
{
|
|
"epoch": 4.143323996265173,
|
|
"grad_norm": 2.097827434539795,
|
|
"learning_rate": 5.0406250000000005e-06,
|
|
"loss": 0.0244,
|
|
"step": 8875
|
|
},
|
|
{
|
|
"epoch": 4.15499533146592,
|
|
"grad_norm": 0.7743799090385437,
|
|
"learning_rate": 5.0328125e-06,
|
|
"loss": 0.0209,
|
|
"step": 8900
|
|
},
|
|
{
|
|
"epoch": 4.166666666666667,
|
|
"grad_norm": 2.9196207523345947,
|
|
"learning_rate": 5.025e-06,
|
|
"loss": 0.0275,
|
|
"step": 8925
|
|
},
|
|
{
|
|
"epoch": 4.178338001867414,
|
|
"grad_norm": 1.094561219215393,
|
|
"learning_rate": 5.0171875e-06,
|
|
"loss": 0.026,
|
|
"step": 8950
|
|
},
|
|
{
|
|
"epoch": 4.190009337068161,
|
|
"grad_norm": 2.849806070327759,
|
|
"learning_rate": 5.0096875000000005e-06,
|
|
"loss": 0.0374,
|
|
"step": 8975
|
|
},
|
|
{
|
|
"epoch": 4.201680672268908,
|
|
"grad_norm": 9.60171890258789,
|
|
"learning_rate": 5.001875e-06,
|
|
"loss": 0.0264,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 4.213352007469655,
|
|
"grad_norm": 3.694355010986328,
|
|
"learning_rate": 4.9940625e-06,
|
|
"loss": 0.0355,
|
|
"step": 9025
|
|
},
|
|
{
|
|
"epoch": 4.225023342670402,
|
|
"grad_norm": 5.392662525177002,
|
|
"learning_rate": 4.98625e-06,
|
|
"loss": 0.0242,
|
|
"step": 9050
|
|
},
|
|
{
|
|
"epoch": 4.2366946778711485,
|
|
"grad_norm": 2.1022627353668213,
|
|
"learning_rate": 4.9784375e-06,
|
|
"loss": 0.0327,
|
|
"step": 9075
|
|
},
|
|
{
|
|
"epoch": 4.248366013071895,
|
|
"grad_norm": 3.802021026611328,
|
|
"learning_rate": 4.970625e-06,
|
|
"loss": 0.0211,
|
|
"step": 9100
|
|
},
|
|
{
|
|
"epoch": 4.260037348272642,
|
|
"grad_norm": 0.9553838968276978,
|
|
"learning_rate": 4.9628125e-06,
|
|
"loss": 0.0291,
|
|
"step": 9125
|
|
},
|
|
{
|
|
"epoch": 4.271708683473389,
|
|
"grad_norm": 5.334795951843262,
|
|
"learning_rate": 4.955e-06,
|
|
"loss": 0.0209,
|
|
"step": 9150
|
|
},
|
|
{
|
|
"epoch": 4.283380018674136,
|
|
"grad_norm": 1.5590300559997559,
|
|
"learning_rate": 4.9471875e-06,
|
|
"loss": 0.032,
|
|
"step": 9175
|
|
},
|
|
{
|
|
"epoch": 4.295051353874883,
|
|
"grad_norm": 1.9582746028900146,
|
|
"learning_rate": 4.9393750000000005e-06,
|
|
"loss": 0.0285,
|
|
"step": 9200
|
|
},
|
|
{
|
|
"epoch": 4.30672268907563,
|
|
"grad_norm": 5.399374485015869,
|
|
"learning_rate": 4.9315625e-06,
|
|
"loss": 0.0385,
|
|
"step": 9225
|
|
},
|
|
{
|
|
"epoch": 4.318394024276377,
|
|
"grad_norm": 3.4703786373138428,
|
|
"learning_rate": 4.92375e-06,
|
|
"loss": 0.0185,
|
|
"step": 9250
|
|
},
|
|
{
|
|
"epoch": 4.330065359477124,
|
|
"grad_norm": 3.748854637145996,
|
|
"learning_rate": 4.9159375e-06,
|
|
"loss": 0.0361,
|
|
"step": 9275
|
|
},
|
|
{
|
|
"epoch": 4.341736694677871,
|
|
"grad_norm": 2.3994362354278564,
|
|
"learning_rate": 4.9081250000000005e-06,
|
|
"loss": 0.0257,
|
|
"step": 9300
|
|
},
|
|
{
|
|
"epoch": 4.353408029878618,
|
|
"grad_norm": 1.0170806646347046,
|
|
"learning_rate": 4.9003125e-06,
|
|
"loss": 0.0344,
|
|
"step": 9325
|
|
},
|
|
{
|
|
"epoch": 4.365079365079365,
|
|
"grad_norm": 1.4588912725448608,
|
|
"learning_rate": 4.8925e-06,
|
|
"loss": 0.0225,
|
|
"step": 9350
|
|
},
|
|
{
|
|
"epoch": 4.3767507002801125,
|
|
"grad_norm": 2.9339776039123535,
|
|
"learning_rate": 4.8846875e-06,
|
|
"loss": 0.0317,
|
|
"step": 9375
|
|
},
|
|
{
|
|
"epoch": 4.388422035480859,
|
|
"grad_norm": 3.1433396339416504,
|
|
"learning_rate": 4.876875e-06,
|
|
"loss": 0.0231,
|
|
"step": 9400
|
|
},
|
|
{
|
|
"epoch": 4.400093370681606,
|
|
"grad_norm": 3.174156427383423,
|
|
"learning_rate": 4.8690625000000004e-06,
|
|
"loss": 0.0269,
|
|
"step": 9425
|
|
},
|
|
{
|
|
"epoch": 4.411764705882353,
|
|
"grad_norm": 1.8732781410217285,
|
|
"learning_rate": 4.86125e-06,
|
|
"loss": 0.0305,
|
|
"step": 9450
|
|
},
|
|
{
|
|
"epoch": 4.4234360410831,
|
|
"grad_norm": 1.5500296354293823,
|
|
"learning_rate": 4.8534375e-06,
|
|
"loss": 0.0351,
|
|
"step": 9475
|
|
},
|
|
{
|
|
"epoch": 4.435107376283847,
|
|
"grad_norm": 3.1208136081695557,
|
|
"learning_rate": 4.845625e-06,
|
|
"loss": 0.022,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 4.446778711484594,
|
|
"grad_norm": 2.964061975479126,
|
|
"learning_rate": 4.8378125000000005e-06,
|
|
"loss": 0.0383,
|
|
"step": 9525
|
|
},
|
|
{
|
|
"epoch": 4.458450046685341,
|
|
"grad_norm": 4.548437118530273,
|
|
"learning_rate": 4.83e-06,
|
|
"loss": 0.0263,
|
|
"step": 9550
|
|
},
|
|
{
|
|
"epoch": 4.470121381886088,
|
|
"grad_norm": 2.4414591789245605,
|
|
"learning_rate": 4.8221875e-06,
|
|
"loss": 0.0314,
|
|
"step": 9575
|
|
},
|
|
{
|
|
"epoch": 4.481792717086835,
|
|
"grad_norm": 2.8750803470611572,
|
|
"learning_rate": 4.814375e-06,
|
|
"loss": 0.0254,
|
|
"step": 9600
|
|
},
|
|
{
|
|
"epoch": 4.493464052287582,
|
|
"grad_norm": 1.9113430976867676,
|
|
"learning_rate": 4.806562500000001e-06,
|
|
"loss": 0.0365,
|
|
"step": 9625
|
|
},
|
|
{
|
|
"epoch": 4.505135387488329,
|
|
"grad_norm": 1.8737727403640747,
|
|
"learning_rate": 4.7987500000000004e-06,
|
|
"loss": 0.0251,
|
|
"step": 9650
|
|
},
|
|
{
|
|
"epoch": 4.516806722689076,
|
|
"grad_norm": 3.6277358531951904,
|
|
"learning_rate": 4.7909375e-06,
|
|
"loss": 0.0357,
|
|
"step": 9675
|
|
},
|
|
{
|
|
"epoch": 4.5284780578898225,
|
|
"grad_norm": 0.974403440952301,
|
|
"learning_rate": 4.783125e-06,
|
|
"loss": 0.0172,
|
|
"step": 9700
|
|
},
|
|
{
|
|
"epoch": 4.540149393090569,
|
|
"grad_norm": 5.103818893432617,
|
|
"learning_rate": 4.7753125e-06,
|
|
"loss": 0.0262,
|
|
"step": 9725
|
|
},
|
|
{
|
|
"epoch": 4.551820728291316,
|
|
"grad_norm": 4.358363151550293,
|
|
"learning_rate": 4.7675000000000005e-06,
|
|
"loss": 0.0268,
|
|
"step": 9750
|
|
},
|
|
{
|
|
"epoch": 4.563492063492063,
|
|
"grad_norm": 1.1133219003677368,
|
|
"learning_rate": 4.7596875e-06,
|
|
"loss": 0.0371,
|
|
"step": 9775
|
|
},
|
|
{
|
|
"epoch": 4.57516339869281,
|
|
"grad_norm": 2.639396905899048,
|
|
"learning_rate": 4.751875e-06,
|
|
"loss": 0.0277,
|
|
"step": 9800
|
|
},
|
|
{
|
|
"epoch": 4.586834733893557,
|
|
"grad_norm": 0.8341067433357239,
|
|
"learning_rate": 4.7440625e-06,
|
|
"loss": 0.0267,
|
|
"step": 9825
|
|
},
|
|
{
|
|
"epoch": 4.598506069094304,
|
|
"grad_norm": 2.7689151763916016,
|
|
"learning_rate": 4.736250000000001e-06,
|
|
"loss": 0.0211,
|
|
"step": 9850
|
|
},
|
|
{
|
|
"epoch": 4.610177404295051,
|
|
"grad_norm": 3.2999351024627686,
|
|
"learning_rate": 4.7284374999999996e-06,
|
|
"loss": 0.0374,
|
|
"step": 9875
|
|
},
|
|
{
|
|
"epoch": 4.621848739495798,
|
|
"grad_norm": 1.2019790410995483,
|
|
"learning_rate": 4.720625e-06,
|
|
"loss": 0.0181,
|
|
"step": 9900
|
|
},
|
|
{
|
|
"epoch": 4.633520074696545,
|
|
"grad_norm": 2.8706002235412598,
|
|
"learning_rate": 4.7128125e-06,
|
|
"loss": 0.0304,
|
|
"step": 9925
|
|
},
|
|
{
|
|
"epoch": 4.645191409897293,
|
|
"grad_norm": 5.747146129608154,
|
|
"learning_rate": 4.705000000000001e-06,
|
|
"loss": 0.0229,
|
|
"step": 9950
|
|
},
|
|
{
|
|
"epoch": 4.6568627450980395,
|
|
"grad_norm": 1.8742387294769287,
|
|
"learning_rate": 4.6971875000000005e-06,
|
|
"loss": 0.0418,
|
|
"step": 9975
|
|
},
|
|
{
|
|
"epoch": 4.6685340802987865,
|
|
"grad_norm": 1.8577946424484253,
|
|
"learning_rate": 4.689375e-06,
|
|
"loss": 0.0156,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 4.6685340802987865,
|
|
"eval_loss": 0.18312382698059082,
|
|
"eval_runtime": 5443.8581,
|
|
"eval_samples_per_second": 1.729,
|
|
"eval_steps_per_second": 0.216,
|
|
"eval_wer": 0.10030679799773938,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 4.680205415499533,
|
|
"grad_norm": 1.9401777982711792,
|
|
"learning_rate": 4.6815625e-06,
|
|
"loss": 0.0271,
|
|
"step": 10025
|
|
},
|
|
{
|
|
"epoch": 4.69187675070028,
|
|
"grad_norm": 5.094863414764404,
|
|
"learning_rate": 4.67375e-06,
|
|
"loss": 0.0245,
|
|
"step": 10050
|
|
},
|
|
{
|
|
"epoch": 4.703548085901027,
|
|
"grad_norm": 3.0759990215301514,
|
|
"learning_rate": 4.665937500000001e-06,
|
|
"loss": 0.0408,
|
|
"step": 10075
|
|
},
|
|
{
|
|
"epoch": 4.715219421101774,
|
|
"grad_norm": 1.7008417844772339,
|
|
"learning_rate": 4.658125e-06,
|
|
"loss": 0.0259,
|
|
"step": 10100
|
|
},
|
|
{
|
|
"epoch": 4.726890756302521,
|
|
"grad_norm": 2.5551605224609375,
|
|
"learning_rate": 4.6503125e-06,
|
|
"loss": 0.0356,
|
|
"step": 10125
|
|
},
|
|
{
|
|
"epoch": 4.738562091503268,
|
|
"grad_norm": 1.278176188468933,
|
|
"learning_rate": 4.6425e-06,
|
|
"loss": 0.024,
|
|
"step": 10150
|
|
},
|
|
{
|
|
"epoch": 4.750233426704015,
|
|
"grad_norm": 3.679241418838501,
|
|
"learning_rate": 4.634687500000001e-06,
|
|
"loss": 0.038,
|
|
"step": 10175
|
|
},
|
|
{
|
|
"epoch": 4.761904761904762,
|
|
"grad_norm": 3.3556320667266846,
|
|
"learning_rate": 4.626875e-06,
|
|
"loss": 0.0295,
|
|
"step": 10200
|
|
},
|
|
{
|
|
"epoch": 4.773576097105509,
|
|
"grad_norm": 2.3901469707489014,
|
|
"learning_rate": 4.6190625e-06,
|
|
"loss": 0.0326,
|
|
"step": 10225
|
|
},
|
|
{
|
|
"epoch": 4.785247432306256,
|
|
"grad_norm": 2.4672956466674805,
|
|
"learning_rate": 4.61125e-06,
|
|
"loss": 0.0255,
|
|
"step": 10250
|
|
},
|
|
{
|
|
"epoch": 4.796918767507003,
|
|
"grad_norm": 1.856067419052124,
|
|
"learning_rate": 4.603437500000001e-06,
|
|
"loss": 0.0472,
|
|
"step": 10275
|
|
},
|
|
{
|
|
"epoch": 4.80859010270775,
|
|
"grad_norm": 3.6307425498962402,
|
|
"learning_rate": 4.595625e-06,
|
|
"loss": 0.0204,
|
|
"step": 10300
|
|
},
|
|
{
|
|
"epoch": 4.8202614379084965,
|
|
"grad_norm": 3.4470720291137695,
|
|
"learning_rate": 4.5878125e-06,
|
|
"loss": 0.0266,
|
|
"step": 10325
|
|
},
|
|
{
|
|
"epoch": 4.831932773109243,
|
|
"grad_norm": 3.720325231552124,
|
|
"learning_rate": 4.58e-06,
|
|
"loss": 0.0174,
|
|
"step": 10350
|
|
},
|
|
{
|
|
"epoch": 4.84360410830999,
|
|
"grad_norm": 3.147507429122925,
|
|
"learning_rate": 4.572187500000001e-06,
|
|
"loss": 0.0353,
|
|
"step": 10375
|
|
},
|
|
{
|
|
"epoch": 4.855275443510737,
|
|
"grad_norm": 0.47896313667297363,
|
|
"learning_rate": 4.564375e-06,
|
|
"loss": 0.0198,
|
|
"step": 10400
|
|
},
|
|
{
|
|
"epoch": 4.866946778711485,
|
|
"grad_norm": 1.2566039562225342,
|
|
"learning_rate": 4.5565625000000005e-06,
|
|
"loss": 0.0326,
|
|
"step": 10425
|
|
},
|
|
{
|
|
"epoch": 4.878618113912232,
|
|
"grad_norm": 6.644142150878906,
|
|
"learning_rate": 4.54875e-06,
|
|
"loss": 0.0292,
|
|
"step": 10450
|
|
},
|
|
{
|
|
"epoch": 4.890289449112979,
|
|
"grad_norm": 4.639550685882568,
|
|
"learning_rate": 4.5409375e-06,
|
|
"loss": 0.0378,
|
|
"step": 10475
|
|
},
|
|
{
|
|
"epoch": 4.901960784313726,
|
|
"grad_norm": 2.032776117324829,
|
|
"learning_rate": 4.533125e-06,
|
|
"loss": 0.0226,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 4.913632119514473,
|
|
"grad_norm": 1.344425916671753,
|
|
"learning_rate": 4.5253125e-06,
|
|
"loss": 0.0368,
|
|
"step": 10525
|
|
},
|
|
{
|
|
"epoch": 4.92530345471522,
|
|
"grad_norm": 0.8881208300590515,
|
|
"learning_rate": 4.5175e-06,
|
|
"loss": 0.0224,
|
|
"step": 10550
|
|
},
|
|
{
|
|
"epoch": 4.936974789915967,
|
|
"grad_norm": 2.743755340576172,
|
|
"learning_rate": 4.5096875e-06,
|
|
"loss": 0.0417,
|
|
"step": 10575
|
|
},
|
|
{
|
|
"epoch": 4.9486461251167135,
|
|
"grad_norm": 1.9883514642715454,
|
|
"learning_rate": 4.501875000000001e-06,
|
|
"loss": 0.0207,
|
|
"step": 10600
|
|
},
|
|
{
|
|
"epoch": 4.9603174603174605,
|
|
"grad_norm": 4.254443168640137,
|
|
"learning_rate": 4.4940625e-06,
|
|
"loss": 0.0344,
|
|
"step": 10625
|
|
},
|
|
{
|
|
"epoch": 4.971988795518207,
|
|
"grad_norm": 2.9644615650177,
|
|
"learning_rate": 4.4862500000000005e-06,
|
|
"loss": 0.0242,
|
|
"step": 10650
|
|
},
|
|
{
|
|
"epoch": 4.983660130718954,
|
|
"grad_norm": 4.65371036529541,
|
|
"learning_rate": 4.4784375e-06,
|
|
"loss": 0.0402,
|
|
"step": 10675
|
|
},
|
|
{
|
|
"epoch": 4.995331465919701,
|
|
"grad_norm": 1.430145025253296,
|
|
"learning_rate": 4.470625000000001e-06,
|
|
"loss": 0.0193,
|
|
"step": 10700
|
|
},
|
|
{
|
|
"epoch": 5.007002801120448,
|
|
"grad_norm": 2.433776378631592,
|
|
"learning_rate": 4.4628125e-06,
|
|
"loss": 0.0217,
|
|
"step": 10725
|
|
},
|
|
{
|
|
"epoch": 5.018674136321195,
|
|
"grad_norm": 0.8967903256416321,
|
|
"learning_rate": 4.4550000000000005e-06,
|
|
"loss": 0.0167,
|
|
"step": 10750
|
|
},
|
|
{
|
|
"epoch": 5.030345471521942,
|
|
"grad_norm": 3.4592394828796387,
|
|
"learning_rate": 4.4471875e-06,
|
|
"loss": 0.0275,
|
|
"step": 10775
|
|
},
|
|
{
|
|
"epoch": 5.042016806722689,
|
|
"grad_norm": 0.6761863827705383,
|
|
"learning_rate": 4.439375e-06,
|
|
"loss": 0.0153,
|
|
"step": 10800
|
|
},
|
|
{
|
|
"epoch": 5.053688141923436,
|
|
"grad_norm": 0.43812835216522217,
|
|
"learning_rate": 4.4315625e-06,
|
|
"loss": 0.0195,
|
|
"step": 10825
|
|
},
|
|
{
|
|
"epoch": 5.065359477124183,
|
|
"grad_norm": 1.3948005437850952,
|
|
"learning_rate": 4.42375e-06,
|
|
"loss": 0.017,
|
|
"step": 10850
|
|
},
|
|
{
|
|
"epoch": 5.07703081232493,
|
|
"grad_norm": 2.06145977973938,
|
|
"learning_rate": 4.4159375000000004e-06,
|
|
"loss": 0.026,
|
|
"step": 10875
|
|
},
|
|
{
|
|
"epoch": 5.088702147525677,
|
|
"grad_norm": 3.0333502292633057,
|
|
"learning_rate": 4.408125e-06,
|
|
"loss": 0.0146,
|
|
"step": 10900
|
|
},
|
|
{
|
|
"epoch": 5.1003734827264235,
|
|
"grad_norm": 2.764770746231079,
|
|
"learning_rate": 4.4003125e-06,
|
|
"loss": 0.0186,
|
|
"step": 10925
|
|
},
|
|
{
|
|
"epoch": 5.1120448179271705,
|
|
"grad_norm": 2.536029577255249,
|
|
"learning_rate": 4.3925e-06,
|
|
"loss": 0.0161,
|
|
"step": 10950
|
|
},
|
|
{
|
|
"epoch": 5.123716153127917,
|
|
"grad_norm": 2.648541212081909,
|
|
"learning_rate": 4.3846875000000005e-06,
|
|
"loss": 0.0217,
|
|
"step": 10975
|
|
},
|
|
{
|
|
"epoch": 5.135387488328665,
|
|
"grad_norm": 4.795249938964844,
|
|
"learning_rate": 4.376875e-06,
|
|
"loss": 0.0116,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 5.147058823529412,
|
|
"grad_norm": 0.7834287285804749,
|
|
"learning_rate": 4.3690625e-06,
|
|
"loss": 0.023,
|
|
"step": 11025
|
|
},
|
|
{
|
|
"epoch": 5.158730158730159,
|
|
"grad_norm": 6.7206010818481445,
|
|
"learning_rate": 4.36125e-06,
|
|
"loss": 0.0188,
|
|
"step": 11050
|
|
},
|
|
{
|
|
"epoch": 5.170401493930906,
|
|
"grad_norm": 0.3784288763999939,
|
|
"learning_rate": 4.353437500000001e-06,
|
|
"loss": 0.0187,
|
|
"step": 11075
|
|
},
|
|
{
|
|
"epoch": 5.182072829131653,
|
|
"grad_norm": 5.990387439727783,
|
|
"learning_rate": 4.3456250000000004e-06,
|
|
"loss": 0.0131,
|
|
"step": 11100
|
|
},
|
|
{
|
|
"epoch": 5.1937441643324,
|
|
"grad_norm": 2.445591449737549,
|
|
"learning_rate": 4.3378125e-06,
|
|
"loss": 0.0293,
|
|
"step": 11125
|
|
},
|
|
{
|
|
"epoch": 5.205415499533147,
|
|
"grad_norm": 3.114480495452881,
|
|
"learning_rate": 4.33e-06,
|
|
"loss": 0.0132,
|
|
"step": 11150
|
|
},
|
|
{
|
|
"epoch": 5.217086834733894,
|
|
"grad_norm": 0.6976014971733093,
|
|
"learning_rate": 4.3221875e-06,
|
|
"loss": 0.0254,
|
|
"step": 11175
|
|
},
|
|
{
|
|
"epoch": 5.228758169934641,
|
|
"grad_norm": 6.35882568359375,
|
|
"learning_rate": 4.3143750000000005e-06,
|
|
"loss": 0.017,
|
|
"step": 11200
|
|
},
|
|
{
|
|
"epoch": 5.2404295051353875,
|
|
"grad_norm": 2.0138509273529053,
|
|
"learning_rate": 4.3065625e-06,
|
|
"loss": 0.0236,
|
|
"step": 11225
|
|
},
|
|
{
|
|
"epoch": 5.2521008403361344,
|
|
"grad_norm": 3.1038243770599365,
|
|
"learning_rate": 4.29875e-06,
|
|
"loss": 0.015,
|
|
"step": 11250
|
|
},
|
|
{
|
|
"epoch": 5.263772175536881,
|
|
"grad_norm": 2.8752825260162354,
|
|
"learning_rate": 4.2909375e-06,
|
|
"loss": 0.0193,
|
|
"step": 11275
|
|
},
|
|
{
|
|
"epoch": 5.275443510737628,
|
|
"grad_norm": 1.1929106712341309,
|
|
"learning_rate": 4.283125000000001e-06,
|
|
"loss": 0.0114,
|
|
"step": 11300
|
|
},
|
|
{
|
|
"epoch": 5.287114845938375,
|
|
"grad_norm": 2.3317930698394775,
|
|
"learning_rate": 4.2753125e-06,
|
|
"loss": 0.0236,
|
|
"step": 11325
|
|
},
|
|
{
|
|
"epoch": 5.298786181139122,
|
|
"grad_norm": 2.8288731575012207,
|
|
"learning_rate": 4.2675e-06,
|
|
"loss": 0.0182,
|
|
"step": 11350
|
|
},
|
|
{
|
|
"epoch": 5.310457516339869,
|
|
"grad_norm": 4.622555255889893,
|
|
"learning_rate": 4.2596875e-06,
|
|
"loss": 0.0317,
|
|
"step": 11375
|
|
},
|
|
{
|
|
"epoch": 5.322128851540616,
|
|
"grad_norm": 0.40077078342437744,
|
|
"learning_rate": 4.251875000000001e-06,
|
|
"loss": 0.0117,
|
|
"step": 11400
|
|
},
|
|
{
|
|
"epoch": 5.333800186741363,
|
|
"grad_norm": 0.6447650194168091,
|
|
"learning_rate": 4.2440625000000005e-06,
|
|
"loss": 0.0213,
|
|
"step": 11425
|
|
},
|
|
{
|
|
"epoch": 5.34547152194211,
|
|
"grad_norm": 4.721693992614746,
|
|
"learning_rate": 4.23625e-06,
|
|
"loss": 0.0145,
|
|
"step": 11450
|
|
},
|
|
{
|
|
"epoch": 5.357142857142857,
|
|
"grad_norm": 4.699221134185791,
|
|
"learning_rate": 4.2284375e-06,
|
|
"loss": 0.0213,
|
|
"step": 11475
|
|
},
|
|
{
|
|
"epoch": 5.368814192343605,
|
|
"grad_norm": 1.0902756452560425,
|
|
"learning_rate": 4.220625e-06,
|
|
"loss": 0.0135,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 5.3804855275443515,
|
|
"grad_norm": 3.20731258392334,
|
|
"learning_rate": 4.212812500000001e-06,
|
|
"loss": 0.0192,
|
|
"step": 11525
|
|
},
|
|
{
|
|
"epoch": 5.392156862745098,
|
|
"grad_norm": 3.498342752456665,
|
|
"learning_rate": 4.2049999999999996e-06,
|
|
"loss": 0.0131,
|
|
"step": 11550
|
|
},
|
|
{
|
|
"epoch": 5.403828197945845,
|
|
"grad_norm": 0.2736945152282715,
|
|
"learning_rate": 4.1971875e-06,
|
|
"loss": 0.0191,
|
|
"step": 11575
|
|
},
|
|
{
|
|
"epoch": 5.415499533146592,
|
|
"grad_norm": 0.33990904688835144,
|
|
"learning_rate": 4.189375e-06,
|
|
"loss": 0.0131,
|
|
"step": 11600
|
|
},
|
|
{
|
|
"epoch": 5.427170868347339,
|
|
"grad_norm": 4.783412456512451,
|
|
"learning_rate": 4.181562500000001e-06,
|
|
"loss": 0.0235,
|
|
"step": 11625
|
|
},
|
|
{
|
|
"epoch": 5.438842203548086,
|
|
"grad_norm": 4.162958145141602,
|
|
"learning_rate": 4.17375e-06,
|
|
"loss": 0.0168,
|
|
"step": 11650
|
|
},
|
|
{
|
|
"epoch": 5.450513538748833,
|
|
"grad_norm": 2.0236053466796875,
|
|
"learning_rate": 4.1659375e-06,
|
|
"loss": 0.0321,
|
|
"step": 11675
|
|
},
|
|
{
|
|
"epoch": 5.46218487394958,
|
|
"grad_norm": 5.59421443939209,
|
|
"learning_rate": 4.158125e-06,
|
|
"loss": 0.0116,
|
|
"step": 11700
|
|
},
|
|
{
|
|
"epoch": 5.473856209150327,
|
|
"grad_norm": 1.2957547903060913,
|
|
"learning_rate": 4.150312500000001e-06,
|
|
"loss": 0.0187,
|
|
"step": 11725
|
|
},
|
|
{
|
|
"epoch": 5.485527544351074,
|
|
"grad_norm": 2.75832462310791,
|
|
"learning_rate": 4.1425e-06,
|
|
"loss": 0.0189,
|
|
"step": 11750
|
|
},
|
|
{
|
|
"epoch": 5.497198879551821,
|
|
"grad_norm": 2.776012659072876,
|
|
"learning_rate": 4.1346875e-06,
|
|
"loss": 0.0235,
|
|
"step": 11775
|
|
},
|
|
{
|
|
"epoch": 5.508870214752568,
|
|
"grad_norm": 4.952755451202393,
|
|
"learning_rate": 4.126875e-06,
|
|
"loss": 0.024,
|
|
"step": 11800
|
|
},
|
|
{
|
|
"epoch": 5.520541549953315,
|
|
"grad_norm": 1.2413980960845947,
|
|
"learning_rate": 4.1190625e-06,
|
|
"loss": 0.0248,
|
|
"step": 11825
|
|
},
|
|
{
|
|
"epoch": 5.5322128851540615,
|
|
"grad_norm": 4.319220066070557,
|
|
"learning_rate": 4.111250000000001e-06,
|
|
"loss": 0.0176,
|
|
"step": 11850
|
|
},
|
|
{
|
|
"epoch": 5.543884220354808,
|
|
"grad_norm": 2.8155884742736816,
|
|
"learning_rate": 4.1034375e-06,
|
|
"loss": 0.0226,
|
|
"step": 11875
|
|
},
|
|
{
|
|
"epoch": 5.555555555555555,
|
|
"grad_norm": 3.499506950378418,
|
|
"learning_rate": 4.095625e-06,
|
|
"loss": 0.0118,
|
|
"step": 11900
|
|
},
|
|
{
|
|
"epoch": 5.567226890756302,
|
|
"grad_norm": 3.1512813568115234,
|
|
"learning_rate": 4.0878125e-06,
|
|
"loss": 0.0157,
|
|
"step": 11925
|
|
},
|
|
{
|
|
"epoch": 5.578898225957049,
|
|
"grad_norm": 4.766519546508789,
|
|
"learning_rate": 4.080000000000001e-06,
|
|
"loss": 0.0184,
|
|
"step": 11950
|
|
},
|
|
{
|
|
"epoch": 5.590569561157796,
|
|
"grad_norm": 2.901200294494629,
|
|
"learning_rate": 4.0721875e-06,
|
|
"loss": 0.0198,
|
|
"step": 11975
|
|
},
|
|
{
|
|
"epoch": 5.602240896358543,
|
|
"grad_norm": 2.888226270675659,
|
|
"learning_rate": 4.064375e-06,
|
|
"loss": 0.0191,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 5.61391223155929,
|
|
"grad_norm": 1.0496464967727661,
|
|
"learning_rate": 4.0565625e-06,
|
|
"loss": 0.0219,
|
|
"step": 12025
|
|
},
|
|
{
|
|
"epoch": 5.625583566760037,
|
|
"grad_norm": 0.7852717041969299,
|
|
"learning_rate": 4.048750000000001e-06,
|
|
"loss": 0.0177,
|
|
"step": 12050
|
|
},
|
|
{
|
|
"epoch": 5.637254901960784,
|
|
"grad_norm": 2.5804331302642822,
|
|
"learning_rate": 4.0409375e-06,
|
|
"loss": 0.0241,
|
|
"step": 12075
|
|
},
|
|
{
|
|
"epoch": 5.648926237161532,
|
|
"grad_norm": 1.4901975393295288,
|
|
"learning_rate": 4.0331250000000005e-06,
|
|
"loss": 0.0183,
|
|
"step": 12100
|
|
},
|
|
{
|
|
"epoch": 5.660597572362279,
|
|
"grad_norm": 3.2913360595703125,
|
|
"learning_rate": 4.0253125e-06,
|
|
"loss": 0.0194,
|
|
"step": 12125
|
|
},
|
|
{
|
|
"epoch": 5.6722689075630255,
|
|
"grad_norm": 3.6681406497955322,
|
|
"learning_rate": 4.0175e-06,
|
|
"loss": 0.0149,
|
|
"step": 12150
|
|
},
|
|
{
|
|
"epoch": 5.683940242763772,
|
|
"grad_norm": 2.2549614906311035,
|
|
"learning_rate": 4.0096875e-06,
|
|
"loss": 0.023,
|
|
"step": 12175
|
|
},
|
|
{
|
|
"epoch": 5.695611577964519,
|
|
"grad_norm": 10.556268692016602,
|
|
"learning_rate": 4.001875e-06,
|
|
"loss": 0.0179,
|
|
"step": 12200
|
|
},
|
|
{
|
|
"epoch": 5.707282913165266,
|
|
"grad_norm": 2.5755960941314697,
|
|
"learning_rate": 3.9940625e-06,
|
|
"loss": 0.0215,
|
|
"step": 12225
|
|
},
|
|
{
|
|
"epoch": 5.718954248366013,
|
|
"grad_norm": 1.8617174625396729,
|
|
"learning_rate": 3.98625e-06,
|
|
"loss": 0.017,
|
|
"step": 12250
|
|
},
|
|
{
|
|
"epoch": 5.73062558356676,
|
|
"grad_norm": 3.4985668659210205,
|
|
"learning_rate": 3.9784375e-06,
|
|
"loss": 0.0299,
|
|
"step": 12275
|
|
},
|
|
{
|
|
"epoch": 5.742296918767507,
|
|
"grad_norm": 9.18930435180664,
|
|
"learning_rate": 3.970625e-06,
|
|
"loss": 0.0157,
|
|
"step": 12300
|
|
},
|
|
{
|
|
"epoch": 5.753968253968254,
|
|
"grad_norm": 3.4537224769592285,
|
|
"learning_rate": 3.9628125000000004e-06,
|
|
"loss": 0.0231,
|
|
"step": 12325
|
|
},
|
|
{
|
|
"epoch": 5.765639589169001,
|
|
"grad_norm": 3.9688210487365723,
|
|
"learning_rate": 3.955e-06,
|
|
"loss": 0.0205,
|
|
"step": 12350
|
|
},
|
|
{
|
|
"epoch": 5.777310924369748,
|
|
"grad_norm": 1.72626793384552,
|
|
"learning_rate": 3.9471875e-06,
|
|
"loss": 0.0225,
|
|
"step": 12375
|
|
},
|
|
{
|
|
"epoch": 5.788982259570495,
|
|
"grad_norm": 2.350785493850708,
|
|
"learning_rate": 3.939375e-06,
|
|
"loss": 0.0137,
|
|
"step": 12400
|
|
},
|
|
{
|
|
"epoch": 5.800653594771242,
|
|
"grad_norm": 3.5603792667388916,
|
|
"learning_rate": 3.931875e-06,
|
|
"loss": 0.0252,
|
|
"step": 12425
|
|
},
|
|
{
|
|
"epoch": 5.812324929971989,
|
|
"grad_norm": 0.25176432728767395,
|
|
"learning_rate": 3.9240625e-06,
|
|
"loss": 0.0202,
|
|
"step": 12450
|
|
},
|
|
{
|
|
"epoch": 5.8239962651727355,
|
|
"grad_norm": 4.103959083557129,
|
|
"learning_rate": 3.916250000000001e-06,
|
|
"loss": 0.0201,
|
|
"step": 12475
|
|
},
|
|
{
|
|
"epoch": 5.835667600373482,
|
|
"grad_norm": 5.1462202072143555,
|
|
"learning_rate": 3.9084375e-06,
|
|
"loss": 0.014,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 5.847338935574229,
|
|
"grad_norm": 1.2658880949020386,
|
|
"learning_rate": 3.9006250000000005e-06,
|
|
"loss": 0.0224,
|
|
"step": 12525
|
|
},
|
|
{
|
|
"epoch": 5.859010270774976,
|
|
"grad_norm": 1.1250051259994507,
|
|
"learning_rate": 3.8928125e-06,
|
|
"loss": 0.013,
|
|
"step": 12550
|
|
},
|
|
{
|
|
"epoch": 5.870681605975724,
|
|
"grad_norm": 0.4671033024787903,
|
|
"learning_rate": 3.885e-06,
|
|
"loss": 0.0222,
|
|
"step": 12575
|
|
},
|
|
{
|
|
"epoch": 5.882352941176471,
|
|
"grad_norm": 4.812198638916016,
|
|
"learning_rate": 3.8771875e-06,
|
|
"loss": 0.013,
|
|
"step": 12600
|
|
},
|
|
{
|
|
"epoch": 5.894024276377218,
|
|
"grad_norm": 6.955799102783203,
|
|
"learning_rate": 3.869375e-06,
|
|
"loss": 0.0216,
|
|
"step": 12625
|
|
},
|
|
{
|
|
"epoch": 5.905695611577965,
|
|
"grad_norm": 9.038055419921875,
|
|
"learning_rate": 3.8615625e-06,
|
|
"loss": 0.0147,
|
|
"step": 12650
|
|
},
|
|
{
|
|
"epoch": 5.917366946778712,
|
|
"grad_norm": 2.660266399383545,
|
|
"learning_rate": 3.85375e-06,
|
|
"loss": 0.0179,
|
|
"step": 12675
|
|
},
|
|
{
|
|
"epoch": 5.929038281979459,
|
|
"grad_norm": 3.986816167831421,
|
|
"learning_rate": 3.8459375e-06,
|
|
"loss": 0.0175,
|
|
"step": 12700
|
|
},
|
|
{
|
|
"epoch": 5.940709617180206,
|
|
"grad_norm": 1.7132847309112549,
|
|
"learning_rate": 3.838125e-06,
|
|
"loss": 0.0243,
|
|
"step": 12725
|
|
},
|
|
{
|
|
"epoch": 5.9523809523809526,
|
|
"grad_norm": 1.878422498703003,
|
|
"learning_rate": 3.8303125000000004e-06,
|
|
"loss": 0.0191,
|
|
"step": 12750
|
|
},
|
|
{
|
|
"epoch": 5.9640522875816995,
|
|
"grad_norm": 1.7959318161010742,
|
|
"learning_rate": 3.8225e-06,
|
|
"loss": 0.0285,
|
|
"step": 12775
|
|
},
|
|
{
|
|
"epoch": 5.975723622782446,
|
|
"grad_norm": 7.094222068786621,
|
|
"learning_rate": 3.8146875e-06,
|
|
"loss": 0.019,
|
|
"step": 12800
|
|
},
|
|
{
|
|
"epoch": 5.987394957983193,
|
|
"grad_norm": 2.3803250789642334,
|
|
"learning_rate": 3.806875e-06,
|
|
"loss": 0.027,
|
|
"step": 12825
|
|
},
|
|
{
|
|
"epoch": 5.99906629318394,
|
|
"grad_norm": 5.211573123931885,
|
|
"learning_rate": 3.7990625e-06,
|
|
"loss": 0.0239,
|
|
"step": 12850
|
|
},
|
|
{
|
|
"epoch": 6.010737628384687,
|
|
"grad_norm": 2.943984270095825,
|
|
"learning_rate": 3.7912500000000003e-06,
|
|
"loss": 0.0156,
|
|
"step": 12875
|
|
},
|
|
{
|
|
"epoch": 6.022408963585434,
|
|
"grad_norm": 2.1434903144836426,
|
|
"learning_rate": 3.7834375000000006e-06,
|
|
"loss": 0.0147,
|
|
"step": 12900
|
|
},
|
|
{
|
|
"epoch": 6.034080298786181,
|
|
"grad_norm": 0.33908581733703613,
|
|
"learning_rate": 3.775625e-06,
|
|
"loss": 0.0089,
|
|
"step": 12925
|
|
},
|
|
{
|
|
"epoch": 6.045751633986928,
|
|
"grad_norm": 7.022948265075684,
|
|
"learning_rate": 3.7678125e-06,
|
|
"loss": 0.0165,
|
|
"step": 12950
|
|
},
|
|
{
|
|
"epoch": 6.057422969187675,
|
|
"grad_norm": 1.1056474447250366,
|
|
"learning_rate": 3.7600000000000004e-06,
|
|
"loss": 0.0154,
|
|
"step": 12975
|
|
},
|
|
{
|
|
"epoch": 6.069094304388422,
|
|
"grad_norm": 3.8569955825805664,
|
|
"learning_rate": 3.7521875000000007e-06,
|
|
"loss": 0.0149,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 6.080765639589169,
|
|
"grad_norm": 2.6585159301757812,
|
|
"learning_rate": 3.744375e-06,
|
|
"loss": 0.0147,
|
|
"step": 13025
|
|
},
|
|
{
|
|
"epoch": 6.092436974789916,
|
|
"grad_norm": 3.2226240634918213,
|
|
"learning_rate": 3.7365625000000003e-06,
|
|
"loss": 0.0119,
|
|
"step": 13050
|
|
},
|
|
{
|
|
"epoch": 6.104108309990663,
|
|
"grad_norm": 1.7068639993667603,
|
|
"learning_rate": 3.7290625000000003e-06,
|
|
"loss": 0.013,
|
|
"step": 13075
|
|
},
|
|
{
|
|
"epoch": 6.1157796451914095,
|
|
"grad_norm": 0.6673070788383484,
|
|
"learning_rate": 3.7212500000000006e-06,
|
|
"loss": 0.0097,
|
|
"step": 13100
|
|
},
|
|
{
|
|
"epoch": 6.127450980392156,
|
|
"grad_norm": 0.041345566511154175,
|
|
"learning_rate": 3.7134375e-06,
|
|
"loss": 0.0115,
|
|
"step": 13125
|
|
},
|
|
{
|
|
"epoch": 6.139122315592904,
|
|
"grad_norm": 0.7863844037055969,
|
|
"learning_rate": 3.705625e-06,
|
|
"loss": 0.0126,
|
|
"step": 13150
|
|
},
|
|
{
|
|
"epoch": 6.150793650793651,
|
|
"grad_norm": 2.8898963928222656,
|
|
"learning_rate": 3.6978125000000004e-06,
|
|
"loss": 0.0151,
|
|
"step": 13175
|
|
},
|
|
{
|
|
"epoch": 6.162464985994398,
|
|
"grad_norm": 4.4687299728393555,
|
|
"learning_rate": 3.6900000000000002e-06,
|
|
"loss": 0.0145,
|
|
"step": 13200
|
|
},
|
|
{
|
|
"epoch": 6.174136321195145,
|
|
"grad_norm": 2.697178602218628,
|
|
"learning_rate": 3.6821875e-06,
|
|
"loss": 0.0136,
|
|
"step": 13225
|
|
},
|
|
{
|
|
"epoch": 6.185807656395892,
|
|
"grad_norm": 7.974569320678711,
|
|
"learning_rate": 3.674375e-06,
|
|
"loss": 0.0129,
|
|
"step": 13250
|
|
},
|
|
{
|
|
"epoch": 6.197478991596639,
|
|
"grad_norm": 0.35622915625572205,
|
|
"learning_rate": 3.6665625e-06,
|
|
"loss": 0.0168,
|
|
"step": 13275
|
|
},
|
|
{
|
|
"epoch": 6.209150326797386,
|
|
"grad_norm": 6.407752990722656,
|
|
"learning_rate": 3.6587500000000003e-06,
|
|
"loss": 0.0135,
|
|
"step": 13300
|
|
},
|
|
{
|
|
"epoch": 6.220821661998133,
|
|
"grad_norm": 0.7060608267784119,
|
|
"learning_rate": 3.6509374999999997e-06,
|
|
"loss": 0.0113,
|
|
"step": 13325
|
|
},
|
|
{
|
|
"epoch": 6.23249299719888,
|
|
"grad_norm": 0.6277226805686951,
|
|
"learning_rate": 3.643125e-06,
|
|
"loss": 0.0124,
|
|
"step": 13350
|
|
},
|
|
{
|
|
"epoch": 6.2441643323996265,
|
|
"grad_norm": 3.3179471492767334,
|
|
"learning_rate": 3.6353125e-06,
|
|
"loss": 0.0128,
|
|
"step": 13375
|
|
},
|
|
{
|
|
"epoch": 6.2558356676003735,
|
|
"grad_norm": 2.2590084075927734,
|
|
"learning_rate": 3.6275000000000004e-06,
|
|
"loss": 0.0137,
|
|
"step": 13400
|
|
},
|
|
{
|
|
"epoch": 6.26750700280112,
|
|
"grad_norm": 0.3138970136642456,
|
|
"learning_rate": 3.6196875000000007e-06,
|
|
"loss": 0.0105,
|
|
"step": 13425
|
|
},
|
|
{
|
|
"epoch": 6.279178338001867,
|
|
"grad_norm": 4.370602607727051,
|
|
"learning_rate": 3.611875e-06,
|
|
"loss": 0.0106,
|
|
"step": 13450
|
|
},
|
|
{
|
|
"epoch": 6.290849673202614,
|
|
"grad_norm": 0.7395113706588745,
|
|
"learning_rate": 3.6040625000000003e-06,
|
|
"loss": 0.0074,
|
|
"step": 13475
|
|
},
|
|
{
|
|
"epoch": 6.302521008403361,
|
|
"grad_norm": 4.352066516876221,
|
|
"learning_rate": 3.5962500000000005e-06,
|
|
"loss": 0.0148,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 6.314192343604108,
|
|
"grad_norm": 4.510288715362549,
|
|
"learning_rate": 3.5884375000000003e-06,
|
|
"loss": 0.0127,
|
|
"step": 13525
|
|
},
|
|
{
|
|
"epoch": 6.325863678804855,
|
|
"grad_norm": 6.977252960205078,
|
|
"learning_rate": 3.580625e-06,
|
|
"loss": 0.0163,
|
|
"step": 13550
|
|
},
|
|
{
|
|
"epoch": 6.337535014005602,
|
|
"grad_norm": 3.6490318775177,
|
|
"learning_rate": 3.5728125e-06,
|
|
"loss": 0.0127,
|
|
"step": 13575
|
|
},
|
|
{
|
|
"epoch": 6.349206349206349,
|
|
"grad_norm": 2.546675682067871,
|
|
"learning_rate": 3.565e-06,
|
|
"loss": 0.0123,
|
|
"step": 13600
|
|
},
|
|
{
|
|
"epoch": 6.360877684407096,
|
|
"grad_norm": 9.396807670593262,
|
|
"learning_rate": 3.5571875000000004e-06,
|
|
"loss": 0.0155,
|
|
"step": 13625
|
|
},
|
|
{
|
|
"epoch": 6.372549019607844,
|
|
"grad_norm": 5.870864391326904,
|
|
"learning_rate": 3.549375e-06,
|
|
"loss": 0.0153,
|
|
"step": 13650
|
|
},
|
|
{
|
|
"epoch": 6.3842203548085905,
|
|
"grad_norm": 0.08212006092071533,
|
|
"learning_rate": 3.5415625e-06,
|
|
"loss": 0.0113,
|
|
"step": 13675
|
|
},
|
|
{
|
|
"epoch": 6.395891690009337,
|
|
"grad_norm": 1.5073678493499756,
|
|
"learning_rate": 3.5337500000000003e-06,
|
|
"loss": 0.0148,
|
|
"step": 13700
|
|
},
|
|
{
|
|
"epoch": 6.407563025210084,
|
|
"grad_norm": 1.6626029014587402,
|
|
"learning_rate": 3.5259375000000005e-06,
|
|
"loss": 0.0206,
|
|
"step": 13725
|
|
},
|
|
{
|
|
"epoch": 6.419234360410831,
|
|
"grad_norm": 5.561774730682373,
|
|
"learning_rate": 3.518125e-06,
|
|
"loss": 0.0219,
|
|
"step": 13750
|
|
},
|
|
{
|
|
"epoch": 6.430905695611578,
|
|
"grad_norm": 3.8881995677948,
|
|
"learning_rate": 3.5103125e-06,
|
|
"loss": 0.0194,
|
|
"step": 13775
|
|
},
|
|
{
|
|
"epoch": 6.442577030812325,
|
|
"grad_norm": 6.549841403961182,
|
|
"learning_rate": 3.5025000000000003e-06,
|
|
"loss": 0.014,
|
|
"step": 13800
|
|
},
|
|
{
|
|
"epoch": 6.454248366013072,
|
|
"grad_norm": 0.38337138295173645,
|
|
"learning_rate": 3.4946875000000006e-06,
|
|
"loss": 0.0125,
|
|
"step": 13825
|
|
},
|
|
{
|
|
"epoch": 6.465919701213819,
|
|
"grad_norm": 7.83842658996582,
|
|
"learning_rate": 3.486875e-06,
|
|
"loss": 0.0117,
|
|
"step": 13850
|
|
},
|
|
{
|
|
"epoch": 6.477591036414566,
|
|
"grad_norm": 2.4902381896972656,
|
|
"learning_rate": 3.4790625e-06,
|
|
"loss": 0.0154,
|
|
"step": 13875
|
|
},
|
|
{
|
|
"epoch": 6.489262371615313,
|
|
"grad_norm": 2.7067272663116455,
|
|
"learning_rate": 3.47125e-06,
|
|
"loss": 0.0145,
|
|
"step": 13900
|
|
},
|
|
{
|
|
"epoch": 6.50093370681606,
|
|
"grad_norm": 0.13609760999679565,
|
|
"learning_rate": 3.4634375000000002e-06,
|
|
"loss": 0.013,
|
|
"step": 13925
|
|
},
|
|
{
|
|
"epoch": 6.512605042016807,
|
|
"grad_norm": 7.123418807983398,
|
|
"learning_rate": 3.4556249999999996e-06,
|
|
"loss": 0.0135,
|
|
"step": 13950
|
|
},
|
|
{
|
|
"epoch": 6.524276377217554,
|
|
"grad_norm": 2.5809738636016846,
|
|
"learning_rate": 3.4478125e-06,
|
|
"loss": 0.013,
|
|
"step": 13975
|
|
},
|
|
{
|
|
"epoch": 6.5359477124183005,
|
|
"grad_norm": 3.8302764892578125,
|
|
"learning_rate": 3.44e-06,
|
|
"loss": 0.0123,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 6.5476190476190474,
|
|
"grad_norm": 0.38114723563194275,
|
|
"learning_rate": 3.4321875000000003e-06,
|
|
"loss": 0.0123,
|
|
"step": 14025
|
|
},
|
|
{
|
|
"epoch": 6.559290382819794,
|
|
"grad_norm": 1.6169977188110352,
|
|
"learning_rate": 3.4243750000000006e-06,
|
|
"loss": 0.0139,
|
|
"step": 14050
|
|
},
|
|
{
|
|
"epoch": 6.570961718020541,
|
|
"grad_norm": 0.527672529220581,
|
|
"learning_rate": 3.4165625e-06,
|
|
"loss": 0.0137,
|
|
"step": 14075
|
|
},
|
|
{
|
|
"epoch": 6.582633053221288,
|
|
"grad_norm": 4.989930152893066,
|
|
"learning_rate": 3.40875e-06,
|
|
"loss": 0.0108,
|
|
"step": 14100
|
|
},
|
|
{
|
|
"epoch": 6.594304388422035,
|
|
"grad_norm": 0.13473570346832275,
|
|
"learning_rate": 3.4009375000000004e-06,
|
|
"loss": 0.0159,
|
|
"step": 14125
|
|
},
|
|
{
|
|
"epoch": 6.605975723622782,
|
|
"grad_norm": 3.537700891494751,
|
|
"learning_rate": 3.3931250000000007e-06,
|
|
"loss": 0.0201,
|
|
"step": 14150
|
|
},
|
|
{
|
|
"epoch": 6.617647058823529,
|
|
"grad_norm": 2.97955060005188,
|
|
"learning_rate": 3.3853125e-06,
|
|
"loss": 0.0106,
|
|
"step": 14175
|
|
},
|
|
{
|
|
"epoch": 6.629318394024276,
|
|
"grad_norm": 0.7965431809425354,
|
|
"learning_rate": 3.3775000000000003e-06,
|
|
"loss": 0.016,
|
|
"step": 14200
|
|
},
|
|
{
|
|
"epoch": 6.640989729225024,
|
|
"grad_norm": 2.8173744678497314,
|
|
"learning_rate": 3.3696875e-06,
|
|
"loss": 0.0169,
|
|
"step": 14225
|
|
},
|
|
{
|
|
"epoch": 6.652661064425771,
|
|
"grad_norm": 0.6731769442558289,
|
|
"learning_rate": 3.3618750000000003e-06,
|
|
"loss": 0.0118,
|
|
"step": 14250
|
|
},
|
|
{
|
|
"epoch": 6.664332399626518,
|
|
"grad_norm": 3.4868786334991455,
|
|
"learning_rate": 3.3540624999999997e-06,
|
|
"loss": 0.0149,
|
|
"step": 14275
|
|
},
|
|
{
|
|
"epoch": 6.6760037348272645,
|
|
"grad_norm": 7.706684112548828,
|
|
"learning_rate": 3.34625e-06,
|
|
"loss": 0.0211,
|
|
"step": 14300
|
|
},
|
|
{
|
|
"epoch": 6.687675070028011,
|
|
"grad_norm": 3.9315950870513916,
|
|
"learning_rate": 3.3384375e-06,
|
|
"loss": 0.0122,
|
|
"step": 14325
|
|
},
|
|
{
|
|
"epoch": 6.699346405228758,
|
|
"grad_norm": 0.4107113778591156,
|
|
"learning_rate": 3.3306250000000004e-06,
|
|
"loss": 0.0132,
|
|
"step": 14350
|
|
},
|
|
{
|
|
"epoch": 6.711017740429505,
|
|
"grad_norm": 0.8928655982017517,
|
|
"learning_rate": 3.3228125e-06,
|
|
"loss": 0.0132,
|
|
"step": 14375
|
|
},
|
|
{
|
|
"epoch": 6.722689075630252,
|
|
"grad_norm": 0.32426151633262634,
|
|
"learning_rate": 3.315e-06,
|
|
"loss": 0.0121,
|
|
"step": 14400
|
|
},
|
|
{
|
|
"epoch": 6.734360410830999,
|
|
"grad_norm": 0.9105150103569031,
|
|
"learning_rate": 3.3071875000000003e-06,
|
|
"loss": 0.0134,
|
|
"step": 14425
|
|
},
|
|
{
|
|
"epoch": 6.746031746031746,
|
|
"grad_norm": 4.484381198883057,
|
|
"learning_rate": 3.2993750000000005e-06,
|
|
"loss": 0.012,
|
|
"step": 14450
|
|
},
|
|
{
|
|
"epoch": 6.757703081232493,
|
|
"grad_norm": 1.9971312284469604,
|
|
"learning_rate": 3.2915625e-06,
|
|
"loss": 0.0129,
|
|
"step": 14475
|
|
},
|
|
{
|
|
"epoch": 6.76937441643324,
|
|
"grad_norm": 5.409192085266113,
|
|
"learning_rate": 3.28375e-06,
|
|
"loss": 0.0142,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 6.781045751633987,
|
|
"grad_norm": 0.35186824202537537,
|
|
"learning_rate": 3.2759375000000003e-06,
|
|
"loss": 0.0155,
|
|
"step": 14525
|
|
},
|
|
{
|
|
"epoch": 6.792717086834734,
|
|
"grad_norm": 8.460790634155273,
|
|
"learning_rate": 3.268125e-06,
|
|
"loss": 0.0119,
|
|
"step": 14550
|
|
},
|
|
{
|
|
"epoch": 6.804388422035481,
|
|
"grad_norm": 2.0943799018859863,
|
|
"learning_rate": 3.2603125e-06,
|
|
"loss": 0.0121,
|
|
"step": 14575
|
|
},
|
|
{
|
|
"epoch": 6.816059757236228,
|
|
"grad_norm": 7.832350730895996,
|
|
"learning_rate": 3.2525e-06,
|
|
"loss": 0.0189,
|
|
"step": 14600
|
|
},
|
|
{
|
|
"epoch": 6.8277310924369745,
|
|
"grad_norm": 0.39846083521842957,
|
|
"learning_rate": 3.2446875e-06,
|
|
"loss": 0.0134,
|
|
"step": 14625
|
|
},
|
|
{
|
|
"epoch": 6.839402427637721,
|
|
"grad_norm": 7.057211875915527,
|
|
"learning_rate": 3.2368750000000002e-06,
|
|
"loss": 0.0133,
|
|
"step": 14650
|
|
},
|
|
{
|
|
"epoch": 6.851073762838468,
|
|
"grad_norm": 0.2746317684650421,
|
|
"learning_rate": 3.2290625000000005e-06,
|
|
"loss": 0.0137,
|
|
"step": 14675
|
|
},
|
|
{
|
|
"epoch": 6.862745098039216,
|
|
"grad_norm": 5.0260396003723145,
|
|
"learning_rate": 3.22125e-06,
|
|
"loss": 0.0173,
|
|
"step": 14700
|
|
},
|
|
{
|
|
"epoch": 6.874416433239963,
|
|
"grad_norm": 4.125462532043457,
|
|
"learning_rate": 3.2134375e-06,
|
|
"loss": 0.0176,
|
|
"step": 14725
|
|
},
|
|
{
|
|
"epoch": 6.88608776844071,
|
|
"grad_norm": 6.75167179107666,
|
|
"learning_rate": 3.2056250000000003e-06,
|
|
"loss": 0.0145,
|
|
"step": 14750
|
|
},
|
|
{
|
|
"epoch": 6.897759103641457,
|
|
"grad_norm": 1.9314770698547363,
|
|
"learning_rate": 3.1978125000000006e-06,
|
|
"loss": 0.013,
|
|
"step": 14775
|
|
},
|
|
{
|
|
"epoch": 6.909430438842204,
|
|
"grad_norm": 2.6127073764801025,
|
|
"learning_rate": 3.19e-06,
|
|
"loss": 0.0105,
|
|
"step": 14800
|
|
},
|
|
{
|
|
"epoch": 6.921101774042951,
|
|
"grad_norm": 1.5826447010040283,
|
|
"learning_rate": 3.1821875e-06,
|
|
"loss": 0.0127,
|
|
"step": 14825
|
|
},
|
|
{
|
|
"epoch": 6.932773109243698,
|
|
"grad_norm": 0.6113137006759644,
|
|
"learning_rate": 3.1743750000000004e-06,
|
|
"loss": 0.0126,
|
|
"step": 14850
|
|
},
|
|
{
|
|
"epoch": 6.944444444444445,
|
|
"grad_norm": 3.849680185317993,
|
|
"learning_rate": 3.1665625000000002e-06,
|
|
"loss": 0.015,
|
|
"step": 14875
|
|
},
|
|
{
|
|
"epoch": 6.956115779645192,
|
|
"grad_norm": 5.097741603851318,
|
|
"learning_rate": 3.15875e-06,
|
|
"loss": 0.0072,
|
|
"step": 14900
|
|
},
|
|
{
|
|
"epoch": 6.9677871148459385,
|
|
"grad_norm": 4.420420169830322,
|
|
"learning_rate": 3.1509375000000003e-06,
|
|
"loss": 0.0135,
|
|
"step": 14925
|
|
},
|
|
{
|
|
"epoch": 6.979458450046685,
|
|
"grad_norm": 3.5658352375030518,
|
|
"learning_rate": 3.143125e-06,
|
|
"loss": 0.012,
|
|
"step": 14950
|
|
},
|
|
{
|
|
"epoch": 6.991129785247432,
|
|
"grad_norm": 0.9113016128540039,
|
|
"learning_rate": 3.1353125000000003e-06,
|
|
"loss": 0.008,
|
|
"step": 14975
|
|
},
|
|
{
|
|
"epoch": 7.002801120448179,
|
|
"grad_norm": 2.622985363006592,
|
|
"learning_rate": 3.1274999999999997e-06,
|
|
"loss": 0.0189,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 7.002801120448179,
|
|
"eval_loss": 0.19959864020347595,
|
|
"eval_runtime": 5339.534,
|
|
"eval_samples_per_second": 1.763,
|
|
"eval_steps_per_second": 0.22,
|
|
"eval_wer": 0.09801388664621347,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 7.014472455648926,
|
|
"grad_norm": 0.3408145606517792,
|
|
"learning_rate": 3.1196875e-06,
|
|
"loss": 0.0108,
|
|
"step": 15025
|
|
},
|
|
{
|
|
"epoch": 7.026143790849673,
|
|
"grad_norm": 1.0346554517745972,
|
|
"learning_rate": 3.111875e-06,
|
|
"loss": 0.0135,
|
|
"step": 15050
|
|
},
|
|
{
|
|
"epoch": 7.03781512605042,
|
|
"grad_norm": 0.08663380146026611,
|
|
"learning_rate": 3.1040625e-06,
|
|
"loss": 0.0088,
|
|
"step": 15075
|
|
},
|
|
{
|
|
"epoch": 7.049486461251167,
|
|
"grad_norm": 0.6362659335136414,
|
|
"learning_rate": 3.0962500000000002e-06,
|
|
"loss": 0.0072,
|
|
"step": 15100
|
|
},
|
|
{
|
|
"epoch": 7.061157796451914,
|
|
"grad_norm": 0.1300945281982422,
|
|
"learning_rate": 3.0884375e-06,
|
|
"loss": 0.0074,
|
|
"step": 15125
|
|
},
|
|
{
|
|
"epoch": 7.072829131652661,
|
|
"grad_norm": 4.269519805908203,
|
|
"learning_rate": 3.0806250000000003e-06,
|
|
"loss": 0.0158,
|
|
"step": 15150
|
|
},
|
|
{
|
|
"epoch": 7.084500466853408,
|
|
"grad_norm": 5.42411994934082,
|
|
"learning_rate": 3.0728125e-06,
|
|
"loss": 0.0112,
|
|
"step": 15175
|
|
},
|
|
{
|
|
"epoch": 7.096171802054155,
|
|
"grad_norm": 0.7867230176925659,
|
|
"learning_rate": 3.0650000000000003e-06,
|
|
"loss": 0.0204,
|
|
"step": 15200
|
|
},
|
|
{
|
|
"epoch": 7.107843137254902,
|
|
"grad_norm": 0.30360114574432373,
|
|
"learning_rate": 3.0571875e-06,
|
|
"loss": 0.01,
|
|
"step": 15225
|
|
},
|
|
{
|
|
"epoch": 7.1195144724556485,
|
|
"grad_norm": 3.1817336082458496,
|
|
"learning_rate": 3.0493750000000003e-06,
|
|
"loss": 0.0126,
|
|
"step": 15250
|
|
},
|
|
{
|
|
"epoch": 7.131185807656396,
|
|
"grad_norm": 1.1198678016662598,
|
|
"learning_rate": 3.0415625e-06,
|
|
"loss": 0.0059,
|
|
"step": 15275
|
|
},
|
|
{
|
|
"epoch": 7.142857142857143,
|
|
"grad_norm": 1.7146356105804443,
|
|
"learning_rate": 3.03375e-06,
|
|
"loss": 0.0101,
|
|
"step": 15300
|
|
},
|
|
{
|
|
"epoch": 7.15452847805789,
|
|
"grad_norm": 0.09200263023376465,
|
|
"learning_rate": 3.0259375e-06,
|
|
"loss": 0.0098,
|
|
"step": 15325
|
|
},
|
|
{
|
|
"epoch": 7.166199813258637,
|
|
"grad_norm": 2.3543612957000732,
|
|
"learning_rate": 3.018125e-06,
|
|
"loss": 0.0172,
|
|
"step": 15350
|
|
},
|
|
{
|
|
"epoch": 7.177871148459384,
|
|
"grad_norm": 0.4582861661911011,
|
|
"learning_rate": 3.0103125000000002e-06,
|
|
"loss": 0.0082,
|
|
"step": 15375
|
|
},
|
|
{
|
|
"epoch": 7.189542483660131,
|
|
"grad_norm": 3.5268125534057617,
|
|
"learning_rate": 3.0025e-06,
|
|
"loss": 0.015,
|
|
"step": 15400
|
|
},
|
|
{
|
|
"epoch": 7.201213818860878,
|
|
"grad_norm": 3.0377275943756104,
|
|
"learning_rate": 2.9946875000000003e-06,
|
|
"loss": 0.0079,
|
|
"step": 15425
|
|
},
|
|
{
|
|
"epoch": 7.212885154061625,
|
|
"grad_norm": 1.1198071241378784,
|
|
"learning_rate": 2.986875e-06,
|
|
"loss": 0.0175,
|
|
"step": 15450
|
|
},
|
|
{
|
|
"epoch": 7.224556489262372,
|
|
"grad_norm": 0.33140361309051514,
|
|
"learning_rate": 2.9790625000000003e-06,
|
|
"loss": 0.0078,
|
|
"step": 15475
|
|
},
|
|
{
|
|
"epoch": 7.236227824463119,
|
|
"grad_norm": 2.4766924381256104,
|
|
"learning_rate": 2.97125e-06,
|
|
"loss": 0.0162,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 7.2478991596638656,
|
|
"grad_norm": 0.24502252042293549,
|
|
"learning_rate": 2.9634375000000004e-06,
|
|
"loss": 0.0083,
|
|
"step": 15525
|
|
},
|
|
{
|
|
"epoch": 7.2595704948646125,
|
|
"grad_norm": 0.837504506111145,
|
|
"learning_rate": 2.955625e-06,
|
|
"loss": 0.0145,
|
|
"step": 15550
|
|
},
|
|
{
|
|
"epoch": 7.271241830065359,
|
|
"grad_norm": 0.06051575765013695,
|
|
"learning_rate": 2.9478125000000004e-06,
|
|
"loss": 0.0071,
|
|
"step": 15575
|
|
},
|
|
{
|
|
"epoch": 7.282913165266106,
|
|
"grad_norm": 2.0014054775238037,
|
|
"learning_rate": 2.9400000000000002e-06,
|
|
"loss": 0.0146,
|
|
"step": 15600
|
|
},
|
|
{
|
|
"epoch": 7.294584500466853,
|
|
"grad_norm": 0.5527703762054443,
|
|
"learning_rate": 2.9321875e-06,
|
|
"loss": 0.0093,
|
|
"step": 15625
|
|
},
|
|
{
|
|
"epoch": 7.3062558356676,
|
|
"grad_norm": 0.5724664330482483,
|
|
"learning_rate": 2.924375e-06,
|
|
"loss": 0.0147,
|
|
"step": 15650
|
|
},
|
|
{
|
|
"epoch": 7.317927170868347,
|
|
"grad_norm": 0.18703560531139374,
|
|
"learning_rate": 2.9165625e-06,
|
|
"loss": 0.0129,
|
|
"step": 15675
|
|
},
|
|
{
|
|
"epoch": 7.329598506069094,
|
|
"grad_norm": 2.189370632171631,
|
|
"learning_rate": 2.90875e-06,
|
|
"loss": 0.0133,
|
|
"step": 15700
|
|
},
|
|
{
|
|
"epoch": 7.341269841269841,
|
|
"grad_norm": 0.19430263340473175,
|
|
"learning_rate": 2.9009375e-06,
|
|
"loss": 0.0109,
|
|
"step": 15725
|
|
},
|
|
{
|
|
"epoch": 7.352941176470588,
|
|
"grad_norm": 2.84920597076416,
|
|
"learning_rate": 2.893125e-06,
|
|
"loss": 0.0099,
|
|
"step": 15750
|
|
},
|
|
{
|
|
"epoch": 7.364612511671335,
|
|
"grad_norm": 1.7251280546188354,
|
|
"learning_rate": 2.8853125e-06,
|
|
"loss": 0.0074,
|
|
"step": 15775
|
|
},
|
|
{
|
|
"epoch": 7.376283846872083,
|
|
"grad_norm": 0.5282366871833801,
|
|
"learning_rate": 2.8775e-06,
|
|
"loss": 0.0184,
|
|
"step": 15800
|
|
},
|
|
{
|
|
"epoch": 7.3879551820728295,
|
|
"grad_norm": 7.1876420974731445,
|
|
"learning_rate": 2.8696875000000002e-06,
|
|
"loss": 0.011,
|
|
"step": 15825
|
|
},
|
|
{
|
|
"epoch": 7.3996265172735765,
|
|
"grad_norm": 0.14123250544071198,
|
|
"learning_rate": 2.861875e-06,
|
|
"loss": 0.0115,
|
|
"step": 15850
|
|
},
|
|
{
|
|
"epoch": 7.411297852474323,
|
|
"grad_norm": 0.15596270561218262,
|
|
"learning_rate": 2.8540625000000003e-06,
|
|
"loss": 0.0117,
|
|
"step": 15875
|
|
},
|
|
{
|
|
"epoch": 7.42296918767507,
|
|
"grad_norm": 0.23796889185905457,
|
|
"learning_rate": 2.8462500000000005e-06,
|
|
"loss": 0.0153,
|
|
"step": 15900
|
|
},
|
|
{
|
|
"epoch": 7.434640522875817,
|
|
"grad_norm": 3.0602948665618896,
|
|
"learning_rate": 2.8384375000000003e-06,
|
|
"loss": 0.0116,
|
|
"step": 15925
|
|
},
|
|
{
|
|
"epoch": 7.446311858076564,
|
|
"grad_norm": 0.32861384749412537,
|
|
"learning_rate": 2.830625e-06,
|
|
"loss": 0.0121,
|
|
"step": 15950
|
|
},
|
|
{
|
|
"epoch": 7.457983193277311,
|
|
"grad_norm": 0.5478546023368835,
|
|
"learning_rate": 2.8228125e-06,
|
|
"loss": 0.0112,
|
|
"step": 15975
|
|
},
|
|
{
|
|
"epoch": 7.469654528478058,
|
|
"grad_norm": 0.2652440667152405,
|
|
"learning_rate": 2.815e-06,
|
|
"loss": 0.0218,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 7.481325863678805,
|
|
"grad_norm": 1.3157047033309937,
|
|
"learning_rate": 2.8071875e-06,
|
|
"loss": 0.0077,
|
|
"step": 16025
|
|
},
|
|
{
|
|
"epoch": 7.492997198879552,
|
|
"grad_norm": 0.28366702795028687,
|
|
"learning_rate": 2.799375e-06,
|
|
"loss": 0.0136,
|
|
"step": 16050
|
|
},
|
|
{
|
|
"epoch": 7.504668534080299,
|
|
"grad_norm": 5.81749963760376,
|
|
"learning_rate": 2.7915625e-06,
|
|
"loss": 0.0083,
|
|
"step": 16075
|
|
},
|
|
{
|
|
"epoch": 7.516339869281046,
|
|
"grad_norm": 2.328474760055542,
|
|
"learning_rate": 2.7837500000000002e-06,
|
|
"loss": 0.0138,
|
|
"step": 16100
|
|
},
|
|
{
|
|
"epoch": 7.528011204481793,
|
|
"grad_norm": 0.3001089096069336,
|
|
"learning_rate": 2.7759375e-06,
|
|
"loss": 0.0063,
|
|
"step": 16125
|
|
},
|
|
{
|
|
"epoch": 7.5396825396825395,
|
|
"grad_norm": 0.8168843388557434,
|
|
"learning_rate": 2.7681250000000003e-06,
|
|
"loss": 0.0153,
|
|
"step": 16150
|
|
},
|
|
{
|
|
"epoch": 7.5513538748832865,
|
|
"grad_norm": 0.5548914074897766,
|
|
"learning_rate": 2.7603125e-06,
|
|
"loss": 0.0075,
|
|
"step": 16175
|
|
},
|
|
{
|
|
"epoch": 7.563025210084033,
|
|
"grad_norm": 1.3669456243515015,
|
|
"learning_rate": 2.7525000000000003e-06,
|
|
"loss": 0.0135,
|
|
"step": 16200
|
|
},
|
|
{
|
|
"epoch": 7.57469654528478,
|
|
"grad_norm": 0.7598258852958679,
|
|
"learning_rate": 2.7446875e-06,
|
|
"loss": 0.0056,
|
|
"step": 16225
|
|
},
|
|
{
|
|
"epoch": 7.586367880485527,
|
|
"grad_norm": 0.4459327757358551,
|
|
"learning_rate": 2.7368750000000004e-06,
|
|
"loss": 0.0102,
|
|
"step": 16250
|
|
},
|
|
{
|
|
"epoch": 7.598039215686274,
|
|
"grad_norm": 0.2884249985218048,
|
|
"learning_rate": 2.7290625e-06,
|
|
"loss": 0.0097,
|
|
"step": 16275
|
|
},
|
|
{
|
|
"epoch": 7.609710550887021,
|
|
"grad_norm": 0.1581568568944931,
|
|
"learning_rate": 2.72125e-06,
|
|
"loss": 0.0121,
|
|
"step": 16300
|
|
},
|
|
{
|
|
"epoch": 7.621381886087768,
|
|
"grad_norm": 0.3975503444671631,
|
|
"learning_rate": 2.7134375e-06,
|
|
"loss": 0.0107,
|
|
"step": 16325
|
|
},
|
|
{
|
|
"epoch": 7.633053221288515,
|
|
"grad_norm": 1.209242582321167,
|
|
"learning_rate": 2.705625e-06,
|
|
"loss": 0.0148,
|
|
"step": 16350
|
|
},
|
|
{
|
|
"epoch": 7.644724556489263,
|
|
"grad_norm": 0.27312788367271423,
|
|
"learning_rate": 2.6978125e-06,
|
|
"loss": 0.0035,
|
|
"step": 16375
|
|
},
|
|
{
|
|
"epoch": 7.65639589169001,
|
|
"grad_norm": 1.0958424806594849,
|
|
"learning_rate": 2.69e-06,
|
|
"loss": 0.0171,
|
|
"step": 16400
|
|
},
|
|
{
|
|
"epoch": 7.668067226890757,
|
|
"grad_norm": 0.2819930911064148,
|
|
"learning_rate": 2.6821875e-06,
|
|
"loss": 0.0056,
|
|
"step": 16425
|
|
},
|
|
{
|
|
"epoch": 7.6797385620915035,
|
|
"grad_norm": 0.7688259482383728,
|
|
"learning_rate": 2.674375e-06,
|
|
"loss": 0.0097,
|
|
"step": 16450
|
|
},
|
|
{
|
|
"epoch": 7.69140989729225,
|
|
"grad_norm": 0.7461805939674377,
|
|
"learning_rate": 2.6665625e-06,
|
|
"loss": 0.0097,
|
|
"step": 16475
|
|
},
|
|
{
|
|
"epoch": 7.703081232492997,
|
|
"grad_norm": 0.4246864914894104,
|
|
"learning_rate": 2.65875e-06,
|
|
"loss": 0.0142,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 7.714752567693744,
|
|
"grad_norm": 0.32187584042549133,
|
|
"learning_rate": 2.6509375000000004e-06,
|
|
"loss": 0.0069,
|
|
"step": 16525
|
|
},
|
|
{
|
|
"epoch": 7.726423902894491,
|
|
"grad_norm": 0.9666975736618042,
|
|
"learning_rate": 2.643125e-06,
|
|
"loss": 0.0139,
|
|
"step": 16550
|
|
},
|
|
{
|
|
"epoch": 7.738095238095238,
|
|
"grad_norm": 0.4001488983631134,
|
|
"learning_rate": 2.6353125000000004e-06,
|
|
"loss": 0.0078,
|
|
"step": 16575
|
|
},
|
|
{
|
|
"epoch": 7.749766573295985,
|
|
"grad_norm": 1.532838225364685,
|
|
"learning_rate": 2.6275000000000003e-06,
|
|
"loss": 0.0131,
|
|
"step": 16600
|
|
},
|
|
{
|
|
"epoch": 7.761437908496732,
|
|
"grad_norm": 5.185120582580566,
|
|
"learning_rate": 2.6196875e-06,
|
|
"loss": 0.0086,
|
|
"step": 16625
|
|
},
|
|
{
|
|
"epoch": 7.773109243697479,
|
|
"grad_norm": 2.4308927059173584,
|
|
"learning_rate": 2.611875e-06,
|
|
"loss": 0.0143,
|
|
"step": 16650
|
|
},
|
|
{
|
|
"epoch": 7.784780578898226,
|
|
"grad_norm": 3.3174314498901367,
|
|
"learning_rate": 2.6040625e-06,
|
|
"loss": 0.0069,
|
|
"step": 16675
|
|
},
|
|
{
|
|
"epoch": 7.796451914098973,
|
|
"grad_norm": 1.5750664472579956,
|
|
"learning_rate": 2.59625e-06,
|
|
"loss": 0.0223,
|
|
"step": 16700
|
|
},
|
|
{
|
|
"epoch": 7.80812324929972,
|
|
"grad_norm": 0.2341316193342209,
|
|
"learning_rate": 2.5884375e-06,
|
|
"loss": 0.0054,
|
|
"step": 16725
|
|
},
|
|
{
|
|
"epoch": 7.819794584500467,
|
|
"grad_norm": 1.4788146018981934,
|
|
"learning_rate": 2.580625e-06,
|
|
"loss": 0.0136,
|
|
"step": 16750
|
|
},
|
|
{
|
|
"epoch": 7.8314659197012135,
|
|
"grad_norm": 1.1940587759017944,
|
|
"learning_rate": 2.5728125e-06,
|
|
"loss": 0.006,
|
|
"step": 16775
|
|
},
|
|
{
|
|
"epoch": 7.8431372549019605,
|
|
"grad_norm": 1.383323073387146,
|
|
"learning_rate": 2.565e-06,
|
|
"loss": 0.0115,
|
|
"step": 16800
|
|
},
|
|
{
|
|
"epoch": 7.854808590102707,
|
|
"grad_norm": 0.6506948471069336,
|
|
"learning_rate": 2.5571875000000002e-06,
|
|
"loss": 0.008,
|
|
"step": 16825
|
|
},
|
|
{
|
|
"epoch": 7.866479925303455,
|
|
"grad_norm": 1.9429287910461426,
|
|
"learning_rate": 2.549375e-06,
|
|
"loss": 0.018,
|
|
"step": 16850
|
|
},
|
|
{
|
|
"epoch": 7.878151260504202,
|
|
"grad_norm": 1.7122925519943237,
|
|
"learning_rate": 2.5415625000000003e-06,
|
|
"loss": 0.011,
|
|
"step": 16875
|
|
},
|
|
{
|
|
"epoch": 7.889822595704949,
|
|
"grad_norm": 0.38340166211128235,
|
|
"learning_rate": 2.53375e-06,
|
|
"loss": 0.0163,
|
|
"step": 16900
|
|
},
|
|
{
|
|
"epoch": 7.901493930905696,
|
|
"grad_norm": 4.114197731018066,
|
|
"learning_rate": 2.5259375000000003e-06,
|
|
"loss": 0.0114,
|
|
"step": 16925
|
|
},
|
|
{
|
|
"epoch": 7.913165266106443,
|
|
"grad_norm": 2.0275721549987793,
|
|
"learning_rate": 2.518125e-06,
|
|
"loss": 0.0131,
|
|
"step": 16950
|
|
},
|
|
{
|
|
"epoch": 7.92483660130719,
|
|
"grad_norm": 3.8220088481903076,
|
|
"learning_rate": 2.5103125000000004e-06,
|
|
"loss": 0.0126,
|
|
"step": 16975
|
|
},
|
|
{
|
|
"epoch": 7.936507936507937,
|
|
"grad_norm": 2.1761302947998047,
|
|
"learning_rate": 2.5025e-06,
|
|
"loss": 0.0164,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 7.948179271708684,
|
|
"grad_norm": 6.142478942871094,
|
|
"learning_rate": 2.4946875e-06,
|
|
"loss": 0.0063,
|
|
"step": 17025
|
|
},
|
|
{
|
|
"epoch": 7.959850606909431,
|
|
"grad_norm": 2.5636518001556396,
|
|
"learning_rate": 2.486875e-06,
|
|
"loss": 0.0142,
|
|
"step": 17050
|
|
},
|
|
{
|
|
"epoch": 7.9715219421101775,
|
|
"grad_norm": 0.4117043614387512,
|
|
"learning_rate": 2.4790625e-06,
|
|
"loss": 0.0082,
|
|
"step": 17075
|
|
},
|
|
{
|
|
"epoch": 7.983193277310924,
|
|
"grad_norm": 2.189190626144409,
|
|
"learning_rate": 2.47125e-06,
|
|
"loss": 0.0139,
|
|
"step": 17100
|
|
},
|
|
{
|
|
"epoch": 7.994864612511671,
|
|
"grad_norm": 2.0030508041381836,
|
|
"learning_rate": 2.4634375e-06,
|
|
"loss": 0.0084,
|
|
"step": 17125
|
|
},
|
|
{
|
|
"epoch": 8.006535947712418,
|
|
"grad_norm": 0.26142677664756775,
|
|
"learning_rate": 2.4556250000000003e-06,
|
|
"loss": 0.0091,
|
|
"step": 17150
|
|
},
|
|
{
|
|
"epoch": 8.018207282913165,
|
|
"grad_norm": 0.4059411585330963,
|
|
"learning_rate": 2.4478125e-06,
|
|
"loss": 0.0055,
|
|
"step": 17175
|
|
},
|
|
{
|
|
"epoch": 8.029878618113912,
|
|
"grad_norm": 0.09706517308950424,
|
|
"learning_rate": 2.4400000000000004e-06,
|
|
"loss": 0.0126,
|
|
"step": 17200
|
|
},
|
|
{
|
|
"epoch": 8.041549953314659,
|
|
"grad_norm": 6.117686748504639,
|
|
"learning_rate": 2.4321875e-06,
|
|
"loss": 0.0077,
|
|
"step": 17225
|
|
},
|
|
{
|
|
"epoch": 8.053221288515406,
|
|
"grad_norm": 1.4079279899597168,
|
|
"learning_rate": 2.4243750000000004e-06,
|
|
"loss": 0.0099,
|
|
"step": 17250
|
|
},
|
|
{
|
|
"epoch": 8.064892623716153,
|
|
"grad_norm": 0.329962819814682,
|
|
"learning_rate": 2.4165625e-06,
|
|
"loss": 0.0101,
|
|
"step": 17275
|
|
},
|
|
{
|
|
"epoch": 8.0765639589169,
|
|
"grad_norm": 0.14133867621421814,
|
|
"learning_rate": 2.4087500000000004e-06,
|
|
"loss": 0.0166,
|
|
"step": 17300
|
|
},
|
|
{
|
|
"epoch": 8.088235294117647,
|
|
"grad_norm": 0.3688430190086365,
|
|
"learning_rate": 2.4009375000000003e-06,
|
|
"loss": 0.0074,
|
|
"step": 17325
|
|
},
|
|
{
|
|
"epoch": 8.099906629318394,
|
|
"grad_norm": 2.421675205230713,
|
|
"learning_rate": 2.393125e-06,
|
|
"loss": 0.0119,
|
|
"step": 17350
|
|
},
|
|
{
|
|
"epoch": 8.11157796451914,
|
|
"grad_norm": 0.39900481700897217,
|
|
"learning_rate": 2.3853125e-06,
|
|
"loss": 0.0087,
|
|
"step": 17375
|
|
},
|
|
{
|
|
"epoch": 8.123249299719888,
|
|
"grad_norm": 9.060256004333496,
|
|
"learning_rate": 2.3778125000000004e-06,
|
|
"loss": 0.0193,
|
|
"step": 17400
|
|
},
|
|
{
|
|
"epoch": 8.134920634920634,
|
|
"grad_norm": 3.6718103885650635,
|
|
"learning_rate": 2.37e-06,
|
|
"loss": 0.0034,
|
|
"step": 17425
|
|
},
|
|
{
|
|
"epoch": 8.146591970121381,
|
|
"grad_norm": 1.8826625347137451,
|
|
"learning_rate": 2.3621875e-06,
|
|
"loss": 0.0108,
|
|
"step": 17450
|
|
},
|
|
{
|
|
"epoch": 8.158263305322128,
|
|
"grad_norm": 5.267531871795654,
|
|
"learning_rate": 2.354375e-06,
|
|
"loss": 0.0051,
|
|
"step": 17475
|
|
},
|
|
{
|
|
"epoch": 8.169934640522875,
|
|
"grad_norm": 1.7462451457977295,
|
|
"learning_rate": 2.3465625e-06,
|
|
"loss": 0.0098,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 8.181605975723622,
|
|
"grad_norm": 3.9560678005218506,
|
|
"learning_rate": 2.33875e-06,
|
|
"loss": 0.0074,
|
|
"step": 17525
|
|
},
|
|
{
|
|
"epoch": 8.193277310924369,
|
|
"grad_norm": 0.9454758763313293,
|
|
"learning_rate": 2.3309375e-06,
|
|
"loss": 0.0124,
|
|
"step": 17550
|
|
},
|
|
{
|
|
"epoch": 8.204948646125116,
|
|
"grad_norm": 4.392622947692871,
|
|
"learning_rate": 2.323125e-06,
|
|
"loss": 0.0092,
|
|
"step": 17575
|
|
},
|
|
{
|
|
"epoch": 8.216619981325863,
|
|
"grad_norm": 0.15422876179218292,
|
|
"learning_rate": 2.3153125e-06,
|
|
"loss": 0.0112,
|
|
"step": 17600
|
|
},
|
|
{
|
|
"epoch": 8.22829131652661,
|
|
"grad_norm": 4.900958061218262,
|
|
"learning_rate": 2.3075e-06,
|
|
"loss": 0.0062,
|
|
"step": 17625
|
|
},
|
|
{
|
|
"epoch": 8.239962651727357,
|
|
"grad_norm": 0.7558678388595581,
|
|
"learning_rate": 2.2996875e-06,
|
|
"loss": 0.0202,
|
|
"step": 17650
|
|
},
|
|
{
|
|
"epoch": 8.251633986928105,
|
|
"grad_norm": 1.5306479930877686,
|
|
"learning_rate": 2.2918750000000004e-06,
|
|
"loss": 0.0045,
|
|
"step": 17675
|
|
},
|
|
{
|
|
"epoch": 8.263305322128852,
|
|
"grad_norm": 0.20347028970718384,
|
|
"learning_rate": 2.2840625e-06,
|
|
"loss": 0.0078,
|
|
"step": 17700
|
|
},
|
|
{
|
|
"epoch": 8.2749766573296,
|
|
"grad_norm": 1.3202959299087524,
|
|
"learning_rate": 2.2762500000000004e-06,
|
|
"loss": 0.0086,
|
|
"step": 17725
|
|
},
|
|
{
|
|
"epoch": 8.286647992530346,
|
|
"grad_norm": 0.9320886731147766,
|
|
"learning_rate": 2.2684375000000003e-06,
|
|
"loss": 0.0108,
|
|
"step": 17750
|
|
},
|
|
{
|
|
"epoch": 8.298319327731093,
|
|
"grad_norm": 3.197519063949585,
|
|
"learning_rate": 2.260625e-06,
|
|
"loss": 0.0077,
|
|
"step": 17775
|
|
},
|
|
{
|
|
"epoch": 8.30999066293184,
|
|
"grad_norm": 1.24087655544281,
|
|
"learning_rate": 2.2528125e-06,
|
|
"loss": 0.0132,
|
|
"step": 17800
|
|
},
|
|
{
|
|
"epoch": 8.321661998132587,
|
|
"grad_norm": 5.21552848815918,
|
|
"learning_rate": 2.245e-06,
|
|
"loss": 0.0063,
|
|
"step": 17825
|
|
},
|
|
{
|
|
"epoch": 8.333333333333334,
|
|
"grad_norm": 1.1258479356765747,
|
|
"learning_rate": 2.2371875e-06,
|
|
"loss": 0.0163,
|
|
"step": 17850
|
|
},
|
|
{
|
|
"epoch": 8.34500466853408,
|
|
"grad_norm": 4.139800071716309,
|
|
"learning_rate": 2.229375e-06,
|
|
"loss": 0.0056,
|
|
"step": 17875
|
|
},
|
|
{
|
|
"epoch": 8.356676003734828,
|
|
"grad_norm": 0.32767072319984436,
|
|
"learning_rate": 2.2215625e-06,
|
|
"loss": 0.0075,
|
|
"step": 17900
|
|
},
|
|
{
|
|
"epoch": 8.368347338935575,
|
|
"grad_norm": 1.2744501829147339,
|
|
"learning_rate": 2.21375e-06,
|
|
"loss": 0.007,
|
|
"step": 17925
|
|
},
|
|
{
|
|
"epoch": 8.380018674136322,
|
|
"grad_norm": 1.00862717628479,
|
|
"learning_rate": 2.2059375e-06,
|
|
"loss": 0.0154,
|
|
"step": 17950
|
|
},
|
|
{
|
|
"epoch": 8.391690009337069,
|
|
"grad_norm": 0.4900611340999603,
|
|
"learning_rate": 2.1981250000000002e-06,
|
|
"loss": 0.0085,
|
|
"step": 17975
|
|
},
|
|
{
|
|
"epoch": 8.403361344537815,
|
|
"grad_norm": 3.439101219177246,
|
|
"learning_rate": 2.1903125e-06,
|
|
"loss": 0.0193,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 8.415032679738562,
|
|
"grad_norm": 0.17414799332618713,
|
|
"learning_rate": 2.1825000000000003e-06,
|
|
"loss": 0.0092,
|
|
"step": 18025
|
|
},
|
|
{
|
|
"epoch": 8.42670401493931,
|
|
"grad_norm": 0.3426636755466461,
|
|
"learning_rate": 2.1746875e-06,
|
|
"loss": 0.0149,
|
|
"step": 18050
|
|
},
|
|
{
|
|
"epoch": 8.438375350140056,
|
|
"grad_norm": 0.722213089466095,
|
|
"learning_rate": 2.1668750000000003e-06,
|
|
"loss": 0.007,
|
|
"step": 18075
|
|
},
|
|
{
|
|
"epoch": 8.450046685340803,
|
|
"grad_norm": 0.1770373433828354,
|
|
"learning_rate": 2.1590625e-06,
|
|
"loss": 0.0106,
|
|
"step": 18100
|
|
},
|
|
{
|
|
"epoch": 8.46171802054155,
|
|
"grad_norm": 1.1311222314834595,
|
|
"learning_rate": 2.15125e-06,
|
|
"loss": 0.0105,
|
|
"step": 18125
|
|
},
|
|
{
|
|
"epoch": 8.473389355742297,
|
|
"grad_norm": 3.256246328353882,
|
|
"learning_rate": 2.1434374999999998e-06,
|
|
"loss": 0.0117,
|
|
"step": 18150
|
|
},
|
|
{
|
|
"epoch": 8.485060690943044,
|
|
"grad_norm": 2.979933977127075,
|
|
"learning_rate": 2.135625e-06,
|
|
"loss": 0.0065,
|
|
"step": 18175
|
|
},
|
|
{
|
|
"epoch": 8.49673202614379,
|
|
"grad_norm": 5.483570098876953,
|
|
"learning_rate": 2.1278125e-06,
|
|
"loss": 0.0117,
|
|
"step": 18200
|
|
},
|
|
{
|
|
"epoch": 8.508403361344538,
|
|
"grad_norm": 0.9835972189903259,
|
|
"learning_rate": 2.12e-06,
|
|
"loss": 0.0068,
|
|
"step": 18225
|
|
},
|
|
{
|
|
"epoch": 8.520074696545285,
|
|
"grad_norm": 0.053934112191200256,
|
|
"learning_rate": 2.1121875e-06,
|
|
"loss": 0.0064,
|
|
"step": 18250
|
|
},
|
|
{
|
|
"epoch": 8.531746031746032,
|
|
"grad_norm": 0.5441355109214783,
|
|
"learning_rate": 2.104375e-06,
|
|
"loss": 0.0076,
|
|
"step": 18275
|
|
},
|
|
{
|
|
"epoch": 8.543417366946779,
|
|
"grad_norm": 2.9546403884887695,
|
|
"learning_rate": 2.0965625000000003e-06,
|
|
"loss": 0.0132,
|
|
"step": 18300
|
|
},
|
|
{
|
|
"epoch": 8.555088702147525,
|
|
"grad_norm": 1.0984193086624146,
|
|
"learning_rate": 2.08875e-06,
|
|
"loss": 0.006,
|
|
"step": 18325
|
|
},
|
|
{
|
|
"epoch": 8.566760037348272,
|
|
"grad_norm": 0.058905456215143204,
|
|
"learning_rate": 2.0809375000000004e-06,
|
|
"loss": 0.0086,
|
|
"step": 18350
|
|
},
|
|
{
|
|
"epoch": 8.57843137254902,
|
|
"grad_norm": 1.6285549402236938,
|
|
"learning_rate": 2.073125e-06,
|
|
"loss": 0.0055,
|
|
"step": 18375
|
|
},
|
|
{
|
|
"epoch": 8.590102707749766,
|
|
"grad_norm": 2.7696332931518555,
|
|
"learning_rate": 2.0653125000000004e-06,
|
|
"loss": 0.0093,
|
|
"step": 18400
|
|
},
|
|
{
|
|
"epoch": 8.601774042950513,
|
|
"grad_norm": 0.25450074672698975,
|
|
"learning_rate": 2.0575e-06,
|
|
"loss": 0.0027,
|
|
"step": 18425
|
|
},
|
|
{
|
|
"epoch": 8.61344537815126,
|
|
"grad_norm": 0.13693825900554657,
|
|
"learning_rate": 2.0496875e-06,
|
|
"loss": 0.0154,
|
|
"step": 18450
|
|
},
|
|
{
|
|
"epoch": 8.625116713352007,
|
|
"grad_norm": 0.07830255478620529,
|
|
"learning_rate": 2.041875e-06,
|
|
"loss": 0.0063,
|
|
"step": 18475
|
|
},
|
|
{
|
|
"epoch": 8.636788048552754,
|
|
"grad_norm": 3.4713680744171143,
|
|
"learning_rate": 2.0340625e-06,
|
|
"loss": 0.0222,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 8.6484593837535,
|
|
"grad_norm": 0.6259431838989258,
|
|
"learning_rate": 2.02625e-06,
|
|
"loss": 0.0088,
|
|
"step": 18525
|
|
},
|
|
{
|
|
"epoch": 8.660130718954248,
|
|
"grad_norm": 2.6433568000793457,
|
|
"learning_rate": 2.0184375e-06,
|
|
"loss": 0.0104,
|
|
"step": 18550
|
|
},
|
|
{
|
|
"epoch": 8.671802054154995,
|
|
"grad_norm": 4.890020370483398,
|
|
"learning_rate": 2.010625e-06,
|
|
"loss": 0.0049,
|
|
"step": 18575
|
|
},
|
|
{
|
|
"epoch": 8.683473389355742,
|
|
"grad_norm": 0.13995322585105896,
|
|
"learning_rate": 2.0028125e-06,
|
|
"loss": 0.0132,
|
|
"step": 18600
|
|
},
|
|
{
|
|
"epoch": 8.695144724556489,
|
|
"grad_norm": 3.4847798347473145,
|
|
"learning_rate": 1.995e-06,
|
|
"loss": 0.0083,
|
|
"step": 18625
|
|
},
|
|
{
|
|
"epoch": 8.706816059757235,
|
|
"grad_norm": 0.990917980670929,
|
|
"learning_rate": 1.9871875e-06,
|
|
"loss": 0.0067,
|
|
"step": 18650
|
|
},
|
|
{
|
|
"epoch": 8.718487394957982,
|
|
"grad_norm": 3.6839520931243896,
|
|
"learning_rate": 1.979375e-06,
|
|
"loss": 0.0063,
|
|
"step": 18675
|
|
},
|
|
{
|
|
"epoch": 8.73015873015873,
|
|
"grad_norm": 2.492249011993408,
|
|
"learning_rate": 1.9715625000000002e-06,
|
|
"loss": 0.01,
|
|
"step": 18700
|
|
},
|
|
{
|
|
"epoch": 8.741830065359476,
|
|
"grad_norm": 1.4780845642089844,
|
|
"learning_rate": 1.96375e-06,
|
|
"loss": 0.0058,
|
|
"step": 18725
|
|
},
|
|
{
|
|
"epoch": 8.753501400560225,
|
|
"grad_norm": 0.1961933821439743,
|
|
"learning_rate": 1.9559375000000003e-06,
|
|
"loss": 0.01,
|
|
"step": 18750
|
|
},
|
|
{
|
|
"epoch": 8.76517273576097,
|
|
"grad_norm": 1.2011662721633911,
|
|
"learning_rate": 1.948125e-06,
|
|
"loss": 0.0075,
|
|
"step": 18775
|
|
},
|
|
{
|
|
"epoch": 8.776844070961719,
|
|
"grad_norm": 1.7209523916244507,
|
|
"learning_rate": 1.9403125000000003e-06,
|
|
"loss": 0.014,
|
|
"step": 18800
|
|
},
|
|
{
|
|
"epoch": 8.788515406162466,
|
|
"grad_norm": 0.40629979968070984,
|
|
"learning_rate": 1.9325e-06,
|
|
"loss": 0.0047,
|
|
"step": 18825
|
|
},
|
|
{
|
|
"epoch": 8.800186741363213,
|
|
"grad_norm": 1.7460086345672607,
|
|
"learning_rate": 1.9246875e-06,
|
|
"loss": 0.0075,
|
|
"step": 18850
|
|
},
|
|
{
|
|
"epoch": 8.81185807656396,
|
|
"grad_norm": 0.1296474188566208,
|
|
"learning_rate": 1.9168749999999998e-06,
|
|
"loss": 0.004,
|
|
"step": 18875
|
|
},
|
|
{
|
|
"epoch": 8.823529411764707,
|
|
"grad_norm": 3.629750967025757,
|
|
"learning_rate": 1.9090625e-06,
|
|
"loss": 0.0118,
|
|
"step": 18900
|
|
},
|
|
{
|
|
"epoch": 8.835200746965453,
|
|
"grad_norm": 0.17966805398464203,
|
|
"learning_rate": 1.9012500000000002e-06,
|
|
"loss": 0.007,
|
|
"step": 18925
|
|
},
|
|
{
|
|
"epoch": 8.8468720821662,
|
|
"grad_norm": 1.5354282855987549,
|
|
"learning_rate": 1.8934375e-06,
|
|
"loss": 0.0185,
|
|
"step": 18950
|
|
},
|
|
{
|
|
"epoch": 8.858543417366947,
|
|
"grad_norm": 4.741540908813477,
|
|
"learning_rate": 1.8856250000000003e-06,
|
|
"loss": 0.0086,
|
|
"step": 18975
|
|
},
|
|
{
|
|
"epoch": 8.870214752567694,
|
|
"grad_norm": 0.33261170983314514,
|
|
"learning_rate": 1.8778125e-06,
|
|
"loss": 0.0145,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 8.881886087768441,
|
|
"grad_norm": 0.1079050749540329,
|
|
"learning_rate": 1.8700000000000003e-06,
|
|
"loss": 0.0084,
|
|
"step": 19025
|
|
},
|
|
{
|
|
"epoch": 8.893557422969188,
|
|
"grad_norm": 0.7272719740867615,
|
|
"learning_rate": 1.8621875000000001e-06,
|
|
"loss": 0.0065,
|
|
"step": 19050
|
|
},
|
|
{
|
|
"epoch": 8.905228758169935,
|
|
"grad_norm": 2.8611772060394287,
|
|
"learning_rate": 1.8543750000000001e-06,
|
|
"loss": 0.0085,
|
|
"step": 19075
|
|
},
|
|
{
|
|
"epoch": 8.916900093370682,
|
|
"grad_norm": 0.06473066657781601,
|
|
"learning_rate": 1.8465625e-06,
|
|
"loss": 0.0118,
|
|
"step": 19100
|
|
},
|
|
{
|
|
"epoch": 8.928571428571429,
|
|
"grad_norm": 0.5498653054237366,
|
|
"learning_rate": 1.8387500000000002e-06,
|
|
"loss": 0.0071,
|
|
"step": 19125
|
|
},
|
|
{
|
|
"epoch": 8.940242763772176,
|
|
"grad_norm": 0.0627368837594986,
|
|
"learning_rate": 1.8309375e-06,
|
|
"loss": 0.0094,
|
|
"step": 19150
|
|
},
|
|
{
|
|
"epoch": 8.951914098972923,
|
|
"grad_norm": 2.6687510013580322,
|
|
"learning_rate": 1.8231250000000002e-06,
|
|
"loss": 0.0054,
|
|
"step": 19175
|
|
},
|
|
{
|
|
"epoch": 8.96358543417367,
|
|
"grad_norm": 1.0596753358840942,
|
|
"learning_rate": 1.8153125e-06,
|
|
"loss": 0.0069,
|
|
"step": 19200
|
|
},
|
|
{
|
|
"epoch": 8.975256769374417,
|
|
"grad_norm": 1.5671969652175903,
|
|
"learning_rate": 1.8075000000000003e-06,
|
|
"loss": 0.0041,
|
|
"step": 19225
|
|
},
|
|
{
|
|
"epoch": 8.986928104575163,
|
|
"grad_norm": 3.649564266204834,
|
|
"learning_rate": 1.7996875e-06,
|
|
"loss": 0.0123,
|
|
"step": 19250
|
|
},
|
|
{
|
|
"epoch": 8.99859943977591,
|
|
"grad_norm": 4.41900110244751,
|
|
"learning_rate": 1.791875e-06,
|
|
"loss": 0.0074,
|
|
"step": 19275
|
|
},
|
|
{
|
|
"epoch": 9.010270774976657,
|
|
"grad_norm": 1.1093493700027466,
|
|
"learning_rate": 1.7840625e-06,
|
|
"loss": 0.0056,
|
|
"step": 19300
|
|
},
|
|
{
|
|
"epoch": 9.021942110177404,
|
|
"grad_norm": 2.091535806655884,
|
|
"learning_rate": 1.7762500000000001e-06,
|
|
"loss": 0.007,
|
|
"step": 19325
|
|
},
|
|
{
|
|
"epoch": 9.033613445378151,
|
|
"grad_norm": 4.006499767303467,
|
|
"learning_rate": 1.7684375e-06,
|
|
"loss": 0.0095,
|
|
"step": 19350
|
|
},
|
|
{
|
|
"epoch": 9.045284780578898,
|
|
"grad_norm": 0.13656963407993317,
|
|
"learning_rate": 1.7606250000000002e-06,
|
|
"loss": 0.0095,
|
|
"step": 19375
|
|
},
|
|
{
|
|
"epoch": 9.056956115779645,
|
|
"grad_norm": 2.558016538619995,
|
|
"learning_rate": 1.7528125e-06,
|
|
"loss": 0.0125,
|
|
"step": 19400
|
|
},
|
|
{
|
|
"epoch": 9.068627450980392,
|
|
"grad_norm": 0.15220613777637482,
|
|
"learning_rate": 1.745e-06,
|
|
"loss": 0.0074,
|
|
"step": 19425
|
|
},
|
|
{
|
|
"epoch": 9.080298786181139,
|
|
"grad_norm": 2.0453782081604004,
|
|
"learning_rate": 1.7371874999999998e-06,
|
|
"loss": 0.0063,
|
|
"step": 19450
|
|
},
|
|
{
|
|
"epoch": 9.091970121381886,
|
|
"grad_norm": 5.893077373504639,
|
|
"learning_rate": 1.729375e-06,
|
|
"loss": 0.0093,
|
|
"step": 19475
|
|
},
|
|
{
|
|
"epoch": 9.103641456582633,
|
|
"grad_norm": 1.2152618169784546,
|
|
"learning_rate": 1.7215624999999999e-06,
|
|
"loss": 0.0058,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 9.11531279178338,
|
|
"grad_norm": 0.9248460531234741,
|
|
"learning_rate": 1.7137500000000001e-06,
|
|
"loss": 0.0095,
|
|
"step": 19525
|
|
},
|
|
{
|
|
"epoch": 9.126984126984127,
|
|
"grad_norm": 0.35303401947021484,
|
|
"learning_rate": 1.7059375000000003e-06,
|
|
"loss": 0.0074,
|
|
"step": 19550
|
|
},
|
|
{
|
|
"epoch": 9.138655462184873,
|
|
"grad_norm": 0.8544372320175171,
|
|
"learning_rate": 1.6981250000000002e-06,
|
|
"loss": 0.0116,
|
|
"step": 19575
|
|
},
|
|
{
|
|
"epoch": 9.15032679738562,
|
|
"grad_norm": 0.38176584243774414,
|
|
"learning_rate": 1.6903125000000002e-06,
|
|
"loss": 0.0131,
|
|
"step": 19600
|
|
},
|
|
{
|
|
"epoch": 9.161998132586367,
|
|
"grad_norm": 3.0445823669433594,
|
|
"learning_rate": 1.6825e-06,
|
|
"loss": 0.0073,
|
|
"step": 19625
|
|
},
|
|
{
|
|
"epoch": 9.173669467787114,
|
|
"grad_norm": 0.1117783859372139,
|
|
"learning_rate": 1.6746875000000002e-06,
|
|
"loss": 0.0067,
|
|
"step": 19650
|
|
},
|
|
{
|
|
"epoch": 9.185340802987861,
|
|
"grad_norm": 1.0190069675445557,
|
|
"learning_rate": 1.666875e-06,
|
|
"loss": 0.0096,
|
|
"step": 19675
|
|
},
|
|
{
|
|
"epoch": 9.197012138188608,
|
|
"grad_norm": 0.07005083560943604,
|
|
"learning_rate": 1.6590625000000003e-06,
|
|
"loss": 0.0125,
|
|
"step": 19700
|
|
},
|
|
{
|
|
"epoch": 9.208683473389355,
|
|
"grad_norm": 1.7159433364868164,
|
|
"learning_rate": 1.65125e-06,
|
|
"loss": 0.0078,
|
|
"step": 19725
|
|
},
|
|
{
|
|
"epoch": 9.220354808590102,
|
|
"grad_norm": 0.325469046831131,
|
|
"learning_rate": 1.6437500000000001e-06,
|
|
"loss": 0.0068,
|
|
"step": 19750
|
|
},
|
|
{
|
|
"epoch": 9.232026143790849,
|
|
"grad_norm": 0.516471803188324,
|
|
"learning_rate": 1.6359375e-06,
|
|
"loss": 0.0064,
|
|
"step": 19775
|
|
},
|
|
{
|
|
"epoch": 9.243697478991596,
|
|
"grad_norm": 0.18785762786865234,
|
|
"learning_rate": 1.6281250000000002e-06,
|
|
"loss": 0.0083,
|
|
"step": 19800
|
|
},
|
|
{
|
|
"epoch": 9.255368814192344,
|
|
"grad_norm": 7.891486167907715,
|
|
"learning_rate": 1.6203125e-06,
|
|
"loss": 0.0075,
|
|
"step": 19825
|
|
},
|
|
{
|
|
"epoch": 9.267040149393091,
|
|
"grad_norm": 0.1949397474527359,
|
|
"learning_rate": 1.6125e-06,
|
|
"loss": 0.0079,
|
|
"step": 19850
|
|
},
|
|
{
|
|
"epoch": 9.278711484593838,
|
|
"grad_norm": 3.992004632949829,
|
|
"learning_rate": 1.6046875e-06,
|
|
"loss": 0.0111,
|
|
"step": 19875
|
|
},
|
|
{
|
|
"epoch": 9.290382819794585,
|
|
"grad_norm": 1.5781553983688354,
|
|
"learning_rate": 1.596875e-06,
|
|
"loss": 0.005,
|
|
"step": 19900
|
|
},
|
|
{
|
|
"epoch": 9.302054154995332,
|
|
"grad_norm": 0.28434544801712036,
|
|
"learning_rate": 1.5890624999999999e-06,
|
|
"loss": 0.0121,
|
|
"step": 19925
|
|
},
|
|
{
|
|
"epoch": 9.313725490196079,
|
|
"grad_norm": 0.04462061822414398,
|
|
"learning_rate": 1.5812500000000001e-06,
|
|
"loss": 0.01,
|
|
"step": 19950
|
|
},
|
|
{
|
|
"epoch": 9.325396825396826,
|
|
"grad_norm": 0.1574648916721344,
|
|
"learning_rate": 1.5734375e-06,
|
|
"loss": 0.0047,
|
|
"step": 19975
|
|
},
|
|
{
|
|
"epoch": 9.337068160597573,
|
|
"grad_norm": 3.997842788696289,
|
|
"learning_rate": 1.5656250000000002e-06,
|
|
"loss": 0.0052,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 9.337068160597573,
|
|
"eval_loss": 0.20788420736789703,
|
|
"eval_runtime": 5359.6354,
|
|
"eval_samples_per_second": 1.756,
|
|
"eval_steps_per_second": 0.22,
|
|
"eval_wer": 0.09564023897949298,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 9.34873949579832,
|
|
"grad_norm": 0.07767148315906525,
|
|
"learning_rate": 1.5578125000000002e-06,
|
|
"loss": 0.0071,
|
|
"step": 20025
|
|
},
|
|
{
|
|
"epoch": 9.360410830999067,
|
|
"grad_norm": 0.03903215005993843,
|
|
"learning_rate": 1.55e-06,
|
|
"loss": 0.0056,
|
|
"step": 20050
|
|
},
|
|
{
|
|
"epoch": 9.372082166199814,
|
|
"grad_norm": 5.785853385925293,
|
|
"learning_rate": 1.5421875e-06,
|
|
"loss": 0.0087,
|
|
"step": 20075
|
|
},
|
|
{
|
|
"epoch": 9.38375350140056,
|
|
"grad_norm": 0.3732275366783142,
|
|
"learning_rate": 1.534375e-06,
|
|
"loss": 0.0079,
|
|
"step": 20100
|
|
},
|
|
{
|
|
"epoch": 9.395424836601308,
|
|
"grad_norm": 0.7682175636291504,
|
|
"learning_rate": 1.5265625e-06,
|
|
"loss": 0.0113,
|
|
"step": 20125
|
|
},
|
|
{
|
|
"epoch": 9.407096171802054,
|
|
"grad_norm": 0.3410235345363617,
|
|
"learning_rate": 1.51875e-06,
|
|
"loss": 0.0058,
|
|
"step": 20150
|
|
},
|
|
{
|
|
"epoch": 9.418767507002801,
|
|
"grad_norm": 6.657580375671387,
|
|
"learning_rate": 1.5109375e-06,
|
|
"loss": 0.0071,
|
|
"step": 20175
|
|
},
|
|
{
|
|
"epoch": 9.430438842203548,
|
|
"grad_norm": 1.331062912940979,
|
|
"learning_rate": 1.5031250000000001e-06,
|
|
"loss": 0.0064,
|
|
"step": 20200
|
|
},
|
|
{
|
|
"epoch": 9.442110177404295,
|
|
"grad_norm": 0.032236941158771515,
|
|
"learning_rate": 1.4953125e-06,
|
|
"loss": 0.0073,
|
|
"step": 20225
|
|
},
|
|
{
|
|
"epoch": 9.453781512605042,
|
|
"grad_norm": 3.0221993923187256,
|
|
"learning_rate": 1.4875e-06,
|
|
"loss": 0.0053,
|
|
"step": 20250
|
|
},
|
|
{
|
|
"epoch": 9.465452847805789,
|
|
"grad_norm": 3.605405569076538,
|
|
"learning_rate": 1.4796875e-06,
|
|
"loss": 0.0127,
|
|
"step": 20275
|
|
},
|
|
{
|
|
"epoch": 9.477124183006536,
|
|
"grad_norm": 0.11760404706001282,
|
|
"learning_rate": 1.471875e-06,
|
|
"loss": 0.0067,
|
|
"step": 20300
|
|
},
|
|
{
|
|
"epoch": 9.488795518207283,
|
|
"grad_norm": 2.5941033363342285,
|
|
"learning_rate": 1.4640625000000002e-06,
|
|
"loss": 0.0072,
|
|
"step": 20325
|
|
},
|
|
{
|
|
"epoch": 9.50046685340803,
|
|
"grad_norm": 5.615445613861084,
|
|
"learning_rate": 1.4562500000000002e-06,
|
|
"loss": 0.0116,
|
|
"step": 20350
|
|
},
|
|
{
|
|
"epoch": 9.512138188608777,
|
|
"grad_norm": 0.22926200926303864,
|
|
"learning_rate": 1.4484375e-06,
|
|
"loss": 0.0082,
|
|
"step": 20375
|
|
},
|
|
{
|
|
"epoch": 9.523809523809524,
|
|
"grad_norm": 0.8798258304595947,
|
|
"learning_rate": 1.440625e-06,
|
|
"loss": 0.0036,
|
|
"step": 20400
|
|
},
|
|
{
|
|
"epoch": 9.53548085901027,
|
|
"grad_norm": 2.885864496231079,
|
|
"learning_rate": 1.4328125e-06,
|
|
"loss": 0.0076,
|
|
"step": 20425
|
|
},
|
|
{
|
|
"epoch": 9.547152194211018,
|
|
"grad_norm": 2.137159824371338,
|
|
"learning_rate": 1.4250000000000001e-06,
|
|
"loss": 0.0136,
|
|
"step": 20450
|
|
},
|
|
{
|
|
"epoch": 9.558823529411764,
|
|
"grad_norm": 0.8029230237007141,
|
|
"learning_rate": 1.4171875000000001e-06,
|
|
"loss": 0.0067,
|
|
"step": 20475
|
|
},
|
|
{
|
|
"epoch": 9.570494864612511,
|
|
"grad_norm": 0.14883463084697723,
|
|
"learning_rate": 1.4093750000000002e-06,
|
|
"loss": 0.0095,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 9.582166199813258,
|
|
"grad_norm": 10.01462459564209,
|
|
"learning_rate": 1.4015625000000002e-06,
|
|
"loss": 0.0069,
|
|
"step": 20525
|
|
},
|
|
{
|
|
"epoch": 9.593837535014005,
|
|
"grad_norm": 1.3217803239822388,
|
|
"learning_rate": 1.39375e-06,
|
|
"loss": 0.0058,
|
|
"step": 20550
|
|
},
|
|
{
|
|
"epoch": 9.605508870214752,
|
|
"grad_norm": 5.664200782775879,
|
|
"learning_rate": 1.3859375e-06,
|
|
"loss": 0.0086,
|
|
"step": 20575
|
|
},
|
|
{
|
|
"epoch": 9.6171802054155,
|
|
"grad_norm": 3.8610246181488037,
|
|
"learning_rate": 1.378125e-06,
|
|
"loss": 0.0085,
|
|
"step": 20600
|
|
},
|
|
{
|
|
"epoch": 9.628851540616246,
|
|
"grad_norm": 0.11504428088665009,
|
|
"learning_rate": 1.3703125e-06,
|
|
"loss": 0.006,
|
|
"step": 20625
|
|
},
|
|
{
|
|
"epoch": 9.640522875816993,
|
|
"grad_norm": 0.9499320387840271,
|
|
"learning_rate": 1.3625e-06,
|
|
"loss": 0.0061,
|
|
"step": 20650
|
|
},
|
|
{
|
|
"epoch": 9.65219421101774,
|
|
"grad_norm": 0.2915020287036896,
|
|
"learning_rate": 1.3546875e-06,
|
|
"loss": 0.0059,
|
|
"step": 20675
|
|
},
|
|
{
|
|
"epoch": 9.663865546218487,
|
|
"grad_norm": 1.0401633977890015,
|
|
"learning_rate": 1.3468750000000001e-06,
|
|
"loss": 0.0089,
|
|
"step": 20700
|
|
},
|
|
{
|
|
"epoch": 9.675536881419234,
|
|
"grad_norm": 0.8448579907417297,
|
|
"learning_rate": 1.3390625e-06,
|
|
"loss": 0.0115,
|
|
"step": 20725
|
|
},
|
|
{
|
|
"epoch": 9.68720821661998,
|
|
"grad_norm": 0.22094358503818512,
|
|
"learning_rate": 1.33125e-06,
|
|
"loss": 0.0092,
|
|
"step": 20750
|
|
},
|
|
{
|
|
"epoch": 9.698879551820728,
|
|
"grad_norm": 5.222804546356201,
|
|
"learning_rate": 1.3234375e-06,
|
|
"loss": 0.008,
|
|
"step": 20775
|
|
},
|
|
{
|
|
"epoch": 9.710550887021475,
|
|
"grad_norm": 0.35005176067352295,
|
|
"learning_rate": 1.315625e-06,
|
|
"loss": 0.008,
|
|
"step": 20800
|
|
},
|
|
{
|
|
"epoch": 9.722222222222221,
|
|
"grad_norm": 1.9022040367126465,
|
|
"learning_rate": 1.3078125e-06,
|
|
"loss": 0.0045,
|
|
"step": 20825
|
|
},
|
|
{
|
|
"epoch": 9.733893557422968,
|
|
"grad_norm": 2.139233350753784,
|
|
"learning_rate": 1.3e-06,
|
|
"loss": 0.0054,
|
|
"step": 20850
|
|
},
|
|
{
|
|
"epoch": 9.745564892623715,
|
|
"grad_norm": 4.1651482582092285,
|
|
"learning_rate": 1.2921875e-06,
|
|
"loss": 0.0053,
|
|
"step": 20875
|
|
},
|
|
{
|
|
"epoch": 9.757236227824464,
|
|
"grad_norm": 2.2410309314727783,
|
|
"learning_rate": 1.284375e-06,
|
|
"loss": 0.0062,
|
|
"step": 20900
|
|
},
|
|
{
|
|
"epoch": 9.768907563025211,
|
|
"grad_norm": 6.774412631988525,
|
|
"learning_rate": 1.2765625e-06,
|
|
"loss": 0.0082,
|
|
"step": 20925
|
|
},
|
|
{
|
|
"epoch": 9.780578898225958,
|
|
"grad_norm": 6.076715469360352,
|
|
"learning_rate": 1.2687500000000001e-06,
|
|
"loss": 0.0066,
|
|
"step": 20950
|
|
},
|
|
{
|
|
"epoch": 9.792250233426705,
|
|
"grad_norm": 0.3256414234638214,
|
|
"learning_rate": 1.2609375000000002e-06,
|
|
"loss": 0.0058,
|
|
"step": 20975
|
|
},
|
|
{
|
|
"epoch": 9.803921568627452,
|
|
"grad_norm": 0.03926245495676994,
|
|
"learning_rate": 1.2531250000000002e-06,
|
|
"loss": 0.0081,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 9.815592903828199,
|
|
"grad_norm": 14.117586135864258,
|
|
"learning_rate": 1.2453125000000002e-06,
|
|
"loss": 0.0031,
|
|
"step": 21025
|
|
},
|
|
{
|
|
"epoch": 9.827264239028946,
|
|
"grad_norm": 0.3628706634044647,
|
|
"learning_rate": 1.2375000000000002e-06,
|
|
"loss": 0.009,
|
|
"step": 21050
|
|
},
|
|
{
|
|
"epoch": 9.838935574229692,
|
|
"grad_norm": 4.5739827156066895,
|
|
"learning_rate": 1.2296875e-06,
|
|
"loss": 0.0038,
|
|
"step": 21075
|
|
},
|
|
{
|
|
"epoch": 9.85060690943044,
|
|
"grad_norm": 0.16507214307785034,
|
|
"learning_rate": 1.221875e-06,
|
|
"loss": 0.0101,
|
|
"step": 21100
|
|
},
|
|
{
|
|
"epoch": 9.862278244631186,
|
|
"grad_norm": 7.189055919647217,
|
|
"learning_rate": 1.2140625e-06,
|
|
"loss": 0.0078,
|
|
"step": 21125
|
|
},
|
|
{
|
|
"epoch": 9.873949579831933,
|
|
"grad_norm": 5.090521812438965,
|
|
"learning_rate": 1.20625e-06,
|
|
"loss": 0.0083,
|
|
"step": 21150
|
|
},
|
|
{
|
|
"epoch": 9.88562091503268,
|
|
"grad_norm": 2.5861196517944336,
|
|
"learning_rate": 1.1984375000000001e-06,
|
|
"loss": 0.0047,
|
|
"step": 21175
|
|
},
|
|
{
|
|
"epoch": 9.897292250233427,
|
|
"grad_norm": 0.09222248196601868,
|
|
"learning_rate": 1.1906250000000001e-06,
|
|
"loss": 0.0099,
|
|
"step": 21200
|
|
},
|
|
{
|
|
"epoch": 9.908963585434174,
|
|
"grad_norm": 4.3550004959106445,
|
|
"learning_rate": 1.1828125000000002e-06,
|
|
"loss": 0.008,
|
|
"step": 21225
|
|
},
|
|
{
|
|
"epoch": 9.920634920634921,
|
|
"grad_norm": 0.043043483048677444,
|
|
"learning_rate": 1.175e-06,
|
|
"loss": 0.0083,
|
|
"step": 21250
|
|
},
|
|
{
|
|
"epoch": 9.932306255835668,
|
|
"grad_norm": 0.07931485772132874,
|
|
"learning_rate": 1.1671875e-06,
|
|
"loss": 0.0082,
|
|
"step": 21275
|
|
},
|
|
{
|
|
"epoch": 9.943977591036415,
|
|
"grad_norm": 0.08949258923530579,
|
|
"learning_rate": 1.159375e-06,
|
|
"loss": 0.0075,
|
|
"step": 21300
|
|
},
|
|
{
|
|
"epoch": 9.955648926237162,
|
|
"grad_norm": 0.6334654092788696,
|
|
"learning_rate": 1.1515625e-06,
|
|
"loss": 0.0049,
|
|
"step": 21325
|
|
},
|
|
{
|
|
"epoch": 9.967320261437909,
|
|
"grad_norm": 1.3037919998168945,
|
|
"learning_rate": 1.14375e-06,
|
|
"loss": 0.0069,
|
|
"step": 21350
|
|
},
|
|
{
|
|
"epoch": 9.978991596638656,
|
|
"grad_norm": 4.038185119628906,
|
|
"learning_rate": 1.1359375e-06,
|
|
"loss": 0.0099,
|
|
"step": 21375
|
|
},
|
|
{
|
|
"epoch": 9.990662931839402,
|
|
"grad_norm": 0.7718151211738586,
|
|
"learning_rate": 1.128125e-06,
|
|
"loss": 0.0063,
|
|
"step": 21400
|
|
},
|
|
{
|
|
"epoch": 10.00233426704015,
|
|
"grad_norm": 1.5029001235961914,
|
|
"learning_rate": 1.1203125e-06,
|
|
"loss": 0.013,
|
|
"step": 21425
|
|
},
|
|
{
|
|
"epoch": 10.014005602240896,
|
|
"grad_norm": 1.8782932758331299,
|
|
"learning_rate": 1.1125e-06,
|
|
"loss": 0.0048,
|
|
"step": 21450
|
|
},
|
|
{
|
|
"epoch": 10.025676937441643,
|
|
"grad_norm": 1.411063313484192,
|
|
"learning_rate": 1.1046875e-06,
|
|
"loss": 0.0098,
|
|
"step": 21475
|
|
},
|
|
{
|
|
"epoch": 10.03734827264239,
|
|
"grad_norm": 0.08655665069818497,
|
|
"learning_rate": 1.096875e-06,
|
|
"loss": 0.0047,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 10.049019607843137,
|
|
"grad_norm": 0.7511602640151978,
|
|
"learning_rate": 1.0890625e-06,
|
|
"loss": 0.0077,
|
|
"step": 21525
|
|
},
|
|
{
|
|
"epoch": 10.060690943043884,
|
|
"grad_norm": 2.6931838989257812,
|
|
"learning_rate": 1.08125e-06,
|
|
"loss": 0.0036,
|
|
"step": 21550
|
|
},
|
|
{
|
|
"epoch": 10.072362278244631,
|
|
"grad_norm": 1.6642050743103027,
|
|
"learning_rate": 1.0734375e-06,
|
|
"loss": 0.009,
|
|
"step": 21575
|
|
},
|
|
{
|
|
"epoch": 10.084033613445378,
|
|
"grad_norm": 5.0585503578186035,
|
|
"learning_rate": 1.065625e-06,
|
|
"loss": 0.0054,
|
|
"step": 21600
|
|
},
|
|
{
|
|
"epoch": 10.095704948646125,
|
|
"grad_norm": 1.2419428825378418,
|
|
"learning_rate": 1.0578125e-06,
|
|
"loss": 0.0049,
|
|
"step": 21625
|
|
},
|
|
{
|
|
"epoch": 10.107376283846872,
|
|
"grad_norm": 0.0439959391951561,
|
|
"learning_rate": 1.0500000000000001e-06,
|
|
"loss": 0.006,
|
|
"step": 21650
|
|
},
|
|
{
|
|
"epoch": 10.119047619047619,
|
|
"grad_norm": 0.7057489156723022,
|
|
"learning_rate": 1.0421875000000001e-06,
|
|
"loss": 0.0069,
|
|
"step": 21675
|
|
},
|
|
{
|
|
"epoch": 10.130718954248366,
|
|
"grad_norm": 0.08024278283119202,
|
|
"learning_rate": 1.0343750000000002e-06,
|
|
"loss": 0.0028,
|
|
"step": 21700
|
|
},
|
|
{
|
|
"epoch": 10.142390289449112,
|
|
"grad_norm": 0.21487966179847717,
|
|
"learning_rate": 1.0265625000000002e-06,
|
|
"loss": 0.0033,
|
|
"step": 21725
|
|
},
|
|
{
|
|
"epoch": 10.15406162464986,
|
|
"grad_norm": 0.19071203470230103,
|
|
"learning_rate": 1.01875e-06,
|
|
"loss": 0.0036,
|
|
"step": 21750
|
|
},
|
|
{
|
|
"epoch": 10.165732959850606,
|
|
"grad_norm": 0.27428773045539856,
|
|
"learning_rate": 1.0109375e-06,
|
|
"loss": 0.0103,
|
|
"step": 21775
|
|
},
|
|
{
|
|
"epoch": 10.177404295051353,
|
|
"grad_norm": 1.418234944343567,
|
|
"learning_rate": 1.003125e-06,
|
|
"loss": 0.0058,
|
|
"step": 21800
|
|
},
|
|
{
|
|
"epoch": 10.1890756302521,
|
|
"grad_norm": 0.8873878717422485,
|
|
"learning_rate": 9.953125e-07,
|
|
"loss": 0.0139,
|
|
"step": 21825
|
|
},
|
|
{
|
|
"epoch": 10.200746965452847,
|
|
"grad_norm": 0.583903431892395,
|
|
"learning_rate": 9.875e-07,
|
|
"loss": 0.0033,
|
|
"step": 21850
|
|
},
|
|
{
|
|
"epoch": 10.212418300653594,
|
|
"grad_norm": 0.245305597782135,
|
|
"learning_rate": 9.796875e-07,
|
|
"loss": 0.0097,
|
|
"step": 21875
|
|
},
|
|
{
|
|
"epoch": 10.224089635854341,
|
|
"grad_norm": 3.605557441711426,
|
|
"learning_rate": 9.718750000000001e-07,
|
|
"loss": 0.0073,
|
|
"step": 21900
|
|
},
|
|
{
|
|
"epoch": 10.235760971055088,
|
|
"grad_norm": 0.19548866152763367,
|
|
"learning_rate": 9.640625000000001e-07,
|
|
"loss": 0.0063,
|
|
"step": 21925
|
|
},
|
|
{
|
|
"epoch": 10.247432306255835,
|
|
"grad_norm": 0.807528555393219,
|
|
"learning_rate": 9.5625e-07,
|
|
"loss": 0.0033,
|
|
"step": 21950
|
|
},
|
|
{
|
|
"epoch": 10.259103641456583,
|
|
"grad_norm": 0.32932284474372864,
|
|
"learning_rate": 9.484375e-07,
|
|
"loss": 0.0047,
|
|
"step": 21975
|
|
},
|
|
{
|
|
"epoch": 10.27077497665733,
|
|
"grad_norm": 0.1796354502439499,
|
|
"learning_rate": 9.40625e-07,
|
|
"loss": 0.0091,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 10.282446311858077,
|
|
"grad_norm": 1.3611717224121094,
|
|
"learning_rate": 9.328125e-07,
|
|
"loss": 0.0075,
|
|
"step": 22025
|
|
},
|
|
{
|
|
"epoch": 10.294117647058824,
|
|
"grad_norm": 0.0766756534576416,
|
|
"learning_rate": 9.25e-07,
|
|
"loss": 0.0055,
|
|
"step": 22050
|
|
},
|
|
{
|
|
"epoch": 10.305788982259571,
|
|
"grad_norm": 0.7175803184509277,
|
|
"learning_rate": 9.171875e-07,
|
|
"loss": 0.01,
|
|
"step": 22075
|
|
},
|
|
{
|
|
"epoch": 10.317460317460318,
|
|
"grad_norm": 0.23721574246883392,
|
|
"learning_rate": 9.09375e-07,
|
|
"loss": 0.0039,
|
|
"step": 22100
|
|
},
|
|
{
|
|
"epoch": 10.329131652661065,
|
|
"grad_norm": 0.23020412027835846,
|
|
"learning_rate": 9.015625e-07,
|
|
"loss": 0.0052,
|
|
"step": 22125
|
|
},
|
|
{
|
|
"epoch": 10.340802987861812,
|
|
"grad_norm": 1.632659912109375,
|
|
"learning_rate": 8.9375e-07,
|
|
"loss": 0.0058,
|
|
"step": 22150
|
|
},
|
|
{
|
|
"epoch": 10.352474323062559,
|
|
"grad_norm": 1.804761528968811,
|
|
"learning_rate": 8.859374999999999e-07,
|
|
"loss": 0.0055,
|
|
"step": 22175
|
|
},
|
|
{
|
|
"epoch": 10.364145658263306,
|
|
"grad_norm": 0.08536524325609207,
|
|
"learning_rate": 8.781250000000002e-07,
|
|
"loss": 0.0026,
|
|
"step": 22200
|
|
},
|
|
{
|
|
"epoch": 10.375816993464053,
|
|
"grad_norm": 1.1236313581466675,
|
|
"learning_rate": 8.703125000000001e-07,
|
|
"loss": 0.0097,
|
|
"step": 22225
|
|
},
|
|
{
|
|
"epoch": 10.3874883286648,
|
|
"grad_norm": 0.6604540944099426,
|
|
"learning_rate": 8.625000000000001e-07,
|
|
"loss": 0.0041,
|
|
"step": 22250
|
|
},
|
|
{
|
|
"epoch": 10.399159663865547,
|
|
"grad_norm": 0.28819596767425537,
|
|
"learning_rate": 8.546875000000001e-07,
|
|
"loss": 0.0035,
|
|
"step": 22275
|
|
},
|
|
{
|
|
"epoch": 10.410830999066294,
|
|
"grad_norm": 0.0771021619439125,
|
|
"learning_rate": 8.468750000000002e-07,
|
|
"loss": 0.0045,
|
|
"step": 22300
|
|
},
|
|
{
|
|
"epoch": 10.42250233426704,
|
|
"grad_norm": 1.4627320766448975,
|
|
"learning_rate": 8.390625000000001e-07,
|
|
"loss": 0.0084,
|
|
"step": 22325
|
|
},
|
|
{
|
|
"epoch": 10.434173669467787,
|
|
"grad_norm": 1.7134203910827637,
|
|
"learning_rate": 8.312500000000001e-07,
|
|
"loss": 0.0053,
|
|
"step": 22350
|
|
},
|
|
{
|
|
"epoch": 10.445845004668534,
|
|
"grad_norm": 0.29320698976516724,
|
|
"learning_rate": 8.234375000000001e-07,
|
|
"loss": 0.0095,
|
|
"step": 22375
|
|
},
|
|
{
|
|
"epoch": 10.457516339869281,
|
|
"grad_norm": 4.652510643005371,
|
|
"learning_rate": 8.15625e-07,
|
|
"loss": 0.0033,
|
|
"step": 22400
|
|
},
|
|
{
|
|
"epoch": 10.469187675070028,
|
|
"grad_norm": 0.07530553638935089,
|
|
"learning_rate": 8.078125e-07,
|
|
"loss": 0.0135,
|
|
"step": 22425
|
|
},
|
|
{
|
|
"epoch": 10.480859010270775,
|
|
"grad_norm": 5.351443290710449,
|
|
"learning_rate": 8.000000000000001e-07,
|
|
"loss": 0.0051,
|
|
"step": 22450
|
|
},
|
|
{
|
|
"epoch": 10.492530345471522,
|
|
"grad_norm": 1.0543556213378906,
|
|
"learning_rate": 7.921875000000001e-07,
|
|
"loss": 0.0126,
|
|
"step": 22475
|
|
},
|
|
{
|
|
"epoch": 10.504201680672269,
|
|
"grad_norm": 0.12515470385551453,
|
|
"learning_rate": 7.84375e-07,
|
|
"loss": 0.004,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 10.515873015873016,
|
|
"grad_norm": 0.6163919568061829,
|
|
"learning_rate": 7.765625e-07,
|
|
"loss": 0.0062,
|
|
"step": 22525
|
|
},
|
|
{
|
|
"epoch": 10.527544351073763,
|
|
"grad_norm": 1.1225354671478271,
|
|
"learning_rate": 7.6875e-07,
|
|
"loss": 0.0046,
|
|
"step": 22550
|
|
},
|
|
{
|
|
"epoch": 10.53921568627451,
|
|
"grad_norm": 1.0655065774917603,
|
|
"learning_rate": 7.609375e-07,
|
|
"loss": 0.0049,
|
|
"step": 22575
|
|
},
|
|
{
|
|
"epoch": 10.550887021475257,
|
|
"grad_norm": 2.211533308029175,
|
|
"learning_rate": 7.53125e-07,
|
|
"loss": 0.0034,
|
|
"step": 22600
|
|
},
|
|
{
|
|
"epoch": 10.562558356676004,
|
|
"grad_norm": 0.19352863729000092,
|
|
"learning_rate": 7.453125e-07,
|
|
"loss": 0.0048,
|
|
"step": 22625
|
|
},
|
|
{
|
|
"epoch": 10.57422969187675,
|
|
"grad_norm": 0.6760672926902771,
|
|
"learning_rate": 7.375e-07,
|
|
"loss": 0.0041,
|
|
"step": 22650
|
|
},
|
|
{
|
|
"epoch": 10.585901027077497,
|
|
"grad_norm": 1.2626034021377563,
|
|
"learning_rate": 7.296875000000001e-07,
|
|
"loss": 0.0089,
|
|
"step": 22675
|
|
},
|
|
{
|
|
"epoch": 10.597572362278244,
|
|
"grad_norm": 4.768553256988525,
|
|
"learning_rate": 7.218750000000001e-07,
|
|
"loss": 0.0047,
|
|
"step": 22700
|
|
},
|
|
{
|
|
"epoch": 10.609243697478991,
|
|
"grad_norm": 0.35717836022377014,
|
|
"learning_rate": 7.140625000000001e-07,
|
|
"loss": 0.0077,
|
|
"step": 22725
|
|
},
|
|
{
|
|
"epoch": 10.620915032679738,
|
|
"grad_norm": 0.17013007402420044,
|
|
"learning_rate": 7.0625e-07,
|
|
"loss": 0.0047,
|
|
"step": 22750
|
|
},
|
|
{
|
|
"epoch": 10.632586367880485,
|
|
"grad_norm": 0.8509282469749451,
|
|
"learning_rate": 6.984375e-07,
|
|
"loss": 0.0101,
|
|
"step": 22775
|
|
},
|
|
{
|
|
"epoch": 10.644257703081232,
|
|
"grad_norm": 3.9586565494537354,
|
|
"learning_rate": 6.906250000000001e-07,
|
|
"loss": 0.0025,
|
|
"step": 22800
|
|
},
|
|
{
|
|
"epoch": 10.655929038281979,
|
|
"grad_norm": 0.7251598238945007,
|
|
"learning_rate": 6.828125000000001e-07,
|
|
"loss": 0.0079,
|
|
"step": 22825
|
|
},
|
|
{
|
|
"epoch": 10.667600373482726,
|
|
"grad_norm": 0.039994291961193085,
|
|
"learning_rate": 6.75e-07,
|
|
"loss": 0.0048,
|
|
"step": 22850
|
|
},
|
|
{
|
|
"epoch": 10.679271708683473,
|
|
"grad_norm": 1.6940975189208984,
|
|
"learning_rate": 6.671875e-07,
|
|
"loss": 0.0095,
|
|
"step": 22875
|
|
},
|
|
{
|
|
"epoch": 10.69094304388422,
|
|
"grad_norm": 3.833244562149048,
|
|
"learning_rate": 6.59375e-07,
|
|
"loss": 0.004,
|
|
"step": 22900
|
|
},
|
|
{
|
|
"epoch": 10.702614379084967,
|
|
"grad_norm": 0.6546738743782043,
|
|
"learning_rate": 6.515625e-07,
|
|
"loss": 0.0084,
|
|
"step": 22925
|
|
},
|
|
{
|
|
"epoch": 10.714285714285714,
|
|
"grad_norm": 0.04062287509441376,
|
|
"learning_rate": 6.4375e-07,
|
|
"loss": 0.0059,
|
|
"step": 22950
|
|
},
|
|
{
|
|
"epoch": 10.72595704948646,
|
|
"grad_norm": 0.1445113569498062,
|
|
"learning_rate": 6.359375e-07,
|
|
"loss": 0.0135,
|
|
"step": 22975
|
|
},
|
|
{
|
|
"epoch": 10.73762838468721,
|
|
"grad_norm": 0.029490185901522636,
|
|
"learning_rate": 6.28125e-07,
|
|
"loss": 0.0033,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 10.749299719887954,
|
|
"grad_norm": 1.9724853038787842,
|
|
"learning_rate": 6.203125e-07,
|
|
"loss": 0.0078,
|
|
"step": 23025
|
|
},
|
|
{
|
|
"epoch": 10.760971055088703,
|
|
"grad_norm": 0.8680882453918457,
|
|
"learning_rate": 6.125000000000001e-07,
|
|
"loss": 0.0035,
|
|
"step": 23050
|
|
},
|
|
{
|
|
"epoch": 10.77264239028945,
|
|
"grad_norm": 0.0470956526696682,
|
|
"learning_rate": 6.046875000000001e-07,
|
|
"loss": 0.0055,
|
|
"step": 23075
|
|
},
|
|
{
|
|
"epoch": 10.784313725490197,
|
|
"grad_norm": 0.06983581185340881,
|
|
"learning_rate": 5.96875e-07,
|
|
"loss": 0.0058,
|
|
"step": 23100
|
|
},
|
|
{
|
|
"epoch": 10.795985060690944,
|
|
"grad_norm": 0.3825051784515381,
|
|
"learning_rate": 5.890625e-07,
|
|
"loss": 0.0168,
|
|
"step": 23125
|
|
},
|
|
{
|
|
"epoch": 10.80765639589169,
|
|
"grad_norm": 2.345949649810791,
|
|
"learning_rate": 5.8125e-07,
|
|
"loss": 0.0038,
|
|
"step": 23150
|
|
},
|
|
{
|
|
"epoch": 10.819327731092438,
|
|
"grad_norm": 1.4795840978622437,
|
|
"learning_rate": 5.734375000000001e-07,
|
|
"loss": 0.0095,
|
|
"step": 23175
|
|
},
|
|
{
|
|
"epoch": 10.830999066293185,
|
|
"grad_norm": 0.686439037322998,
|
|
"learning_rate": 5.65625e-07,
|
|
"loss": 0.0091,
|
|
"step": 23200
|
|
},
|
|
{
|
|
"epoch": 10.842670401493931,
|
|
"grad_norm": 0.14041809737682343,
|
|
"learning_rate": 5.578125e-07,
|
|
"loss": 0.01,
|
|
"step": 23225
|
|
},
|
|
{
|
|
"epoch": 10.854341736694678,
|
|
"grad_norm": 4.803620338439941,
|
|
"learning_rate": 5.5e-07,
|
|
"loss": 0.0057,
|
|
"step": 23250
|
|
},
|
|
{
|
|
"epoch": 10.866013071895425,
|
|
"grad_norm": 0.07831548154354095,
|
|
"learning_rate": 5.421874999999999e-07,
|
|
"loss": 0.0062,
|
|
"step": 23275
|
|
},
|
|
{
|
|
"epoch": 10.877684407096172,
|
|
"grad_norm": 1.9763298034667969,
|
|
"learning_rate": 5.343750000000001e-07,
|
|
"loss": 0.0038,
|
|
"step": 23300
|
|
},
|
|
{
|
|
"epoch": 10.88935574229692,
|
|
"grad_norm": 0.3448634445667267,
|
|
"learning_rate": 5.265625000000001e-07,
|
|
"loss": 0.0082,
|
|
"step": 23325
|
|
},
|
|
{
|
|
"epoch": 10.901027077497666,
|
|
"grad_norm": 0.04117899760603905,
|
|
"learning_rate": 5.1875e-07,
|
|
"loss": 0.0031,
|
|
"step": 23350
|
|
},
|
|
{
|
|
"epoch": 10.912698412698413,
|
|
"grad_norm": 0.21676640212535858,
|
|
"learning_rate": 5.109375e-07,
|
|
"loss": 0.0049,
|
|
"step": 23375
|
|
},
|
|
{
|
|
"epoch": 10.92436974789916,
|
|
"grad_norm": 3.311768054962158,
|
|
"learning_rate": 5.031250000000001e-07,
|
|
"loss": 0.007,
|
|
"step": 23400
|
|
},
|
|
{
|
|
"epoch": 10.936041083099907,
|
|
"grad_norm": 2.6684231758117676,
|
|
"learning_rate": 4.953125000000001e-07,
|
|
"loss": 0.0054,
|
|
"step": 23425
|
|
},
|
|
{
|
|
"epoch": 10.947712418300654,
|
|
"grad_norm": 0.7720322012901306,
|
|
"learning_rate": 4.875e-07,
|
|
"loss": 0.0052,
|
|
"step": 23450
|
|
},
|
|
{
|
|
"epoch": 10.9593837535014,
|
|
"grad_norm": 1.1452654600143433,
|
|
"learning_rate": 4.796875e-07,
|
|
"loss": 0.0059,
|
|
"step": 23475
|
|
},
|
|
{
|
|
"epoch": 10.971055088702148,
|
|
"grad_norm": 0.04618614539504051,
|
|
"learning_rate": 4.71875e-07,
|
|
"loss": 0.0063,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 10.982726423902895,
|
|
"grad_norm": 2.9821794033050537,
|
|
"learning_rate": 4.640625e-07,
|
|
"loss": 0.0042,
|
|
"step": 23525
|
|
},
|
|
{
|
|
"epoch": 10.994397759103641,
|
|
"grad_norm": 3.0062246322631836,
|
|
"learning_rate": 4.5624999999999997e-07,
|
|
"loss": 0.0077,
|
|
"step": 23550
|
|
},
|
|
{
|
|
"epoch": 11.006069094304388,
|
|
"grad_norm": 2.828244209289551,
|
|
"learning_rate": 4.484375e-07,
|
|
"loss": 0.0078,
|
|
"step": 23575
|
|
},
|
|
{
|
|
"epoch": 11.017740429505135,
|
|
"grad_norm": 4.736670970916748,
|
|
"learning_rate": 4.4062499999999996e-07,
|
|
"loss": 0.0074,
|
|
"step": 23600
|
|
},
|
|
{
|
|
"epoch": 11.029411764705882,
|
|
"grad_norm": 1.2825249433517456,
|
|
"learning_rate": 4.3281250000000004e-07,
|
|
"loss": 0.0111,
|
|
"step": 23625
|
|
},
|
|
{
|
|
"epoch": 11.04108309990663,
|
|
"grad_norm": 2.528594732284546,
|
|
"learning_rate": 4.2500000000000006e-07,
|
|
"loss": 0.006,
|
|
"step": 23650
|
|
},
|
|
{
|
|
"epoch": 11.052754435107376,
|
|
"grad_norm": 0.05410047993063927,
|
|
"learning_rate": 4.1718750000000003e-07,
|
|
"loss": 0.0059,
|
|
"step": 23675
|
|
},
|
|
{
|
|
"epoch": 11.064425770308123,
|
|
"grad_norm": 1.0883435010910034,
|
|
"learning_rate": 4.0937500000000005e-07,
|
|
"loss": 0.0031,
|
|
"step": 23700
|
|
},
|
|
{
|
|
"epoch": 11.07609710550887,
|
|
"grad_norm": 1.926758885383606,
|
|
"learning_rate": 4.015625e-07,
|
|
"loss": 0.0095,
|
|
"step": 23725
|
|
},
|
|
{
|
|
"epoch": 11.087768440709617,
|
|
"grad_norm": 0.09137524664402008,
|
|
"learning_rate": 3.940625e-07,
|
|
"loss": 0.0083,
|
|
"step": 23750
|
|
},
|
|
{
|
|
"epoch": 11.099439775910364,
|
|
"grad_norm": 0.14325258135795593,
|
|
"learning_rate": 3.8625e-07,
|
|
"loss": 0.0039,
|
|
"step": 23775
|
|
},
|
|
{
|
|
"epoch": 11.11111111111111,
|
|
"grad_norm": 0.04979300498962402,
|
|
"learning_rate": 3.7843750000000003e-07,
|
|
"loss": 0.0052,
|
|
"step": 23800
|
|
},
|
|
{
|
|
"epoch": 11.122782446311858,
|
|
"grad_norm": 0.8827780485153198,
|
|
"learning_rate": 3.70625e-07,
|
|
"loss": 0.0046,
|
|
"step": 23825
|
|
},
|
|
{
|
|
"epoch": 11.134453781512605,
|
|
"grad_norm": 2.0285470485687256,
|
|
"learning_rate": 3.628125e-07,
|
|
"loss": 0.002,
|
|
"step": 23850
|
|
},
|
|
{
|
|
"epoch": 11.146125116713351,
|
|
"grad_norm": 0.05097728595137596,
|
|
"learning_rate": 3.5500000000000004e-07,
|
|
"loss": 0.0082,
|
|
"step": 23875
|
|
},
|
|
{
|
|
"epoch": 11.157796451914098,
|
|
"grad_norm": 1.0123631954193115,
|
|
"learning_rate": 3.471875e-07,
|
|
"loss": 0.002,
|
|
"step": 23900
|
|
},
|
|
{
|
|
"epoch": 11.169467787114845,
|
|
"grad_norm": 0.06493563950061798,
|
|
"learning_rate": 3.3937500000000003e-07,
|
|
"loss": 0.0065,
|
|
"step": 23925
|
|
},
|
|
{
|
|
"epoch": 11.181139122315592,
|
|
"grad_norm": 0.05671960860490799,
|
|
"learning_rate": 3.315625e-07,
|
|
"loss": 0.0074,
|
|
"step": 23950
|
|
},
|
|
{
|
|
"epoch": 11.19281045751634,
|
|
"grad_norm": 0.06837425380945206,
|
|
"learning_rate": 3.2375e-07,
|
|
"loss": 0.0062,
|
|
"step": 23975
|
|
},
|
|
{
|
|
"epoch": 11.204481792717086,
|
|
"grad_norm": 0.38731399178504944,
|
|
"learning_rate": 3.159375e-07,
|
|
"loss": 0.0057,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 11.216153127917833,
|
|
"grad_norm": 1.3272087574005127,
|
|
"learning_rate": 3.084375e-07,
|
|
"loss": 0.0071,
|
|
"step": 24025
|
|
},
|
|
{
|
|
"epoch": 11.22782446311858,
|
|
"grad_norm": 0.15108473598957062,
|
|
"learning_rate": 3.00625e-07,
|
|
"loss": 0.0066,
|
|
"step": 24050
|
|
},
|
|
{
|
|
"epoch": 11.239495798319327,
|
|
"grad_norm": 1.1661783456802368,
|
|
"learning_rate": 2.9281250000000006e-07,
|
|
"loss": 0.007,
|
|
"step": 24075
|
|
},
|
|
{
|
|
"epoch": 11.251167133520074,
|
|
"grad_norm": 0.22821743786334991,
|
|
"learning_rate": 2.85e-07,
|
|
"loss": 0.0045,
|
|
"step": 24100
|
|
},
|
|
{
|
|
"epoch": 11.262838468720823,
|
|
"grad_norm": 0.8526090383529663,
|
|
"learning_rate": 2.771875e-07,
|
|
"loss": 0.0056,
|
|
"step": 24125
|
|
},
|
|
{
|
|
"epoch": 11.27450980392157,
|
|
"grad_norm": 0.3976341784000397,
|
|
"learning_rate": 2.69375e-07,
|
|
"loss": 0.0076,
|
|
"step": 24150
|
|
},
|
|
{
|
|
"epoch": 11.286181139122316,
|
|
"grad_norm": 0.0741284042596817,
|
|
"learning_rate": 2.615625e-07,
|
|
"loss": 0.0079,
|
|
"step": 24175
|
|
},
|
|
{
|
|
"epoch": 11.297852474323063,
|
|
"grad_norm": 0.057843729853630066,
|
|
"learning_rate": 2.5375e-07,
|
|
"loss": 0.0042,
|
|
"step": 24200
|
|
},
|
|
{
|
|
"epoch": 11.30952380952381,
|
|
"grad_norm": 1.2884389162063599,
|
|
"learning_rate": 2.4593750000000003e-07,
|
|
"loss": 0.0062,
|
|
"step": 24225
|
|
},
|
|
{
|
|
"epoch": 11.321195144724557,
|
|
"grad_norm": 4.889528274536133,
|
|
"learning_rate": 2.3812500000000002e-07,
|
|
"loss": 0.0062,
|
|
"step": 24250
|
|
},
|
|
{
|
|
"epoch": 11.332866479925304,
|
|
"grad_norm": 0.019720420241355896,
|
|
"learning_rate": 2.3031250000000002e-07,
|
|
"loss": 0.0062,
|
|
"step": 24275
|
|
},
|
|
{
|
|
"epoch": 11.344537815126051,
|
|
"grad_norm": 0.22723744809627533,
|
|
"learning_rate": 2.2250000000000001e-07,
|
|
"loss": 0.0035,
|
|
"step": 24300
|
|
},
|
|
{
|
|
"epoch": 11.356209150326798,
|
|
"grad_norm": 0.05429434776306152,
|
|
"learning_rate": 2.146875e-07,
|
|
"loss": 0.009,
|
|
"step": 24325
|
|
},
|
|
{
|
|
"epoch": 11.367880485527545,
|
|
"grad_norm": 0.10212606936693192,
|
|
"learning_rate": 2.06875e-07,
|
|
"loss": 0.0058,
|
|
"step": 24350
|
|
},
|
|
{
|
|
"epoch": 11.379551820728292,
|
|
"grad_norm": 1.9913432598114014,
|
|
"learning_rate": 1.9906250000000003e-07,
|
|
"loss": 0.0097,
|
|
"step": 24375
|
|
},
|
|
{
|
|
"epoch": 11.391223155929039,
|
|
"grad_norm": 0.32066085934638977,
|
|
"learning_rate": 1.9125e-07,
|
|
"loss": 0.0039,
|
|
"step": 24400
|
|
},
|
|
{
|
|
"epoch": 11.402894491129786,
|
|
"grad_norm": 0.029058467596769333,
|
|
"learning_rate": 1.8343750000000002e-07,
|
|
"loss": 0.004,
|
|
"step": 24425
|
|
},
|
|
{
|
|
"epoch": 11.414565826330533,
|
|
"grad_norm": 1.6832449436187744,
|
|
"learning_rate": 1.75625e-07,
|
|
"loss": 0.004,
|
|
"step": 24450
|
|
},
|
|
{
|
|
"epoch": 11.42623716153128,
|
|
"grad_norm": 0.04252633824944496,
|
|
"learning_rate": 1.678125e-07,
|
|
"loss": 0.0093,
|
|
"step": 24475
|
|
},
|
|
{
|
|
"epoch": 11.437908496732026,
|
|
"grad_norm": 4.079598903656006,
|
|
"learning_rate": 1.6e-07,
|
|
"loss": 0.0073,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 11.449579831932773,
|
|
"grad_norm": 2.663240432739258,
|
|
"learning_rate": 1.521875e-07,
|
|
"loss": 0.003,
|
|
"step": 24525
|
|
},
|
|
{
|
|
"epoch": 11.46125116713352,
|
|
"grad_norm": 0.06202975660562515,
|
|
"learning_rate": 1.44375e-07,
|
|
"loss": 0.0051,
|
|
"step": 24550
|
|
},
|
|
{
|
|
"epoch": 11.472922502334267,
|
|
"grad_norm": 0.111959308385849,
|
|
"learning_rate": 1.3656250000000002e-07,
|
|
"loss": 0.0067,
|
|
"step": 24575
|
|
},
|
|
{
|
|
"epoch": 11.484593837535014,
|
|
"grad_norm": 0.02303888648748398,
|
|
"learning_rate": 1.2875e-07,
|
|
"loss": 0.0074,
|
|
"step": 24600
|
|
},
|
|
{
|
|
"epoch": 11.496265172735761,
|
|
"grad_norm": 0.43197059631347656,
|
|
"learning_rate": 1.209375e-07,
|
|
"loss": 0.0071,
|
|
"step": 24625
|
|
},
|
|
{
|
|
"epoch": 11.507936507936508,
|
|
"grad_norm": 3.6830546855926514,
|
|
"learning_rate": 1.1312500000000002e-07,
|
|
"loss": 0.0068,
|
|
"step": 24650
|
|
},
|
|
{
|
|
"epoch": 11.519607843137255,
|
|
"grad_norm": 0.06097732484340668,
|
|
"learning_rate": 1.0531250000000001e-07,
|
|
"loss": 0.0089,
|
|
"step": 24675
|
|
},
|
|
{
|
|
"epoch": 11.531279178338002,
|
|
"grad_norm": 0.06942930817604065,
|
|
"learning_rate": 9.75e-08,
|
|
"loss": 0.003,
|
|
"step": 24700
|
|
},
|
|
{
|
|
"epoch": 11.542950513538749,
|
|
"grad_norm": 2.829679012298584,
|
|
"learning_rate": 8.96875e-08,
|
|
"loss": 0.0075,
|
|
"step": 24725
|
|
},
|
|
{
|
|
"epoch": 11.554621848739496,
|
|
"grad_norm": 3.9653916358947754,
|
|
"learning_rate": 8.187500000000001e-08,
|
|
"loss": 0.0063,
|
|
"step": 24750
|
|
},
|
|
{
|
|
"epoch": 11.566293183940243,
|
|
"grad_norm": 0.29860720038414,
|
|
"learning_rate": 7.40625e-08,
|
|
"loss": 0.0068,
|
|
"step": 24775
|
|
},
|
|
{
|
|
"epoch": 11.57796451914099,
|
|
"grad_norm": 0.04515097290277481,
|
|
"learning_rate": 6.625e-08,
|
|
"loss": 0.0041,
|
|
"step": 24800
|
|
},
|
|
{
|
|
"epoch": 11.589635854341736,
|
|
"grad_norm": 0.026890119537711143,
|
|
"learning_rate": 5.843750000000001e-08,
|
|
"loss": 0.0051,
|
|
"step": 24825
|
|
},
|
|
{
|
|
"epoch": 11.601307189542483,
|
|
"grad_norm": 0.44632381200790405,
|
|
"learning_rate": 5.0625e-08,
|
|
"loss": 0.0047,
|
|
"step": 24850
|
|
},
|
|
{
|
|
"epoch": 11.61297852474323,
|
|
"grad_norm": 0.17215296626091003,
|
|
"learning_rate": 4.28125e-08,
|
|
"loss": 0.0079,
|
|
"step": 24875
|
|
},
|
|
{
|
|
"epoch": 11.624649859943977,
|
|
"grad_norm": 2.4952566623687744,
|
|
"learning_rate": 3.5e-08,
|
|
"loss": 0.0043,
|
|
"step": 24900
|
|
},
|
|
{
|
|
"epoch": 11.636321195144724,
|
|
"grad_norm": 2.0370965003967285,
|
|
"learning_rate": 2.7187499999999998e-08,
|
|
"loss": 0.0075,
|
|
"step": 24925
|
|
},
|
|
{
|
|
"epoch": 11.647992530345471,
|
|
"grad_norm": 2.6626877784729004,
|
|
"learning_rate": 1.9375e-08,
|
|
"loss": 0.0057,
|
|
"step": 24950
|
|
},
|
|
{
|
|
"epoch": 11.659663865546218,
|
|
"grad_norm": 0.16452664136886597,
|
|
"learning_rate": 1.1562500000000002e-08,
|
|
"loss": 0.0091,
|
|
"step": 24975
|
|
},
|
|
{
|
|
"epoch": 11.671335200746965,
|
|
"grad_norm": 0.11460210382938385,
|
|
"learning_rate": 3.75e-09,
|
|
"loss": 0.0035,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 11.671335200746965,
|
|
"eval_loss": 0.20881079137325287,
|
|
"eval_runtime": 5358.5345,
|
|
"eval_samples_per_second": 1.757,
|
|
"eval_steps_per_second": 0.22,
|
|
"eval_wer": 0.09323429678669466,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 11.671335200746965,
|
|
"step": 25000,
|
|
"total_flos": 4.081858297380864e+20,
|
|
"train_loss": 0.052512485960870985,
|
|
"train_runtime": 210665.5128,
|
|
"train_samples_per_second": 1.899,
|
|
"train_steps_per_second": 0.119
|
|
}
|
|
],
|
|
"logging_steps": 25,
|
|
"max_steps": 25000,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 12,
|
|
"save_steps": 5000,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 4.081858297380864e+20,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|