whisper-medium-pt-cv16-fleurs2-lr / trainer_state.json
fsicoli's picture
End of training
0934507 verified
{
"best_metric": 0.16011084616184235,
"best_model_checkpoint": "d:\\\\whisper-medium-pt-cv16-fleurs2-lr\\checkpoint-5000",
"epoch": 11.671335200746965,
"eval_steps": 5000,
"global_step": 25000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011671335200746966,
"grad_norm": 18.578170776367188,
"learning_rate": 2.875e-08,
"loss": 0.7382,
"step": 25
},
{
"epoch": 0.02334267040149393,
"grad_norm": 37.2661247253418,
"learning_rate": 5.8750000000000007e-08,
"loss": 1.2823,
"step": 50
},
{
"epoch": 0.0350140056022409,
"grad_norm": 14.59357738494873,
"learning_rate": 9e-08,
"loss": 0.7512,
"step": 75
},
{
"epoch": 0.04668534080298786,
"grad_norm": 37.37008285522461,
"learning_rate": 1.2125e-07,
"loss": 1.2251,
"step": 100
},
{
"epoch": 0.05835667600373483,
"grad_norm": 14.07325553894043,
"learning_rate": 1.5250000000000002e-07,
"loss": 0.6841,
"step": 125
},
{
"epoch": 0.0700280112044818,
"grad_norm": 29.391014099121094,
"learning_rate": 1.8375000000000001e-07,
"loss": 1.1131,
"step": 150
},
{
"epoch": 0.08169934640522876,
"grad_norm": 12.7340087890625,
"learning_rate": 2.15e-07,
"loss": 0.5693,
"step": 175
},
{
"epoch": 0.09337068160597572,
"grad_norm": 26.895967483520508,
"learning_rate": 2.4624999999999997e-07,
"loss": 0.7876,
"step": 200
},
{
"epoch": 0.10504201680672269,
"grad_norm": 8.66826057434082,
"learning_rate": 2.7750000000000004e-07,
"loss": 0.2992,
"step": 225
},
{
"epoch": 0.11671335200746966,
"grad_norm": 25.45290756225586,
"learning_rate": 3.0875e-07,
"loss": 0.4386,
"step": 250
},
{
"epoch": 0.1283846872082166,
"grad_norm": 10.706913948059082,
"learning_rate": 3.4e-07,
"loss": 0.214,
"step": 275
},
{
"epoch": 0.1400560224089636,
"grad_norm": 19.541522979736328,
"learning_rate": 3.7125000000000005e-07,
"loss": 0.345,
"step": 300
},
{
"epoch": 0.15172735760971054,
"grad_norm": 6.879134654998779,
"learning_rate": 4.025e-07,
"loss": 0.2445,
"step": 325
},
{
"epoch": 0.16339869281045752,
"grad_norm": 23.4116268157959,
"learning_rate": 4.3375000000000003e-07,
"loss": 0.3121,
"step": 350
},
{
"epoch": 0.17507002801120447,
"grad_norm": 7.257847785949707,
"learning_rate": 4.65e-07,
"loss": 0.2013,
"step": 375
},
{
"epoch": 0.18674136321195145,
"grad_norm": 11.849136352539062,
"learning_rate": 4.9625e-07,
"loss": 0.2626,
"step": 400
},
{
"epoch": 0.1984126984126984,
"grad_norm": 7.970053195953369,
"learning_rate": 5.275e-07,
"loss": 0.2131,
"step": 425
},
{
"epoch": 0.21008403361344538,
"grad_norm": 19.375301361083984,
"learning_rate": 5.587499999999999e-07,
"loss": 0.2652,
"step": 450
},
{
"epoch": 0.22175536881419233,
"grad_norm": 10.925929069519043,
"learning_rate": 5.9e-07,
"loss": 0.2046,
"step": 475
},
{
"epoch": 0.2334267040149393,
"grad_norm": 20.24220085144043,
"learning_rate": 6.212500000000001e-07,
"loss": 0.2571,
"step": 500
},
{
"epoch": 0.24509803921568626,
"grad_norm": 8.06639575958252,
"learning_rate": 6.525000000000001e-07,
"loss": 0.2157,
"step": 525
},
{
"epoch": 0.2567693744164332,
"grad_norm": 16.64718246459961,
"learning_rate": 6.8375e-07,
"loss": 0.2745,
"step": 550
},
{
"epoch": 0.2684407096171802,
"grad_norm": 8.912382125854492,
"learning_rate": 7.15e-07,
"loss": 0.1999,
"step": 575
},
{
"epoch": 0.2801120448179272,
"grad_norm": 16.385353088378906,
"learning_rate": 7.462500000000001e-07,
"loss": 0.2234,
"step": 600
},
{
"epoch": 0.29178338001867415,
"grad_norm": 4.625432968139648,
"learning_rate": 7.775e-07,
"loss": 0.1847,
"step": 625
},
{
"epoch": 0.3034547152194211,
"grad_norm": 30.452251434326172,
"learning_rate": 8.0875e-07,
"loss": 0.2338,
"step": 650
},
{
"epoch": 0.31512605042016806,
"grad_norm": 9.238980293273926,
"learning_rate": 8.4e-07,
"loss": 0.1883,
"step": 675
},
{
"epoch": 0.32679738562091504,
"grad_norm": 19.22330093383789,
"learning_rate": 8.7125e-07,
"loss": 0.2387,
"step": 700
},
{
"epoch": 0.338468720821662,
"grad_norm": 3.236504316329956,
"learning_rate": 9.025e-07,
"loss": 0.162,
"step": 725
},
{
"epoch": 0.35014005602240894,
"grad_norm": 18.75830078125,
"learning_rate": 9.337500000000001e-07,
"loss": 0.2217,
"step": 750
},
{
"epoch": 0.3618113912231559,
"grad_norm": 4.1586198806762695,
"learning_rate": 9.65e-07,
"loss": 0.1739,
"step": 775
},
{
"epoch": 0.3734827264239029,
"grad_norm": 16.235074996948242,
"learning_rate": 9.9625e-07,
"loss": 0.227,
"step": 800
},
{
"epoch": 0.3851540616246499,
"grad_norm": 7.601698398590088,
"learning_rate": 1.0275e-06,
"loss": 0.1764,
"step": 825
},
{
"epoch": 0.3968253968253968,
"grad_norm": 17.789981842041016,
"learning_rate": 1.05875e-06,
"loss": 0.2138,
"step": 850
},
{
"epoch": 0.4084967320261438,
"grad_norm": 5.891432285308838,
"learning_rate": 1.0900000000000002e-06,
"loss": 0.1753,
"step": 875
},
{
"epoch": 0.42016806722689076,
"grad_norm": 21.724348068237305,
"learning_rate": 1.12125e-06,
"loss": 0.206,
"step": 900
},
{
"epoch": 0.43183940242763774,
"grad_norm": 5.313950538635254,
"learning_rate": 1.1525000000000002e-06,
"loss": 0.167,
"step": 925
},
{
"epoch": 0.44351073762838467,
"grad_norm": 11.306236267089844,
"learning_rate": 1.18375e-06,
"loss": 0.1974,
"step": 950
},
{
"epoch": 0.45518207282913165,
"grad_norm": 8.582784652709961,
"learning_rate": 1.215e-06,
"loss": 0.192,
"step": 975
},
{
"epoch": 0.4668534080298786,
"grad_norm": 14.138503074645996,
"learning_rate": 1.24625e-06,
"loss": 0.1969,
"step": 1000
},
{
"epoch": 0.4785247432306256,
"grad_norm": 5.119744777679443,
"learning_rate": 1.2775e-06,
"loss": 0.1781,
"step": 1025
},
{
"epoch": 0.49019607843137253,
"grad_norm": 16.845916748046875,
"learning_rate": 1.3087500000000002e-06,
"loss": 0.1714,
"step": 1050
},
{
"epoch": 0.5018674136321195,
"grad_norm": 8.487195014953613,
"learning_rate": 1.34e-06,
"loss": 0.1784,
"step": 1075
},
{
"epoch": 0.5135387488328664,
"grad_norm": 10.874380111694336,
"learning_rate": 1.3712500000000002e-06,
"loss": 0.2085,
"step": 1100
},
{
"epoch": 0.5252100840336135,
"grad_norm": 5.2850661277771,
"learning_rate": 1.4025e-06,
"loss": 0.1731,
"step": 1125
},
{
"epoch": 0.5368814192343604,
"grad_norm": 12.621524810791016,
"learning_rate": 1.43375e-06,
"loss": 0.2066,
"step": 1150
},
{
"epoch": 0.5485527544351074,
"grad_norm": 6.700886249542236,
"learning_rate": 1.465e-06,
"loss": 0.1599,
"step": 1175
},
{
"epoch": 0.5602240896358543,
"grad_norm": 13.19000244140625,
"learning_rate": 1.49625e-06,
"loss": 0.1982,
"step": 1200
},
{
"epoch": 0.5718954248366013,
"grad_norm": 5.436820030212402,
"learning_rate": 1.5275000000000002e-06,
"loss": 0.1671,
"step": 1225
},
{
"epoch": 0.5835667600373483,
"grad_norm": 14.979127883911133,
"learning_rate": 1.5587500000000001e-06,
"loss": 0.192,
"step": 1250
},
{
"epoch": 0.5952380952380952,
"grad_norm": 4.490325450897217,
"learning_rate": 1.5900000000000002e-06,
"loss": 0.1731,
"step": 1275
},
{
"epoch": 0.6069094304388422,
"grad_norm": 17.244354248046875,
"learning_rate": 1.6212500000000001e-06,
"loss": 0.1763,
"step": 1300
},
{
"epoch": 0.6185807656395892,
"grad_norm": 8.09378433227539,
"learning_rate": 1.6525000000000003e-06,
"loss": 0.1511,
"step": 1325
},
{
"epoch": 0.6302521008403361,
"grad_norm": 13.42496395111084,
"learning_rate": 1.68375e-06,
"loss": 0.2064,
"step": 1350
},
{
"epoch": 0.6419234360410832,
"grad_norm": 7.0393385887146,
"learning_rate": 1.7149999999999999e-06,
"loss": 0.1703,
"step": 1375
},
{
"epoch": 0.6535947712418301,
"grad_norm": 11.562192916870117,
"learning_rate": 1.74625e-06,
"loss": 0.1743,
"step": 1400
},
{
"epoch": 0.665266106442577,
"grad_norm": 8.296894073486328,
"learning_rate": 1.7775e-06,
"loss": 0.1762,
"step": 1425
},
{
"epoch": 0.676937441643324,
"grad_norm": 15.146247863769531,
"learning_rate": 1.80875e-06,
"loss": 0.1877,
"step": 1450
},
{
"epoch": 0.688608776844071,
"grad_norm": 6.557362079620361,
"learning_rate": 1.84e-06,
"loss": 0.1948,
"step": 1475
},
{
"epoch": 0.7002801120448179,
"grad_norm": 9.744128227233887,
"learning_rate": 1.87125e-06,
"loss": 0.188,
"step": 1500
},
{
"epoch": 0.7119514472455649,
"grad_norm": 6.373684883117676,
"learning_rate": 1.9025000000000002e-06,
"loss": 0.1795,
"step": 1525
},
{
"epoch": 0.7236227824463118,
"grad_norm": 12.318848609924316,
"learning_rate": 1.9337500000000003e-06,
"loss": 0.16,
"step": 1550
},
{
"epoch": 0.7352941176470589,
"grad_norm": 8.32919979095459,
"learning_rate": 1.9650000000000002e-06,
"loss": 0.1559,
"step": 1575
},
{
"epoch": 0.7469654528478058,
"grad_norm": 14.127927780151367,
"learning_rate": 1.99625e-06,
"loss": 0.18,
"step": 1600
},
{
"epoch": 0.7586367880485527,
"grad_norm": 4.867166519165039,
"learning_rate": 2.0275e-06,
"loss": 0.1502,
"step": 1625
},
{
"epoch": 0.7703081232492998,
"grad_norm": 10.735671043395996,
"learning_rate": 2.0587500000000004e-06,
"loss": 0.1798,
"step": 1650
},
{
"epoch": 0.7819794584500467,
"grad_norm": 7.60561990737915,
"learning_rate": 2.09e-06,
"loss": 0.1532,
"step": 1675
},
{
"epoch": 0.7936507936507936,
"grad_norm": 14.279719352722168,
"learning_rate": 2.12125e-06,
"loss": 0.1761,
"step": 1700
},
{
"epoch": 0.8053221288515406,
"grad_norm": 6.583901882171631,
"learning_rate": 2.1525e-06,
"loss": 0.162,
"step": 1725
},
{
"epoch": 0.8169934640522876,
"grad_norm": 12.237863540649414,
"learning_rate": 2.18375e-06,
"loss": 0.1644,
"step": 1750
},
{
"epoch": 0.8286647992530346,
"grad_norm": 5.648594379425049,
"learning_rate": 2.215e-06,
"loss": 0.142,
"step": 1775
},
{
"epoch": 0.8403361344537815,
"grad_norm": 15.333111763000488,
"learning_rate": 2.24625e-06,
"loss": 0.1932,
"step": 1800
},
{
"epoch": 0.8520074696545284,
"grad_norm": 7.390342712402344,
"learning_rate": 2.2775000000000002e-06,
"loss": 0.1663,
"step": 1825
},
{
"epoch": 0.8636788048552755,
"grad_norm": 15.47307300567627,
"learning_rate": 2.30875e-06,
"loss": 0.1864,
"step": 1850
},
{
"epoch": 0.8753501400560224,
"grad_norm": 3.5743496417999268,
"learning_rate": 2.34e-06,
"loss": 0.1625,
"step": 1875
},
{
"epoch": 0.8870214752567693,
"grad_norm": 12.931510925292969,
"learning_rate": 2.3712500000000004e-06,
"loss": 0.1674,
"step": 1900
},
{
"epoch": 0.8986928104575164,
"grad_norm": 10.445046424865723,
"learning_rate": 2.4025000000000003e-06,
"loss": 0.1551,
"step": 1925
},
{
"epoch": 0.9103641456582633,
"grad_norm": 15.884492874145508,
"learning_rate": 2.43375e-06,
"loss": 0.1797,
"step": 1950
},
{
"epoch": 0.9220354808590103,
"grad_norm": 3.6354966163635254,
"learning_rate": 2.465e-06,
"loss": 0.1726,
"step": 1975
},
{
"epoch": 0.9337068160597572,
"grad_norm": 10.952392578125,
"learning_rate": 2.49625e-06,
"loss": 0.1579,
"step": 2000
},
{
"epoch": 0.9453781512605042,
"grad_norm": 5.575680255889893,
"learning_rate": 2.5275e-06,
"loss": 0.1607,
"step": 2025
},
{
"epoch": 0.9570494864612512,
"grad_norm": 9.334450721740723,
"learning_rate": 2.55875e-06,
"loss": 0.1819,
"step": 2050
},
{
"epoch": 0.9687208216619981,
"grad_norm": 5.0021443367004395,
"learning_rate": 2.59e-06,
"loss": 0.1721,
"step": 2075
},
{
"epoch": 0.9803921568627451,
"grad_norm": 11.442158699035645,
"learning_rate": 2.62125e-06,
"loss": 0.1696,
"step": 2100
},
{
"epoch": 0.9920634920634921,
"grad_norm": 7.345489978790283,
"learning_rate": 2.6525e-06,
"loss": 0.1547,
"step": 2125
},
{
"epoch": 1.003734827264239,
"grad_norm": 3.4940426349639893,
"learning_rate": 2.6837500000000004e-06,
"loss": 0.1642,
"step": 2150
},
{
"epoch": 1.015406162464986,
"grad_norm": 5.598668098449707,
"learning_rate": 2.7150000000000003e-06,
"loss": 0.0863,
"step": 2175
},
{
"epoch": 1.0270774976657329,
"grad_norm": 3.780421495437622,
"learning_rate": 2.74625e-06,
"loss": 0.136,
"step": 2200
},
{
"epoch": 1.03874883286648,
"grad_norm": 3.801037311553955,
"learning_rate": 2.7775e-06,
"loss": 0.0962,
"step": 2225
},
{
"epoch": 1.050420168067227,
"grad_norm": 4.065105438232422,
"learning_rate": 2.8087500000000004e-06,
"loss": 0.1418,
"step": 2250
},
{
"epoch": 1.0620915032679739,
"grad_norm": 4.889001369476318,
"learning_rate": 2.8400000000000003e-06,
"loss": 0.1161,
"step": 2275
},
{
"epoch": 1.0737628384687208,
"grad_norm": 6.3269944190979,
"learning_rate": 2.87125e-06,
"loss": 0.1363,
"step": 2300
},
{
"epoch": 1.0854341736694677,
"grad_norm": 6.666966438293457,
"learning_rate": 2.9025e-06,
"loss": 0.1177,
"step": 2325
},
{
"epoch": 1.0971055088702149,
"grad_norm": 3.8940012454986572,
"learning_rate": 2.93375e-06,
"loss": 0.1297,
"step": 2350
},
{
"epoch": 1.1087768440709618,
"grad_norm": 5.189432144165039,
"learning_rate": 2.965e-06,
"loss": 0.1149,
"step": 2375
},
{
"epoch": 1.1204481792717087,
"grad_norm": 4.866479873657227,
"learning_rate": 2.99625e-06,
"loss": 0.1341,
"step": 2400
},
{
"epoch": 1.1321195144724556,
"grad_norm": 7.036620140075684,
"learning_rate": 3.0275000000000002e-06,
"loss": 0.102,
"step": 2425
},
{
"epoch": 1.1437908496732025,
"grad_norm": 4.124939441680908,
"learning_rate": 3.05875e-06,
"loss": 0.1483,
"step": 2450
},
{
"epoch": 1.1554621848739495,
"grad_norm": 6.803956508636475,
"learning_rate": 3.09e-06,
"loss": 0.095,
"step": 2475
},
{
"epoch": 1.1671335200746966,
"grad_norm": 4.783279895782471,
"learning_rate": 3.1212500000000004e-06,
"loss": 0.1458,
"step": 2500
},
{
"epoch": 1.1788048552754435,
"grad_norm": 5.567852020263672,
"learning_rate": 3.1525e-06,
"loss": 0.104,
"step": 2525
},
{
"epoch": 1.1904761904761905,
"grad_norm": 4.637757301330566,
"learning_rate": 3.18375e-06,
"loss": 0.1335,
"step": 2550
},
{
"epoch": 1.2021475256769374,
"grad_norm": 7.655948162078857,
"learning_rate": 3.215e-06,
"loss": 0.1229,
"step": 2575
},
{
"epoch": 1.2138188608776843,
"grad_norm": 4.771119117736816,
"learning_rate": 3.24625e-06,
"loss": 0.1543,
"step": 2600
},
{
"epoch": 1.2254901960784315,
"grad_norm": 5.286261558532715,
"learning_rate": 3.2775e-06,
"loss": 0.1093,
"step": 2625
},
{
"epoch": 1.2371615312791784,
"grad_norm": 4.742598533630371,
"learning_rate": 3.30875e-06,
"loss": 0.1587,
"step": 2650
},
{
"epoch": 1.2488328664799253,
"grad_norm": 7.248344898223877,
"learning_rate": 3.34e-06,
"loss": 0.0946,
"step": 2675
},
{
"epoch": 1.2605042016806722,
"grad_norm": 4.244619846343994,
"learning_rate": 3.37125e-06,
"loss": 0.1408,
"step": 2700
},
{
"epoch": 1.2721755368814192,
"grad_norm": 4.698862075805664,
"learning_rate": 3.4025e-06,
"loss": 0.1115,
"step": 2725
},
{
"epoch": 1.283846872082166,
"grad_norm": 5.453229904174805,
"learning_rate": 3.4337500000000004e-06,
"loss": 0.1377,
"step": 2750
},
{
"epoch": 1.2955182072829132,
"grad_norm": 5.513113498687744,
"learning_rate": 3.4650000000000003e-06,
"loss": 0.1052,
"step": 2775
},
{
"epoch": 1.3071895424836601,
"grad_norm": 4.546627521514893,
"learning_rate": 3.49625e-06,
"loss": 0.1326,
"step": 2800
},
{
"epoch": 1.318860877684407,
"grad_norm": 7.161789894104004,
"learning_rate": 3.5275e-06,
"loss": 0.106,
"step": 2825
},
{
"epoch": 1.330532212885154,
"grad_norm": 4.164399147033691,
"learning_rate": 3.5587500000000004e-06,
"loss": 0.1358,
"step": 2850
},
{
"epoch": 1.3422035480859011,
"grad_norm": 5.428344249725342,
"learning_rate": 3.5900000000000004e-06,
"loss": 0.0949,
"step": 2875
},
{
"epoch": 1.353874883286648,
"grad_norm": 6.0663275718688965,
"learning_rate": 3.6212500000000003e-06,
"loss": 0.1361,
"step": 2900
},
{
"epoch": 1.365546218487395,
"grad_norm": 6.05164909362793,
"learning_rate": 3.6525e-06,
"loss": 0.1098,
"step": 2925
},
{
"epoch": 1.377217553688142,
"grad_norm": 5.027311325073242,
"learning_rate": 3.6837500000000005e-06,
"loss": 0.1299,
"step": 2950
},
{
"epoch": 1.3888888888888888,
"grad_norm": 7.7624006271362305,
"learning_rate": 3.7150000000000004e-06,
"loss": 0.1109,
"step": 2975
},
{
"epoch": 1.4005602240896358,
"grad_norm": 5.019223213195801,
"learning_rate": 3.7462500000000003e-06,
"loss": 0.1648,
"step": 3000
},
{
"epoch": 1.4122315592903827,
"grad_norm": 6.924857139587402,
"learning_rate": 3.7775000000000007e-06,
"loss": 0.1078,
"step": 3025
},
{
"epoch": 1.4239028944911298,
"grad_norm": 6.098647594451904,
"learning_rate": 3.8087500000000006e-06,
"loss": 0.1511,
"step": 3050
},
{
"epoch": 1.4355742296918768,
"grad_norm": 8.425399780273438,
"learning_rate": 3.84e-06,
"loss": 0.1191,
"step": 3075
},
{
"epoch": 1.4472455648926237,
"grad_norm": 5.609083652496338,
"learning_rate": 3.8712499999999996e-06,
"loss": 0.142,
"step": 3100
},
{
"epoch": 1.4589169000933706,
"grad_norm": 6.1147050857543945,
"learning_rate": 3.9025e-06,
"loss": 0.0996,
"step": 3125
},
{
"epoch": 1.4705882352941178,
"grad_norm": 2.957465648651123,
"learning_rate": 3.93375e-06,
"loss": 0.1439,
"step": 3150
},
{
"epoch": 1.4822595704948647,
"grad_norm": 3.472870111465454,
"learning_rate": 3.965e-06,
"loss": 0.0993,
"step": 3175
},
{
"epoch": 1.4939309056956116,
"grad_norm": 5.71575927734375,
"learning_rate": 3.99625e-06,
"loss": 0.1438,
"step": 3200
},
{
"epoch": 1.5056022408963585,
"grad_norm": 3.8211705684661865,
"learning_rate": 4.0275e-06,
"loss": 0.0941,
"step": 3225
},
{
"epoch": 1.5172735760971054,
"grad_norm": 5.43381929397583,
"learning_rate": 4.05875e-06,
"loss": 0.1565,
"step": 3250
},
{
"epoch": 1.5289449112978524,
"grad_norm": 5.146786212921143,
"learning_rate": 4.09e-06,
"loss": 0.1125,
"step": 3275
},
{
"epoch": 1.5406162464985993,
"grad_norm": 4.331883430480957,
"learning_rate": 4.12125e-06,
"loss": 0.1393,
"step": 3300
},
{
"epoch": 1.5522875816993464,
"grad_norm": 7.666718482971191,
"learning_rate": 4.1525000000000005e-06,
"loss": 0.116,
"step": 3325
},
{
"epoch": 1.5639589169000934,
"grad_norm": 3.27604079246521,
"learning_rate": 4.18375e-06,
"loss": 0.1487,
"step": 3350
},
{
"epoch": 1.5756302521008403,
"grad_norm": 8.548047065734863,
"learning_rate": 4.215e-06,
"loss": 0.114,
"step": 3375
},
{
"epoch": 1.5873015873015874,
"grad_norm": 3.315171003341675,
"learning_rate": 4.24625e-06,
"loss": 0.1372,
"step": 3400
},
{
"epoch": 1.5989729225023344,
"grad_norm": 6.508883953094482,
"learning_rate": 4.2775e-06,
"loss": 0.1054,
"step": 3425
},
{
"epoch": 1.6106442577030813,
"grad_norm": 4.305449485778809,
"learning_rate": 4.30875e-06,
"loss": 0.1449,
"step": 3450
},
{
"epoch": 1.6223155929038282,
"grad_norm": 8.238191604614258,
"learning_rate": 4.34e-06,
"loss": 0.1052,
"step": 3475
},
{
"epoch": 1.6339869281045751,
"grad_norm": 3.1781492233276367,
"learning_rate": 4.371250000000001e-06,
"loss": 0.1462,
"step": 3500
},
{
"epoch": 1.645658263305322,
"grad_norm": 5.8778557777404785,
"learning_rate": 4.402500000000001e-06,
"loss": 0.1213,
"step": 3525
},
{
"epoch": 1.657329598506069,
"grad_norm": 3.6947333812713623,
"learning_rate": 4.4337500000000005e-06,
"loss": 0.1626,
"step": 3550
},
{
"epoch": 1.669000933706816,
"grad_norm": 7.086148738861084,
"learning_rate": 4.4650000000000004e-06,
"loss": 0.1254,
"step": 3575
},
{
"epoch": 1.680672268907563,
"grad_norm": 4.603717803955078,
"learning_rate": 4.49625e-06,
"loss": 0.1403,
"step": 3600
},
{
"epoch": 1.69234360410831,
"grad_norm": 4.92815637588501,
"learning_rate": 4.5275e-06,
"loss": 0.1032,
"step": 3625
},
{
"epoch": 1.7040149393090571,
"grad_norm": 3.701477289199829,
"learning_rate": 4.55875e-06,
"loss": 0.1349,
"step": 3650
},
{
"epoch": 1.715686274509804,
"grad_norm": 13.451651573181152,
"learning_rate": 4.590000000000001e-06,
"loss": 0.1238,
"step": 3675
},
{
"epoch": 1.727357609710551,
"grad_norm": 5.369861125946045,
"learning_rate": 4.62125e-06,
"loss": 0.1596,
"step": 3700
},
{
"epoch": 1.739028944911298,
"grad_norm": 6.7364702224731445,
"learning_rate": 4.6525e-06,
"loss": 0.1288,
"step": 3725
},
{
"epoch": 1.7507002801120448,
"grad_norm": 5.925997734069824,
"learning_rate": 4.68375e-06,
"loss": 0.1462,
"step": 3750
},
{
"epoch": 1.7623716153127917,
"grad_norm": 4.7654829025268555,
"learning_rate": 4.715e-06,
"loss": 0.1146,
"step": 3775
},
{
"epoch": 1.7740429505135387,
"grad_norm": 3.232302188873291,
"learning_rate": 4.74625e-06,
"loss": 0.1341,
"step": 3800
},
{
"epoch": 1.7857142857142856,
"grad_norm": 6.663305282592773,
"learning_rate": 4.7775e-06,
"loss": 0.1047,
"step": 3825
},
{
"epoch": 1.7973856209150327,
"grad_norm": 4.3404340744018555,
"learning_rate": 4.80875e-06,
"loss": 0.1425,
"step": 3850
},
{
"epoch": 1.8090569561157797,
"grad_norm": 7.439436912536621,
"learning_rate": 4.84e-06,
"loss": 0.1056,
"step": 3875
},
{
"epoch": 1.8207282913165266,
"grad_norm": 4.493560314178467,
"learning_rate": 4.87125e-06,
"loss": 0.152,
"step": 3900
},
{
"epoch": 1.8323996265172737,
"grad_norm": 8.294795036315918,
"learning_rate": 4.9025e-06,
"loss": 0.1048,
"step": 3925
},
{
"epoch": 1.8440709617180207,
"grad_norm": 4.7361884117126465,
"learning_rate": 4.93375e-06,
"loss": 0.1424,
"step": 3950
},
{
"epoch": 1.8557422969187676,
"grad_norm": 6.927464485168457,
"learning_rate": 4.965e-06,
"loss": 0.0982,
"step": 3975
},
{
"epoch": 1.8674136321195145,
"grad_norm": 6.300534248352051,
"learning_rate": 4.996250000000001e-06,
"loss": 0.1273,
"step": 4000
},
{
"epoch": 1.8790849673202614,
"grad_norm": 4.410505294799805,
"learning_rate": 5.0275000000000006e-06,
"loss": 0.1223,
"step": 4025
},
{
"epoch": 1.8907563025210083,
"grad_norm": 7.067946434020996,
"learning_rate": 5.0587500000000005e-06,
"loss": 0.1447,
"step": 4050
},
{
"epoch": 1.9024276377217553,
"grad_norm": 5.033799171447754,
"learning_rate": 5.09e-06,
"loss": 0.1066,
"step": 4075
},
{
"epoch": 1.9140989729225022,
"grad_norm": 4.994957447052002,
"learning_rate": 5.12e-06,
"loss": 0.14,
"step": 4100
},
{
"epoch": 1.9257703081232493,
"grad_norm": 7.51298189163208,
"learning_rate": 5.151250000000001e-06,
"loss": 0.1065,
"step": 4125
},
{
"epoch": 1.9374416433239963,
"grad_norm": 4.488656997680664,
"learning_rate": 5.182500000000001e-06,
"loss": 0.1495,
"step": 4150
},
{
"epoch": 1.9491129785247432,
"grad_norm": 6.328264236450195,
"learning_rate": 5.213750000000001e-06,
"loss": 0.1047,
"step": 4175
},
{
"epoch": 1.9607843137254903,
"grad_norm": 3.9831016063690186,
"learning_rate": 5.245e-06,
"loss": 0.1483,
"step": 4200
},
{
"epoch": 1.9724556489262373,
"grad_norm": 9.204850196838379,
"learning_rate": 5.27625e-06,
"loss": 0.1076,
"step": 4225
},
{
"epoch": 1.9841269841269842,
"grad_norm": 4.560220241546631,
"learning_rate": 5.3075e-06,
"loss": 0.1319,
"step": 4250
},
{
"epoch": 1.995798319327731,
"grad_norm": 6.272380828857422,
"learning_rate": 5.33875e-06,
"loss": 0.1127,
"step": 4275
},
{
"epoch": 2.007469654528478,
"grad_norm": 2.819856882095337,
"learning_rate": 5.37e-06,
"loss": 0.0898,
"step": 4300
},
{
"epoch": 2.019140989729225,
"grad_norm": 3.3237640857696533,
"learning_rate": 5.40125e-06,
"loss": 0.0556,
"step": 4325
},
{
"epoch": 2.030812324929972,
"grad_norm": 2.4771931171417236,
"learning_rate": 5.4325e-06,
"loss": 0.0807,
"step": 4350
},
{
"epoch": 2.042483660130719,
"grad_norm": 3.640773296356201,
"learning_rate": 5.46375e-06,
"loss": 0.0528,
"step": 4375
},
{
"epoch": 2.0541549953314657,
"grad_norm": 4.329100131988525,
"learning_rate": 5.495e-06,
"loss": 0.0852,
"step": 4400
},
{
"epoch": 2.065826330532213,
"grad_norm": 4.2357916831970215,
"learning_rate": 5.52625e-06,
"loss": 0.0473,
"step": 4425
},
{
"epoch": 2.07749766573296,
"grad_norm": 4.033267974853516,
"learning_rate": 5.557500000000001e-06,
"loss": 0.0802,
"step": 4450
},
{
"epoch": 2.089169000933707,
"grad_norm": 6.817841529846191,
"learning_rate": 5.5887500000000005e-06,
"loss": 0.0602,
"step": 4475
},
{
"epoch": 2.100840336134454,
"grad_norm": 4.568445682525635,
"learning_rate": 5.62e-06,
"loss": 0.083,
"step": 4500
},
{
"epoch": 2.112511671335201,
"grad_norm": 8.219367980957031,
"learning_rate": 5.65125e-06,
"loss": 0.066,
"step": 4525
},
{
"epoch": 2.1241830065359477,
"grad_norm": 1.9845637083053589,
"learning_rate": 5.6825e-06,
"loss": 0.0878,
"step": 4550
},
{
"epoch": 2.1358543417366946,
"grad_norm": 7.169174671173096,
"learning_rate": 5.71375e-06,
"loss": 0.0651,
"step": 4575
},
{
"epoch": 2.1475256769374416,
"grad_norm": 3.6445248126983643,
"learning_rate": 5.745e-06,
"loss": 0.1006,
"step": 4600
},
{
"epoch": 2.1591970121381885,
"grad_norm": 4.955069541931152,
"learning_rate": 5.776250000000001e-06,
"loss": 0.0572,
"step": 4625
},
{
"epoch": 2.1708683473389354,
"grad_norm": 3.909029245376587,
"learning_rate": 5.807500000000001e-06,
"loss": 0.0841,
"step": 4650
},
{
"epoch": 2.1825396825396823,
"grad_norm": 2.1477255821228027,
"learning_rate": 5.838750000000001e-06,
"loss": 0.0717,
"step": 4675
},
{
"epoch": 2.1942110177404297,
"grad_norm": 5.743031978607178,
"learning_rate": 5.8700000000000005e-06,
"loss": 0.0808,
"step": 4700
},
{
"epoch": 2.2058823529411766,
"grad_norm": 4.362875461578369,
"learning_rate": 5.9012500000000005e-06,
"loss": 0.0609,
"step": 4725
},
{
"epoch": 2.2175536881419236,
"grad_norm": 3.8198697566986084,
"learning_rate": 5.9325e-06,
"loss": 0.0892,
"step": 4750
},
{
"epoch": 2.2292250233426705,
"grad_norm": 3.9962849617004395,
"learning_rate": 5.96375e-06,
"loss": 0.0656,
"step": 4775
},
{
"epoch": 2.2408963585434174,
"grad_norm": 2.613006830215454,
"learning_rate": 5.995e-06,
"loss": 0.0812,
"step": 4800
},
{
"epoch": 2.2525676937441643,
"grad_norm": 5.6209540367126465,
"learning_rate": 6.02625e-06,
"loss": 0.0814,
"step": 4825
},
{
"epoch": 2.2642390289449112,
"grad_norm": 4.349456787109375,
"learning_rate": 6.0575e-06,
"loss": 0.084,
"step": 4850
},
{
"epoch": 2.275910364145658,
"grad_norm": 6.475245952606201,
"learning_rate": 6.08875e-06,
"loss": 0.0798,
"step": 4875
},
{
"epoch": 2.287581699346405,
"grad_norm": 2.503551959991455,
"learning_rate": 6.12e-06,
"loss": 0.0836,
"step": 4900
},
{
"epoch": 2.299253034547152,
"grad_norm": 7.365092754364014,
"learning_rate": 6.15125e-06,
"loss": 0.07,
"step": 4925
},
{
"epoch": 2.310924369747899,
"grad_norm": 4.494143486022949,
"learning_rate": 6.1825e-06,
"loss": 0.1019,
"step": 4950
},
{
"epoch": 2.3225957049486463,
"grad_norm": 9.088369369506836,
"learning_rate": 6.2137500000000004e-06,
"loss": 0.0741,
"step": 4975
},
{
"epoch": 2.3342670401493932,
"grad_norm": 3.5182809829711914,
"learning_rate": 6.245e-06,
"loss": 0.0856,
"step": 5000
},
{
"epoch": 2.3342670401493932,
"eval_loss": 0.16011084616184235,
"eval_runtime": 6234.0593,
"eval_samples_per_second": 1.51,
"eval_steps_per_second": 0.189,
"eval_wer": 0.103035685451316,
"step": 5000
},
{
"epoch": 2.34593837535014,
"grad_norm": 3.4111175537109375,
"learning_rate": 6.2434375e-06,
"loss": 0.0694,
"step": 5025
},
{
"epoch": 2.357609710550887,
"grad_norm": 5.693511486053467,
"learning_rate": 6.235625e-06,
"loss": 0.0952,
"step": 5050
},
{
"epoch": 2.369281045751634,
"grad_norm": 7.079166412353516,
"learning_rate": 6.2278125e-06,
"loss": 0.0605,
"step": 5075
},
{
"epoch": 2.380952380952381,
"grad_norm": 2.9197869300842285,
"learning_rate": 6.22e-06,
"loss": 0.0941,
"step": 5100
},
{
"epoch": 2.392623716153128,
"grad_norm": 6.79939603805542,
"learning_rate": 6.2121875e-06,
"loss": 0.0725,
"step": 5125
},
{
"epoch": 2.404295051353875,
"grad_norm": 6.609640121459961,
"learning_rate": 6.204375e-06,
"loss": 0.0976,
"step": 5150
},
{
"epoch": 2.4159663865546217,
"grad_norm": 7.591739654541016,
"learning_rate": 6.196562500000001e-06,
"loss": 0.0697,
"step": 5175
},
{
"epoch": 2.4276377217553686,
"grad_norm": 7.580626964569092,
"learning_rate": 6.18875e-06,
"loss": 0.0921,
"step": 5200
},
{
"epoch": 2.439309056956116,
"grad_norm": 7.333129405975342,
"learning_rate": 6.1809375000000005e-06,
"loss": 0.0727,
"step": 5225
},
{
"epoch": 2.450980392156863,
"grad_norm": 6.632033348083496,
"learning_rate": 6.173125e-06,
"loss": 0.0891,
"step": 5250
},
{
"epoch": 2.46265172735761,
"grad_norm": 5.526509761810303,
"learning_rate": 6.165312500000001e-06,
"loss": 0.077,
"step": 5275
},
{
"epoch": 2.4743230625583568,
"grad_norm": 6.0735602378845215,
"learning_rate": 6.1575e-06,
"loss": 0.101,
"step": 5300
},
{
"epoch": 2.4859943977591037,
"grad_norm": 7.87660026550293,
"learning_rate": 6.1496875000000006e-06,
"loss": 0.0737,
"step": 5325
},
{
"epoch": 2.4976657329598506,
"grad_norm": 4.01476526260376,
"learning_rate": 6.141875e-06,
"loss": 0.0928,
"step": 5350
},
{
"epoch": 2.5093370681605975,
"grad_norm": 5.005721569061279,
"learning_rate": 6.1340625e-06,
"loss": 0.0717,
"step": 5375
},
{
"epoch": 2.5210084033613445,
"grad_norm": 5.76194429397583,
"learning_rate": 6.12625e-06,
"loss": 0.0922,
"step": 5400
},
{
"epoch": 2.5326797385620914,
"grad_norm": 5.3504157066345215,
"learning_rate": 6.1184375e-06,
"loss": 0.0658,
"step": 5425
},
{
"epoch": 2.5443510737628383,
"grad_norm": 4.85629415512085,
"learning_rate": 6.1106250000000005e-06,
"loss": 0.0825,
"step": 5450
},
{
"epoch": 2.5560224089635852,
"grad_norm": 5.944486141204834,
"learning_rate": 6.1028125e-06,
"loss": 0.0775,
"step": 5475
},
{
"epoch": 2.567693744164332,
"grad_norm": 6.294357776641846,
"learning_rate": 6.095e-06,
"loss": 0.0915,
"step": 5500
},
{
"epoch": 2.5793650793650795,
"grad_norm": 5.524097919464111,
"learning_rate": 6.0871875e-06,
"loss": 0.0662,
"step": 5525
},
{
"epoch": 2.5910364145658265,
"grad_norm": 4.0100812911987305,
"learning_rate": 6.0793750000000006e-06,
"loss": 0.0914,
"step": 5550
},
{
"epoch": 2.6027077497665734,
"grad_norm": 7.9108123779296875,
"learning_rate": 6.0715625e-06,
"loss": 0.0774,
"step": 5575
},
{
"epoch": 2.6143790849673203,
"grad_norm": 2.5471160411834717,
"learning_rate": 6.06375e-06,
"loss": 0.0839,
"step": 5600
},
{
"epoch": 2.6260504201680672,
"grad_norm": 3.6380198001861572,
"learning_rate": 6.0559375e-06,
"loss": 0.0621,
"step": 5625
},
{
"epoch": 2.637721755368814,
"grad_norm": 2.9542012214660645,
"learning_rate": 6.048125000000001e-06,
"loss": 0.0784,
"step": 5650
},
{
"epoch": 2.649393090569561,
"grad_norm": 4.753948211669922,
"learning_rate": 6.0403125000000005e-06,
"loss": 0.0768,
"step": 5675
},
{
"epoch": 2.661064425770308,
"grad_norm": 2.410440444946289,
"learning_rate": 6.0325e-06,
"loss": 0.088,
"step": 5700
},
{
"epoch": 2.6727357609710554,
"grad_norm": 5.084535121917725,
"learning_rate": 6.0246875e-06,
"loss": 0.0743,
"step": 5725
},
{
"epoch": 2.6844070961718023,
"grad_norm": 1.9251270294189453,
"learning_rate": 6.016875e-06,
"loss": 0.0882,
"step": 5750
},
{
"epoch": 2.696078431372549,
"grad_norm": 6.866667747497559,
"learning_rate": 6.0090625000000005e-06,
"loss": 0.0675,
"step": 5775
},
{
"epoch": 2.707749766573296,
"grad_norm": 5.318982124328613,
"learning_rate": 6.00125e-06,
"loss": 0.0856,
"step": 5800
},
{
"epoch": 2.719421101774043,
"grad_norm": 4.362662315368652,
"learning_rate": 5.9934375e-06,
"loss": 0.0787,
"step": 5825
},
{
"epoch": 2.73109243697479,
"grad_norm": 3.6322102546691895,
"learning_rate": 5.985625e-06,
"loss": 0.0907,
"step": 5850
},
{
"epoch": 2.742763772175537,
"grad_norm": 6.347275257110596,
"learning_rate": 5.977812500000001e-06,
"loss": 0.0689,
"step": 5875
},
{
"epoch": 2.754435107376284,
"grad_norm": 4.315841197967529,
"learning_rate": 5.9700000000000004e-06,
"loss": 0.0971,
"step": 5900
},
{
"epoch": 2.7661064425770308,
"grad_norm": 3.1454360485076904,
"learning_rate": 5.9621875e-06,
"loss": 0.0615,
"step": 5925
},
{
"epoch": 2.7777777777777777,
"grad_norm": 3.9786288738250732,
"learning_rate": 5.954375e-06,
"loss": 0.0988,
"step": 5950
},
{
"epoch": 2.7894491129785246,
"grad_norm": 7.057102680206299,
"learning_rate": 5.946562500000001e-06,
"loss": 0.0768,
"step": 5975
},
{
"epoch": 2.8011204481792715,
"grad_norm": 4.522549629211426,
"learning_rate": 5.9387500000000005e-06,
"loss": 0.0847,
"step": 6000
},
{
"epoch": 2.8127917833800185,
"grad_norm": 6.361202716827393,
"learning_rate": 5.9309375e-06,
"loss": 0.0525,
"step": 6025
},
{
"epoch": 2.8244631185807654,
"grad_norm": 3.8292720317840576,
"learning_rate": 5.923125e-06,
"loss": 0.0841,
"step": 6050
},
{
"epoch": 2.8361344537815127,
"grad_norm": 6.834649085998535,
"learning_rate": 5.9153125e-06,
"loss": 0.0687,
"step": 6075
},
{
"epoch": 2.8478057889822597,
"grad_norm": 6.25474214553833,
"learning_rate": 5.907500000000001e-06,
"loss": 0.0895,
"step": 6100
},
{
"epoch": 2.8594771241830066,
"grad_norm": 5.775394439697266,
"learning_rate": 5.8996875000000004e-06,
"loss": 0.0727,
"step": 6125
},
{
"epoch": 2.8711484593837535,
"grad_norm": 4.371216297149658,
"learning_rate": 5.891875e-06,
"loss": 0.1085,
"step": 6150
},
{
"epoch": 2.8828197945845004,
"grad_norm": 3.050452947616577,
"learning_rate": 5.8840625e-06,
"loss": 0.0654,
"step": 6175
},
{
"epoch": 2.8944911297852474,
"grad_norm": 3.992262840270996,
"learning_rate": 5.876250000000001e-06,
"loss": 0.0945,
"step": 6200
},
{
"epoch": 2.9061624649859943,
"grad_norm": 5.945260047912598,
"learning_rate": 5.8684375e-06,
"loss": 0.0724,
"step": 6225
},
{
"epoch": 2.917833800186741,
"grad_norm": 3.371884822845459,
"learning_rate": 5.860625e-06,
"loss": 0.0961,
"step": 6250
},
{
"epoch": 2.9295051353874886,
"grad_norm": 4.842737674713135,
"learning_rate": 5.8528125e-06,
"loss": 0.0748,
"step": 6275
},
{
"epoch": 2.9411764705882355,
"grad_norm": 3.629974603652954,
"learning_rate": 5.845000000000001e-06,
"loss": 0.0897,
"step": 6300
},
{
"epoch": 2.9528478057889824,
"grad_norm": 8.20695972442627,
"learning_rate": 5.8371875e-06,
"loss": 0.0718,
"step": 6325
},
{
"epoch": 2.9645191409897294,
"grad_norm": 3.662733554840088,
"learning_rate": 5.8296875e-06,
"loss": 0.0885,
"step": 6350
},
{
"epoch": 2.9761904761904763,
"grad_norm": 7.424181938171387,
"learning_rate": 5.821875e-06,
"loss": 0.0631,
"step": 6375
},
{
"epoch": 2.987861811391223,
"grad_norm": 3.9309329986572266,
"learning_rate": 5.814062500000001e-06,
"loss": 0.0894,
"step": 6400
},
{
"epoch": 2.99953314659197,
"grad_norm": 10.30614185333252,
"learning_rate": 5.8062500000000005e-06,
"loss": 0.087,
"step": 6425
},
{
"epoch": 3.011204481792717,
"grad_norm": 4.544281005859375,
"learning_rate": 5.7984375e-06,
"loss": 0.04,
"step": 6450
},
{
"epoch": 3.022875816993464,
"grad_norm": 9.070793151855469,
"learning_rate": 5.790625e-06,
"loss": 0.0373,
"step": 6475
},
{
"epoch": 3.034547152194211,
"grad_norm": 7.368350982666016,
"learning_rate": 5.782812500000001e-06,
"loss": 0.0485,
"step": 6500
},
{
"epoch": 3.046218487394958,
"grad_norm": 6.376898765563965,
"learning_rate": 5.775000000000001e-06,
"loss": 0.0474,
"step": 6525
},
{
"epoch": 3.0578898225957047,
"grad_norm": 5.481170654296875,
"learning_rate": 5.7671875e-06,
"loss": 0.0458,
"step": 6550
},
{
"epoch": 3.069561157796452,
"grad_norm": 10.094844818115234,
"learning_rate": 5.759375e-06,
"loss": 0.0451,
"step": 6575
},
{
"epoch": 3.081232492997199,
"grad_norm": 0.9469685554504395,
"learning_rate": 5.7515625e-06,
"loss": 0.0465,
"step": 6600
},
{
"epoch": 3.092903828197946,
"grad_norm": 4.808952331542969,
"learning_rate": 5.743750000000001e-06,
"loss": 0.0441,
"step": 6625
},
{
"epoch": 3.104575163398693,
"grad_norm": 8.131449699401855,
"learning_rate": 5.7359375e-06,
"loss": 0.0396,
"step": 6650
},
{
"epoch": 3.11624649859944,
"grad_norm": 6.750060081481934,
"learning_rate": 5.728125e-06,
"loss": 0.0611,
"step": 6675
},
{
"epoch": 3.1279178338001867,
"grad_norm": 6.601670742034912,
"learning_rate": 5.7203125e-06,
"loss": 0.0415,
"step": 6700
},
{
"epoch": 3.1395891690009337,
"grad_norm": 13.884129524230957,
"learning_rate": 5.712500000000001e-06,
"loss": 0.0417,
"step": 6725
},
{
"epoch": 3.1512605042016806,
"grad_norm": 7.539254188537598,
"learning_rate": 5.7046875e-06,
"loss": 0.0413,
"step": 6750
},
{
"epoch": 3.1629318394024275,
"grad_norm": 6.866730213165283,
"learning_rate": 5.696875e-06,
"loss": 0.055,
"step": 6775
},
{
"epoch": 3.1746031746031744,
"grad_norm": 2.3453876972198486,
"learning_rate": 5.6890625e-06,
"loss": 0.0314,
"step": 6800
},
{
"epoch": 3.186274509803922,
"grad_norm": 5.64259672164917,
"learning_rate": 5.681250000000001e-06,
"loss": 0.0446,
"step": 6825
},
{
"epoch": 3.1979458450046687,
"grad_norm": 1.9124208688735962,
"learning_rate": 5.6734375e-06,
"loss": 0.0413,
"step": 6850
},
{
"epoch": 3.2096171802054156,
"grad_norm": 8.153667449951172,
"learning_rate": 5.6656250000000005e-06,
"loss": 0.0482,
"step": 6875
},
{
"epoch": 3.2212885154061626,
"grad_norm": 3.0059521198272705,
"learning_rate": 5.6578125e-06,
"loss": 0.0391,
"step": 6900
},
{
"epoch": 3.2329598506069095,
"grad_norm": 7.093464374542236,
"learning_rate": 5.65e-06,
"loss": 0.0473,
"step": 6925
},
{
"epoch": 3.2446311858076564,
"grad_norm": 2.2140514850616455,
"learning_rate": 5.642187500000001e-06,
"loss": 0.0472,
"step": 6950
},
{
"epoch": 3.2563025210084033,
"grad_norm": 5.731634616851807,
"learning_rate": 5.634375e-06,
"loss": 0.0454,
"step": 6975
},
{
"epoch": 3.2679738562091503,
"grad_norm": 2.6004838943481445,
"learning_rate": 5.6265625e-06,
"loss": 0.0379,
"step": 7000
},
{
"epoch": 3.279645191409897,
"grad_norm": 6.271092414855957,
"learning_rate": 5.61875e-06,
"loss": 0.0447,
"step": 7025
},
{
"epoch": 3.291316526610644,
"grad_norm": 14.229198455810547,
"learning_rate": 5.610937500000001e-06,
"loss": 0.0433,
"step": 7050
},
{
"epoch": 3.302987861811391,
"grad_norm": 7.76876974105835,
"learning_rate": 5.603125e-06,
"loss": 0.0417,
"step": 7075
},
{
"epoch": 3.314659197012138,
"grad_norm": 5.000005722045898,
"learning_rate": 5.5953125000000005e-06,
"loss": 0.0365,
"step": 7100
},
{
"epoch": 3.3263305322128853,
"grad_norm": 5.754938125610352,
"learning_rate": 5.5875e-06,
"loss": 0.0483,
"step": 7125
},
{
"epoch": 3.3380018674136323,
"grad_norm": 2.836254835128784,
"learning_rate": 5.579687500000001e-06,
"loss": 0.0439,
"step": 7150
},
{
"epoch": 3.349673202614379,
"grad_norm": 6.115027904510498,
"learning_rate": 5.571875e-06,
"loss": 0.0518,
"step": 7175
},
{
"epoch": 3.361344537815126,
"grad_norm": 4.021732330322266,
"learning_rate": 5.5640625000000006e-06,
"loss": 0.0413,
"step": 7200
},
{
"epoch": 3.373015873015873,
"grad_norm": 12.889508247375488,
"learning_rate": 5.55625e-06,
"loss": 0.0608,
"step": 7225
},
{
"epoch": 3.38468720821662,
"grad_norm": 5.332011699676514,
"learning_rate": 5.5484375e-06,
"loss": 0.0458,
"step": 7250
},
{
"epoch": 3.396358543417367,
"grad_norm": 9.390791893005371,
"learning_rate": 5.540625e-06,
"loss": 0.0418,
"step": 7275
},
{
"epoch": 3.408029878618114,
"grad_norm": 5.002245903015137,
"learning_rate": 5.5328125e-06,
"loss": 0.0431,
"step": 7300
},
{
"epoch": 3.4197012138188607,
"grad_norm": 9.859498023986816,
"learning_rate": 5.5250000000000005e-06,
"loss": 0.0511,
"step": 7325
},
{
"epoch": 3.431372549019608,
"grad_norm": 12.415291786193848,
"learning_rate": 5.5171875e-06,
"loss": 0.0441,
"step": 7350
},
{
"epoch": 3.443043884220355,
"grad_norm": 4.821508884429932,
"learning_rate": 5.509375e-06,
"loss": 0.0466,
"step": 7375
},
{
"epoch": 3.454715219421102,
"grad_norm": 3.222395420074463,
"learning_rate": 5.5015625e-06,
"loss": 0.043,
"step": 7400
},
{
"epoch": 3.466386554621849,
"grad_norm": 11.607864379882812,
"learning_rate": 5.4937500000000006e-06,
"loss": 0.044,
"step": 7425
},
{
"epoch": 3.478057889822596,
"grad_norm": 2.7468137741088867,
"learning_rate": 5.4859375e-06,
"loss": 0.0494,
"step": 7450
},
{
"epoch": 3.4897292250233427,
"grad_norm": 5.353877067565918,
"learning_rate": 5.478125e-06,
"loss": 0.0439,
"step": 7475
},
{
"epoch": 3.5014005602240896,
"grad_norm": 5.521659851074219,
"learning_rate": 5.4703125e-06,
"loss": 0.043,
"step": 7500
},
{
"epoch": 3.5130718954248366,
"grad_norm": 11.562368392944336,
"learning_rate": 5.462500000000001e-06,
"loss": 0.0611,
"step": 7525
},
{
"epoch": 3.5247432306255835,
"grad_norm": 2.4676475524902344,
"learning_rate": 5.4546875000000004e-06,
"loss": 0.0372,
"step": 7550
},
{
"epoch": 3.5364145658263304,
"grad_norm": 7.735954761505127,
"learning_rate": 5.446875e-06,
"loss": 0.0434,
"step": 7575
},
{
"epoch": 3.5480859010270773,
"grad_norm": 3.367266893386841,
"learning_rate": 5.4390625e-06,
"loss": 0.0488,
"step": 7600
},
{
"epoch": 3.5597572362278243,
"grad_norm": 6.3219990730285645,
"learning_rate": 5.43125e-06,
"loss": 0.0488,
"step": 7625
},
{
"epoch": 3.571428571428571,
"grad_norm": 5.415238857269287,
"learning_rate": 5.4234375000000005e-06,
"loss": 0.0481,
"step": 7650
},
{
"epoch": 3.5830999066293185,
"grad_norm": 7.6133832931518555,
"learning_rate": 5.415625e-06,
"loss": 0.0379,
"step": 7675
},
{
"epoch": 3.5947712418300655,
"grad_norm": 3.2077534198760986,
"learning_rate": 5.4078125e-06,
"loss": 0.0469,
"step": 7700
},
{
"epoch": 3.6064425770308124,
"grad_norm": 9.553058624267578,
"learning_rate": 5.4e-06,
"loss": 0.044,
"step": 7725
},
{
"epoch": 3.6181139122315593,
"grad_norm": 6.475897312164307,
"learning_rate": 5.392187500000001e-06,
"loss": 0.0388,
"step": 7750
},
{
"epoch": 3.6297852474323062,
"grad_norm": 7.829625129699707,
"learning_rate": 5.3843750000000004e-06,
"loss": 0.0543,
"step": 7775
},
{
"epoch": 3.641456582633053,
"grad_norm": 2.857725143432617,
"learning_rate": 5.3765625e-06,
"loss": 0.0456,
"step": 7800
},
{
"epoch": 3.6531279178338,
"grad_norm": 9.913888931274414,
"learning_rate": 5.36875e-06,
"loss": 0.0588,
"step": 7825
},
{
"epoch": 3.664799253034547,
"grad_norm": 6.124692916870117,
"learning_rate": 5.360937500000001e-06,
"loss": 0.0487,
"step": 7850
},
{
"epoch": 3.6764705882352944,
"grad_norm": 5.036586284637451,
"learning_rate": 5.3531250000000005e-06,
"loss": 0.0381,
"step": 7875
},
{
"epoch": 3.6881419234360413,
"grad_norm": 2.280762195587158,
"learning_rate": 5.3453125e-06,
"loss": 0.041,
"step": 7900
},
{
"epoch": 3.6998132586367882,
"grad_norm": 5.175332546234131,
"learning_rate": 5.3375e-06,
"loss": 0.0439,
"step": 7925
},
{
"epoch": 3.711484593837535,
"grad_norm": 6.605205535888672,
"learning_rate": 5.3296875e-06,
"loss": 0.052,
"step": 7950
},
{
"epoch": 3.723155929038282,
"grad_norm": 8.107190132141113,
"learning_rate": 5.321875000000001e-06,
"loss": 0.0513,
"step": 7975
},
{
"epoch": 3.734827264239029,
"grad_norm": 4.808969497680664,
"learning_rate": 5.3140624999999996e-06,
"loss": 0.0474,
"step": 8000
},
{
"epoch": 3.746498599439776,
"grad_norm": 13.523513793945312,
"learning_rate": 5.30625e-06,
"loss": 0.0472,
"step": 8025
},
{
"epoch": 3.758169934640523,
"grad_norm": 4.2439751625061035,
"learning_rate": 5.2984375e-06,
"loss": 0.0493,
"step": 8050
},
{
"epoch": 3.7698412698412698,
"grad_norm": 19.3562068939209,
"learning_rate": 5.290625000000001e-06,
"loss": 0.0559,
"step": 8075
},
{
"epoch": 3.7815126050420167,
"grad_norm": 5.023294448852539,
"learning_rate": 5.2828125e-06,
"loss": 0.0444,
"step": 8100
},
{
"epoch": 3.7931839402427636,
"grad_norm": 10.296977996826172,
"learning_rate": 5.275e-06,
"loss": 0.0485,
"step": 8125
},
{
"epoch": 3.8048552754435105,
"grad_norm": 1.339447259902954,
"learning_rate": 5.2671875e-06,
"loss": 0.0408,
"step": 8150
},
{
"epoch": 3.8165266106442575,
"grad_norm": 8.966866493225098,
"learning_rate": 5.259375000000001e-06,
"loss": 0.0481,
"step": 8175
},
{
"epoch": 3.828197945845005,
"grad_norm": 4.252060413360596,
"learning_rate": 5.251562500000001e-06,
"loss": 0.0452,
"step": 8200
},
{
"epoch": 3.8398692810457518,
"grad_norm": 10.229138374328613,
"learning_rate": 5.24375e-06,
"loss": 0.0592,
"step": 8225
},
{
"epoch": 3.8515406162464987,
"grad_norm": 1.4391601085662842,
"learning_rate": 5.2359375e-06,
"loss": 0.0453,
"step": 8250
},
{
"epoch": 3.8632119514472456,
"grad_norm": 6.556412696838379,
"learning_rate": 5.228125e-06,
"loss": 0.0436,
"step": 8275
},
{
"epoch": 3.8748832866479925,
"grad_norm": 4.541426658630371,
"learning_rate": 5.220312500000001e-06,
"loss": 0.0399,
"step": 8300
},
{
"epoch": 3.8865546218487395,
"grad_norm": 5.626660346984863,
"learning_rate": 5.2125e-06,
"loss": 0.0526,
"step": 8325
},
{
"epoch": 3.8982259570494864,
"grad_norm": 3.5642924308776855,
"learning_rate": 5.2046875e-06,
"loss": 0.041,
"step": 8350
},
{
"epoch": 3.9098972922502333,
"grad_norm": 5.301916599273682,
"learning_rate": 5.196875e-06,
"loss": 0.0505,
"step": 8375
},
{
"epoch": 3.9215686274509802,
"grad_norm": 4.125392913818359,
"learning_rate": 5.189062500000001e-06,
"loss": 0.0413,
"step": 8400
},
{
"epoch": 3.9332399626517276,
"grad_norm": 10.192436218261719,
"learning_rate": 5.18125e-06,
"loss": 0.0531,
"step": 8425
},
{
"epoch": 3.9449112978524745,
"grad_norm": 3.3052845001220703,
"learning_rate": 5.1734375e-06,
"loss": 0.0398,
"step": 8450
},
{
"epoch": 3.9565826330532214,
"grad_norm": 6.241024494171143,
"learning_rate": 5.165625e-06,
"loss": 0.0408,
"step": 8475
},
{
"epoch": 3.9682539682539684,
"grad_norm": 6.708200454711914,
"learning_rate": 5.157812500000001e-06,
"loss": 0.0516,
"step": 8500
},
{
"epoch": 3.9799253034547153,
"grad_norm": 11.930779457092285,
"learning_rate": 5.15e-06,
"loss": 0.0452,
"step": 8525
},
{
"epoch": 3.991596638655462,
"grad_norm": 7.160813808441162,
"learning_rate": 5.1421875000000005e-06,
"loss": 0.0455,
"step": 8550
},
{
"epoch": 4.003267973856209,
"grad_norm": 1.401731014251709,
"learning_rate": 5.134375e-06,
"loss": 0.0453,
"step": 8575
},
{
"epoch": 4.014939309056956,
"grad_norm": 1.890440821647644,
"learning_rate": 5.1265625e-06,
"loss": 0.0177,
"step": 8600
},
{
"epoch": 4.026610644257703,
"grad_norm": 1.3520216941833496,
"learning_rate": 5.11875e-06,
"loss": 0.0305,
"step": 8625
},
{
"epoch": 4.03828197945845,
"grad_norm": 4.4095025062561035,
"learning_rate": 5.1109375e-06,
"loss": 0.0227,
"step": 8650
},
{
"epoch": 4.049953314659197,
"grad_norm": 2.4577364921569824,
"learning_rate": 5.103125e-06,
"loss": 0.036,
"step": 8675
},
{
"epoch": 4.061624649859944,
"grad_norm": 3.378568410873413,
"learning_rate": 5.0953125e-06,
"loss": 0.0235,
"step": 8700
},
{
"epoch": 4.073295985060691,
"grad_norm": 1.7685190439224243,
"learning_rate": 5.0875e-06,
"loss": 0.031,
"step": 8725
},
{
"epoch": 4.084967320261438,
"grad_norm": 1.426932454109192,
"learning_rate": 5.0796875e-06,
"loss": 0.0184,
"step": 8750
},
{
"epoch": 4.0966386554621845,
"grad_norm": 2.890690803527832,
"learning_rate": 5.0718750000000005e-06,
"loss": 0.0319,
"step": 8775
},
{
"epoch": 4.1083099906629315,
"grad_norm": 2.438765287399292,
"learning_rate": 5.0640625e-06,
"loss": 0.0191,
"step": 8800
},
{
"epoch": 4.119981325863678,
"grad_norm": 1.9891207218170166,
"learning_rate": 5.056250000000001e-06,
"loss": 0.031,
"step": 8825
},
{
"epoch": 4.131652661064426,
"grad_norm": 2.288236141204834,
"learning_rate": 5.0484375e-06,
"loss": 0.0174,
"step": 8850
},
{
"epoch": 4.143323996265173,
"grad_norm": 2.097827434539795,
"learning_rate": 5.0406250000000005e-06,
"loss": 0.0244,
"step": 8875
},
{
"epoch": 4.15499533146592,
"grad_norm": 0.7743799090385437,
"learning_rate": 5.0328125e-06,
"loss": 0.0209,
"step": 8900
},
{
"epoch": 4.166666666666667,
"grad_norm": 2.9196207523345947,
"learning_rate": 5.025e-06,
"loss": 0.0275,
"step": 8925
},
{
"epoch": 4.178338001867414,
"grad_norm": 1.094561219215393,
"learning_rate": 5.0171875e-06,
"loss": 0.026,
"step": 8950
},
{
"epoch": 4.190009337068161,
"grad_norm": 2.849806070327759,
"learning_rate": 5.0096875000000005e-06,
"loss": 0.0374,
"step": 8975
},
{
"epoch": 4.201680672268908,
"grad_norm": 9.60171890258789,
"learning_rate": 5.001875e-06,
"loss": 0.0264,
"step": 9000
},
{
"epoch": 4.213352007469655,
"grad_norm": 3.694355010986328,
"learning_rate": 4.9940625e-06,
"loss": 0.0355,
"step": 9025
},
{
"epoch": 4.225023342670402,
"grad_norm": 5.392662525177002,
"learning_rate": 4.98625e-06,
"loss": 0.0242,
"step": 9050
},
{
"epoch": 4.2366946778711485,
"grad_norm": 2.1022627353668213,
"learning_rate": 4.9784375e-06,
"loss": 0.0327,
"step": 9075
},
{
"epoch": 4.248366013071895,
"grad_norm": 3.802021026611328,
"learning_rate": 4.970625e-06,
"loss": 0.0211,
"step": 9100
},
{
"epoch": 4.260037348272642,
"grad_norm": 0.9553838968276978,
"learning_rate": 4.9628125e-06,
"loss": 0.0291,
"step": 9125
},
{
"epoch": 4.271708683473389,
"grad_norm": 5.334795951843262,
"learning_rate": 4.955e-06,
"loss": 0.0209,
"step": 9150
},
{
"epoch": 4.283380018674136,
"grad_norm": 1.5590300559997559,
"learning_rate": 4.9471875e-06,
"loss": 0.032,
"step": 9175
},
{
"epoch": 4.295051353874883,
"grad_norm": 1.9582746028900146,
"learning_rate": 4.9393750000000005e-06,
"loss": 0.0285,
"step": 9200
},
{
"epoch": 4.30672268907563,
"grad_norm": 5.399374485015869,
"learning_rate": 4.9315625e-06,
"loss": 0.0385,
"step": 9225
},
{
"epoch": 4.318394024276377,
"grad_norm": 3.4703786373138428,
"learning_rate": 4.92375e-06,
"loss": 0.0185,
"step": 9250
},
{
"epoch": 4.330065359477124,
"grad_norm": 3.748854637145996,
"learning_rate": 4.9159375e-06,
"loss": 0.0361,
"step": 9275
},
{
"epoch": 4.341736694677871,
"grad_norm": 2.3994362354278564,
"learning_rate": 4.9081250000000005e-06,
"loss": 0.0257,
"step": 9300
},
{
"epoch": 4.353408029878618,
"grad_norm": 1.0170806646347046,
"learning_rate": 4.9003125e-06,
"loss": 0.0344,
"step": 9325
},
{
"epoch": 4.365079365079365,
"grad_norm": 1.4588912725448608,
"learning_rate": 4.8925e-06,
"loss": 0.0225,
"step": 9350
},
{
"epoch": 4.3767507002801125,
"grad_norm": 2.9339776039123535,
"learning_rate": 4.8846875e-06,
"loss": 0.0317,
"step": 9375
},
{
"epoch": 4.388422035480859,
"grad_norm": 3.1433396339416504,
"learning_rate": 4.876875e-06,
"loss": 0.0231,
"step": 9400
},
{
"epoch": 4.400093370681606,
"grad_norm": 3.174156427383423,
"learning_rate": 4.8690625000000004e-06,
"loss": 0.0269,
"step": 9425
},
{
"epoch": 4.411764705882353,
"grad_norm": 1.8732781410217285,
"learning_rate": 4.86125e-06,
"loss": 0.0305,
"step": 9450
},
{
"epoch": 4.4234360410831,
"grad_norm": 1.5500296354293823,
"learning_rate": 4.8534375e-06,
"loss": 0.0351,
"step": 9475
},
{
"epoch": 4.435107376283847,
"grad_norm": 3.1208136081695557,
"learning_rate": 4.845625e-06,
"loss": 0.022,
"step": 9500
},
{
"epoch": 4.446778711484594,
"grad_norm": 2.964061975479126,
"learning_rate": 4.8378125000000005e-06,
"loss": 0.0383,
"step": 9525
},
{
"epoch": 4.458450046685341,
"grad_norm": 4.548437118530273,
"learning_rate": 4.83e-06,
"loss": 0.0263,
"step": 9550
},
{
"epoch": 4.470121381886088,
"grad_norm": 2.4414591789245605,
"learning_rate": 4.8221875e-06,
"loss": 0.0314,
"step": 9575
},
{
"epoch": 4.481792717086835,
"grad_norm": 2.8750803470611572,
"learning_rate": 4.814375e-06,
"loss": 0.0254,
"step": 9600
},
{
"epoch": 4.493464052287582,
"grad_norm": 1.9113430976867676,
"learning_rate": 4.806562500000001e-06,
"loss": 0.0365,
"step": 9625
},
{
"epoch": 4.505135387488329,
"grad_norm": 1.8737727403640747,
"learning_rate": 4.7987500000000004e-06,
"loss": 0.0251,
"step": 9650
},
{
"epoch": 4.516806722689076,
"grad_norm": 3.6277358531951904,
"learning_rate": 4.7909375e-06,
"loss": 0.0357,
"step": 9675
},
{
"epoch": 4.5284780578898225,
"grad_norm": 0.974403440952301,
"learning_rate": 4.783125e-06,
"loss": 0.0172,
"step": 9700
},
{
"epoch": 4.540149393090569,
"grad_norm": 5.103818893432617,
"learning_rate": 4.7753125e-06,
"loss": 0.0262,
"step": 9725
},
{
"epoch": 4.551820728291316,
"grad_norm": 4.358363151550293,
"learning_rate": 4.7675000000000005e-06,
"loss": 0.0268,
"step": 9750
},
{
"epoch": 4.563492063492063,
"grad_norm": 1.1133219003677368,
"learning_rate": 4.7596875e-06,
"loss": 0.0371,
"step": 9775
},
{
"epoch": 4.57516339869281,
"grad_norm": 2.639396905899048,
"learning_rate": 4.751875e-06,
"loss": 0.0277,
"step": 9800
},
{
"epoch": 4.586834733893557,
"grad_norm": 0.8341067433357239,
"learning_rate": 4.7440625e-06,
"loss": 0.0267,
"step": 9825
},
{
"epoch": 4.598506069094304,
"grad_norm": 2.7689151763916016,
"learning_rate": 4.736250000000001e-06,
"loss": 0.0211,
"step": 9850
},
{
"epoch": 4.610177404295051,
"grad_norm": 3.2999351024627686,
"learning_rate": 4.7284374999999996e-06,
"loss": 0.0374,
"step": 9875
},
{
"epoch": 4.621848739495798,
"grad_norm": 1.2019790410995483,
"learning_rate": 4.720625e-06,
"loss": 0.0181,
"step": 9900
},
{
"epoch": 4.633520074696545,
"grad_norm": 2.8706002235412598,
"learning_rate": 4.7128125e-06,
"loss": 0.0304,
"step": 9925
},
{
"epoch": 4.645191409897293,
"grad_norm": 5.747146129608154,
"learning_rate": 4.705000000000001e-06,
"loss": 0.0229,
"step": 9950
},
{
"epoch": 4.6568627450980395,
"grad_norm": 1.8742387294769287,
"learning_rate": 4.6971875000000005e-06,
"loss": 0.0418,
"step": 9975
},
{
"epoch": 4.6685340802987865,
"grad_norm": 1.8577946424484253,
"learning_rate": 4.689375e-06,
"loss": 0.0156,
"step": 10000
},
{
"epoch": 4.6685340802987865,
"eval_loss": 0.18312382698059082,
"eval_runtime": 5443.8581,
"eval_samples_per_second": 1.729,
"eval_steps_per_second": 0.216,
"eval_wer": 0.10030679799773938,
"step": 10000
},
{
"epoch": 4.680205415499533,
"grad_norm": 1.9401777982711792,
"learning_rate": 4.6815625e-06,
"loss": 0.0271,
"step": 10025
},
{
"epoch": 4.69187675070028,
"grad_norm": 5.094863414764404,
"learning_rate": 4.67375e-06,
"loss": 0.0245,
"step": 10050
},
{
"epoch": 4.703548085901027,
"grad_norm": 3.0759990215301514,
"learning_rate": 4.665937500000001e-06,
"loss": 0.0408,
"step": 10075
},
{
"epoch": 4.715219421101774,
"grad_norm": 1.7008417844772339,
"learning_rate": 4.658125e-06,
"loss": 0.0259,
"step": 10100
},
{
"epoch": 4.726890756302521,
"grad_norm": 2.5551605224609375,
"learning_rate": 4.6503125e-06,
"loss": 0.0356,
"step": 10125
},
{
"epoch": 4.738562091503268,
"grad_norm": 1.278176188468933,
"learning_rate": 4.6425e-06,
"loss": 0.024,
"step": 10150
},
{
"epoch": 4.750233426704015,
"grad_norm": 3.679241418838501,
"learning_rate": 4.634687500000001e-06,
"loss": 0.038,
"step": 10175
},
{
"epoch": 4.761904761904762,
"grad_norm": 3.3556320667266846,
"learning_rate": 4.626875e-06,
"loss": 0.0295,
"step": 10200
},
{
"epoch": 4.773576097105509,
"grad_norm": 2.3901469707489014,
"learning_rate": 4.6190625e-06,
"loss": 0.0326,
"step": 10225
},
{
"epoch": 4.785247432306256,
"grad_norm": 2.4672956466674805,
"learning_rate": 4.61125e-06,
"loss": 0.0255,
"step": 10250
},
{
"epoch": 4.796918767507003,
"grad_norm": 1.856067419052124,
"learning_rate": 4.603437500000001e-06,
"loss": 0.0472,
"step": 10275
},
{
"epoch": 4.80859010270775,
"grad_norm": 3.6307425498962402,
"learning_rate": 4.595625e-06,
"loss": 0.0204,
"step": 10300
},
{
"epoch": 4.8202614379084965,
"grad_norm": 3.4470720291137695,
"learning_rate": 4.5878125e-06,
"loss": 0.0266,
"step": 10325
},
{
"epoch": 4.831932773109243,
"grad_norm": 3.720325231552124,
"learning_rate": 4.58e-06,
"loss": 0.0174,
"step": 10350
},
{
"epoch": 4.84360410830999,
"grad_norm": 3.147507429122925,
"learning_rate": 4.572187500000001e-06,
"loss": 0.0353,
"step": 10375
},
{
"epoch": 4.855275443510737,
"grad_norm": 0.47896313667297363,
"learning_rate": 4.564375e-06,
"loss": 0.0198,
"step": 10400
},
{
"epoch": 4.866946778711485,
"grad_norm": 1.2566039562225342,
"learning_rate": 4.5565625000000005e-06,
"loss": 0.0326,
"step": 10425
},
{
"epoch": 4.878618113912232,
"grad_norm": 6.644142150878906,
"learning_rate": 4.54875e-06,
"loss": 0.0292,
"step": 10450
},
{
"epoch": 4.890289449112979,
"grad_norm": 4.639550685882568,
"learning_rate": 4.5409375e-06,
"loss": 0.0378,
"step": 10475
},
{
"epoch": 4.901960784313726,
"grad_norm": 2.032776117324829,
"learning_rate": 4.533125e-06,
"loss": 0.0226,
"step": 10500
},
{
"epoch": 4.913632119514473,
"grad_norm": 1.344425916671753,
"learning_rate": 4.5253125e-06,
"loss": 0.0368,
"step": 10525
},
{
"epoch": 4.92530345471522,
"grad_norm": 0.8881208300590515,
"learning_rate": 4.5175e-06,
"loss": 0.0224,
"step": 10550
},
{
"epoch": 4.936974789915967,
"grad_norm": 2.743755340576172,
"learning_rate": 4.5096875e-06,
"loss": 0.0417,
"step": 10575
},
{
"epoch": 4.9486461251167135,
"grad_norm": 1.9883514642715454,
"learning_rate": 4.501875000000001e-06,
"loss": 0.0207,
"step": 10600
},
{
"epoch": 4.9603174603174605,
"grad_norm": 4.254443168640137,
"learning_rate": 4.4940625e-06,
"loss": 0.0344,
"step": 10625
},
{
"epoch": 4.971988795518207,
"grad_norm": 2.9644615650177,
"learning_rate": 4.4862500000000005e-06,
"loss": 0.0242,
"step": 10650
},
{
"epoch": 4.983660130718954,
"grad_norm": 4.65371036529541,
"learning_rate": 4.4784375e-06,
"loss": 0.0402,
"step": 10675
},
{
"epoch": 4.995331465919701,
"grad_norm": 1.430145025253296,
"learning_rate": 4.470625000000001e-06,
"loss": 0.0193,
"step": 10700
},
{
"epoch": 5.007002801120448,
"grad_norm": 2.433776378631592,
"learning_rate": 4.4628125e-06,
"loss": 0.0217,
"step": 10725
},
{
"epoch": 5.018674136321195,
"grad_norm": 0.8967903256416321,
"learning_rate": 4.4550000000000005e-06,
"loss": 0.0167,
"step": 10750
},
{
"epoch": 5.030345471521942,
"grad_norm": 3.4592394828796387,
"learning_rate": 4.4471875e-06,
"loss": 0.0275,
"step": 10775
},
{
"epoch": 5.042016806722689,
"grad_norm": 0.6761863827705383,
"learning_rate": 4.439375e-06,
"loss": 0.0153,
"step": 10800
},
{
"epoch": 5.053688141923436,
"grad_norm": 0.43812835216522217,
"learning_rate": 4.4315625e-06,
"loss": 0.0195,
"step": 10825
},
{
"epoch": 5.065359477124183,
"grad_norm": 1.3948005437850952,
"learning_rate": 4.42375e-06,
"loss": 0.017,
"step": 10850
},
{
"epoch": 5.07703081232493,
"grad_norm": 2.06145977973938,
"learning_rate": 4.4159375000000004e-06,
"loss": 0.026,
"step": 10875
},
{
"epoch": 5.088702147525677,
"grad_norm": 3.0333502292633057,
"learning_rate": 4.408125e-06,
"loss": 0.0146,
"step": 10900
},
{
"epoch": 5.1003734827264235,
"grad_norm": 2.764770746231079,
"learning_rate": 4.4003125e-06,
"loss": 0.0186,
"step": 10925
},
{
"epoch": 5.1120448179271705,
"grad_norm": 2.536029577255249,
"learning_rate": 4.3925e-06,
"loss": 0.0161,
"step": 10950
},
{
"epoch": 5.123716153127917,
"grad_norm": 2.648541212081909,
"learning_rate": 4.3846875000000005e-06,
"loss": 0.0217,
"step": 10975
},
{
"epoch": 5.135387488328665,
"grad_norm": 4.795249938964844,
"learning_rate": 4.376875e-06,
"loss": 0.0116,
"step": 11000
},
{
"epoch": 5.147058823529412,
"grad_norm": 0.7834287285804749,
"learning_rate": 4.3690625e-06,
"loss": 0.023,
"step": 11025
},
{
"epoch": 5.158730158730159,
"grad_norm": 6.7206010818481445,
"learning_rate": 4.36125e-06,
"loss": 0.0188,
"step": 11050
},
{
"epoch": 5.170401493930906,
"grad_norm": 0.3784288763999939,
"learning_rate": 4.353437500000001e-06,
"loss": 0.0187,
"step": 11075
},
{
"epoch": 5.182072829131653,
"grad_norm": 5.990387439727783,
"learning_rate": 4.3456250000000004e-06,
"loss": 0.0131,
"step": 11100
},
{
"epoch": 5.1937441643324,
"grad_norm": 2.445591449737549,
"learning_rate": 4.3378125e-06,
"loss": 0.0293,
"step": 11125
},
{
"epoch": 5.205415499533147,
"grad_norm": 3.114480495452881,
"learning_rate": 4.33e-06,
"loss": 0.0132,
"step": 11150
},
{
"epoch": 5.217086834733894,
"grad_norm": 0.6976014971733093,
"learning_rate": 4.3221875e-06,
"loss": 0.0254,
"step": 11175
},
{
"epoch": 5.228758169934641,
"grad_norm": 6.35882568359375,
"learning_rate": 4.3143750000000005e-06,
"loss": 0.017,
"step": 11200
},
{
"epoch": 5.2404295051353875,
"grad_norm": 2.0138509273529053,
"learning_rate": 4.3065625e-06,
"loss": 0.0236,
"step": 11225
},
{
"epoch": 5.2521008403361344,
"grad_norm": 3.1038243770599365,
"learning_rate": 4.29875e-06,
"loss": 0.015,
"step": 11250
},
{
"epoch": 5.263772175536881,
"grad_norm": 2.8752825260162354,
"learning_rate": 4.2909375e-06,
"loss": 0.0193,
"step": 11275
},
{
"epoch": 5.275443510737628,
"grad_norm": 1.1929106712341309,
"learning_rate": 4.283125000000001e-06,
"loss": 0.0114,
"step": 11300
},
{
"epoch": 5.287114845938375,
"grad_norm": 2.3317930698394775,
"learning_rate": 4.2753125e-06,
"loss": 0.0236,
"step": 11325
},
{
"epoch": 5.298786181139122,
"grad_norm": 2.8288731575012207,
"learning_rate": 4.2675e-06,
"loss": 0.0182,
"step": 11350
},
{
"epoch": 5.310457516339869,
"grad_norm": 4.622555255889893,
"learning_rate": 4.2596875e-06,
"loss": 0.0317,
"step": 11375
},
{
"epoch": 5.322128851540616,
"grad_norm": 0.40077078342437744,
"learning_rate": 4.251875000000001e-06,
"loss": 0.0117,
"step": 11400
},
{
"epoch": 5.333800186741363,
"grad_norm": 0.6447650194168091,
"learning_rate": 4.2440625000000005e-06,
"loss": 0.0213,
"step": 11425
},
{
"epoch": 5.34547152194211,
"grad_norm": 4.721693992614746,
"learning_rate": 4.23625e-06,
"loss": 0.0145,
"step": 11450
},
{
"epoch": 5.357142857142857,
"grad_norm": 4.699221134185791,
"learning_rate": 4.2284375e-06,
"loss": 0.0213,
"step": 11475
},
{
"epoch": 5.368814192343605,
"grad_norm": 1.0902756452560425,
"learning_rate": 4.220625e-06,
"loss": 0.0135,
"step": 11500
},
{
"epoch": 5.3804855275443515,
"grad_norm": 3.20731258392334,
"learning_rate": 4.212812500000001e-06,
"loss": 0.0192,
"step": 11525
},
{
"epoch": 5.392156862745098,
"grad_norm": 3.498342752456665,
"learning_rate": 4.2049999999999996e-06,
"loss": 0.0131,
"step": 11550
},
{
"epoch": 5.403828197945845,
"grad_norm": 0.2736945152282715,
"learning_rate": 4.1971875e-06,
"loss": 0.0191,
"step": 11575
},
{
"epoch": 5.415499533146592,
"grad_norm": 0.33990904688835144,
"learning_rate": 4.189375e-06,
"loss": 0.0131,
"step": 11600
},
{
"epoch": 5.427170868347339,
"grad_norm": 4.783412456512451,
"learning_rate": 4.181562500000001e-06,
"loss": 0.0235,
"step": 11625
},
{
"epoch": 5.438842203548086,
"grad_norm": 4.162958145141602,
"learning_rate": 4.17375e-06,
"loss": 0.0168,
"step": 11650
},
{
"epoch": 5.450513538748833,
"grad_norm": 2.0236053466796875,
"learning_rate": 4.1659375e-06,
"loss": 0.0321,
"step": 11675
},
{
"epoch": 5.46218487394958,
"grad_norm": 5.59421443939209,
"learning_rate": 4.158125e-06,
"loss": 0.0116,
"step": 11700
},
{
"epoch": 5.473856209150327,
"grad_norm": 1.2957547903060913,
"learning_rate": 4.150312500000001e-06,
"loss": 0.0187,
"step": 11725
},
{
"epoch": 5.485527544351074,
"grad_norm": 2.75832462310791,
"learning_rate": 4.1425e-06,
"loss": 0.0189,
"step": 11750
},
{
"epoch": 5.497198879551821,
"grad_norm": 2.776012659072876,
"learning_rate": 4.1346875e-06,
"loss": 0.0235,
"step": 11775
},
{
"epoch": 5.508870214752568,
"grad_norm": 4.952755451202393,
"learning_rate": 4.126875e-06,
"loss": 0.024,
"step": 11800
},
{
"epoch": 5.520541549953315,
"grad_norm": 1.2413980960845947,
"learning_rate": 4.1190625e-06,
"loss": 0.0248,
"step": 11825
},
{
"epoch": 5.5322128851540615,
"grad_norm": 4.319220066070557,
"learning_rate": 4.111250000000001e-06,
"loss": 0.0176,
"step": 11850
},
{
"epoch": 5.543884220354808,
"grad_norm": 2.8155884742736816,
"learning_rate": 4.1034375e-06,
"loss": 0.0226,
"step": 11875
},
{
"epoch": 5.555555555555555,
"grad_norm": 3.499506950378418,
"learning_rate": 4.095625e-06,
"loss": 0.0118,
"step": 11900
},
{
"epoch": 5.567226890756302,
"grad_norm": 3.1512813568115234,
"learning_rate": 4.0878125e-06,
"loss": 0.0157,
"step": 11925
},
{
"epoch": 5.578898225957049,
"grad_norm": 4.766519546508789,
"learning_rate": 4.080000000000001e-06,
"loss": 0.0184,
"step": 11950
},
{
"epoch": 5.590569561157796,
"grad_norm": 2.901200294494629,
"learning_rate": 4.0721875e-06,
"loss": 0.0198,
"step": 11975
},
{
"epoch": 5.602240896358543,
"grad_norm": 2.888226270675659,
"learning_rate": 4.064375e-06,
"loss": 0.0191,
"step": 12000
},
{
"epoch": 5.61391223155929,
"grad_norm": 1.0496464967727661,
"learning_rate": 4.0565625e-06,
"loss": 0.0219,
"step": 12025
},
{
"epoch": 5.625583566760037,
"grad_norm": 0.7852717041969299,
"learning_rate": 4.048750000000001e-06,
"loss": 0.0177,
"step": 12050
},
{
"epoch": 5.637254901960784,
"grad_norm": 2.5804331302642822,
"learning_rate": 4.0409375e-06,
"loss": 0.0241,
"step": 12075
},
{
"epoch": 5.648926237161532,
"grad_norm": 1.4901975393295288,
"learning_rate": 4.0331250000000005e-06,
"loss": 0.0183,
"step": 12100
},
{
"epoch": 5.660597572362279,
"grad_norm": 3.2913360595703125,
"learning_rate": 4.0253125e-06,
"loss": 0.0194,
"step": 12125
},
{
"epoch": 5.6722689075630255,
"grad_norm": 3.6681406497955322,
"learning_rate": 4.0175e-06,
"loss": 0.0149,
"step": 12150
},
{
"epoch": 5.683940242763772,
"grad_norm": 2.2549614906311035,
"learning_rate": 4.0096875e-06,
"loss": 0.023,
"step": 12175
},
{
"epoch": 5.695611577964519,
"grad_norm": 10.556268692016602,
"learning_rate": 4.001875e-06,
"loss": 0.0179,
"step": 12200
},
{
"epoch": 5.707282913165266,
"grad_norm": 2.5755960941314697,
"learning_rate": 3.9940625e-06,
"loss": 0.0215,
"step": 12225
},
{
"epoch": 5.718954248366013,
"grad_norm": 1.8617174625396729,
"learning_rate": 3.98625e-06,
"loss": 0.017,
"step": 12250
},
{
"epoch": 5.73062558356676,
"grad_norm": 3.4985668659210205,
"learning_rate": 3.9784375e-06,
"loss": 0.0299,
"step": 12275
},
{
"epoch": 5.742296918767507,
"grad_norm": 9.18930435180664,
"learning_rate": 3.970625e-06,
"loss": 0.0157,
"step": 12300
},
{
"epoch": 5.753968253968254,
"grad_norm": 3.4537224769592285,
"learning_rate": 3.9628125000000004e-06,
"loss": 0.0231,
"step": 12325
},
{
"epoch": 5.765639589169001,
"grad_norm": 3.9688210487365723,
"learning_rate": 3.955e-06,
"loss": 0.0205,
"step": 12350
},
{
"epoch": 5.777310924369748,
"grad_norm": 1.72626793384552,
"learning_rate": 3.9471875e-06,
"loss": 0.0225,
"step": 12375
},
{
"epoch": 5.788982259570495,
"grad_norm": 2.350785493850708,
"learning_rate": 3.939375e-06,
"loss": 0.0137,
"step": 12400
},
{
"epoch": 5.800653594771242,
"grad_norm": 3.5603792667388916,
"learning_rate": 3.931875e-06,
"loss": 0.0252,
"step": 12425
},
{
"epoch": 5.812324929971989,
"grad_norm": 0.25176432728767395,
"learning_rate": 3.9240625e-06,
"loss": 0.0202,
"step": 12450
},
{
"epoch": 5.8239962651727355,
"grad_norm": 4.103959083557129,
"learning_rate": 3.916250000000001e-06,
"loss": 0.0201,
"step": 12475
},
{
"epoch": 5.835667600373482,
"grad_norm": 5.1462202072143555,
"learning_rate": 3.9084375e-06,
"loss": 0.014,
"step": 12500
},
{
"epoch": 5.847338935574229,
"grad_norm": 1.2658880949020386,
"learning_rate": 3.9006250000000005e-06,
"loss": 0.0224,
"step": 12525
},
{
"epoch": 5.859010270774976,
"grad_norm": 1.1250051259994507,
"learning_rate": 3.8928125e-06,
"loss": 0.013,
"step": 12550
},
{
"epoch": 5.870681605975724,
"grad_norm": 0.4671033024787903,
"learning_rate": 3.885e-06,
"loss": 0.0222,
"step": 12575
},
{
"epoch": 5.882352941176471,
"grad_norm": 4.812198638916016,
"learning_rate": 3.8771875e-06,
"loss": 0.013,
"step": 12600
},
{
"epoch": 5.894024276377218,
"grad_norm": 6.955799102783203,
"learning_rate": 3.869375e-06,
"loss": 0.0216,
"step": 12625
},
{
"epoch": 5.905695611577965,
"grad_norm": 9.038055419921875,
"learning_rate": 3.8615625e-06,
"loss": 0.0147,
"step": 12650
},
{
"epoch": 5.917366946778712,
"grad_norm": 2.660266399383545,
"learning_rate": 3.85375e-06,
"loss": 0.0179,
"step": 12675
},
{
"epoch": 5.929038281979459,
"grad_norm": 3.986816167831421,
"learning_rate": 3.8459375e-06,
"loss": 0.0175,
"step": 12700
},
{
"epoch": 5.940709617180206,
"grad_norm": 1.7132847309112549,
"learning_rate": 3.838125e-06,
"loss": 0.0243,
"step": 12725
},
{
"epoch": 5.9523809523809526,
"grad_norm": 1.878422498703003,
"learning_rate": 3.8303125000000004e-06,
"loss": 0.0191,
"step": 12750
},
{
"epoch": 5.9640522875816995,
"grad_norm": 1.7959318161010742,
"learning_rate": 3.8225e-06,
"loss": 0.0285,
"step": 12775
},
{
"epoch": 5.975723622782446,
"grad_norm": 7.094222068786621,
"learning_rate": 3.8146875e-06,
"loss": 0.019,
"step": 12800
},
{
"epoch": 5.987394957983193,
"grad_norm": 2.3803250789642334,
"learning_rate": 3.806875e-06,
"loss": 0.027,
"step": 12825
},
{
"epoch": 5.99906629318394,
"grad_norm": 5.211573123931885,
"learning_rate": 3.7990625e-06,
"loss": 0.0239,
"step": 12850
},
{
"epoch": 6.010737628384687,
"grad_norm": 2.943984270095825,
"learning_rate": 3.7912500000000003e-06,
"loss": 0.0156,
"step": 12875
},
{
"epoch": 6.022408963585434,
"grad_norm": 2.1434903144836426,
"learning_rate": 3.7834375000000006e-06,
"loss": 0.0147,
"step": 12900
},
{
"epoch": 6.034080298786181,
"grad_norm": 0.33908581733703613,
"learning_rate": 3.775625e-06,
"loss": 0.0089,
"step": 12925
},
{
"epoch": 6.045751633986928,
"grad_norm": 7.022948265075684,
"learning_rate": 3.7678125e-06,
"loss": 0.0165,
"step": 12950
},
{
"epoch": 6.057422969187675,
"grad_norm": 1.1056474447250366,
"learning_rate": 3.7600000000000004e-06,
"loss": 0.0154,
"step": 12975
},
{
"epoch": 6.069094304388422,
"grad_norm": 3.8569955825805664,
"learning_rate": 3.7521875000000007e-06,
"loss": 0.0149,
"step": 13000
},
{
"epoch": 6.080765639589169,
"grad_norm": 2.6585159301757812,
"learning_rate": 3.744375e-06,
"loss": 0.0147,
"step": 13025
},
{
"epoch": 6.092436974789916,
"grad_norm": 3.2226240634918213,
"learning_rate": 3.7365625000000003e-06,
"loss": 0.0119,
"step": 13050
},
{
"epoch": 6.104108309990663,
"grad_norm": 1.7068639993667603,
"learning_rate": 3.7290625000000003e-06,
"loss": 0.013,
"step": 13075
},
{
"epoch": 6.1157796451914095,
"grad_norm": 0.6673070788383484,
"learning_rate": 3.7212500000000006e-06,
"loss": 0.0097,
"step": 13100
},
{
"epoch": 6.127450980392156,
"grad_norm": 0.041345566511154175,
"learning_rate": 3.7134375e-06,
"loss": 0.0115,
"step": 13125
},
{
"epoch": 6.139122315592904,
"grad_norm": 0.7863844037055969,
"learning_rate": 3.705625e-06,
"loss": 0.0126,
"step": 13150
},
{
"epoch": 6.150793650793651,
"grad_norm": 2.8898963928222656,
"learning_rate": 3.6978125000000004e-06,
"loss": 0.0151,
"step": 13175
},
{
"epoch": 6.162464985994398,
"grad_norm": 4.4687299728393555,
"learning_rate": 3.6900000000000002e-06,
"loss": 0.0145,
"step": 13200
},
{
"epoch": 6.174136321195145,
"grad_norm": 2.697178602218628,
"learning_rate": 3.6821875e-06,
"loss": 0.0136,
"step": 13225
},
{
"epoch": 6.185807656395892,
"grad_norm": 7.974569320678711,
"learning_rate": 3.674375e-06,
"loss": 0.0129,
"step": 13250
},
{
"epoch": 6.197478991596639,
"grad_norm": 0.35622915625572205,
"learning_rate": 3.6665625e-06,
"loss": 0.0168,
"step": 13275
},
{
"epoch": 6.209150326797386,
"grad_norm": 6.407752990722656,
"learning_rate": 3.6587500000000003e-06,
"loss": 0.0135,
"step": 13300
},
{
"epoch": 6.220821661998133,
"grad_norm": 0.7060608267784119,
"learning_rate": 3.6509374999999997e-06,
"loss": 0.0113,
"step": 13325
},
{
"epoch": 6.23249299719888,
"grad_norm": 0.6277226805686951,
"learning_rate": 3.643125e-06,
"loss": 0.0124,
"step": 13350
},
{
"epoch": 6.2441643323996265,
"grad_norm": 3.3179471492767334,
"learning_rate": 3.6353125e-06,
"loss": 0.0128,
"step": 13375
},
{
"epoch": 6.2558356676003735,
"grad_norm": 2.2590084075927734,
"learning_rate": 3.6275000000000004e-06,
"loss": 0.0137,
"step": 13400
},
{
"epoch": 6.26750700280112,
"grad_norm": 0.3138970136642456,
"learning_rate": 3.6196875000000007e-06,
"loss": 0.0105,
"step": 13425
},
{
"epoch": 6.279178338001867,
"grad_norm": 4.370602607727051,
"learning_rate": 3.611875e-06,
"loss": 0.0106,
"step": 13450
},
{
"epoch": 6.290849673202614,
"grad_norm": 0.7395113706588745,
"learning_rate": 3.6040625000000003e-06,
"loss": 0.0074,
"step": 13475
},
{
"epoch": 6.302521008403361,
"grad_norm": 4.352066516876221,
"learning_rate": 3.5962500000000005e-06,
"loss": 0.0148,
"step": 13500
},
{
"epoch": 6.314192343604108,
"grad_norm": 4.510288715362549,
"learning_rate": 3.5884375000000003e-06,
"loss": 0.0127,
"step": 13525
},
{
"epoch": 6.325863678804855,
"grad_norm": 6.977252960205078,
"learning_rate": 3.580625e-06,
"loss": 0.0163,
"step": 13550
},
{
"epoch": 6.337535014005602,
"grad_norm": 3.6490318775177,
"learning_rate": 3.5728125e-06,
"loss": 0.0127,
"step": 13575
},
{
"epoch": 6.349206349206349,
"grad_norm": 2.546675682067871,
"learning_rate": 3.565e-06,
"loss": 0.0123,
"step": 13600
},
{
"epoch": 6.360877684407096,
"grad_norm": 9.396807670593262,
"learning_rate": 3.5571875000000004e-06,
"loss": 0.0155,
"step": 13625
},
{
"epoch": 6.372549019607844,
"grad_norm": 5.870864391326904,
"learning_rate": 3.549375e-06,
"loss": 0.0153,
"step": 13650
},
{
"epoch": 6.3842203548085905,
"grad_norm": 0.08212006092071533,
"learning_rate": 3.5415625e-06,
"loss": 0.0113,
"step": 13675
},
{
"epoch": 6.395891690009337,
"grad_norm": 1.5073678493499756,
"learning_rate": 3.5337500000000003e-06,
"loss": 0.0148,
"step": 13700
},
{
"epoch": 6.407563025210084,
"grad_norm": 1.6626029014587402,
"learning_rate": 3.5259375000000005e-06,
"loss": 0.0206,
"step": 13725
},
{
"epoch": 6.419234360410831,
"grad_norm": 5.561774730682373,
"learning_rate": 3.518125e-06,
"loss": 0.0219,
"step": 13750
},
{
"epoch": 6.430905695611578,
"grad_norm": 3.8881995677948,
"learning_rate": 3.5103125e-06,
"loss": 0.0194,
"step": 13775
},
{
"epoch": 6.442577030812325,
"grad_norm": 6.549841403961182,
"learning_rate": 3.5025000000000003e-06,
"loss": 0.014,
"step": 13800
},
{
"epoch": 6.454248366013072,
"grad_norm": 0.38337138295173645,
"learning_rate": 3.4946875000000006e-06,
"loss": 0.0125,
"step": 13825
},
{
"epoch": 6.465919701213819,
"grad_norm": 7.83842658996582,
"learning_rate": 3.486875e-06,
"loss": 0.0117,
"step": 13850
},
{
"epoch": 6.477591036414566,
"grad_norm": 2.4902381896972656,
"learning_rate": 3.4790625e-06,
"loss": 0.0154,
"step": 13875
},
{
"epoch": 6.489262371615313,
"grad_norm": 2.7067272663116455,
"learning_rate": 3.47125e-06,
"loss": 0.0145,
"step": 13900
},
{
"epoch": 6.50093370681606,
"grad_norm": 0.13609760999679565,
"learning_rate": 3.4634375000000002e-06,
"loss": 0.013,
"step": 13925
},
{
"epoch": 6.512605042016807,
"grad_norm": 7.123418807983398,
"learning_rate": 3.4556249999999996e-06,
"loss": 0.0135,
"step": 13950
},
{
"epoch": 6.524276377217554,
"grad_norm": 2.5809738636016846,
"learning_rate": 3.4478125e-06,
"loss": 0.013,
"step": 13975
},
{
"epoch": 6.5359477124183005,
"grad_norm": 3.8302764892578125,
"learning_rate": 3.44e-06,
"loss": 0.0123,
"step": 14000
},
{
"epoch": 6.5476190476190474,
"grad_norm": 0.38114723563194275,
"learning_rate": 3.4321875000000003e-06,
"loss": 0.0123,
"step": 14025
},
{
"epoch": 6.559290382819794,
"grad_norm": 1.6169977188110352,
"learning_rate": 3.4243750000000006e-06,
"loss": 0.0139,
"step": 14050
},
{
"epoch": 6.570961718020541,
"grad_norm": 0.527672529220581,
"learning_rate": 3.4165625e-06,
"loss": 0.0137,
"step": 14075
},
{
"epoch": 6.582633053221288,
"grad_norm": 4.989930152893066,
"learning_rate": 3.40875e-06,
"loss": 0.0108,
"step": 14100
},
{
"epoch": 6.594304388422035,
"grad_norm": 0.13473570346832275,
"learning_rate": 3.4009375000000004e-06,
"loss": 0.0159,
"step": 14125
},
{
"epoch": 6.605975723622782,
"grad_norm": 3.537700891494751,
"learning_rate": 3.3931250000000007e-06,
"loss": 0.0201,
"step": 14150
},
{
"epoch": 6.617647058823529,
"grad_norm": 2.97955060005188,
"learning_rate": 3.3853125e-06,
"loss": 0.0106,
"step": 14175
},
{
"epoch": 6.629318394024276,
"grad_norm": 0.7965431809425354,
"learning_rate": 3.3775000000000003e-06,
"loss": 0.016,
"step": 14200
},
{
"epoch": 6.640989729225024,
"grad_norm": 2.8173744678497314,
"learning_rate": 3.3696875e-06,
"loss": 0.0169,
"step": 14225
},
{
"epoch": 6.652661064425771,
"grad_norm": 0.6731769442558289,
"learning_rate": 3.3618750000000003e-06,
"loss": 0.0118,
"step": 14250
},
{
"epoch": 6.664332399626518,
"grad_norm": 3.4868786334991455,
"learning_rate": 3.3540624999999997e-06,
"loss": 0.0149,
"step": 14275
},
{
"epoch": 6.6760037348272645,
"grad_norm": 7.706684112548828,
"learning_rate": 3.34625e-06,
"loss": 0.0211,
"step": 14300
},
{
"epoch": 6.687675070028011,
"grad_norm": 3.9315950870513916,
"learning_rate": 3.3384375e-06,
"loss": 0.0122,
"step": 14325
},
{
"epoch": 6.699346405228758,
"grad_norm": 0.4107113778591156,
"learning_rate": 3.3306250000000004e-06,
"loss": 0.0132,
"step": 14350
},
{
"epoch": 6.711017740429505,
"grad_norm": 0.8928655982017517,
"learning_rate": 3.3228125e-06,
"loss": 0.0132,
"step": 14375
},
{
"epoch": 6.722689075630252,
"grad_norm": 0.32426151633262634,
"learning_rate": 3.315e-06,
"loss": 0.0121,
"step": 14400
},
{
"epoch": 6.734360410830999,
"grad_norm": 0.9105150103569031,
"learning_rate": 3.3071875000000003e-06,
"loss": 0.0134,
"step": 14425
},
{
"epoch": 6.746031746031746,
"grad_norm": 4.484381198883057,
"learning_rate": 3.2993750000000005e-06,
"loss": 0.012,
"step": 14450
},
{
"epoch": 6.757703081232493,
"grad_norm": 1.9971312284469604,
"learning_rate": 3.2915625e-06,
"loss": 0.0129,
"step": 14475
},
{
"epoch": 6.76937441643324,
"grad_norm": 5.409192085266113,
"learning_rate": 3.28375e-06,
"loss": 0.0142,
"step": 14500
},
{
"epoch": 6.781045751633987,
"grad_norm": 0.35186824202537537,
"learning_rate": 3.2759375000000003e-06,
"loss": 0.0155,
"step": 14525
},
{
"epoch": 6.792717086834734,
"grad_norm": 8.460790634155273,
"learning_rate": 3.268125e-06,
"loss": 0.0119,
"step": 14550
},
{
"epoch": 6.804388422035481,
"grad_norm": 2.0943799018859863,
"learning_rate": 3.2603125e-06,
"loss": 0.0121,
"step": 14575
},
{
"epoch": 6.816059757236228,
"grad_norm": 7.832350730895996,
"learning_rate": 3.2525e-06,
"loss": 0.0189,
"step": 14600
},
{
"epoch": 6.8277310924369745,
"grad_norm": 0.39846083521842957,
"learning_rate": 3.2446875e-06,
"loss": 0.0134,
"step": 14625
},
{
"epoch": 6.839402427637721,
"grad_norm": 7.057211875915527,
"learning_rate": 3.2368750000000002e-06,
"loss": 0.0133,
"step": 14650
},
{
"epoch": 6.851073762838468,
"grad_norm": 0.2746317684650421,
"learning_rate": 3.2290625000000005e-06,
"loss": 0.0137,
"step": 14675
},
{
"epoch": 6.862745098039216,
"grad_norm": 5.0260396003723145,
"learning_rate": 3.22125e-06,
"loss": 0.0173,
"step": 14700
},
{
"epoch": 6.874416433239963,
"grad_norm": 4.125462532043457,
"learning_rate": 3.2134375e-06,
"loss": 0.0176,
"step": 14725
},
{
"epoch": 6.88608776844071,
"grad_norm": 6.75167179107666,
"learning_rate": 3.2056250000000003e-06,
"loss": 0.0145,
"step": 14750
},
{
"epoch": 6.897759103641457,
"grad_norm": 1.9314770698547363,
"learning_rate": 3.1978125000000006e-06,
"loss": 0.013,
"step": 14775
},
{
"epoch": 6.909430438842204,
"grad_norm": 2.6127073764801025,
"learning_rate": 3.19e-06,
"loss": 0.0105,
"step": 14800
},
{
"epoch": 6.921101774042951,
"grad_norm": 1.5826447010040283,
"learning_rate": 3.1821875e-06,
"loss": 0.0127,
"step": 14825
},
{
"epoch": 6.932773109243698,
"grad_norm": 0.6113137006759644,
"learning_rate": 3.1743750000000004e-06,
"loss": 0.0126,
"step": 14850
},
{
"epoch": 6.944444444444445,
"grad_norm": 3.849680185317993,
"learning_rate": 3.1665625000000002e-06,
"loss": 0.015,
"step": 14875
},
{
"epoch": 6.956115779645192,
"grad_norm": 5.097741603851318,
"learning_rate": 3.15875e-06,
"loss": 0.0072,
"step": 14900
},
{
"epoch": 6.9677871148459385,
"grad_norm": 4.420420169830322,
"learning_rate": 3.1509375000000003e-06,
"loss": 0.0135,
"step": 14925
},
{
"epoch": 6.979458450046685,
"grad_norm": 3.5658352375030518,
"learning_rate": 3.143125e-06,
"loss": 0.012,
"step": 14950
},
{
"epoch": 6.991129785247432,
"grad_norm": 0.9113016128540039,
"learning_rate": 3.1353125000000003e-06,
"loss": 0.008,
"step": 14975
},
{
"epoch": 7.002801120448179,
"grad_norm": 2.622985363006592,
"learning_rate": 3.1274999999999997e-06,
"loss": 0.0189,
"step": 15000
},
{
"epoch": 7.002801120448179,
"eval_loss": 0.19959864020347595,
"eval_runtime": 5339.534,
"eval_samples_per_second": 1.763,
"eval_steps_per_second": 0.22,
"eval_wer": 0.09801388664621347,
"step": 15000
},
{
"epoch": 7.014472455648926,
"grad_norm": 0.3408145606517792,
"learning_rate": 3.1196875e-06,
"loss": 0.0108,
"step": 15025
},
{
"epoch": 7.026143790849673,
"grad_norm": 1.0346554517745972,
"learning_rate": 3.111875e-06,
"loss": 0.0135,
"step": 15050
},
{
"epoch": 7.03781512605042,
"grad_norm": 0.08663380146026611,
"learning_rate": 3.1040625e-06,
"loss": 0.0088,
"step": 15075
},
{
"epoch": 7.049486461251167,
"grad_norm": 0.6362659335136414,
"learning_rate": 3.0962500000000002e-06,
"loss": 0.0072,
"step": 15100
},
{
"epoch": 7.061157796451914,
"grad_norm": 0.1300945281982422,
"learning_rate": 3.0884375e-06,
"loss": 0.0074,
"step": 15125
},
{
"epoch": 7.072829131652661,
"grad_norm": 4.269519805908203,
"learning_rate": 3.0806250000000003e-06,
"loss": 0.0158,
"step": 15150
},
{
"epoch": 7.084500466853408,
"grad_norm": 5.42411994934082,
"learning_rate": 3.0728125e-06,
"loss": 0.0112,
"step": 15175
},
{
"epoch": 7.096171802054155,
"grad_norm": 0.7867230176925659,
"learning_rate": 3.0650000000000003e-06,
"loss": 0.0204,
"step": 15200
},
{
"epoch": 7.107843137254902,
"grad_norm": 0.30360114574432373,
"learning_rate": 3.0571875e-06,
"loss": 0.01,
"step": 15225
},
{
"epoch": 7.1195144724556485,
"grad_norm": 3.1817336082458496,
"learning_rate": 3.0493750000000003e-06,
"loss": 0.0126,
"step": 15250
},
{
"epoch": 7.131185807656396,
"grad_norm": 1.1198678016662598,
"learning_rate": 3.0415625e-06,
"loss": 0.0059,
"step": 15275
},
{
"epoch": 7.142857142857143,
"grad_norm": 1.7146356105804443,
"learning_rate": 3.03375e-06,
"loss": 0.0101,
"step": 15300
},
{
"epoch": 7.15452847805789,
"grad_norm": 0.09200263023376465,
"learning_rate": 3.0259375e-06,
"loss": 0.0098,
"step": 15325
},
{
"epoch": 7.166199813258637,
"grad_norm": 2.3543612957000732,
"learning_rate": 3.018125e-06,
"loss": 0.0172,
"step": 15350
},
{
"epoch": 7.177871148459384,
"grad_norm": 0.4582861661911011,
"learning_rate": 3.0103125000000002e-06,
"loss": 0.0082,
"step": 15375
},
{
"epoch": 7.189542483660131,
"grad_norm": 3.5268125534057617,
"learning_rate": 3.0025e-06,
"loss": 0.015,
"step": 15400
},
{
"epoch": 7.201213818860878,
"grad_norm": 3.0377275943756104,
"learning_rate": 2.9946875000000003e-06,
"loss": 0.0079,
"step": 15425
},
{
"epoch": 7.212885154061625,
"grad_norm": 1.1198071241378784,
"learning_rate": 2.986875e-06,
"loss": 0.0175,
"step": 15450
},
{
"epoch": 7.224556489262372,
"grad_norm": 0.33140361309051514,
"learning_rate": 2.9790625000000003e-06,
"loss": 0.0078,
"step": 15475
},
{
"epoch": 7.236227824463119,
"grad_norm": 2.4766924381256104,
"learning_rate": 2.97125e-06,
"loss": 0.0162,
"step": 15500
},
{
"epoch": 7.2478991596638656,
"grad_norm": 0.24502252042293549,
"learning_rate": 2.9634375000000004e-06,
"loss": 0.0083,
"step": 15525
},
{
"epoch": 7.2595704948646125,
"grad_norm": 0.837504506111145,
"learning_rate": 2.955625e-06,
"loss": 0.0145,
"step": 15550
},
{
"epoch": 7.271241830065359,
"grad_norm": 0.06051575765013695,
"learning_rate": 2.9478125000000004e-06,
"loss": 0.0071,
"step": 15575
},
{
"epoch": 7.282913165266106,
"grad_norm": 2.0014054775238037,
"learning_rate": 2.9400000000000002e-06,
"loss": 0.0146,
"step": 15600
},
{
"epoch": 7.294584500466853,
"grad_norm": 0.5527703762054443,
"learning_rate": 2.9321875e-06,
"loss": 0.0093,
"step": 15625
},
{
"epoch": 7.3062558356676,
"grad_norm": 0.5724664330482483,
"learning_rate": 2.924375e-06,
"loss": 0.0147,
"step": 15650
},
{
"epoch": 7.317927170868347,
"grad_norm": 0.18703560531139374,
"learning_rate": 2.9165625e-06,
"loss": 0.0129,
"step": 15675
},
{
"epoch": 7.329598506069094,
"grad_norm": 2.189370632171631,
"learning_rate": 2.90875e-06,
"loss": 0.0133,
"step": 15700
},
{
"epoch": 7.341269841269841,
"grad_norm": 0.19430263340473175,
"learning_rate": 2.9009375e-06,
"loss": 0.0109,
"step": 15725
},
{
"epoch": 7.352941176470588,
"grad_norm": 2.84920597076416,
"learning_rate": 2.893125e-06,
"loss": 0.0099,
"step": 15750
},
{
"epoch": 7.364612511671335,
"grad_norm": 1.7251280546188354,
"learning_rate": 2.8853125e-06,
"loss": 0.0074,
"step": 15775
},
{
"epoch": 7.376283846872083,
"grad_norm": 0.5282366871833801,
"learning_rate": 2.8775e-06,
"loss": 0.0184,
"step": 15800
},
{
"epoch": 7.3879551820728295,
"grad_norm": 7.1876420974731445,
"learning_rate": 2.8696875000000002e-06,
"loss": 0.011,
"step": 15825
},
{
"epoch": 7.3996265172735765,
"grad_norm": 0.14123250544071198,
"learning_rate": 2.861875e-06,
"loss": 0.0115,
"step": 15850
},
{
"epoch": 7.411297852474323,
"grad_norm": 0.15596270561218262,
"learning_rate": 2.8540625000000003e-06,
"loss": 0.0117,
"step": 15875
},
{
"epoch": 7.42296918767507,
"grad_norm": 0.23796889185905457,
"learning_rate": 2.8462500000000005e-06,
"loss": 0.0153,
"step": 15900
},
{
"epoch": 7.434640522875817,
"grad_norm": 3.0602948665618896,
"learning_rate": 2.8384375000000003e-06,
"loss": 0.0116,
"step": 15925
},
{
"epoch": 7.446311858076564,
"grad_norm": 0.32861384749412537,
"learning_rate": 2.830625e-06,
"loss": 0.0121,
"step": 15950
},
{
"epoch": 7.457983193277311,
"grad_norm": 0.5478546023368835,
"learning_rate": 2.8228125e-06,
"loss": 0.0112,
"step": 15975
},
{
"epoch": 7.469654528478058,
"grad_norm": 0.2652440667152405,
"learning_rate": 2.815e-06,
"loss": 0.0218,
"step": 16000
},
{
"epoch": 7.481325863678805,
"grad_norm": 1.3157047033309937,
"learning_rate": 2.8071875e-06,
"loss": 0.0077,
"step": 16025
},
{
"epoch": 7.492997198879552,
"grad_norm": 0.28366702795028687,
"learning_rate": 2.799375e-06,
"loss": 0.0136,
"step": 16050
},
{
"epoch": 7.504668534080299,
"grad_norm": 5.81749963760376,
"learning_rate": 2.7915625e-06,
"loss": 0.0083,
"step": 16075
},
{
"epoch": 7.516339869281046,
"grad_norm": 2.328474760055542,
"learning_rate": 2.7837500000000002e-06,
"loss": 0.0138,
"step": 16100
},
{
"epoch": 7.528011204481793,
"grad_norm": 0.3001089096069336,
"learning_rate": 2.7759375e-06,
"loss": 0.0063,
"step": 16125
},
{
"epoch": 7.5396825396825395,
"grad_norm": 0.8168843388557434,
"learning_rate": 2.7681250000000003e-06,
"loss": 0.0153,
"step": 16150
},
{
"epoch": 7.5513538748832865,
"grad_norm": 0.5548914074897766,
"learning_rate": 2.7603125e-06,
"loss": 0.0075,
"step": 16175
},
{
"epoch": 7.563025210084033,
"grad_norm": 1.3669456243515015,
"learning_rate": 2.7525000000000003e-06,
"loss": 0.0135,
"step": 16200
},
{
"epoch": 7.57469654528478,
"grad_norm": 0.7598258852958679,
"learning_rate": 2.7446875e-06,
"loss": 0.0056,
"step": 16225
},
{
"epoch": 7.586367880485527,
"grad_norm": 0.4459327757358551,
"learning_rate": 2.7368750000000004e-06,
"loss": 0.0102,
"step": 16250
},
{
"epoch": 7.598039215686274,
"grad_norm": 0.2884249985218048,
"learning_rate": 2.7290625e-06,
"loss": 0.0097,
"step": 16275
},
{
"epoch": 7.609710550887021,
"grad_norm": 0.1581568568944931,
"learning_rate": 2.72125e-06,
"loss": 0.0121,
"step": 16300
},
{
"epoch": 7.621381886087768,
"grad_norm": 0.3975503444671631,
"learning_rate": 2.7134375e-06,
"loss": 0.0107,
"step": 16325
},
{
"epoch": 7.633053221288515,
"grad_norm": 1.209242582321167,
"learning_rate": 2.705625e-06,
"loss": 0.0148,
"step": 16350
},
{
"epoch": 7.644724556489263,
"grad_norm": 0.27312788367271423,
"learning_rate": 2.6978125e-06,
"loss": 0.0035,
"step": 16375
},
{
"epoch": 7.65639589169001,
"grad_norm": 1.0958424806594849,
"learning_rate": 2.69e-06,
"loss": 0.0171,
"step": 16400
},
{
"epoch": 7.668067226890757,
"grad_norm": 0.2819930911064148,
"learning_rate": 2.6821875e-06,
"loss": 0.0056,
"step": 16425
},
{
"epoch": 7.6797385620915035,
"grad_norm": 0.7688259482383728,
"learning_rate": 2.674375e-06,
"loss": 0.0097,
"step": 16450
},
{
"epoch": 7.69140989729225,
"grad_norm": 0.7461805939674377,
"learning_rate": 2.6665625e-06,
"loss": 0.0097,
"step": 16475
},
{
"epoch": 7.703081232492997,
"grad_norm": 0.4246864914894104,
"learning_rate": 2.65875e-06,
"loss": 0.0142,
"step": 16500
},
{
"epoch": 7.714752567693744,
"grad_norm": 0.32187584042549133,
"learning_rate": 2.6509375000000004e-06,
"loss": 0.0069,
"step": 16525
},
{
"epoch": 7.726423902894491,
"grad_norm": 0.9666975736618042,
"learning_rate": 2.643125e-06,
"loss": 0.0139,
"step": 16550
},
{
"epoch": 7.738095238095238,
"grad_norm": 0.4001488983631134,
"learning_rate": 2.6353125000000004e-06,
"loss": 0.0078,
"step": 16575
},
{
"epoch": 7.749766573295985,
"grad_norm": 1.532838225364685,
"learning_rate": 2.6275000000000003e-06,
"loss": 0.0131,
"step": 16600
},
{
"epoch": 7.761437908496732,
"grad_norm": 5.185120582580566,
"learning_rate": 2.6196875e-06,
"loss": 0.0086,
"step": 16625
},
{
"epoch": 7.773109243697479,
"grad_norm": 2.4308927059173584,
"learning_rate": 2.611875e-06,
"loss": 0.0143,
"step": 16650
},
{
"epoch": 7.784780578898226,
"grad_norm": 3.3174314498901367,
"learning_rate": 2.6040625e-06,
"loss": 0.0069,
"step": 16675
},
{
"epoch": 7.796451914098973,
"grad_norm": 1.5750664472579956,
"learning_rate": 2.59625e-06,
"loss": 0.0223,
"step": 16700
},
{
"epoch": 7.80812324929972,
"grad_norm": 0.2341316193342209,
"learning_rate": 2.5884375e-06,
"loss": 0.0054,
"step": 16725
},
{
"epoch": 7.819794584500467,
"grad_norm": 1.4788146018981934,
"learning_rate": 2.580625e-06,
"loss": 0.0136,
"step": 16750
},
{
"epoch": 7.8314659197012135,
"grad_norm": 1.1940587759017944,
"learning_rate": 2.5728125e-06,
"loss": 0.006,
"step": 16775
},
{
"epoch": 7.8431372549019605,
"grad_norm": 1.383323073387146,
"learning_rate": 2.565e-06,
"loss": 0.0115,
"step": 16800
},
{
"epoch": 7.854808590102707,
"grad_norm": 0.6506948471069336,
"learning_rate": 2.5571875000000002e-06,
"loss": 0.008,
"step": 16825
},
{
"epoch": 7.866479925303455,
"grad_norm": 1.9429287910461426,
"learning_rate": 2.549375e-06,
"loss": 0.018,
"step": 16850
},
{
"epoch": 7.878151260504202,
"grad_norm": 1.7122925519943237,
"learning_rate": 2.5415625000000003e-06,
"loss": 0.011,
"step": 16875
},
{
"epoch": 7.889822595704949,
"grad_norm": 0.38340166211128235,
"learning_rate": 2.53375e-06,
"loss": 0.0163,
"step": 16900
},
{
"epoch": 7.901493930905696,
"grad_norm": 4.114197731018066,
"learning_rate": 2.5259375000000003e-06,
"loss": 0.0114,
"step": 16925
},
{
"epoch": 7.913165266106443,
"grad_norm": 2.0275721549987793,
"learning_rate": 2.518125e-06,
"loss": 0.0131,
"step": 16950
},
{
"epoch": 7.92483660130719,
"grad_norm": 3.8220088481903076,
"learning_rate": 2.5103125000000004e-06,
"loss": 0.0126,
"step": 16975
},
{
"epoch": 7.936507936507937,
"grad_norm": 2.1761302947998047,
"learning_rate": 2.5025e-06,
"loss": 0.0164,
"step": 17000
},
{
"epoch": 7.948179271708684,
"grad_norm": 6.142478942871094,
"learning_rate": 2.4946875e-06,
"loss": 0.0063,
"step": 17025
},
{
"epoch": 7.959850606909431,
"grad_norm": 2.5636518001556396,
"learning_rate": 2.486875e-06,
"loss": 0.0142,
"step": 17050
},
{
"epoch": 7.9715219421101775,
"grad_norm": 0.4117043614387512,
"learning_rate": 2.4790625e-06,
"loss": 0.0082,
"step": 17075
},
{
"epoch": 7.983193277310924,
"grad_norm": 2.189190626144409,
"learning_rate": 2.47125e-06,
"loss": 0.0139,
"step": 17100
},
{
"epoch": 7.994864612511671,
"grad_norm": 2.0030508041381836,
"learning_rate": 2.4634375e-06,
"loss": 0.0084,
"step": 17125
},
{
"epoch": 8.006535947712418,
"grad_norm": 0.26142677664756775,
"learning_rate": 2.4556250000000003e-06,
"loss": 0.0091,
"step": 17150
},
{
"epoch": 8.018207282913165,
"grad_norm": 0.4059411585330963,
"learning_rate": 2.4478125e-06,
"loss": 0.0055,
"step": 17175
},
{
"epoch": 8.029878618113912,
"grad_norm": 0.09706517308950424,
"learning_rate": 2.4400000000000004e-06,
"loss": 0.0126,
"step": 17200
},
{
"epoch": 8.041549953314659,
"grad_norm": 6.117686748504639,
"learning_rate": 2.4321875e-06,
"loss": 0.0077,
"step": 17225
},
{
"epoch": 8.053221288515406,
"grad_norm": 1.4079279899597168,
"learning_rate": 2.4243750000000004e-06,
"loss": 0.0099,
"step": 17250
},
{
"epoch": 8.064892623716153,
"grad_norm": 0.329962819814682,
"learning_rate": 2.4165625e-06,
"loss": 0.0101,
"step": 17275
},
{
"epoch": 8.0765639589169,
"grad_norm": 0.14133867621421814,
"learning_rate": 2.4087500000000004e-06,
"loss": 0.0166,
"step": 17300
},
{
"epoch": 8.088235294117647,
"grad_norm": 0.3688430190086365,
"learning_rate": 2.4009375000000003e-06,
"loss": 0.0074,
"step": 17325
},
{
"epoch": 8.099906629318394,
"grad_norm": 2.421675205230713,
"learning_rate": 2.393125e-06,
"loss": 0.0119,
"step": 17350
},
{
"epoch": 8.11157796451914,
"grad_norm": 0.39900481700897217,
"learning_rate": 2.3853125e-06,
"loss": 0.0087,
"step": 17375
},
{
"epoch": 8.123249299719888,
"grad_norm": 9.060256004333496,
"learning_rate": 2.3778125000000004e-06,
"loss": 0.0193,
"step": 17400
},
{
"epoch": 8.134920634920634,
"grad_norm": 3.6718103885650635,
"learning_rate": 2.37e-06,
"loss": 0.0034,
"step": 17425
},
{
"epoch": 8.146591970121381,
"grad_norm": 1.8826625347137451,
"learning_rate": 2.3621875e-06,
"loss": 0.0108,
"step": 17450
},
{
"epoch": 8.158263305322128,
"grad_norm": 5.267531871795654,
"learning_rate": 2.354375e-06,
"loss": 0.0051,
"step": 17475
},
{
"epoch": 8.169934640522875,
"grad_norm": 1.7462451457977295,
"learning_rate": 2.3465625e-06,
"loss": 0.0098,
"step": 17500
},
{
"epoch": 8.181605975723622,
"grad_norm": 3.9560678005218506,
"learning_rate": 2.33875e-06,
"loss": 0.0074,
"step": 17525
},
{
"epoch": 8.193277310924369,
"grad_norm": 0.9454758763313293,
"learning_rate": 2.3309375e-06,
"loss": 0.0124,
"step": 17550
},
{
"epoch": 8.204948646125116,
"grad_norm": 4.392622947692871,
"learning_rate": 2.323125e-06,
"loss": 0.0092,
"step": 17575
},
{
"epoch": 8.216619981325863,
"grad_norm": 0.15422876179218292,
"learning_rate": 2.3153125e-06,
"loss": 0.0112,
"step": 17600
},
{
"epoch": 8.22829131652661,
"grad_norm": 4.900958061218262,
"learning_rate": 2.3075e-06,
"loss": 0.0062,
"step": 17625
},
{
"epoch": 8.239962651727357,
"grad_norm": 0.7558678388595581,
"learning_rate": 2.2996875e-06,
"loss": 0.0202,
"step": 17650
},
{
"epoch": 8.251633986928105,
"grad_norm": 1.5306479930877686,
"learning_rate": 2.2918750000000004e-06,
"loss": 0.0045,
"step": 17675
},
{
"epoch": 8.263305322128852,
"grad_norm": 0.20347028970718384,
"learning_rate": 2.2840625e-06,
"loss": 0.0078,
"step": 17700
},
{
"epoch": 8.2749766573296,
"grad_norm": 1.3202959299087524,
"learning_rate": 2.2762500000000004e-06,
"loss": 0.0086,
"step": 17725
},
{
"epoch": 8.286647992530346,
"grad_norm": 0.9320886731147766,
"learning_rate": 2.2684375000000003e-06,
"loss": 0.0108,
"step": 17750
},
{
"epoch": 8.298319327731093,
"grad_norm": 3.197519063949585,
"learning_rate": 2.260625e-06,
"loss": 0.0077,
"step": 17775
},
{
"epoch": 8.30999066293184,
"grad_norm": 1.24087655544281,
"learning_rate": 2.2528125e-06,
"loss": 0.0132,
"step": 17800
},
{
"epoch": 8.321661998132587,
"grad_norm": 5.21552848815918,
"learning_rate": 2.245e-06,
"loss": 0.0063,
"step": 17825
},
{
"epoch": 8.333333333333334,
"grad_norm": 1.1258479356765747,
"learning_rate": 2.2371875e-06,
"loss": 0.0163,
"step": 17850
},
{
"epoch": 8.34500466853408,
"grad_norm": 4.139800071716309,
"learning_rate": 2.229375e-06,
"loss": 0.0056,
"step": 17875
},
{
"epoch": 8.356676003734828,
"grad_norm": 0.32767072319984436,
"learning_rate": 2.2215625e-06,
"loss": 0.0075,
"step": 17900
},
{
"epoch": 8.368347338935575,
"grad_norm": 1.2744501829147339,
"learning_rate": 2.21375e-06,
"loss": 0.007,
"step": 17925
},
{
"epoch": 8.380018674136322,
"grad_norm": 1.00862717628479,
"learning_rate": 2.2059375e-06,
"loss": 0.0154,
"step": 17950
},
{
"epoch": 8.391690009337069,
"grad_norm": 0.4900611340999603,
"learning_rate": 2.1981250000000002e-06,
"loss": 0.0085,
"step": 17975
},
{
"epoch": 8.403361344537815,
"grad_norm": 3.439101219177246,
"learning_rate": 2.1903125e-06,
"loss": 0.0193,
"step": 18000
},
{
"epoch": 8.415032679738562,
"grad_norm": 0.17414799332618713,
"learning_rate": 2.1825000000000003e-06,
"loss": 0.0092,
"step": 18025
},
{
"epoch": 8.42670401493931,
"grad_norm": 0.3426636755466461,
"learning_rate": 2.1746875e-06,
"loss": 0.0149,
"step": 18050
},
{
"epoch": 8.438375350140056,
"grad_norm": 0.722213089466095,
"learning_rate": 2.1668750000000003e-06,
"loss": 0.007,
"step": 18075
},
{
"epoch": 8.450046685340803,
"grad_norm": 0.1770373433828354,
"learning_rate": 2.1590625e-06,
"loss": 0.0106,
"step": 18100
},
{
"epoch": 8.46171802054155,
"grad_norm": 1.1311222314834595,
"learning_rate": 2.15125e-06,
"loss": 0.0105,
"step": 18125
},
{
"epoch": 8.473389355742297,
"grad_norm": 3.256246328353882,
"learning_rate": 2.1434374999999998e-06,
"loss": 0.0117,
"step": 18150
},
{
"epoch": 8.485060690943044,
"grad_norm": 2.979933977127075,
"learning_rate": 2.135625e-06,
"loss": 0.0065,
"step": 18175
},
{
"epoch": 8.49673202614379,
"grad_norm": 5.483570098876953,
"learning_rate": 2.1278125e-06,
"loss": 0.0117,
"step": 18200
},
{
"epoch": 8.508403361344538,
"grad_norm": 0.9835972189903259,
"learning_rate": 2.12e-06,
"loss": 0.0068,
"step": 18225
},
{
"epoch": 8.520074696545285,
"grad_norm": 0.053934112191200256,
"learning_rate": 2.1121875e-06,
"loss": 0.0064,
"step": 18250
},
{
"epoch": 8.531746031746032,
"grad_norm": 0.5441355109214783,
"learning_rate": 2.104375e-06,
"loss": 0.0076,
"step": 18275
},
{
"epoch": 8.543417366946779,
"grad_norm": 2.9546403884887695,
"learning_rate": 2.0965625000000003e-06,
"loss": 0.0132,
"step": 18300
},
{
"epoch": 8.555088702147525,
"grad_norm": 1.0984193086624146,
"learning_rate": 2.08875e-06,
"loss": 0.006,
"step": 18325
},
{
"epoch": 8.566760037348272,
"grad_norm": 0.058905456215143204,
"learning_rate": 2.0809375000000004e-06,
"loss": 0.0086,
"step": 18350
},
{
"epoch": 8.57843137254902,
"grad_norm": 1.6285549402236938,
"learning_rate": 2.073125e-06,
"loss": 0.0055,
"step": 18375
},
{
"epoch": 8.590102707749766,
"grad_norm": 2.7696332931518555,
"learning_rate": 2.0653125000000004e-06,
"loss": 0.0093,
"step": 18400
},
{
"epoch": 8.601774042950513,
"grad_norm": 0.25450074672698975,
"learning_rate": 2.0575e-06,
"loss": 0.0027,
"step": 18425
},
{
"epoch": 8.61344537815126,
"grad_norm": 0.13693825900554657,
"learning_rate": 2.0496875e-06,
"loss": 0.0154,
"step": 18450
},
{
"epoch": 8.625116713352007,
"grad_norm": 0.07830255478620529,
"learning_rate": 2.041875e-06,
"loss": 0.0063,
"step": 18475
},
{
"epoch": 8.636788048552754,
"grad_norm": 3.4713680744171143,
"learning_rate": 2.0340625e-06,
"loss": 0.0222,
"step": 18500
},
{
"epoch": 8.6484593837535,
"grad_norm": 0.6259431838989258,
"learning_rate": 2.02625e-06,
"loss": 0.0088,
"step": 18525
},
{
"epoch": 8.660130718954248,
"grad_norm": 2.6433568000793457,
"learning_rate": 2.0184375e-06,
"loss": 0.0104,
"step": 18550
},
{
"epoch": 8.671802054154995,
"grad_norm": 4.890020370483398,
"learning_rate": 2.010625e-06,
"loss": 0.0049,
"step": 18575
},
{
"epoch": 8.683473389355742,
"grad_norm": 0.13995322585105896,
"learning_rate": 2.0028125e-06,
"loss": 0.0132,
"step": 18600
},
{
"epoch": 8.695144724556489,
"grad_norm": 3.4847798347473145,
"learning_rate": 1.995e-06,
"loss": 0.0083,
"step": 18625
},
{
"epoch": 8.706816059757235,
"grad_norm": 0.990917980670929,
"learning_rate": 1.9871875e-06,
"loss": 0.0067,
"step": 18650
},
{
"epoch": 8.718487394957982,
"grad_norm": 3.6839520931243896,
"learning_rate": 1.979375e-06,
"loss": 0.0063,
"step": 18675
},
{
"epoch": 8.73015873015873,
"grad_norm": 2.492249011993408,
"learning_rate": 1.9715625000000002e-06,
"loss": 0.01,
"step": 18700
},
{
"epoch": 8.741830065359476,
"grad_norm": 1.4780845642089844,
"learning_rate": 1.96375e-06,
"loss": 0.0058,
"step": 18725
},
{
"epoch": 8.753501400560225,
"grad_norm": 0.1961933821439743,
"learning_rate": 1.9559375000000003e-06,
"loss": 0.01,
"step": 18750
},
{
"epoch": 8.76517273576097,
"grad_norm": 1.2011662721633911,
"learning_rate": 1.948125e-06,
"loss": 0.0075,
"step": 18775
},
{
"epoch": 8.776844070961719,
"grad_norm": 1.7209523916244507,
"learning_rate": 1.9403125000000003e-06,
"loss": 0.014,
"step": 18800
},
{
"epoch": 8.788515406162466,
"grad_norm": 0.40629979968070984,
"learning_rate": 1.9325e-06,
"loss": 0.0047,
"step": 18825
},
{
"epoch": 8.800186741363213,
"grad_norm": 1.7460086345672607,
"learning_rate": 1.9246875e-06,
"loss": 0.0075,
"step": 18850
},
{
"epoch": 8.81185807656396,
"grad_norm": 0.1296474188566208,
"learning_rate": 1.9168749999999998e-06,
"loss": 0.004,
"step": 18875
},
{
"epoch": 8.823529411764707,
"grad_norm": 3.629750967025757,
"learning_rate": 1.9090625e-06,
"loss": 0.0118,
"step": 18900
},
{
"epoch": 8.835200746965453,
"grad_norm": 0.17966805398464203,
"learning_rate": 1.9012500000000002e-06,
"loss": 0.007,
"step": 18925
},
{
"epoch": 8.8468720821662,
"grad_norm": 1.5354282855987549,
"learning_rate": 1.8934375e-06,
"loss": 0.0185,
"step": 18950
},
{
"epoch": 8.858543417366947,
"grad_norm": 4.741540908813477,
"learning_rate": 1.8856250000000003e-06,
"loss": 0.0086,
"step": 18975
},
{
"epoch": 8.870214752567694,
"grad_norm": 0.33261170983314514,
"learning_rate": 1.8778125e-06,
"loss": 0.0145,
"step": 19000
},
{
"epoch": 8.881886087768441,
"grad_norm": 0.1079050749540329,
"learning_rate": 1.8700000000000003e-06,
"loss": 0.0084,
"step": 19025
},
{
"epoch": 8.893557422969188,
"grad_norm": 0.7272719740867615,
"learning_rate": 1.8621875000000001e-06,
"loss": 0.0065,
"step": 19050
},
{
"epoch": 8.905228758169935,
"grad_norm": 2.8611772060394287,
"learning_rate": 1.8543750000000001e-06,
"loss": 0.0085,
"step": 19075
},
{
"epoch": 8.916900093370682,
"grad_norm": 0.06473066657781601,
"learning_rate": 1.8465625e-06,
"loss": 0.0118,
"step": 19100
},
{
"epoch": 8.928571428571429,
"grad_norm": 0.5498653054237366,
"learning_rate": 1.8387500000000002e-06,
"loss": 0.0071,
"step": 19125
},
{
"epoch": 8.940242763772176,
"grad_norm": 0.0627368837594986,
"learning_rate": 1.8309375e-06,
"loss": 0.0094,
"step": 19150
},
{
"epoch": 8.951914098972923,
"grad_norm": 2.6687510013580322,
"learning_rate": 1.8231250000000002e-06,
"loss": 0.0054,
"step": 19175
},
{
"epoch": 8.96358543417367,
"grad_norm": 1.0596753358840942,
"learning_rate": 1.8153125e-06,
"loss": 0.0069,
"step": 19200
},
{
"epoch": 8.975256769374417,
"grad_norm": 1.5671969652175903,
"learning_rate": 1.8075000000000003e-06,
"loss": 0.0041,
"step": 19225
},
{
"epoch": 8.986928104575163,
"grad_norm": 3.649564266204834,
"learning_rate": 1.7996875e-06,
"loss": 0.0123,
"step": 19250
},
{
"epoch": 8.99859943977591,
"grad_norm": 4.41900110244751,
"learning_rate": 1.791875e-06,
"loss": 0.0074,
"step": 19275
},
{
"epoch": 9.010270774976657,
"grad_norm": 1.1093493700027466,
"learning_rate": 1.7840625e-06,
"loss": 0.0056,
"step": 19300
},
{
"epoch": 9.021942110177404,
"grad_norm": 2.091535806655884,
"learning_rate": 1.7762500000000001e-06,
"loss": 0.007,
"step": 19325
},
{
"epoch": 9.033613445378151,
"grad_norm": 4.006499767303467,
"learning_rate": 1.7684375e-06,
"loss": 0.0095,
"step": 19350
},
{
"epoch": 9.045284780578898,
"grad_norm": 0.13656963407993317,
"learning_rate": 1.7606250000000002e-06,
"loss": 0.0095,
"step": 19375
},
{
"epoch": 9.056956115779645,
"grad_norm": 2.558016538619995,
"learning_rate": 1.7528125e-06,
"loss": 0.0125,
"step": 19400
},
{
"epoch": 9.068627450980392,
"grad_norm": 0.15220613777637482,
"learning_rate": 1.745e-06,
"loss": 0.0074,
"step": 19425
},
{
"epoch": 9.080298786181139,
"grad_norm": 2.0453782081604004,
"learning_rate": 1.7371874999999998e-06,
"loss": 0.0063,
"step": 19450
},
{
"epoch": 9.091970121381886,
"grad_norm": 5.893077373504639,
"learning_rate": 1.729375e-06,
"loss": 0.0093,
"step": 19475
},
{
"epoch": 9.103641456582633,
"grad_norm": 1.2152618169784546,
"learning_rate": 1.7215624999999999e-06,
"loss": 0.0058,
"step": 19500
},
{
"epoch": 9.11531279178338,
"grad_norm": 0.9248460531234741,
"learning_rate": 1.7137500000000001e-06,
"loss": 0.0095,
"step": 19525
},
{
"epoch": 9.126984126984127,
"grad_norm": 0.35303401947021484,
"learning_rate": 1.7059375000000003e-06,
"loss": 0.0074,
"step": 19550
},
{
"epoch": 9.138655462184873,
"grad_norm": 0.8544372320175171,
"learning_rate": 1.6981250000000002e-06,
"loss": 0.0116,
"step": 19575
},
{
"epoch": 9.15032679738562,
"grad_norm": 0.38176584243774414,
"learning_rate": 1.6903125000000002e-06,
"loss": 0.0131,
"step": 19600
},
{
"epoch": 9.161998132586367,
"grad_norm": 3.0445823669433594,
"learning_rate": 1.6825e-06,
"loss": 0.0073,
"step": 19625
},
{
"epoch": 9.173669467787114,
"grad_norm": 0.1117783859372139,
"learning_rate": 1.6746875000000002e-06,
"loss": 0.0067,
"step": 19650
},
{
"epoch": 9.185340802987861,
"grad_norm": 1.0190069675445557,
"learning_rate": 1.666875e-06,
"loss": 0.0096,
"step": 19675
},
{
"epoch": 9.197012138188608,
"grad_norm": 0.07005083560943604,
"learning_rate": 1.6590625000000003e-06,
"loss": 0.0125,
"step": 19700
},
{
"epoch": 9.208683473389355,
"grad_norm": 1.7159433364868164,
"learning_rate": 1.65125e-06,
"loss": 0.0078,
"step": 19725
},
{
"epoch": 9.220354808590102,
"grad_norm": 0.325469046831131,
"learning_rate": 1.6437500000000001e-06,
"loss": 0.0068,
"step": 19750
},
{
"epoch": 9.232026143790849,
"grad_norm": 0.516471803188324,
"learning_rate": 1.6359375e-06,
"loss": 0.0064,
"step": 19775
},
{
"epoch": 9.243697478991596,
"grad_norm": 0.18785762786865234,
"learning_rate": 1.6281250000000002e-06,
"loss": 0.0083,
"step": 19800
},
{
"epoch": 9.255368814192344,
"grad_norm": 7.891486167907715,
"learning_rate": 1.6203125e-06,
"loss": 0.0075,
"step": 19825
},
{
"epoch": 9.267040149393091,
"grad_norm": 0.1949397474527359,
"learning_rate": 1.6125e-06,
"loss": 0.0079,
"step": 19850
},
{
"epoch": 9.278711484593838,
"grad_norm": 3.992004632949829,
"learning_rate": 1.6046875e-06,
"loss": 0.0111,
"step": 19875
},
{
"epoch": 9.290382819794585,
"grad_norm": 1.5781553983688354,
"learning_rate": 1.596875e-06,
"loss": 0.005,
"step": 19900
},
{
"epoch": 9.302054154995332,
"grad_norm": 0.28434544801712036,
"learning_rate": 1.5890624999999999e-06,
"loss": 0.0121,
"step": 19925
},
{
"epoch": 9.313725490196079,
"grad_norm": 0.04462061822414398,
"learning_rate": 1.5812500000000001e-06,
"loss": 0.01,
"step": 19950
},
{
"epoch": 9.325396825396826,
"grad_norm": 0.1574648916721344,
"learning_rate": 1.5734375e-06,
"loss": 0.0047,
"step": 19975
},
{
"epoch": 9.337068160597573,
"grad_norm": 3.997842788696289,
"learning_rate": 1.5656250000000002e-06,
"loss": 0.0052,
"step": 20000
},
{
"epoch": 9.337068160597573,
"eval_loss": 0.20788420736789703,
"eval_runtime": 5359.6354,
"eval_samples_per_second": 1.756,
"eval_steps_per_second": 0.22,
"eval_wer": 0.09564023897949298,
"step": 20000
},
{
"epoch": 9.34873949579832,
"grad_norm": 0.07767148315906525,
"learning_rate": 1.5578125000000002e-06,
"loss": 0.0071,
"step": 20025
},
{
"epoch": 9.360410830999067,
"grad_norm": 0.03903215005993843,
"learning_rate": 1.55e-06,
"loss": 0.0056,
"step": 20050
},
{
"epoch": 9.372082166199814,
"grad_norm": 5.785853385925293,
"learning_rate": 1.5421875e-06,
"loss": 0.0087,
"step": 20075
},
{
"epoch": 9.38375350140056,
"grad_norm": 0.3732275366783142,
"learning_rate": 1.534375e-06,
"loss": 0.0079,
"step": 20100
},
{
"epoch": 9.395424836601308,
"grad_norm": 0.7682175636291504,
"learning_rate": 1.5265625e-06,
"loss": 0.0113,
"step": 20125
},
{
"epoch": 9.407096171802054,
"grad_norm": 0.3410235345363617,
"learning_rate": 1.51875e-06,
"loss": 0.0058,
"step": 20150
},
{
"epoch": 9.418767507002801,
"grad_norm": 6.657580375671387,
"learning_rate": 1.5109375e-06,
"loss": 0.0071,
"step": 20175
},
{
"epoch": 9.430438842203548,
"grad_norm": 1.331062912940979,
"learning_rate": 1.5031250000000001e-06,
"loss": 0.0064,
"step": 20200
},
{
"epoch": 9.442110177404295,
"grad_norm": 0.032236941158771515,
"learning_rate": 1.4953125e-06,
"loss": 0.0073,
"step": 20225
},
{
"epoch": 9.453781512605042,
"grad_norm": 3.0221993923187256,
"learning_rate": 1.4875e-06,
"loss": 0.0053,
"step": 20250
},
{
"epoch": 9.465452847805789,
"grad_norm": 3.605405569076538,
"learning_rate": 1.4796875e-06,
"loss": 0.0127,
"step": 20275
},
{
"epoch": 9.477124183006536,
"grad_norm": 0.11760404706001282,
"learning_rate": 1.471875e-06,
"loss": 0.0067,
"step": 20300
},
{
"epoch": 9.488795518207283,
"grad_norm": 2.5941033363342285,
"learning_rate": 1.4640625000000002e-06,
"loss": 0.0072,
"step": 20325
},
{
"epoch": 9.50046685340803,
"grad_norm": 5.615445613861084,
"learning_rate": 1.4562500000000002e-06,
"loss": 0.0116,
"step": 20350
},
{
"epoch": 9.512138188608777,
"grad_norm": 0.22926200926303864,
"learning_rate": 1.4484375e-06,
"loss": 0.0082,
"step": 20375
},
{
"epoch": 9.523809523809524,
"grad_norm": 0.8798258304595947,
"learning_rate": 1.440625e-06,
"loss": 0.0036,
"step": 20400
},
{
"epoch": 9.53548085901027,
"grad_norm": 2.885864496231079,
"learning_rate": 1.4328125e-06,
"loss": 0.0076,
"step": 20425
},
{
"epoch": 9.547152194211018,
"grad_norm": 2.137159824371338,
"learning_rate": 1.4250000000000001e-06,
"loss": 0.0136,
"step": 20450
},
{
"epoch": 9.558823529411764,
"grad_norm": 0.8029230237007141,
"learning_rate": 1.4171875000000001e-06,
"loss": 0.0067,
"step": 20475
},
{
"epoch": 9.570494864612511,
"grad_norm": 0.14883463084697723,
"learning_rate": 1.4093750000000002e-06,
"loss": 0.0095,
"step": 20500
},
{
"epoch": 9.582166199813258,
"grad_norm": 10.01462459564209,
"learning_rate": 1.4015625000000002e-06,
"loss": 0.0069,
"step": 20525
},
{
"epoch": 9.593837535014005,
"grad_norm": 1.3217803239822388,
"learning_rate": 1.39375e-06,
"loss": 0.0058,
"step": 20550
},
{
"epoch": 9.605508870214752,
"grad_norm": 5.664200782775879,
"learning_rate": 1.3859375e-06,
"loss": 0.0086,
"step": 20575
},
{
"epoch": 9.6171802054155,
"grad_norm": 3.8610246181488037,
"learning_rate": 1.378125e-06,
"loss": 0.0085,
"step": 20600
},
{
"epoch": 9.628851540616246,
"grad_norm": 0.11504428088665009,
"learning_rate": 1.3703125e-06,
"loss": 0.006,
"step": 20625
},
{
"epoch": 9.640522875816993,
"grad_norm": 0.9499320387840271,
"learning_rate": 1.3625e-06,
"loss": 0.0061,
"step": 20650
},
{
"epoch": 9.65219421101774,
"grad_norm": 0.2915020287036896,
"learning_rate": 1.3546875e-06,
"loss": 0.0059,
"step": 20675
},
{
"epoch": 9.663865546218487,
"grad_norm": 1.0401633977890015,
"learning_rate": 1.3468750000000001e-06,
"loss": 0.0089,
"step": 20700
},
{
"epoch": 9.675536881419234,
"grad_norm": 0.8448579907417297,
"learning_rate": 1.3390625e-06,
"loss": 0.0115,
"step": 20725
},
{
"epoch": 9.68720821661998,
"grad_norm": 0.22094358503818512,
"learning_rate": 1.33125e-06,
"loss": 0.0092,
"step": 20750
},
{
"epoch": 9.698879551820728,
"grad_norm": 5.222804546356201,
"learning_rate": 1.3234375e-06,
"loss": 0.008,
"step": 20775
},
{
"epoch": 9.710550887021475,
"grad_norm": 0.35005176067352295,
"learning_rate": 1.315625e-06,
"loss": 0.008,
"step": 20800
},
{
"epoch": 9.722222222222221,
"grad_norm": 1.9022040367126465,
"learning_rate": 1.3078125e-06,
"loss": 0.0045,
"step": 20825
},
{
"epoch": 9.733893557422968,
"grad_norm": 2.139233350753784,
"learning_rate": 1.3e-06,
"loss": 0.0054,
"step": 20850
},
{
"epoch": 9.745564892623715,
"grad_norm": 4.1651482582092285,
"learning_rate": 1.2921875e-06,
"loss": 0.0053,
"step": 20875
},
{
"epoch": 9.757236227824464,
"grad_norm": 2.2410309314727783,
"learning_rate": 1.284375e-06,
"loss": 0.0062,
"step": 20900
},
{
"epoch": 9.768907563025211,
"grad_norm": 6.774412631988525,
"learning_rate": 1.2765625e-06,
"loss": 0.0082,
"step": 20925
},
{
"epoch": 9.780578898225958,
"grad_norm": 6.076715469360352,
"learning_rate": 1.2687500000000001e-06,
"loss": 0.0066,
"step": 20950
},
{
"epoch": 9.792250233426705,
"grad_norm": 0.3256414234638214,
"learning_rate": 1.2609375000000002e-06,
"loss": 0.0058,
"step": 20975
},
{
"epoch": 9.803921568627452,
"grad_norm": 0.03926245495676994,
"learning_rate": 1.2531250000000002e-06,
"loss": 0.0081,
"step": 21000
},
{
"epoch": 9.815592903828199,
"grad_norm": 14.117586135864258,
"learning_rate": 1.2453125000000002e-06,
"loss": 0.0031,
"step": 21025
},
{
"epoch": 9.827264239028946,
"grad_norm": 0.3628706634044647,
"learning_rate": 1.2375000000000002e-06,
"loss": 0.009,
"step": 21050
},
{
"epoch": 9.838935574229692,
"grad_norm": 4.5739827156066895,
"learning_rate": 1.2296875e-06,
"loss": 0.0038,
"step": 21075
},
{
"epoch": 9.85060690943044,
"grad_norm": 0.16507214307785034,
"learning_rate": 1.221875e-06,
"loss": 0.0101,
"step": 21100
},
{
"epoch": 9.862278244631186,
"grad_norm": 7.189055919647217,
"learning_rate": 1.2140625e-06,
"loss": 0.0078,
"step": 21125
},
{
"epoch": 9.873949579831933,
"grad_norm": 5.090521812438965,
"learning_rate": 1.20625e-06,
"loss": 0.0083,
"step": 21150
},
{
"epoch": 9.88562091503268,
"grad_norm": 2.5861196517944336,
"learning_rate": 1.1984375000000001e-06,
"loss": 0.0047,
"step": 21175
},
{
"epoch": 9.897292250233427,
"grad_norm": 0.09222248196601868,
"learning_rate": 1.1906250000000001e-06,
"loss": 0.0099,
"step": 21200
},
{
"epoch": 9.908963585434174,
"grad_norm": 4.3550004959106445,
"learning_rate": 1.1828125000000002e-06,
"loss": 0.008,
"step": 21225
},
{
"epoch": 9.920634920634921,
"grad_norm": 0.043043483048677444,
"learning_rate": 1.175e-06,
"loss": 0.0083,
"step": 21250
},
{
"epoch": 9.932306255835668,
"grad_norm": 0.07931485772132874,
"learning_rate": 1.1671875e-06,
"loss": 0.0082,
"step": 21275
},
{
"epoch": 9.943977591036415,
"grad_norm": 0.08949258923530579,
"learning_rate": 1.159375e-06,
"loss": 0.0075,
"step": 21300
},
{
"epoch": 9.955648926237162,
"grad_norm": 0.6334654092788696,
"learning_rate": 1.1515625e-06,
"loss": 0.0049,
"step": 21325
},
{
"epoch": 9.967320261437909,
"grad_norm": 1.3037919998168945,
"learning_rate": 1.14375e-06,
"loss": 0.0069,
"step": 21350
},
{
"epoch": 9.978991596638656,
"grad_norm": 4.038185119628906,
"learning_rate": 1.1359375e-06,
"loss": 0.0099,
"step": 21375
},
{
"epoch": 9.990662931839402,
"grad_norm": 0.7718151211738586,
"learning_rate": 1.128125e-06,
"loss": 0.0063,
"step": 21400
},
{
"epoch": 10.00233426704015,
"grad_norm": 1.5029001235961914,
"learning_rate": 1.1203125e-06,
"loss": 0.013,
"step": 21425
},
{
"epoch": 10.014005602240896,
"grad_norm": 1.8782932758331299,
"learning_rate": 1.1125e-06,
"loss": 0.0048,
"step": 21450
},
{
"epoch": 10.025676937441643,
"grad_norm": 1.411063313484192,
"learning_rate": 1.1046875e-06,
"loss": 0.0098,
"step": 21475
},
{
"epoch": 10.03734827264239,
"grad_norm": 0.08655665069818497,
"learning_rate": 1.096875e-06,
"loss": 0.0047,
"step": 21500
},
{
"epoch": 10.049019607843137,
"grad_norm": 0.7511602640151978,
"learning_rate": 1.0890625e-06,
"loss": 0.0077,
"step": 21525
},
{
"epoch": 10.060690943043884,
"grad_norm": 2.6931838989257812,
"learning_rate": 1.08125e-06,
"loss": 0.0036,
"step": 21550
},
{
"epoch": 10.072362278244631,
"grad_norm": 1.6642050743103027,
"learning_rate": 1.0734375e-06,
"loss": 0.009,
"step": 21575
},
{
"epoch": 10.084033613445378,
"grad_norm": 5.0585503578186035,
"learning_rate": 1.065625e-06,
"loss": 0.0054,
"step": 21600
},
{
"epoch": 10.095704948646125,
"grad_norm": 1.2419428825378418,
"learning_rate": 1.0578125e-06,
"loss": 0.0049,
"step": 21625
},
{
"epoch": 10.107376283846872,
"grad_norm": 0.0439959391951561,
"learning_rate": 1.0500000000000001e-06,
"loss": 0.006,
"step": 21650
},
{
"epoch": 10.119047619047619,
"grad_norm": 0.7057489156723022,
"learning_rate": 1.0421875000000001e-06,
"loss": 0.0069,
"step": 21675
},
{
"epoch": 10.130718954248366,
"grad_norm": 0.08024278283119202,
"learning_rate": 1.0343750000000002e-06,
"loss": 0.0028,
"step": 21700
},
{
"epoch": 10.142390289449112,
"grad_norm": 0.21487966179847717,
"learning_rate": 1.0265625000000002e-06,
"loss": 0.0033,
"step": 21725
},
{
"epoch": 10.15406162464986,
"grad_norm": 0.19071203470230103,
"learning_rate": 1.01875e-06,
"loss": 0.0036,
"step": 21750
},
{
"epoch": 10.165732959850606,
"grad_norm": 0.27428773045539856,
"learning_rate": 1.0109375e-06,
"loss": 0.0103,
"step": 21775
},
{
"epoch": 10.177404295051353,
"grad_norm": 1.418234944343567,
"learning_rate": 1.003125e-06,
"loss": 0.0058,
"step": 21800
},
{
"epoch": 10.1890756302521,
"grad_norm": 0.8873878717422485,
"learning_rate": 9.953125e-07,
"loss": 0.0139,
"step": 21825
},
{
"epoch": 10.200746965452847,
"grad_norm": 0.583903431892395,
"learning_rate": 9.875e-07,
"loss": 0.0033,
"step": 21850
},
{
"epoch": 10.212418300653594,
"grad_norm": 0.245305597782135,
"learning_rate": 9.796875e-07,
"loss": 0.0097,
"step": 21875
},
{
"epoch": 10.224089635854341,
"grad_norm": 3.605557441711426,
"learning_rate": 9.718750000000001e-07,
"loss": 0.0073,
"step": 21900
},
{
"epoch": 10.235760971055088,
"grad_norm": 0.19548866152763367,
"learning_rate": 9.640625000000001e-07,
"loss": 0.0063,
"step": 21925
},
{
"epoch": 10.247432306255835,
"grad_norm": 0.807528555393219,
"learning_rate": 9.5625e-07,
"loss": 0.0033,
"step": 21950
},
{
"epoch": 10.259103641456583,
"grad_norm": 0.32932284474372864,
"learning_rate": 9.484375e-07,
"loss": 0.0047,
"step": 21975
},
{
"epoch": 10.27077497665733,
"grad_norm": 0.1796354502439499,
"learning_rate": 9.40625e-07,
"loss": 0.0091,
"step": 22000
},
{
"epoch": 10.282446311858077,
"grad_norm": 1.3611717224121094,
"learning_rate": 9.328125e-07,
"loss": 0.0075,
"step": 22025
},
{
"epoch": 10.294117647058824,
"grad_norm": 0.0766756534576416,
"learning_rate": 9.25e-07,
"loss": 0.0055,
"step": 22050
},
{
"epoch": 10.305788982259571,
"grad_norm": 0.7175803184509277,
"learning_rate": 9.171875e-07,
"loss": 0.01,
"step": 22075
},
{
"epoch": 10.317460317460318,
"grad_norm": 0.23721574246883392,
"learning_rate": 9.09375e-07,
"loss": 0.0039,
"step": 22100
},
{
"epoch": 10.329131652661065,
"grad_norm": 0.23020412027835846,
"learning_rate": 9.015625e-07,
"loss": 0.0052,
"step": 22125
},
{
"epoch": 10.340802987861812,
"grad_norm": 1.632659912109375,
"learning_rate": 8.9375e-07,
"loss": 0.0058,
"step": 22150
},
{
"epoch": 10.352474323062559,
"grad_norm": 1.804761528968811,
"learning_rate": 8.859374999999999e-07,
"loss": 0.0055,
"step": 22175
},
{
"epoch": 10.364145658263306,
"grad_norm": 0.08536524325609207,
"learning_rate": 8.781250000000002e-07,
"loss": 0.0026,
"step": 22200
},
{
"epoch": 10.375816993464053,
"grad_norm": 1.1236313581466675,
"learning_rate": 8.703125000000001e-07,
"loss": 0.0097,
"step": 22225
},
{
"epoch": 10.3874883286648,
"grad_norm": 0.6604540944099426,
"learning_rate": 8.625000000000001e-07,
"loss": 0.0041,
"step": 22250
},
{
"epoch": 10.399159663865547,
"grad_norm": 0.28819596767425537,
"learning_rate": 8.546875000000001e-07,
"loss": 0.0035,
"step": 22275
},
{
"epoch": 10.410830999066294,
"grad_norm": 0.0771021619439125,
"learning_rate": 8.468750000000002e-07,
"loss": 0.0045,
"step": 22300
},
{
"epoch": 10.42250233426704,
"grad_norm": 1.4627320766448975,
"learning_rate": 8.390625000000001e-07,
"loss": 0.0084,
"step": 22325
},
{
"epoch": 10.434173669467787,
"grad_norm": 1.7134203910827637,
"learning_rate": 8.312500000000001e-07,
"loss": 0.0053,
"step": 22350
},
{
"epoch": 10.445845004668534,
"grad_norm": 0.29320698976516724,
"learning_rate": 8.234375000000001e-07,
"loss": 0.0095,
"step": 22375
},
{
"epoch": 10.457516339869281,
"grad_norm": 4.652510643005371,
"learning_rate": 8.15625e-07,
"loss": 0.0033,
"step": 22400
},
{
"epoch": 10.469187675070028,
"grad_norm": 0.07530553638935089,
"learning_rate": 8.078125e-07,
"loss": 0.0135,
"step": 22425
},
{
"epoch": 10.480859010270775,
"grad_norm": 5.351443290710449,
"learning_rate": 8.000000000000001e-07,
"loss": 0.0051,
"step": 22450
},
{
"epoch": 10.492530345471522,
"grad_norm": 1.0543556213378906,
"learning_rate": 7.921875000000001e-07,
"loss": 0.0126,
"step": 22475
},
{
"epoch": 10.504201680672269,
"grad_norm": 0.12515470385551453,
"learning_rate": 7.84375e-07,
"loss": 0.004,
"step": 22500
},
{
"epoch": 10.515873015873016,
"grad_norm": 0.6163919568061829,
"learning_rate": 7.765625e-07,
"loss": 0.0062,
"step": 22525
},
{
"epoch": 10.527544351073763,
"grad_norm": 1.1225354671478271,
"learning_rate": 7.6875e-07,
"loss": 0.0046,
"step": 22550
},
{
"epoch": 10.53921568627451,
"grad_norm": 1.0655065774917603,
"learning_rate": 7.609375e-07,
"loss": 0.0049,
"step": 22575
},
{
"epoch": 10.550887021475257,
"grad_norm": 2.211533308029175,
"learning_rate": 7.53125e-07,
"loss": 0.0034,
"step": 22600
},
{
"epoch": 10.562558356676004,
"grad_norm": 0.19352863729000092,
"learning_rate": 7.453125e-07,
"loss": 0.0048,
"step": 22625
},
{
"epoch": 10.57422969187675,
"grad_norm": 0.6760672926902771,
"learning_rate": 7.375e-07,
"loss": 0.0041,
"step": 22650
},
{
"epoch": 10.585901027077497,
"grad_norm": 1.2626034021377563,
"learning_rate": 7.296875000000001e-07,
"loss": 0.0089,
"step": 22675
},
{
"epoch": 10.597572362278244,
"grad_norm": 4.768553256988525,
"learning_rate": 7.218750000000001e-07,
"loss": 0.0047,
"step": 22700
},
{
"epoch": 10.609243697478991,
"grad_norm": 0.35717836022377014,
"learning_rate": 7.140625000000001e-07,
"loss": 0.0077,
"step": 22725
},
{
"epoch": 10.620915032679738,
"grad_norm": 0.17013007402420044,
"learning_rate": 7.0625e-07,
"loss": 0.0047,
"step": 22750
},
{
"epoch": 10.632586367880485,
"grad_norm": 0.8509282469749451,
"learning_rate": 6.984375e-07,
"loss": 0.0101,
"step": 22775
},
{
"epoch": 10.644257703081232,
"grad_norm": 3.9586565494537354,
"learning_rate": 6.906250000000001e-07,
"loss": 0.0025,
"step": 22800
},
{
"epoch": 10.655929038281979,
"grad_norm": 0.7251598238945007,
"learning_rate": 6.828125000000001e-07,
"loss": 0.0079,
"step": 22825
},
{
"epoch": 10.667600373482726,
"grad_norm": 0.039994291961193085,
"learning_rate": 6.75e-07,
"loss": 0.0048,
"step": 22850
},
{
"epoch": 10.679271708683473,
"grad_norm": 1.6940975189208984,
"learning_rate": 6.671875e-07,
"loss": 0.0095,
"step": 22875
},
{
"epoch": 10.69094304388422,
"grad_norm": 3.833244562149048,
"learning_rate": 6.59375e-07,
"loss": 0.004,
"step": 22900
},
{
"epoch": 10.702614379084967,
"grad_norm": 0.6546738743782043,
"learning_rate": 6.515625e-07,
"loss": 0.0084,
"step": 22925
},
{
"epoch": 10.714285714285714,
"grad_norm": 0.04062287509441376,
"learning_rate": 6.4375e-07,
"loss": 0.0059,
"step": 22950
},
{
"epoch": 10.72595704948646,
"grad_norm": 0.1445113569498062,
"learning_rate": 6.359375e-07,
"loss": 0.0135,
"step": 22975
},
{
"epoch": 10.73762838468721,
"grad_norm": 0.029490185901522636,
"learning_rate": 6.28125e-07,
"loss": 0.0033,
"step": 23000
},
{
"epoch": 10.749299719887954,
"grad_norm": 1.9724853038787842,
"learning_rate": 6.203125e-07,
"loss": 0.0078,
"step": 23025
},
{
"epoch": 10.760971055088703,
"grad_norm": 0.8680882453918457,
"learning_rate": 6.125000000000001e-07,
"loss": 0.0035,
"step": 23050
},
{
"epoch": 10.77264239028945,
"grad_norm": 0.0470956526696682,
"learning_rate": 6.046875000000001e-07,
"loss": 0.0055,
"step": 23075
},
{
"epoch": 10.784313725490197,
"grad_norm": 0.06983581185340881,
"learning_rate": 5.96875e-07,
"loss": 0.0058,
"step": 23100
},
{
"epoch": 10.795985060690944,
"grad_norm": 0.3825051784515381,
"learning_rate": 5.890625e-07,
"loss": 0.0168,
"step": 23125
},
{
"epoch": 10.80765639589169,
"grad_norm": 2.345949649810791,
"learning_rate": 5.8125e-07,
"loss": 0.0038,
"step": 23150
},
{
"epoch": 10.819327731092438,
"grad_norm": 1.4795840978622437,
"learning_rate": 5.734375000000001e-07,
"loss": 0.0095,
"step": 23175
},
{
"epoch": 10.830999066293185,
"grad_norm": 0.686439037322998,
"learning_rate": 5.65625e-07,
"loss": 0.0091,
"step": 23200
},
{
"epoch": 10.842670401493931,
"grad_norm": 0.14041809737682343,
"learning_rate": 5.578125e-07,
"loss": 0.01,
"step": 23225
},
{
"epoch": 10.854341736694678,
"grad_norm": 4.803620338439941,
"learning_rate": 5.5e-07,
"loss": 0.0057,
"step": 23250
},
{
"epoch": 10.866013071895425,
"grad_norm": 0.07831548154354095,
"learning_rate": 5.421874999999999e-07,
"loss": 0.0062,
"step": 23275
},
{
"epoch": 10.877684407096172,
"grad_norm": 1.9763298034667969,
"learning_rate": 5.343750000000001e-07,
"loss": 0.0038,
"step": 23300
},
{
"epoch": 10.88935574229692,
"grad_norm": 0.3448634445667267,
"learning_rate": 5.265625000000001e-07,
"loss": 0.0082,
"step": 23325
},
{
"epoch": 10.901027077497666,
"grad_norm": 0.04117899760603905,
"learning_rate": 5.1875e-07,
"loss": 0.0031,
"step": 23350
},
{
"epoch": 10.912698412698413,
"grad_norm": 0.21676640212535858,
"learning_rate": 5.109375e-07,
"loss": 0.0049,
"step": 23375
},
{
"epoch": 10.92436974789916,
"grad_norm": 3.311768054962158,
"learning_rate": 5.031250000000001e-07,
"loss": 0.007,
"step": 23400
},
{
"epoch": 10.936041083099907,
"grad_norm": 2.6684231758117676,
"learning_rate": 4.953125000000001e-07,
"loss": 0.0054,
"step": 23425
},
{
"epoch": 10.947712418300654,
"grad_norm": 0.7720322012901306,
"learning_rate": 4.875e-07,
"loss": 0.0052,
"step": 23450
},
{
"epoch": 10.9593837535014,
"grad_norm": 1.1452654600143433,
"learning_rate": 4.796875e-07,
"loss": 0.0059,
"step": 23475
},
{
"epoch": 10.971055088702148,
"grad_norm": 0.04618614539504051,
"learning_rate": 4.71875e-07,
"loss": 0.0063,
"step": 23500
},
{
"epoch": 10.982726423902895,
"grad_norm": 2.9821794033050537,
"learning_rate": 4.640625e-07,
"loss": 0.0042,
"step": 23525
},
{
"epoch": 10.994397759103641,
"grad_norm": 3.0062246322631836,
"learning_rate": 4.5624999999999997e-07,
"loss": 0.0077,
"step": 23550
},
{
"epoch": 11.006069094304388,
"grad_norm": 2.828244209289551,
"learning_rate": 4.484375e-07,
"loss": 0.0078,
"step": 23575
},
{
"epoch": 11.017740429505135,
"grad_norm": 4.736670970916748,
"learning_rate": 4.4062499999999996e-07,
"loss": 0.0074,
"step": 23600
},
{
"epoch": 11.029411764705882,
"grad_norm": 1.2825249433517456,
"learning_rate": 4.3281250000000004e-07,
"loss": 0.0111,
"step": 23625
},
{
"epoch": 11.04108309990663,
"grad_norm": 2.528594732284546,
"learning_rate": 4.2500000000000006e-07,
"loss": 0.006,
"step": 23650
},
{
"epoch": 11.052754435107376,
"grad_norm": 0.05410047993063927,
"learning_rate": 4.1718750000000003e-07,
"loss": 0.0059,
"step": 23675
},
{
"epoch": 11.064425770308123,
"grad_norm": 1.0883435010910034,
"learning_rate": 4.0937500000000005e-07,
"loss": 0.0031,
"step": 23700
},
{
"epoch": 11.07609710550887,
"grad_norm": 1.926758885383606,
"learning_rate": 4.015625e-07,
"loss": 0.0095,
"step": 23725
},
{
"epoch": 11.087768440709617,
"grad_norm": 0.09137524664402008,
"learning_rate": 3.940625e-07,
"loss": 0.0083,
"step": 23750
},
{
"epoch": 11.099439775910364,
"grad_norm": 0.14325258135795593,
"learning_rate": 3.8625e-07,
"loss": 0.0039,
"step": 23775
},
{
"epoch": 11.11111111111111,
"grad_norm": 0.04979300498962402,
"learning_rate": 3.7843750000000003e-07,
"loss": 0.0052,
"step": 23800
},
{
"epoch": 11.122782446311858,
"grad_norm": 0.8827780485153198,
"learning_rate": 3.70625e-07,
"loss": 0.0046,
"step": 23825
},
{
"epoch": 11.134453781512605,
"grad_norm": 2.0285470485687256,
"learning_rate": 3.628125e-07,
"loss": 0.002,
"step": 23850
},
{
"epoch": 11.146125116713351,
"grad_norm": 0.05097728595137596,
"learning_rate": 3.5500000000000004e-07,
"loss": 0.0082,
"step": 23875
},
{
"epoch": 11.157796451914098,
"grad_norm": 1.0123631954193115,
"learning_rate": 3.471875e-07,
"loss": 0.002,
"step": 23900
},
{
"epoch": 11.169467787114845,
"grad_norm": 0.06493563950061798,
"learning_rate": 3.3937500000000003e-07,
"loss": 0.0065,
"step": 23925
},
{
"epoch": 11.181139122315592,
"grad_norm": 0.05671960860490799,
"learning_rate": 3.315625e-07,
"loss": 0.0074,
"step": 23950
},
{
"epoch": 11.19281045751634,
"grad_norm": 0.06837425380945206,
"learning_rate": 3.2375e-07,
"loss": 0.0062,
"step": 23975
},
{
"epoch": 11.204481792717086,
"grad_norm": 0.38731399178504944,
"learning_rate": 3.159375e-07,
"loss": 0.0057,
"step": 24000
},
{
"epoch": 11.216153127917833,
"grad_norm": 1.3272087574005127,
"learning_rate": 3.084375e-07,
"loss": 0.0071,
"step": 24025
},
{
"epoch": 11.22782446311858,
"grad_norm": 0.15108473598957062,
"learning_rate": 3.00625e-07,
"loss": 0.0066,
"step": 24050
},
{
"epoch": 11.239495798319327,
"grad_norm": 1.1661783456802368,
"learning_rate": 2.9281250000000006e-07,
"loss": 0.007,
"step": 24075
},
{
"epoch": 11.251167133520074,
"grad_norm": 0.22821743786334991,
"learning_rate": 2.85e-07,
"loss": 0.0045,
"step": 24100
},
{
"epoch": 11.262838468720823,
"grad_norm": 0.8526090383529663,
"learning_rate": 2.771875e-07,
"loss": 0.0056,
"step": 24125
},
{
"epoch": 11.27450980392157,
"grad_norm": 0.3976341784000397,
"learning_rate": 2.69375e-07,
"loss": 0.0076,
"step": 24150
},
{
"epoch": 11.286181139122316,
"grad_norm": 0.0741284042596817,
"learning_rate": 2.615625e-07,
"loss": 0.0079,
"step": 24175
},
{
"epoch": 11.297852474323063,
"grad_norm": 0.057843729853630066,
"learning_rate": 2.5375e-07,
"loss": 0.0042,
"step": 24200
},
{
"epoch": 11.30952380952381,
"grad_norm": 1.2884389162063599,
"learning_rate": 2.4593750000000003e-07,
"loss": 0.0062,
"step": 24225
},
{
"epoch": 11.321195144724557,
"grad_norm": 4.889528274536133,
"learning_rate": 2.3812500000000002e-07,
"loss": 0.0062,
"step": 24250
},
{
"epoch": 11.332866479925304,
"grad_norm": 0.019720420241355896,
"learning_rate": 2.3031250000000002e-07,
"loss": 0.0062,
"step": 24275
},
{
"epoch": 11.344537815126051,
"grad_norm": 0.22723744809627533,
"learning_rate": 2.2250000000000001e-07,
"loss": 0.0035,
"step": 24300
},
{
"epoch": 11.356209150326798,
"grad_norm": 0.05429434776306152,
"learning_rate": 2.146875e-07,
"loss": 0.009,
"step": 24325
},
{
"epoch": 11.367880485527545,
"grad_norm": 0.10212606936693192,
"learning_rate": 2.06875e-07,
"loss": 0.0058,
"step": 24350
},
{
"epoch": 11.379551820728292,
"grad_norm": 1.9913432598114014,
"learning_rate": 1.9906250000000003e-07,
"loss": 0.0097,
"step": 24375
},
{
"epoch": 11.391223155929039,
"grad_norm": 0.32066085934638977,
"learning_rate": 1.9125e-07,
"loss": 0.0039,
"step": 24400
},
{
"epoch": 11.402894491129786,
"grad_norm": 0.029058467596769333,
"learning_rate": 1.8343750000000002e-07,
"loss": 0.004,
"step": 24425
},
{
"epoch": 11.414565826330533,
"grad_norm": 1.6832449436187744,
"learning_rate": 1.75625e-07,
"loss": 0.004,
"step": 24450
},
{
"epoch": 11.42623716153128,
"grad_norm": 0.04252633824944496,
"learning_rate": 1.678125e-07,
"loss": 0.0093,
"step": 24475
},
{
"epoch": 11.437908496732026,
"grad_norm": 4.079598903656006,
"learning_rate": 1.6e-07,
"loss": 0.0073,
"step": 24500
},
{
"epoch": 11.449579831932773,
"grad_norm": 2.663240432739258,
"learning_rate": 1.521875e-07,
"loss": 0.003,
"step": 24525
},
{
"epoch": 11.46125116713352,
"grad_norm": 0.06202975660562515,
"learning_rate": 1.44375e-07,
"loss": 0.0051,
"step": 24550
},
{
"epoch": 11.472922502334267,
"grad_norm": 0.111959308385849,
"learning_rate": 1.3656250000000002e-07,
"loss": 0.0067,
"step": 24575
},
{
"epoch": 11.484593837535014,
"grad_norm": 0.02303888648748398,
"learning_rate": 1.2875e-07,
"loss": 0.0074,
"step": 24600
},
{
"epoch": 11.496265172735761,
"grad_norm": 0.43197059631347656,
"learning_rate": 1.209375e-07,
"loss": 0.0071,
"step": 24625
},
{
"epoch": 11.507936507936508,
"grad_norm": 3.6830546855926514,
"learning_rate": 1.1312500000000002e-07,
"loss": 0.0068,
"step": 24650
},
{
"epoch": 11.519607843137255,
"grad_norm": 0.06097732484340668,
"learning_rate": 1.0531250000000001e-07,
"loss": 0.0089,
"step": 24675
},
{
"epoch": 11.531279178338002,
"grad_norm": 0.06942930817604065,
"learning_rate": 9.75e-08,
"loss": 0.003,
"step": 24700
},
{
"epoch": 11.542950513538749,
"grad_norm": 2.829679012298584,
"learning_rate": 8.96875e-08,
"loss": 0.0075,
"step": 24725
},
{
"epoch": 11.554621848739496,
"grad_norm": 3.9653916358947754,
"learning_rate": 8.187500000000001e-08,
"loss": 0.0063,
"step": 24750
},
{
"epoch": 11.566293183940243,
"grad_norm": 0.29860720038414,
"learning_rate": 7.40625e-08,
"loss": 0.0068,
"step": 24775
},
{
"epoch": 11.57796451914099,
"grad_norm": 0.04515097290277481,
"learning_rate": 6.625e-08,
"loss": 0.0041,
"step": 24800
},
{
"epoch": 11.589635854341736,
"grad_norm": 0.026890119537711143,
"learning_rate": 5.843750000000001e-08,
"loss": 0.0051,
"step": 24825
},
{
"epoch": 11.601307189542483,
"grad_norm": 0.44632381200790405,
"learning_rate": 5.0625e-08,
"loss": 0.0047,
"step": 24850
},
{
"epoch": 11.61297852474323,
"grad_norm": 0.17215296626091003,
"learning_rate": 4.28125e-08,
"loss": 0.0079,
"step": 24875
},
{
"epoch": 11.624649859943977,
"grad_norm": 2.4952566623687744,
"learning_rate": 3.5e-08,
"loss": 0.0043,
"step": 24900
},
{
"epoch": 11.636321195144724,
"grad_norm": 2.0370965003967285,
"learning_rate": 2.7187499999999998e-08,
"loss": 0.0075,
"step": 24925
},
{
"epoch": 11.647992530345471,
"grad_norm": 2.6626877784729004,
"learning_rate": 1.9375e-08,
"loss": 0.0057,
"step": 24950
},
{
"epoch": 11.659663865546218,
"grad_norm": 0.16452664136886597,
"learning_rate": 1.1562500000000002e-08,
"loss": 0.0091,
"step": 24975
},
{
"epoch": 11.671335200746965,
"grad_norm": 0.11460210382938385,
"learning_rate": 3.75e-09,
"loss": 0.0035,
"step": 25000
},
{
"epoch": 11.671335200746965,
"eval_loss": 0.20881079137325287,
"eval_runtime": 5358.5345,
"eval_samples_per_second": 1.757,
"eval_steps_per_second": 0.22,
"eval_wer": 0.09323429678669466,
"step": 25000
},
{
"epoch": 11.671335200746965,
"step": 25000,
"total_flos": 4.081858297380864e+20,
"train_loss": 0.052512485960870985,
"train_runtime": 210665.5128,
"train_samples_per_second": 1.899,
"train_steps_per_second": 0.119
}
],
"logging_steps": 25,
"max_steps": 25000,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.081858297380864e+20,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}