{ "best_metric": 0.10594170403587444, "best_model_checkpoint": "d:\\\\whisper-medium-pt-cv18-fleurs2-lr\\checkpoint-5000", "epoch": 11.502185415228894, "eval_steps": 5000, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011502185415228893, "grad_norm": 15.794218063354492, "learning_rate": 2.875e-08, "loss": 0.7429, "step": 25 }, { "epoch": 0.023004370830457786, "grad_norm": 37.62531661987305, "learning_rate": 5.8750000000000007e-08, "loss": 1.3461, "step": 50 }, { "epoch": 0.03450655624568668, "grad_norm": 13.57304573059082, "learning_rate": 9e-08, "loss": 0.7409, "step": 75 }, { "epoch": 0.04600874166091557, "grad_norm": 37.35552215576172, "learning_rate": 1.2125e-07, "loss": 1.2656, "step": 100 }, { "epoch": 0.05751092707614447, "grad_norm": 13.972356796264648, "learning_rate": 1.5250000000000002e-07, "loss": 0.6919, "step": 125 }, { "epoch": 0.06901311249137336, "grad_norm": 28.50897789001465, "learning_rate": 1.8375000000000001e-07, "loss": 1.131, "step": 150 }, { "epoch": 0.08051529790660225, "grad_norm": 12.919734954833984, "learning_rate": 2.15e-07, "loss": 0.5826, "step": 175 }, { "epoch": 0.09201748332183114, "grad_norm": 28.706079483032227, "learning_rate": 2.4624999999999997e-07, "loss": 0.7895, "step": 200 }, { "epoch": 0.10351966873706005, "grad_norm": 8.280942916870117, "learning_rate": 2.7750000000000004e-07, "loss": 0.3197, "step": 225 }, { "epoch": 0.11502185415228894, "grad_norm": 23.930252075195312, "learning_rate": 3.0875e-07, "loss": 0.3855, "step": 250 }, { "epoch": 0.12652403956751782, "grad_norm": 6.460832595825195, "learning_rate": 3.4e-07, "loss": 0.2569, "step": 275 }, { "epoch": 0.13802622498274672, "grad_norm": 22.88783836364746, "learning_rate": 3.7125000000000005e-07, "loss": 0.3504, "step": 300 }, { "epoch": 0.14952841039797563, "grad_norm": 7.947082042694092, "learning_rate": 4.025e-07, "loss": 0.2123, "step": 325 }, { "epoch": 0.1610305958132045, "grad_norm": 17.616596221923828, "learning_rate": 4.3375000000000003e-07, "loss": 0.2902, "step": 350 }, { "epoch": 0.1725327812284334, "grad_norm": 4.91463565826416, "learning_rate": 4.65e-07, "loss": 0.1828, "step": 375 }, { "epoch": 0.18403496664366228, "grad_norm": 18.81287956237793, "learning_rate": 4.9625e-07, "loss": 0.2847, "step": 400 }, { "epoch": 0.1955371520588912, "grad_norm": 6.390377044677734, "learning_rate": 5.275e-07, "loss": 0.2107, "step": 425 }, { "epoch": 0.2070393374741201, "grad_norm": 18.839292526245117, "learning_rate": 5.587499999999999e-07, "loss": 0.234, "step": 450 }, { "epoch": 0.21854152288934897, "grad_norm": 6.151549816131592, "learning_rate": 5.9e-07, "loss": 0.2198, "step": 475 }, { "epoch": 0.23004370830457788, "grad_norm": 13.628652572631836, "learning_rate": 6.212500000000001e-07, "loss": 0.2575, "step": 500 }, { "epoch": 0.24154589371980675, "grad_norm": 5.531817436218262, "learning_rate": 6.525000000000001e-07, "loss": 0.1854, "step": 525 }, { "epoch": 0.25304807913503563, "grad_norm": 14.468728065490723, "learning_rate": 6.8375e-07, "loss": 0.244, "step": 550 }, { "epoch": 0.26455026455026454, "grad_norm": 6.999868869781494, "learning_rate": 7.15e-07, "loss": 0.1865, "step": 575 }, { "epoch": 0.27605244996549344, "grad_norm": 17.996356964111328, "learning_rate": 7.462500000000001e-07, "loss": 0.2358, "step": 600 }, { "epoch": 0.28755463538072235, "grad_norm": 8.172720909118652, "learning_rate": 7.775e-07, "loss": 0.168, "step": 625 }, { "epoch": 0.29905682079595125, "grad_norm": 16.850492477416992, "learning_rate": 8.0875e-07, "loss": 0.2229, "step": 650 }, { "epoch": 0.3105590062111801, "grad_norm": 6.139720916748047, "learning_rate": 8.4e-07, "loss": 0.1813, "step": 675 }, { "epoch": 0.322061191626409, "grad_norm": 13.691137313842773, "learning_rate": 8.7125e-07, "loss": 0.2124, "step": 700 }, { "epoch": 0.3335633770416379, "grad_norm": 6.144876956939697, "learning_rate": 9.025e-07, "loss": 0.1865, "step": 725 }, { "epoch": 0.3450655624568668, "grad_norm": 19.069873809814453, "learning_rate": 9.337500000000001e-07, "loss": 0.1917, "step": 750 }, { "epoch": 0.3565677478720957, "grad_norm": 8.921222686767578, "learning_rate": 9.65e-07, "loss": 0.2001, "step": 775 }, { "epoch": 0.36806993328732457, "grad_norm": 16.66543197631836, "learning_rate": 9.9625e-07, "loss": 0.2174, "step": 800 }, { "epoch": 0.3795721187025535, "grad_norm": 6.839846134185791, "learning_rate": 1.0275e-06, "loss": 0.1836, "step": 825 }, { "epoch": 0.3910743041177824, "grad_norm": 18.923572540283203, "learning_rate": 1.05875e-06, "loss": 0.2126, "step": 850 }, { "epoch": 0.4025764895330113, "grad_norm": 4.898512840270996, "learning_rate": 1.0900000000000002e-06, "loss": 0.1661, "step": 875 }, { "epoch": 0.4140786749482402, "grad_norm": 14.121219635009766, "learning_rate": 1.12125e-06, "loss": 0.1825, "step": 900 }, { "epoch": 0.42558086036346904, "grad_norm": 7.532533645629883, "learning_rate": 1.1525000000000002e-06, "loss": 0.1767, "step": 925 }, { "epoch": 0.43708304577869794, "grad_norm": 12.991471290588379, "learning_rate": 1.18375e-06, "loss": 0.1993, "step": 950 }, { "epoch": 0.44858523119392685, "grad_norm": 6.528143882751465, "learning_rate": 1.215e-06, "loss": 0.1772, "step": 975 }, { "epoch": 0.46008741660915575, "grad_norm": 15.690600395202637, "learning_rate": 1.24625e-06, "loss": 0.184, "step": 1000 }, { "epoch": 0.47158960202438466, "grad_norm": 6.040137767791748, "learning_rate": 1.2775e-06, "loss": 0.1684, "step": 1025 }, { "epoch": 0.4830917874396135, "grad_norm": 12.872380256652832, "learning_rate": 1.3087500000000002e-06, "loss": 0.1934, "step": 1050 }, { "epoch": 0.4945939728548424, "grad_norm": 6.453334331512451, "learning_rate": 1.34e-06, "loss": 0.1671, "step": 1075 }, { "epoch": 0.5060961582700713, "grad_norm": 12.546272277832031, "learning_rate": 1.3712500000000002e-06, "loss": 0.1901, "step": 1100 }, { "epoch": 0.5175983436853002, "grad_norm": 6.841800689697266, "learning_rate": 1.4025e-06, "loss": 0.1726, "step": 1125 }, { "epoch": 0.5291005291005291, "grad_norm": 16.96964454650879, "learning_rate": 1.43375e-06, "loss": 0.2027, "step": 1150 }, { "epoch": 0.540602714515758, "grad_norm": 5.311614036560059, "learning_rate": 1.465e-06, "loss": 0.152, "step": 1175 }, { "epoch": 0.5521048999309869, "grad_norm": 15.524170875549316, "learning_rate": 1.49625e-06, "loss": 0.1986, "step": 1200 }, { "epoch": 0.5636070853462157, "grad_norm": 9.683732986450195, "learning_rate": 1.5275000000000002e-06, "loss": 0.1624, "step": 1225 }, { "epoch": 0.5751092707614447, "grad_norm": 11.873454093933105, "learning_rate": 1.5587500000000001e-06, "loss": 0.1875, "step": 1250 }, { "epoch": 0.5866114561766735, "grad_norm": 5.891846179962158, "learning_rate": 1.5900000000000002e-06, "loss": 0.1603, "step": 1275 }, { "epoch": 0.5981136415919025, "grad_norm": 14.153804779052734, "learning_rate": 1.6212500000000001e-06, "loss": 0.1641, "step": 1300 }, { "epoch": 0.6096158270071314, "grad_norm": 7.2772955894470215, "learning_rate": 1.6525000000000003e-06, "loss": 0.154, "step": 1325 }, { "epoch": 0.6211180124223602, "grad_norm": 14.763301849365234, "learning_rate": 1.68375e-06, "loss": 0.1745, "step": 1350 }, { "epoch": 0.6326201978375892, "grad_norm": 8.715161323547363, "learning_rate": 1.7149999999999999e-06, "loss": 0.1667, "step": 1375 }, { "epoch": 0.644122383252818, "grad_norm": 11.507594108581543, "learning_rate": 1.74625e-06, "loss": 0.17, "step": 1400 }, { "epoch": 0.655624568668047, "grad_norm": 5.30320405960083, "learning_rate": 1.7775e-06, "loss": 0.1735, "step": 1425 }, { "epoch": 0.6671267540832758, "grad_norm": 12.242377281188965, "learning_rate": 1.80875e-06, "loss": 0.1834, "step": 1450 }, { "epoch": 0.6786289394985047, "grad_norm": 7.7493462562561035, "learning_rate": 1.84e-06, "loss": 0.1575, "step": 1475 }, { "epoch": 0.6901311249137336, "grad_norm": 14.08462142944336, "learning_rate": 1.87125e-06, "loss": 0.1818, "step": 1500 }, { "epoch": 0.7016333103289625, "grad_norm": 5.448755264282227, "learning_rate": 1.9025000000000002e-06, "loss": 0.1483, "step": 1525 }, { "epoch": 0.7131354957441914, "grad_norm": 14.599383354187012, "learning_rate": 1.9337500000000003e-06, "loss": 0.1877, "step": 1550 }, { "epoch": 0.7246376811594203, "grad_norm": 6.832576274871826, "learning_rate": 1.9650000000000002e-06, "loss": 0.1639, "step": 1575 }, { "epoch": 0.7361398665746491, "grad_norm": 11.764741897583008, "learning_rate": 1.99625e-06, "loss": 0.1887, "step": 1600 }, { "epoch": 0.7476420519898781, "grad_norm": 5.073885917663574, "learning_rate": 2.0275e-06, "loss": 0.1449, "step": 1625 }, { "epoch": 0.759144237405107, "grad_norm": 11.470847129821777, "learning_rate": 2.0587500000000004e-06, "loss": 0.1776, "step": 1650 }, { "epoch": 0.7706464228203359, "grad_norm": 8.333059310913086, "learning_rate": 2.09e-06, "loss": 0.1572, "step": 1675 }, { "epoch": 0.7821486082355648, "grad_norm": 10.23344612121582, "learning_rate": 2.12125e-06, "loss": 0.2047, "step": 1700 }, { "epoch": 0.7936507936507936, "grad_norm": 7.107337951660156, "learning_rate": 2.1525e-06, "loss": 0.1635, "step": 1725 }, { "epoch": 0.8051529790660226, "grad_norm": 12.670259475708008, "learning_rate": 2.18375e-06, "loss": 0.1784, "step": 1750 }, { "epoch": 0.8166551644812514, "grad_norm": 6.77697229385376, "learning_rate": 2.215e-06, "loss": 0.1526, "step": 1775 }, { "epoch": 0.8281573498964804, "grad_norm": 13.887433052062988, "learning_rate": 2.24625e-06, "loss": 0.1897, "step": 1800 }, { "epoch": 0.8396595353117092, "grad_norm": 4.762125492095947, "learning_rate": 2.2775000000000002e-06, "loss": 0.1464, "step": 1825 }, { "epoch": 0.8511617207269381, "grad_norm": 19.292552947998047, "learning_rate": 2.30875e-06, "loss": 0.174, "step": 1850 }, { "epoch": 0.862663906142167, "grad_norm": 7.12981653213501, "learning_rate": 2.34e-06, "loss": 0.1602, "step": 1875 }, { "epoch": 0.8741660915573959, "grad_norm": 9.92496395111084, "learning_rate": 2.3712500000000004e-06, "loss": 0.1571, "step": 1900 }, { "epoch": 0.8856682769726248, "grad_norm": 6.081151008605957, "learning_rate": 2.4025000000000003e-06, "loss": 0.1608, "step": 1925 }, { "epoch": 0.8971704623878537, "grad_norm": 15.237141609191895, "learning_rate": 2.43375e-06, "loss": 0.1634, "step": 1950 }, { "epoch": 0.9086726478030825, "grad_norm": 6.994187831878662, "learning_rate": 2.465e-06, "loss": 0.1531, "step": 1975 }, { "epoch": 0.9201748332183115, "grad_norm": 16.033370971679688, "learning_rate": 2.49625e-06, "loss": 0.1583, "step": 2000 }, { "epoch": 0.9316770186335404, "grad_norm": 6.178311824798584, "learning_rate": 2.5275e-06, "loss": 0.1502, "step": 2025 }, { "epoch": 0.9431792040487693, "grad_norm": 10.050224304199219, "learning_rate": 2.55875e-06, "loss": 0.1763, "step": 2050 }, { "epoch": 0.9546813894639982, "grad_norm": 5.254738807678223, "learning_rate": 2.59e-06, "loss": 0.1453, "step": 2075 }, { "epoch": 0.966183574879227, "grad_norm": 13.425460815429688, "learning_rate": 2.62125e-06, "loss": 0.1536, "step": 2100 }, { "epoch": 0.977685760294456, "grad_norm": 4.7515459060668945, "learning_rate": 2.6525e-06, "loss": 0.1579, "step": 2125 }, { "epoch": 0.9891879457096848, "grad_norm": 19.656898498535156, "learning_rate": 2.6837500000000004e-06, "loss": 0.1984, "step": 2150 }, { "epoch": 1.0006901311249137, "grad_norm": 2.677222728729248, "learning_rate": 2.7150000000000003e-06, "loss": 0.1498, "step": 2175 }, { "epoch": 1.0121923165401425, "grad_norm": 5.08892297744751, "learning_rate": 2.74625e-06, "loss": 0.1184, "step": 2200 }, { "epoch": 1.0236945019553716, "grad_norm": 3.9588983058929443, "learning_rate": 2.7775e-06, "loss": 0.1467, "step": 2225 }, { "epoch": 1.0351966873706004, "grad_norm": 11.414639472961426, "learning_rate": 2.8087500000000004e-06, "loss": 0.1113, "step": 2250 }, { "epoch": 1.0466988727858293, "grad_norm": 2.633138418197632, "learning_rate": 2.8400000000000003e-06, "loss": 0.1353, "step": 2275 }, { "epoch": 1.0582010582010581, "grad_norm": 5.479538917541504, "learning_rate": 2.87125e-06, "loss": 0.1261, "step": 2300 }, { "epoch": 1.069703243616287, "grad_norm": 3.630627393722534, "learning_rate": 2.9025e-06, "loss": 0.1227, "step": 2325 }, { "epoch": 1.081205429031516, "grad_norm": 8.805641174316406, "learning_rate": 2.93375e-06, "loss": 0.1229, "step": 2350 }, { "epoch": 1.092707614446745, "grad_norm": 3.6392369270324707, "learning_rate": 2.965e-06, "loss": 0.1476, "step": 2375 }, { "epoch": 1.1042097998619738, "grad_norm": 4.040417194366455, "learning_rate": 2.99625e-06, "loss": 0.1146, "step": 2400 }, { "epoch": 1.1157119852772026, "grad_norm": 2.933556318283081, "learning_rate": 3.0275000000000002e-06, "loss": 0.1226, "step": 2425 }, { "epoch": 1.1272141706924317, "grad_norm": 3.0352232456207275, "learning_rate": 3.05875e-06, "loss": 0.1224, "step": 2450 }, { "epoch": 1.1387163561076605, "grad_norm": 3.9333884716033936, "learning_rate": 3.09e-06, "loss": 0.116, "step": 2475 }, { "epoch": 1.1502185415228894, "grad_norm": 4.144917011260986, "learning_rate": 3.1212500000000004e-06, "loss": 0.1101, "step": 2500 }, { "epoch": 1.1617207269381182, "grad_norm": 3.215536117553711, "learning_rate": 3.1525e-06, "loss": 0.1464, "step": 2525 }, { "epoch": 1.173222912353347, "grad_norm": 10.1427640914917, "learning_rate": 3.18375e-06, "loss": 0.122, "step": 2550 }, { "epoch": 1.1847250977685762, "grad_norm": 2.707651138305664, "learning_rate": 3.215e-06, "loss": 0.129, "step": 2575 }, { "epoch": 1.196227283183805, "grad_norm": 4.589003562927246, "learning_rate": 3.24625e-06, "loss": 0.1083, "step": 2600 }, { "epoch": 1.2077294685990339, "grad_norm": 2.6789445877075195, "learning_rate": 3.2775e-06, "loss": 0.1446, "step": 2625 }, { "epoch": 1.2192316540142627, "grad_norm": 7.148416519165039, "learning_rate": 3.30875e-06, "loss": 0.1153, "step": 2650 }, { "epoch": 1.2307338394294915, "grad_norm": 2.8945469856262207, "learning_rate": 3.34e-06, "loss": 0.1267, "step": 2675 }, { "epoch": 1.2422360248447206, "grad_norm": 7.540188312530518, "learning_rate": 3.37125e-06, "loss": 0.1106, "step": 2700 }, { "epoch": 1.2537382102599495, "grad_norm": 3.4765818119049072, "learning_rate": 3.4025e-06, "loss": 0.1377, "step": 2725 }, { "epoch": 1.2652403956751783, "grad_norm": 5.349803447723389, "learning_rate": 3.4337500000000004e-06, "loss": 0.0954, "step": 2750 }, { "epoch": 1.2767425810904072, "grad_norm": 2.526627779006958, "learning_rate": 3.4650000000000003e-06, "loss": 0.1224, "step": 2775 }, { "epoch": 1.288244766505636, "grad_norm": 6.571626663208008, "learning_rate": 3.49625e-06, "loss": 0.118, "step": 2800 }, { "epoch": 1.299746951920865, "grad_norm": 2.319915533065796, "learning_rate": 3.5275e-06, "loss": 0.1581, "step": 2825 }, { "epoch": 1.311249137336094, "grad_norm": 3.2540760040283203, "learning_rate": 3.5587500000000004e-06, "loss": 0.1196, "step": 2850 }, { "epoch": 1.3227513227513228, "grad_norm": 3.912529706954956, "learning_rate": 3.5900000000000004e-06, "loss": 0.146, "step": 2875 }, { "epoch": 1.3342535081665516, "grad_norm": 3.1499977111816406, "learning_rate": 3.6212500000000003e-06, "loss": 0.1158, "step": 2900 }, { "epoch": 1.3457556935817805, "grad_norm": 3.1882896423339844, "learning_rate": 3.6525e-06, "loss": 0.1517, "step": 2925 }, { "epoch": 1.3572578789970096, "grad_norm": 5.0317888259887695, "learning_rate": 3.6837500000000005e-06, "loss": 0.1129, "step": 2950 }, { "epoch": 1.3687600644122382, "grad_norm": 3.961643695831299, "learning_rate": 3.7150000000000004e-06, "loss": 0.1233, "step": 2975 }, { "epoch": 1.3802622498274673, "grad_norm": 5.409346580505371, "learning_rate": 3.7462500000000003e-06, "loss": 0.1302, "step": 3000 }, { "epoch": 1.391764435242696, "grad_norm": 3.978931427001953, "learning_rate": 3.7775000000000007e-06, "loss": 0.1361, "step": 3025 }, { "epoch": 1.403266620657925, "grad_norm": 6.479454040527344, "learning_rate": 3.8087500000000006e-06, "loss": 0.1167, "step": 3050 }, { "epoch": 1.414768806073154, "grad_norm": 3.1381306648254395, "learning_rate": 3.84e-06, "loss": 0.1199, "step": 3075 }, { "epoch": 1.4262709914883827, "grad_norm": 5.639588832855225, "learning_rate": 3.8712499999999996e-06, "loss": 0.0967, "step": 3100 }, { "epoch": 1.4377731769036117, "grad_norm": 3.615877866744995, "learning_rate": 3.9025e-06, "loss": 0.1334, "step": 3125 }, { "epoch": 1.4492753623188406, "grad_norm": 5.772467136383057, "learning_rate": 3.93375e-06, "loss": 0.114, "step": 3150 }, { "epoch": 1.4607775477340694, "grad_norm": 3.489830255508423, "learning_rate": 3.965e-06, "loss": 0.1421, "step": 3175 }, { "epoch": 1.4722797331492985, "grad_norm": 6.027266502380371, "learning_rate": 3.99625e-06, "loss": 0.1212, "step": 3200 }, { "epoch": 1.4837819185645271, "grad_norm": 3.047349452972412, "learning_rate": 4.0275e-06, "loss": 0.122, "step": 3225 }, { "epoch": 1.4952841039797562, "grad_norm": 7.183162689208984, "learning_rate": 4.05875e-06, "loss": 0.1265, "step": 3250 }, { "epoch": 1.506786289394985, "grad_norm": 3.0059525966644287, "learning_rate": 4.09e-06, "loss": 0.1468, "step": 3275 }, { "epoch": 1.518288474810214, "grad_norm": 7.402144908905029, "learning_rate": 4.12125e-06, "loss": 0.1156, "step": 3300 }, { "epoch": 1.529790660225443, "grad_norm": 3.278670072555542, "learning_rate": 4.1525000000000005e-06, "loss": 0.1201, "step": 3325 }, { "epoch": 1.5412928456406716, "grad_norm": 5.25584602355957, "learning_rate": 4.18375e-06, "loss": 0.0967, "step": 3350 }, { "epoch": 1.5527950310559007, "grad_norm": 4.171654224395752, "learning_rate": 4.215e-06, "loss": 0.1219, "step": 3375 }, { "epoch": 1.5642972164711295, "grad_norm": 4.809912204742432, "learning_rate": 4.24625e-06, "loss": 0.1082, "step": 3400 }, { "epoch": 1.5757994018863584, "grad_norm": 4.222274303436279, "learning_rate": 4.2775e-06, "loss": 0.1219, "step": 3425 }, { "epoch": 1.5873015873015874, "grad_norm": 7.9530792236328125, "learning_rate": 4.30875e-06, "loss": 0.1228, "step": 3450 }, { "epoch": 1.598803772716816, "grad_norm": 3.8543384075164795, "learning_rate": 4.34e-06, "loss": 0.1422, "step": 3475 }, { "epoch": 1.6103059581320451, "grad_norm": 7.819809436798096, "learning_rate": 4.371250000000001e-06, "loss": 0.1306, "step": 3500 }, { "epoch": 1.621808143547274, "grad_norm": 3.5284693241119385, "learning_rate": 4.402500000000001e-06, "loss": 0.1196, "step": 3525 }, { "epoch": 1.6333103289625028, "grad_norm": 8.282682418823242, "learning_rate": 4.4337500000000005e-06, "loss": 0.102, "step": 3550 }, { "epoch": 1.644812514377732, "grad_norm": 4.162339210510254, "learning_rate": 4.4650000000000004e-06, "loss": 0.1474, "step": 3575 }, { "epoch": 1.6563146997929605, "grad_norm": 6.563460350036621, "learning_rate": 4.49625e-06, "loss": 0.1218, "step": 3600 }, { "epoch": 1.6678168852081896, "grad_norm": 3.1366829872131348, "learning_rate": 4.5275e-06, "loss": 0.1332, "step": 3625 }, { "epoch": 1.6793190706234185, "grad_norm": 6.515697956085205, "learning_rate": 4.55875e-06, "loss": 0.1108, "step": 3650 }, { "epoch": 1.6908212560386473, "grad_norm": 3.636465072631836, "learning_rate": 4.590000000000001e-06, "loss": 0.1126, "step": 3675 }, { "epoch": 1.7023234414538764, "grad_norm": 5.450216770172119, "learning_rate": 4.62125e-06, "loss": 0.1014, "step": 3700 }, { "epoch": 1.713825626869105, "grad_norm": 3.8343234062194824, "learning_rate": 4.6525e-06, "loss": 0.147, "step": 3725 }, { "epoch": 1.725327812284334, "grad_norm": 6.672384738922119, "learning_rate": 4.68375e-06, "loss": 0.1196, "step": 3750 }, { "epoch": 1.736829997699563, "grad_norm": 3.536450147628784, "learning_rate": 4.715e-06, "loss": 0.1546, "step": 3775 }, { "epoch": 1.7483321831147918, "grad_norm": 4.106471538543701, "learning_rate": 4.74625e-06, "loss": 0.1016, "step": 3800 }, { "epoch": 1.7598343685300208, "grad_norm": 3.1923904418945312, "learning_rate": 4.7775e-06, "loss": 0.1414, "step": 3825 }, { "epoch": 1.7713365539452495, "grad_norm": 5.6106157302856445, "learning_rate": 4.80875e-06, "loss": 0.1113, "step": 3850 }, { "epoch": 1.7828387393604785, "grad_norm": 3.33258056640625, "learning_rate": 4.84e-06, "loss": 0.1228, "step": 3875 }, { "epoch": 1.7943409247757074, "grad_norm": 4.954050064086914, "learning_rate": 4.87125e-06, "loss": 0.1204, "step": 3900 }, { "epoch": 1.8058431101909362, "grad_norm": 3.758305072784424, "learning_rate": 4.9025e-06, "loss": 0.1226, "step": 3925 }, { "epoch": 1.8173452956061653, "grad_norm": 5.375064373016357, "learning_rate": 4.93375e-06, "loss": 0.1344, "step": 3950 }, { "epoch": 1.828847481021394, "grad_norm": 3.2198784351348877, "learning_rate": 4.965e-06, "loss": 0.1352, "step": 3975 }, { "epoch": 1.840349666436623, "grad_norm": 6.347688674926758, "learning_rate": 4.996250000000001e-06, "loss": 0.1218, "step": 4000 }, { "epoch": 1.8518518518518519, "grad_norm": 2.7024085521698, "learning_rate": 5.0275000000000006e-06, "loss": 0.1361, "step": 4025 }, { "epoch": 1.8633540372670807, "grad_norm": 4.011370658874512, "learning_rate": 5.0587500000000005e-06, "loss": 0.1191, "step": 4050 }, { "epoch": 1.8748562226823098, "grad_norm": 3.9904325008392334, "learning_rate": 5.09e-06, "loss": 0.1446, "step": 4075 }, { "epoch": 1.8863584080975384, "grad_norm": 9.331755638122559, "learning_rate": 5.12125e-06, "loss": 0.1054, "step": 4100 }, { "epoch": 1.8978605935127675, "grad_norm": 3.661421060562134, "learning_rate": 5.151250000000001e-06, "loss": 0.1424, "step": 4125 }, { "epoch": 1.9093627789279963, "grad_norm": 5.570228576660156, "learning_rate": 5.182500000000001e-06, "loss": 0.1106, "step": 4150 }, { "epoch": 1.9208649643432252, "grad_norm": 2.8392717838287354, "learning_rate": 5.213750000000001e-06, "loss": 0.1464, "step": 4175 }, { "epoch": 1.9323671497584543, "grad_norm": 3.853571891784668, "learning_rate": 5.245e-06, "loss": 0.1287, "step": 4200 }, { "epoch": 1.9438693351736829, "grad_norm": 3.817902088165283, "learning_rate": 5.27625e-06, "loss": 0.1274, "step": 4225 }, { "epoch": 1.955371520588912, "grad_norm": 7.2849297523498535, "learning_rate": 5.3075e-06, "loss": 0.1192, "step": 4250 }, { "epoch": 1.9668737060041408, "grad_norm": 3.0766196250915527, "learning_rate": 5.33875e-06, "loss": 0.1546, "step": 4275 }, { "epoch": 1.9783758914193696, "grad_norm": 4.272324085235596, "learning_rate": 5.37e-06, "loss": 0.1152, "step": 4300 }, { "epoch": 1.9898780768345987, "grad_norm": 3.574113607406616, "learning_rate": 5.40125e-06, "loss": 0.1537, "step": 4325 }, { "epoch": 2.0013802622498273, "grad_norm": 3.3172659873962402, "learning_rate": 5.4325e-06, "loss": 0.1174, "step": 4350 }, { "epoch": 2.0128824476650564, "grad_norm": 9.897499084472656, "learning_rate": 5.46375e-06, "loss": 0.0623, "step": 4375 }, { "epoch": 2.024384633080285, "grad_norm": 2.7083864212036133, "learning_rate": 5.495e-06, "loss": 0.0892, "step": 4400 }, { "epoch": 2.035886818495514, "grad_norm": 4.11065149307251, "learning_rate": 5.52625e-06, "loss": 0.0634, "step": 4425 }, { "epoch": 2.047389003910743, "grad_norm": 2.104963779449463, "learning_rate": 5.557500000000001e-06, "loss": 0.0661, "step": 4450 }, { "epoch": 2.058891189325972, "grad_norm": 3.848796844482422, "learning_rate": 5.5887500000000005e-06, "loss": 0.0586, "step": 4475 }, { "epoch": 2.070393374741201, "grad_norm": 2.1717464923858643, "learning_rate": 5.62e-06, "loss": 0.0773, "step": 4500 }, { "epoch": 2.0818955601564295, "grad_norm": 4.033133506774902, "learning_rate": 5.65125e-06, "loss": 0.0584, "step": 4525 }, { "epoch": 2.0933977455716586, "grad_norm": 2.5643622875213623, "learning_rate": 5.6825e-06, "loss": 0.0776, "step": 4550 }, { "epoch": 2.1048999309868877, "grad_norm": 6.559327602386475, "learning_rate": 5.71375e-06, "loss": 0.0643, "step": 4575 }, { "epoch": 2.1164021164021163, "grad_norm": 2.1577560901641846, "learning_rate": 5.745e-06, "loss": 0.0687, "step": 4600 }, { "epoch": 2.1279043018173454, "grad_norm": 8.438713073730469, "learning_rate": 5.776250000000001e-06, "loss": 0.072, "step": 4625 }, { "epoch": 2.139406487232574, "grad_norm": 2.4797635078430176, "learning_rate": 5.807500000000001e-06, "loss": 0.087, "step": 4650 }, { "epoch": 2.150908672647803, "grad_norm": 2.543196201324463, "learning_rate": 5.838750000000001e-06, "loss": 0.0586, "step": 4675 }, { "epoch": 2.162410858063032, "grad_norm": 2.4707229137420654, "learning_rate": 5.8700000000000005e-06, "loss": 0.0723, "step": 4700 }, { "epoch": 2.1739130434782608, "grad_norm": 5.645440101623535, "learning_rate": 5.9012500000000005e-06, "loss": 0.0654, "step": 4725 }, { "epoch": 2.18541522889349, "grad_norm": 2.701606512069702, "learning_rate": 5.9325e-06, "loss": 0.0846, "step": 4750 }, { "epoch": 2.1969174143087185, "grad_norm": 3.8023571968078613, "learning_rate": 5.96375e-06, "loss": 0.0581, "step": 4775 }, { "epoch": 2.2084195997239475, "grad_norm": 1.9498426914215088, "learning_rate": 5.995e-06, "loss": 0.0911, "step": 4800 }, { "epoch": 2.2199217851391766, "grad_norm": 2.360180139541626, "learning_rate": 6.02625e-06, "loss": 0.0728, "step": 4825 }, { "epoch": 2.2314239705544052, "grad_norm": 2.91253924369812, "learning_rate": 6.0575e-06, "loss": 0.086, "step": 4850 }, { "epoch": 2.2429261559696343, "grad_norm": 4.982974052429199, "learning_rate": 6.08875e-06, "loss": 0.071, "step": 4875 }, { "epoch": 2.2544283413848634, "grad_norm": 2.393528461456299, "learning_rate": 6.12e-06, "loss": 0.0889, "step": 4900 }, { "epoch": 2.265930526800092, "grad_norm": 3.294156312942505, "learning_rate": 6.15125e-06, "loss": 0.0659, "step": 4925 }, { "epoch": 2.277432712215321, "grad_norm": 2.5716331005096436, "learning_rate": 6.1825e-06, "loss": 0.072, "step": 4950 }, { "epoch": 2.2889348976305497, "grad_norm": 5.017734527587891, "learning_rate": 6.2137500000000004e-06, "loss": 0.0742, "step": 4975 }, { "epoch": 2.3004370830457788, "grad_norm": 2.866231679916382, "learning_rate": 6.245e-06, "loss": 0.0876, "step": 5000 }, { "epoch": 2.3004370830457788, "eval_loss": 0.16616719961166382, "eval_runtime": 5341.8191, "eval_samples_per_second": 1.777, "eval_steps_per_second": 0.222, "eval_wer": 0.10594170403587444, "step": 5000 }, { "epoch": 2.311939268461008, "grad_norm": 6.034395694732666, "learning_rate": 6.2434375e-06, "loss": 0.0644, "step": 5025 }, { "epoch": 2.3234414538762365, "grad_norm": 2.2458648681640625, "learning_rate": 6.235625e-06, "loss": 0.0776, "step": 5050 }, { "epoch": 2.3349436392914655, "grad_norm": 4.230370998382568, "learning_rate": 6.2278125e-06, "loss": 0.0663, "step": 5075 }, { "epoch": 2.346445824706694, "grad_norm": 2.9568865299224854, "learning_rate": 6.22e-06, "loss": 0.0908, "step": 5100 }, { "epoch": 2.3579480101219232, "grad_norm": 3.1515731811523438, "learning_rate": 6.2121875e-06, "loss": 0.0767, "step": 5125 }, { "epoch": 2.3694501955371523, "grad_norm": 3.660957098007202, "learning_rate": 6.204375e-06, "loss": 0.08, "step": 5150 }, { "epoch": 2.380952380952381, "grad_norm": 3.4517126083374023, "learning_rate": 6.196562500000001e-06, "loss": 0.0715, "step": 5175 }, { "epoch": 2.39245456636761, "grad_norm": 3.5403709411621094, "learning_rate": 6.18875e-06, "loss": 0.0749, "step": 5200 }, { "epoch": 2.4039567517828386, "grad_norm": 4.9767866134643555, "learning_rate": 6.1809375000000005e-06, "loss": 0.0757, "step": 5225 }, { "epoch": 2.4154589371980677, "grad_norm": 3.120891809463501, "learning_rate": 6.173125e-06, "loss": 0.0968, "step": 5250 }, { "epoch": 2.4269611226132968, "grad_norm": 3.43932843208313, "learning_rate": 6.165312500000001e-06, "loss": 0.0724, "step": 5275 }, { "epoch": 2.4384633080285254, "grad_norm": 3.4927871227264404, "learning_rate": 6.1575e-06, "loss": 0.0831, "step": 5300 }, { "epoch": 2.4499654934437545, "grad_norm": 3.009047746658325, "learning_rate": 6.1496875000000006e-06, "loss": 0.0695, "step": 5325 }, { "epoch": 2.461467678858983, "grad_norm": 3.073551654815674, "learning_rate": 6.141875e-06, "loss": 0.0893, "step": 5350 }, { "epoch": 2.472969864274212, "grad_norm": 5.566808223724365, "learning_rate": 6.1340625e-06, "loss": 0.0743, "step": 5375 }, { "epoch": 2.4844720496894412, "grad_norm": 2.416825771331787, "learning_rate": 6.12625e-06, "loss": 0.079, "step": 5400 }, { "epoch": 2.49597423510467, "grad_norm": 4.4972357749938965, "learning_rate": 6.1184375e-06, "loss": 0.0712, "step": 5425 }, { "epoch": 2.507476420519899, "grad_norm": 3.5067849159240723, "learning_rate": 6.1106250000000005e-06, "loss": 0.0883, "step": 5450 }, { "epoch": 2.5189786059351276, "grad_norm": 4.83007287979126, "learning_rate": 6.1028125e-06, "loss": 0.0775, "step": 5475 }, { "epoch": 2.5304807913503566, "grad_norm": 3.0138561725616455, "learning_rate": 6.095e-06, "loss": 0.0814, "step": 5500 }, { "epoch": 2.5419829767655857, "grad_norm": 5.821829795837402, "learning_rate": 6.0871875e-06, "loss": 0.0632, "step": 5525 }, { "epoch": 2.5534851621808143, "grad_norm": 2.7620084285736084, "learning_rate": 6.0793750000000006e-06, "loss": 0.0789, "step": 5550 }, { "epoch": 2.5649873475960434, "grad_norm": 5.065167427062988, "learning_rate": 6.0715625e-06, "loss": 0.0758, "step": 5575 }, { "epoch": 2.576489533011272, "grad_norm": 1.777954339981079, "learning_rate": 6.06375e-06, "loss": 0.0881, "step": 5600 }, { "epoch": 2.587991718426501, "grad_norm": 4.819468975067139, "learning_rate": 6.0559375e-06, "loss": 0.0715, "step": 5625 }, { "epoch": 2.59949390384173, "grad_norm": 2.716526985168457, "learning_rate": 6.048125000000001e-06, "loss": 0.0912, "step": 5650 }, { "epoch": 2.610996089256959, "grad_norm": 4.544143199920654, "learning_rate": 6.0403125000000005e-06, "loss": 0.0689, "step": 5675 }, { "epoch": 2.622498274672188, "grad_norm": 2.0918431282043457, "learning_rate": 6.0325e-06, "loss": 0.0832, "step": 5700 }, { "epoch": 2.6340004600874165, "grad_norm": 4.814356803894043, "learning_rate": 6.0246875e-06, "loss": 0.073, "step": 5725 }, { "epoch": 2.6455026455026456, "grad_norm": 3.759373664855957, "learning_rate": 6.016875e-06, "loss": 0.0803, "step": 5750 }, { "epoch": 2.6570048309178746, "grad_norm": 3.2967991828918457, "learning_rate": 6.0090625000000005e-06, "loss": 0.0767, "step": 5775 }, { "epoch": 2.6685070163331033, "grad_norm": 4.567154884338379, "learning_rate": 6.00125e-06, "loss": 0.079, "step": 5800 }, { "epoch": 2.6800092017483323, "grad_norm": 3.424586534500122, "learning_rate": 5.9934375e-06, "loss": 0.0761, "step": 5825 }, { "epoch": 2.691511387163561, "grad_norm": 2.420856475830078, "learning_rate": 5.985625e-06, "loss": 0.0848, "step": 5850 }, { "epoch": 2.70301357257879, "grad_norm": 6.956820487976074, "learning_rate": 5.977812500000001e-06, "loss": 0.0702, "step": 5875 }, { "epoch": 2.714515757994019, "grad_norm": 2.5272533893585205, "learning_rate": 5.9700000000000004e-06, "loss": 0.0842, "step": 5900 }, { "epoch": 2.7260179434092477, "grad_norm": 5.917661190032959, "learning_rate": 5.9621875e-06, "loss": 0.081, "step": 5925 }, { "epoch": 2.7375201288244764, "grad_norm": 2.5169830322265625, "learning_rate": 5.954375e-06, "loss": 0.0929, "step": 5950 }, { "epoch": 2.7490223142397054, "grad_norm": 8.81894588470459, "learning_rate": 5.946562500000001e-06, "loss": 0.0764, "step": 5975 }, { "epoch": 2.7605244996549345, "grad_norm": 3.4220263957977295, "learning_rate": 5.9387500000000005e-06, "loss": 0.0821, "step": 6000 }, { "epoch": 2.7720266850701636, "grad_norm": 3.72196626663208, "learning_rate": 5.9309375e-06, "loss": 0.0773, "step": 6025 }, { "epoch": 2.783528870485392, "grad_norm": 3.5996947288513184, "learning_rate": 5.923125e-06, "loss": 0.0952, "step": 6050 }, { "epoch": 2.795031055900621, "grad_norm": 3.508704423904419, "learning_rate": 5.9153125e-06, "loss": 0.0677, "step": 6075 }, { "epoch": 2.80653324131585, "grad_norm": 3.768465042114258, "learning_rate": 5.907500000000001e-06, "loss": 0.0903, "step": 6100 }, { "epoch": 2.818035426731079, "grad_norm": 7.676156997680664, "learning_rate": 5.8996875000000004e-06, "loss": 0.0889, "step": 6125 }, { "epoch": 2.829537612146308, "grad_norm": 4.185784816741943, "learning_rate": 5.891875e-06, "loss": 0.0901, "step": 6150 }, { "epoch": 2.8410397975615367, "grad_norm": 5.07861328125, "learning_rate": 5.8840625e-06, "loss": 0.0736, "step": 6175 }, { "epoch": 2.8525419829767653, "grad_norm": 2.5589280128479004, "learning_rate": 5.876250000000001e-06, "loss": 0.0726, "step": 6200 }, { "epoch": 2.8640441683919944, "grad_norm": 5.522654056549072, "learning_rate": 5.8684375e-06, "loss": 0.081, "step": 6225 }, { "epoch": 2.8755463538072235, "grad_norm": 2.873734474182129, "learning_rate": 5.860625e-06, "loss": 0.0934, "step": 6250 }, { "epoch": 2.8870485392224525, "grad_norm": 4.135101318359375, "learning_rate": 5.8528125e-06, "loss": 0.0691, "step": 6275 }, { "epoch": 2.898550724637681, "grad_norm": 3.2022476196289062, "learning_rate": 5.845312500000001e-06, "loss": 0.0876, "step": 6300 }, { "epoch": 2.91005291005291, "grad_norm": 4.878911018371582, "learning_rate": 5.8375000000000004e-06, "loss": 0.0784, "step": 6325 }, { "epoch": 2.921555095468139, "grad_norm": 2.7555994987487793, "learning_rate": 5.8296875e-06, "loss": 0.0932, "step": 6350 }, { "epoch": 2.933057280883368, "grad_norm": 4.3659257888793945, "learning_rate": 5.821875e-06, "loss": 0.0678, "step": 6375 }, { "epoch": 2.944559466298597, "grad_norm": 2.2182586193084717, "learning_rate": 5.814062500000001e-06, "loss": 0.0832, "step": 6400 }, { "epoch": 2.9560616517138256, "grad_norm": 4.891880989074707, "learning_rate": 5.8062500000000005e-06, "loss": 0.077, "step": 6425 }, { "epoch": 2.9675638371290542, "grad_norm": 3.2281267642974854, "learning_rate": 5.7984375e-06, "loss": 0.0862, "step": 6450 }, { "epoch": 2.9790660225442833, "grad_norm": 4.891918659210205, "learning_rate": 5.790625e-06, "loss": 0.0713, "step": 6475 }, { "epoch": 2.9905682079595124, "grad_norm": 3.6200342178344727, "learning_rate": 5.782812500000001e-06, "loss": 0.1053, "step": 6500 }, { "epoch": 3.002070393374741, "grad_norm": 2.0583813190460205, "learning_rate": 5.775000000000001e-06, "loss": 0.0679, "step": 6525 }, { "epoch": 3.01357257878997, "grad_norm": 4.742440700531006, "learning_rate": 5.7671875e-06, "loss": 0.0332, "step": 6550 }, { "epoch": 3.025074764205199, "grad_norm": 1.5032464265823364, "learning_rate": 5.759375e-06, "loss": 0.0537, "step": 6575 }, { "epoch": 3.036576949620428, "grad_norm": 2.877703905105591, "learning_rate": 5.7515625e-06, "loss": 0.0373, "step": 6600 }, { "epoch": 3.048079135035657, "grad_norm": 2.60418963432312, "learning_rate": 5.743750000000001e-06, "loss": 0.0441, "step": 6625 }, { "epoch": 3.0595813204508855, "grad_norm": 1.8395668268203735, "learning_rate": 5.7359375e-06, "loss": 0.0348, "step": 6650 }, { "epoch": 3.0710835058661146, "grad_norm": 2.9202868938446045, "learning_rate": 5.728125e-06, "loss": 0.0534, "step": 6675 }, { "epoch": 3.0825856912813436, "grad_norm": 2.7743561267852783, "learning_rate": 5.7203125e-06, "loss": 0.0353, "step": 6700 }, { "epoch": 3.0940878766965723, "grad_norm": 3.1167683601379395, "learning_rate": 5.712500000000001e-06, "loss": 0.0505, "step": 6725 }, { "epoch": 3.1055900621118013, "grad_norm": 4.696991920471191, "learning_rate": 5.7046875e-06, "loss": 0.0321, "step": 6750 }, { "epoch": 3.11709224752703, "grad_norm": 1.5438411235809326, "learning_rate": 5.696875e-06, "loss": 0.0499, "step": 6775 }, { "epoch": 3.128594432942259, "grad_norm": 6.239833831787109, "learning_rate": 5.6890625e-06, "loss": 0.0369, "step": 6800 }, { "epoch": 3.140096618357488, "grad_norm": 4.79664421081543, "learning_rate": 5.681250000000001e-06, "loss": 0.0541, "step": 6825 }, { "epoch": 3.1515988037727167, "grad_norm": 3.2525932788848877, "learning_rate": 5.6734375e-06, "loss": 0.0359, "step": 6850 }, { "epoch": 3.163100989187946, "grad_norm": 2.3709487915039062, "learning_rate": 5.6656250000000005e-06, "loss": 0.0464, "step": 6875 }, { "epoch": 3.1746031746031744, "grad_norm": 3.340402126312256, "learning_rate": 5.6578125e-06, "loss": 0.0331, "step": 6900 }, { "epoch": 3.1861053600184035, "grad_norm": 2.4604740142822266, "learning_rate": 5.65e-06, "loss": 0.0578, "step": 6925 }, { "epoch": 3.1976075454336326, "grad_norm": 4.909114837646484, "learning_rate": 5.642187500000001e-06, "loss": 0.0382, "step": 6950 }, { "epoch": 3.209109730848861, "grad_norm": 4.222381591796875, "learning_rate": 5.634375e-06, "loss": 0.0573, "step": 6975 }, { "epoch": 3.2206119162640903, "grad_norm": 3.5466387271881104, "learning_rate": 5.6265625e-06, "loss": 0.0355, "step": 7000 }, { "epoch": 3.232114101679319, "grad_norm": 4.068739891052246, "learning_rate": 5.61875e-06, "loss": 0.0524, "step": 7025 }, { "epoch": 3.243616287094548, "grad_norm": 3.5331459045410156, "learning_rate": 5.610937500000001e-06, "loss": 0.0388, "step": 7050 }, { "epoch": 3.255118472509777, "grad_norm": 5.634138107299805, "learning_rate": 5.603125e-06, "loss": 0.0476, "step": 7075 }, { "epoch": 3.2666206579250057, "grad_norm": 4.637297630310059, "learning_rate": 5.5953125000000005e-06, "loss": 0.038, "step": 7100 }, { "epoch": 3.2781228433402347, "grad_norm": 2.1430771350860596, "learning_rate": 5.5875e-06, "loss": 0.0536, "step": 7125 }, { "epoch": 3.2896250287554634, "grad_norm": 2.6287930011749268, "learning_rate": 5.579687500000001e-06, "loss": 0.0393, "step": 7150 }, { "epoch": 3.3011272141706924, "grad_norm": 4.748372554779053, "learning_rate": 5.571875e-06, "loss": 0.0424, "step": 7175 }, { "epoch": 3.3126293995859215, "grad_norm": 3.630303382873535, "learning_rate": 5.5640625000000006e-06, "loss": 0.0387, "step": 7200 }, { "epoch": 3.32413158500115, "grad_norm": 4.786473751068115, "learning_rate": 5.55625e-06, "loss": 0.0561, "step": 7225 }, { "epoch": 3.335633770416379, "grad_norm": 2.6991186141967773, "learning_rate": 5.5484375e-06, "loss": 0.0441, "step": 7250 }, { "epoch": 3.347135955831608, "grad_norm": 2.1756906509399414, "learning_rate": 5.540625e-06, "loss": 0.0537, "step": 7275 }, { "epoch": 3.358638141246837, "grad_norm": 1.9589180946350098, "learning_rate": 5.5328125e-06, "loss": 0.0341, "step": 7300 }, { "epoch": 3.370140326662066, "grad_norm": 3.504366159439087, "learning_rate": 5.5250000000000005e-06, "loss": 0.0556, "step": 7325 }, { "epoch": 3.3816425120772946, "grad_norm": 2.558767557144165, "learning_rate": 5.5171875e-06, "loss": 0.0411, "step": 7350 }, { "epoch": 3.3931446974925237, "grad_norm": 2.466121196746826, "learning_rate": 5.509375e-06, "loss": 0.0488, "step": 7375 }, { "epoch": 3.4046468829077523, "grad_norm": 4.102237224578857, "learning_rate": 5.5015625e-06, "loss": 0.0399, "step": 7400 }, { "epoch": 3.4161490683229814, "grad_norm": 2.008484125137329, "learning_rate": 5.4937500000000006e-06, "loss": 0.0612, "step": 7425 }, { "epoch": 3.4276512537382104, "grad_norm": 4.083756446838379, "learning_rate": 5.4859375e-06, "loss": 0.0417, "step": 7450 }, { "epoch": 3.439153439153439, "grad_norm": 2.526481866836548, "learning_rate": 5.478125e-06, "loss": 0.0572, "step": 7475 }, { "epoch": 3.450655624568668, "grad_norm": 5.448037624359131, "learning_rate": 5.4703125e-06, "loss": 0.0487, "step": 7500 }, { "epoch": 3.4621578099838968, "grad_norm": 2.4314920902252197, "learning_rate": 5.462500000000001e-06, "loss": 0.0465, "step": 7525 }, { "epoch": 3.473659995399126, "grad_norm": 2.6774253845214844, "learning_rate": 5.4546875000000004e-06, "loss": 0.0398, "step": 7550 }, { "epoch": 3.485162180814355, "grad_norm": 2.9031014442443848, "learning_rate": 5.446875e-06, "loss": 0.0629, "step": 7575 }, { "epoch": 3.4966643662295835, "grad_norm": 3.0640227794647217, "learning_rate": 5.4390625e-06, "loss": 0.0344, "step": 7600 }, { "epoch": 3.5081665516448126, "grad_norm": 3.262568950653076, "learning_rate": 5.43125e-06, "loss": 0.0538, "step": 7625 }, { "epoch": 3.5196687370600412, "grad_norm": 3.67341685295105, "learning_rate": 5.4234375000000005e-06, "loss": 0.0409, "step": 7650 }, { "epoch": 3.5311709224752703, "grad_norm": 2.3461291790008545, "learning_rate": 5.415625e-06, "loss": 0.0506, "step": 7675 }, { "epoch": 3.542673107890499, "grad_norm": 7.1874589920043945, "learning_rate": 5.4078125e-06, "loss": 0.0462, "step": 7700 }, { "epoch": 3.554175293305728, "grad_norm": 2.4068679809570312, "learning_rate": 5.4e-06, "loss": 0.0517, "step": 7725 }, { "epoch": 3.565677478720957, "grad_norm": 10.364015579223633, "learning_rate": 5.392187500000001e-06, "loss": 0.0459, "step": 7750 }, { "epoch": 3.5771796641361857, "grad_norm": 1.969916582107544, "learning_rate": 5.3843750000000004e-06, "loss": 0.0395, "step": 7775 }, { "epoch": 3.588681849551415, "grad_norm": 2.2081713676452637, "learning_rate": 5.3765625e-06, "loss": 0.0333, "step": 7800 }, { "epoch": 3.6001840349666434, "grad_norm": 2.534886598587036, "learning_rate": 5.36875e-06, "loss": 0.0628, "step": 7825 }, { "epoch": 3.6116862203818725, "grad_norm": 4.844212532043457, "learning_rate": 5.360937500000001e-06, "loss": 0.0399, "step": 7850 }, { "epoch": 3.6231884057971016, "grad_norm": 2.0327091217041016, "learning_rate": 5.3531250000000005e-06, "loss": 0.0479, "step": 7875 }, { "epoch": 3.63469059121233, "grad_norm": 5.35178804397583, "learning_rate": 5.3453125e-06, "loss": 0.0449, "step": 7900 }, { "epoch": 3.6461927766275593, "grad_norm": 1.815317153930664, "learning_rate": 5.3375e-06, "loss": 0.061, "step": 7925 }, { "epoch": 3.657694962042788, "grad_norm": 4.710953235626221, "learning_rate": 5.3296875e-06, "loss": 0.0366, "step": 7950 }, { "epoch": 3.669197147458017, "grad_norm": 2.8497252464294434, "learning_rate": 5.321875000000001e-06, "loss": 0.0523, "step": 7975 }, { "epoch": 3.680699332873246, "grad_norm": 3.102057695388794, "learning_rate": 5.3140624999999996e-06, "loss": 0.0424, "step": 8000 }, { "epoch": 3.6922015182884746, "grad_norm": 2.153207540512085, "learning_rate": 5.30625e-06, "loss": 0.0524, "step": 8025 }, { "epoch": 3.7037037037037037, "grad_norm": 2.5647189617156982, "learning_rate": 5.2984375e-06, "loss": 0.041, "step": 8050 }, { "epoch": 3.7152058891189323, "grad_norm": 3.3755810260772705, "learning_rate": 5.290625000000001e-06, "loss": 0.0574, "step": 8075 }, { "epoch": 3.7267080745341614, "grad_norm": 3.370281457901001, "learning_rate": 5.2828125e-06, "loss": 0.0427, "step": 8100 }, { "epoch": 3.7382102599493905, "grad_norm": 3.2038800716400146, "learning_rate": 5.275e-06, "loss": 0.0544, "step": 8125 }, { "epoch": 3.749712445364619, "grad_norm": 1.8960700035095215, "learning_rate": 5.2671875e-06, "loss": 0.0436, "step": 8150 }, { "epoch": 3.761214630779848, "grad_norm": 3.4356021881103516, "learning_rate": 5.259375000000001e-06, "loss": 0.0593, "step": 8175 }, { "epoch": 3.772716816195077, "grad_norm": 3.514998197555542, "learning_rate": 5.251562500000001e-06, "loss": 0.0361, "step": 8200 }, { "epoch": 3.784219001610306, "grad_norm": 1.705615758895874, "learning_rate": 5.24375e-06, "loss": 0.0551, "step": 8225 }, { "epoch": 3.795721187025535, "grad_norm": 4.432028770446777, "learning_rate": 5.2359375e-06, "loss": 0.0409, "step": 8250 }, { "epoch": 3.8072233724407636, "grad_norm": 1.7736481428146362, "learning_rate": 5.228125e-06, "loss": 0.0431, "step": 8275 }, { "epoch": 3.8187255578559927, "grad_norm": 6.601363658905029, "learning_rate": 5.220312500000001e-06, "loss": 0.0385, "step": 8300 }, { "epoch": 3.8302277432712213, "grad_norm": 1.307138204574585, "learning_rate": 5.2128125e-06, "loss": 0.0551, "step": 8325 }, { "epoch": 3.8417299286864504, "grad_norm": 4.539569854736328, "learning_rate": 5.205e-06, "loss": 0.0414, "step": 8350 }, { "epoch": 3.8532321141016794, "grad_norm": 1.4391796588897705, "learning_rate": 5.1971875e-06, "loss": 0.0468, "step": 8375 }, { "epoch": 3.864734299516908, "grad_norm": 6.971019268035889, "learning_rate": 5.189375000000001e-06, "loss": 0.0399, "step": 8400 }, { "epoch": 3.876236484932137, "grad_norm": 3.1996214389801025, "learning_rate": 5.1815624999999996e-06, "loss": 0.0586, "step": 8425 }, { "epoch": 3.8877386703473658, "grad_norm": 2.8706111907958984, "learning_rate": 5.17375e-06, "loss": 0.0372, "step": 8450 }, { "epoch": 3.899240855762595, "grad_norm": 2.5482466220855713, "learning_rate": 5.1659375e-06, "loss": 0.0588, "step": 8475 }, { "epoch": 3.910743041177824, "grad_norm": 3.1475095748901367, "learning_rate": 5.158125000000001e-06, "loss": 0.0357, "step": 8500 }, { "epoch": 3.9222452265930525, "grad_norm": 3.1267690658569336, "learning_rate": 5.1503125e-06, "loss": 0.063, "step": 8525 }, { "epoch": 3.9337474120082816, "grad_norm": 1.667446255683899, "learning_rate": 5.1425e-06, "loss": 0.041, "step": 8550 }, { "epoch": 3.9452495974235102, "grad_norm": 1.526341438293457, "learning_rate": 5.1346875e-06, "loss": 0.0447, "step": 8575 }, { "epoch": 3.9567517828387393, "grad_norm": 5.230031490325928, "learning_rate": 5.126875000000001e-06, "loss": 0.0429, "step": 8600 }, { "epoch": 3.9682539682539684, "grad_norm": 2.7826850414276123, "learning_rate": 5.1190625e-06, "loss": 0.0492, "step": 8625 }, { "epoch": 3.979756153669197, "grad_norm": 3.674015760421753, "learning_rate": 5.11125e-06, "loss": 0.0391, "step": 8650 }, { "epoch": 3.991258339084426, "grad_norm": 3.0181193351745605, "learning_rate": 5.1034375e-06, "loss": 0.0538, "step": 8675 }, { "epoch": 4.002760524499655, "grad_norm": 0.9257192015647888, "learning_rate": 5.095625e-06, "loss": 0.0428, "step": 8700 }, { "epoch": 4.014262709914884, "grad_norm": 3.171478033065796, "learning_rate": 5.087812500000001e-06, "loss": 0.0175, "step": 8725 }, { "epoch": 4.025764895330113, "grad_norm": 1.8162753582000732, "learning_rate": 5.08e-06, "loss": 0.0279, "step": 8750 }, { "epoch": 4.037267080745342, "grad_norm": 0.8982828855514526, "learning_rate": 5.0721875e-06, "loss": 0.0195, "step": 8775 }, { "epoch": 4.04876926616057, "grad_norm": 0.8609257936477661, "learning_rate": 5.064375e-06, "loss": 0.0386, "step": 8800 }, { "epoch": 4.060271451575799, "grad_norm": 2.8455467224121094, "learning_rate": 5.056562500000001e-06, "loss": 0.0224, "step": 8825 }, { "epoch": 4.071773636991028, "grad_norm": 4.009946346282959, "learning_rate": 5.04875e-06, "loss": 0.0332, "step": 8850 }, { "epoch": 4.083275822406257, "grad_norm": 6.834461688995361, "learning_rate": 5.0409375e-06, "loss": 0.0243, "step": 8875 }, { "epoch": 4.094778007821486, "grad_norm": 2.6603477001190186, "learning_rate": 5.033125e-06, "loss": 0.0337, "step": 8900 }, { "epoch": 4.106280193236715, "grad_norm": 0.7769069075584412, "learning_rate": 5.025312500000001e-06, "loss": 0.0227, "step": 8925 }, { "epoch": 4.117782378651944, "grad_norm": 2.2610023021698, "learning_rate": 5.0175e-06, "loss": 0.0316, "step": 8950 }, { "epoch": 4.129284564067173, "grad_norm": 0.38040891289711, "learning_rate": 5.0096875000000005e-06, "loss": 0.0222, "step": 8975 }, { "epoch": 4.140786749482402, "grad_norm": 3.0205907821655273, "learning_rate": 5.001875e-06, "loss": 0.04, "step": 9000 }, { "epoch": 4.152288934897631, "grad_norm": 2.703075647354126, "learning_rate": 4.9940625e-06, "loss": 0.022, "step": 9025 }, { "epoch": 4.163791120312859, "grad_norm": 1.172072410583496, "learning_rate": 4.98625e-06, "loss": 0.0325, "step": 9050 }, { "epoch": 4.175293305728088, "grad_norm": 2.555593967437744, "learning_rate": 4.9784375e-06, "loss": 0.0222, "step": 9075 }, { "epoch": 4.186795491143317, "grad_norm": 1.3756543397903442, "learning_rate": 4.970625e-06, "loss": 0.035, "step": 9100 }, { "epoch": 4.198297676558546, "grad_norm": 3.3377280235290527, "learning_rate": 4.9628125e-06, "loss": 0.0136, "step": 9125 }, { "epoch": 4.209799861973775, "grad_norm": 3.4057676792144775, "learning_rate": 4.955e-06, "loss": 0.0284, "step": 9150 }, { "epoch": 4.2213020473890035, "grad_norm": 0.6658844351768494, "learning_rate": 4.9471875e-06, "loss": 0.0191, "step": 9175 }, { "epoch": 4.232804232804233, "grad_norm": 3.073923349380493, "learning_rate": 4.9393750000000005e-06, "loss": 0.0365, "step": 9200 }, { "epoch": 4.244306418219462, "grad_norm": 1.391367793083191, "learning_rate": 4.9315625e-06, "loss": 0.0206, "step": 9225 }, { "epoch": 4.255808603634691, "grad_norm": 2.199260711669922, "learning_rate": 4.92375e-06, "loss": 0.0325, "step": 9250 }, { "epoch": 4.26731078904992, "grad_norm": 0.31522706151008606, "learning_rate": 4.9159375e-06, "loss": 0.0191, "step": 9275 }, { "epoch": 4.278812974465148, "grad_norm": 1.3394752740859985, "learning_rate": 4.9081250000000005e-06, "loss": 0.031, "step": 9300 }, { "epoch": 4.290315159880377, "grad_norm": 3.397002696990967, "learning_rate": 4.9003125e-06, "loss": 0.0283, "step": 9325 }, { "epoch": 4.301817345295606, "grad_norm": 1.9605752229690552, "learning_rate": 4.8925e-06, "loss": 0.0334, "step": 9350 }, { "epoch": 4.313319530710835, "grad_norm": 3.3331549167633057, "learning_rate": 4.8846875e-06, "loss": 0.0173, "step": 9375 }, { "epoch": 4.324821716126064, "grad_norm": 1.6721982955932617, "learning_rate": 4.876875e-06, "loss": 0.0335, "step": 9400 }, { "epoch": 4.336323901541292, "grad_norm": 2.483747720718384, "learning_rate": 4.8690625000000004e-06, "loss": 0.0161, "step": 9425 }, { "epoch": 4.3478260869565215, "grad_norm": 2.7441227436065674, "learning_rate": 4.86125e-06, "loss": 0.0299, "step": 9450 }, { "epoch": 4.359328272371751, "grad_norm": 6.056209087371826, "learning_rate": 4.8534375e-06, "loss": 0.0191, "step": 9475 }, { "epoch": 4.37083045778698, "grad_norm": 3.665620803833008, "learning_rate": 4.845625e-06, "loss": 0.0413, "step": 9500 }, { "epoch": 4.382332643202209, "grad_norm": 0.7313005924224854, "learning_rate": 4.8378125000000005e-06, "loss": 0.0279, "step": 9525 }, { "epoch": 4.393834828617437, "grad_norm": 6.102551460266113, "learning_rate": 4.83e-06, "loss": 0.0365, "step": 9550 }, { "epoch": 4.405337014032666, "grad_norm": 3.0939571857452393, "learning_rate": 4.8221875e-06, "loss": 0.0235, "step": 9575 }, { "epoch": 4.416839199447895, "grad_norm": 6.218958854675293, "learning_rate": 4.814375e-06, "loss": 0.04, "step": 9600 }, { "epoch": 4.428341384863124, "grad_norm": 3.746630907058716, "learning_rate": 4.806562500000001e-06, "loss": 0.0252, "step": 9625 }, { "epoch": 4.439843570278353, "grad_norm": 2.458735466003418, "learning_rate": 4.7987500000000004e-06, "loss": 0.0407, "step": 9650 }, { "epoch": 4.451345755693581, "grad_norm": 0.9115240573883057, "learning_rate": 4.7909375e-06, "loss": 0.0216, "step": 9675 }, { "epoch": 4.4628479411088104, "grad_norm": 2.0379064083099365, "learning_rate": 4.783125e-06, "loss": 0.0326, "step": 9700 }, { "epoch": 4.4743501265240395, "grad_norm": 3.2523980140686035, "learning_rate": 4.7753125e-06, "loss": 0.0216, "step": 9725 }, { "epoch": 4.485852311939269, "grad_norm": 0.8420194387435913, "learning_rate": 4.7675000000000005e-06, "loss": 0.0337, "step": 9750 }, { "epoch": 4.497354497354498, "grad_norm": 4.6802167892456055, "learning_rate": 4.7596875e-06, "loss": 0.0245, "step": 9775 }, { "epoch": 4.508856682769727, "grad_norm": 1.313185214996338, "learning_rate": 4.751875e-06, "loss": 0.0261, "step": 9800 }, { "epoch": 4.520358868184955, "grad_norm": 0.6525618433952332, "learning_rate": 4.7440625e-06, "loss": 0.0222, "step": 9825 }, { "epoch": 4.531861053600184, "grad_norm": 1.9755631685256958, "learning_rate": 4.736250000000001e-06, "loss": 0.0328, "step": 9850 }, { "epoch": 4.543363239015413, "grad_norm": 5.463117599487305, "learning_rate": 4.7284374999999996e-06, "loss": 0.0243, "step": 9875 }, { "epoch": 4.554865424430642, "grad_norm": 2.689207077026367, "learning_rate": 4.720625e-06, "loss": 0.0294, "step": 9900 }, { "epoch": 4.566367609845871, "grad_norm": 2.54421067237854, "learning_rate": 4.7128125e-06, "loss": 0.0249, "step": 9925 }, { "epoch": 4.577869795261099, "grad_norm": 9.304972648620605, "learning_rate": 4.705000000000001e-06, "loss": 0.0413, "step": 9950 }, { "epoch": 4.5893719806763285, "grad_norm": 1.8700443506240845, "learning_rate": 4.6971875000000005e-06, "loss": 0.0199, "step": 9975 }, { "epoch": 4.6008741660915575, "grad_norm": 1.8287808895111084, "learning_rate": 4.689375e-06, "loss": 0.0371, "step": 10000 }, { "epoch": 4.6008741660915575, "eval_loss": 0.18389073014259338, "eval_runtime": 5170.7691, "eval_samples_per_second": 1.836, "eval_steps_per_second": 0.23, "eval_wer": 0.09987187700192185, "step": 10000 }, { "epoch": 4.612376351506787, "grad_norm": 4.324032306671143, "learning_rate": 4.6815625e-06, "loss": 0.0254, "step": 10025 }, { "epoch": 4.623878536922016, "grad_norm": 1.790111780166626, "learning_rate": 4.67375e-06, "loss": 0.0317, "step": 10050 }, { "epoch": 4.635380722337244, "grad_norm": 3.236027717590332, "learning_rate": 4.665937500000001e-06, "loss": 0.0169, "step": 10075 }, { "epoch": 4.646882907752473, "grad_norm": 2.1184232234954834, "learning_rate": 4.658125e-06, "loss": 0.0272, "step": 10100 }, { "epoch": 4.658385093167702, "grad_norm": 2.2520174980163574, "learning_rate": 4.6503125e-06, "loss": 0.0195, "step": 10125 }, { "epoch": 4.669887278582931, "grad_norm": 3.1109213829040527, "learning_rate": 4.6425e-06, "loss": 0.0377, "step": 10150 }, { "epoch": 4.68138946399816, "grad_norm": 0.9269886612892151, "learning_rate": 4.634687500000001e-06, "loss": 0.0245, "step": 10175 }, { "epoch": 4.692891649413388, "grad_norm": 2.5646307468414307, "learning_rate": 4.626875e-06, "loss": 0.0335, "step": 10200 }, { "epoch": 4.704393834828617, "grad_norm": 2.609894037246704, "learning_rate": 4.6190625e-06, "loss": 0.0286, "step": 10225 }, { "epoch": 4.7158960202438465, "grad_norm": 4.488738059997559, "learning_rate": 4.61125e-06, "loss": 0.0311, "step": 10250 }, { "epoch": 4.7273982056590755, "grad_norm": 1.5146970748901367, "learning_rate": 4.603437500000001e-06, "loss": 0.0193, "step": 10275 }, { "epoch": 4.738900391074305, "grad_norm": 0.9895784258842468, "learning_rate": 4.595625e-06, "loss": 0.0263, "step": 10300 }, { "epoch": 4.750402576489533, "grad_norm": 5.285503387451172, "learning_rate": 4.5878125e-06, "loss": 0.0262, "step": 10325 }, { "epoch": 4.761904761904762, "grad_norm": 0.3710523843765259, "learning_rate": 4.58e-06, "loss": 0.0414, "step": 10350 }, { "epoch": 4.773406947319991, "grad_norm": 1.5297555923461914, "learning_rate": 4.572187500000001e-06, "loss": 0.0226, "step": 10375 }, { "epoch": 4.78490913273522, "grad_norm": 2.7512269020080566, "learning_rate": 4.564375e-06, "loss": 0.0296, "step": 10400 }, { "epoch": 4.796411318150449, "grad_norm": 7.514578342437744, "learning_rate": 4.5565625000000005e-06, "loss": 0.0252, "step": 10425 }, { "epoch": 4.807913503565677, "grad_norm": 2.6303036212921143, "learning_rate": 4.54875e-06, "loss": 0.0281, "step": 10450 }, { "epoch": 4.819415688980906, "grad_norm": 2.5647971630096436, "learning_rate": 4.5409375e-06, "loss": 0.0236, "step": 10475 }, { "epoch": 4.830917874396135, "grad_norm": 1.5822607278823853, "learning_rate": 4.533125e-06, "loss": 0.03, "step": 10500 }, { "epoch": 4.8424200598113645, "grad_norm": 3.3824303150177, "learning_rate": 4.5253125e-06, "loss": 0.0277, "step": 10525 }, { "epoch": 4.8539222452265935, "grad_norm": 2.4250597953796387, "learning_rate": 4.5175e-06, "loss": 0.0288, "step": 10550 }, { "epoch": 4.865424430641822, "grad_norm": 5.316204071044922, "learning_rate": 4.5096875e-06, "loss": 0.0248, "step": 10575 }, { "epoch": 4.876926616057051, "grad_norm": 2.7238681316375732, "learning_rate": 4.501875000000001e-06, "loss": 0.0332, "step": 10600 }, { "epoch": 4.88842880147228, "grad_norm": 3.1399307250976562, "learning_rate": 4.4940625e-06, "loss": 0.0222, "step": 10625 }, { "epoch": 4.899930986887509, "grad_norm": 2.6083264350891113, "learning_rate": 4.4862500000000005e-06, "loss": 0.0325, "step": 10650 }, { "epoch": 4.911433172302738, "grad_norm": 1.1990541219711304, "learning_rate": 4.4784375e-06, "loss": 0.0218, "step": 10675 }, { "epoch": 4.922935357717966, "grad_norm": 2.0461394786834717, "learning_rate": 4.470625000000001e-06, "loss": 0.0312, "step": 10700 }, { "epoch": 4.934437543133195, "grad_norm": 0.7773350477218628, "learning_rate": 4.4628125e-06, "loss": 0.0256, "step": 10725 }, { "epoch": 4.945939728548424, "grad_norm": 2.645052671432495, "learning_rate": 4.4550000000000005e-06, "loss": 0.0334, "step": 10750 }, { "epoch": 4.957441913963653, "grad_norm": 3.0270519256591797, "learning_rate": 4.4471875e-06, "loss": 0.0221, "step": 10775 }, { "epoch": 4.9689440993788825, "grad_norm": 2.553375720977783, "learning_rate": 4.439375e-06, "loss": 0.0337, "step": 10800 }, { "epoch": 4.980446284794111, "grad_norm": 5.496345520019531, "learning_rate": 4.4315625e-06, "loss": 0.0197, "step": 10825 }, { "epoch": 4.99194847020934, "grad_norm": 2.0996806621551514, "learning_rate": 4.4240625000000005e-06, "loss": 0.0389, "step": 10850 }, { "epoch": 5.003450655624569, "grad_norm": 1.9347798824310303, "learning_rate": 4.41625e-06, "loss": 0.0229, "step": 10875 }, { "epoch": 5.014952841039798, "grad_norm": 3.568023443222046, "learning_rate": 4.4084375e-06, "loss": 0.0135, "step": 10900 }, { "epoch": 5.026455026455026, "grad_norm": 0.6959520578384399, "learning_rate": 4.400625e-06, "loss": 0.0216, "step": 10925 }, { "epoch": 5.037957211870255, "grad_norm": 0.2912887930870056, "learning_rate": 4.3928125e-06, "loss": 0.0157, "step": 10950 }, { "epoch": 5.049459397285484, "grad_norm": 2.004884719848633, "learning_rate": 4.385e-06, "loss": 0.0208, "step": 10975 }, { "epoch": 5.060961582700713, "grad_norm": 4.345211505889893, "learning_rate": 4.3771875e-06, "loss": 0.0163, "step": 11000 }, { "epoch": 5.072463768115942, "grad_norm": 1.2300424575805664, "learning_rate": 4.369375000000001e-06, "loss": 0.0261, "step": 11025 }, { "epoch": 5.0839659535311705, "grad_norm": 2.4417171478271484, "learning_rate": 4.3615625e-06, "loss": 0.0177, "step": 11050 }, { "epoch": 5.0954681389464, "grad_norm": 0.4104786217212677, "learning_rate": 4.3537500000000005e-06, "loss": 0.0221, "step": 11075 }, { "epoch": 5.106970324361629, "grad_norm": 1.5912240743637085, "learning_rate": 4.3459375e-06, "loss": 0.0108, "step": 11100 }, { "epoch": 5.118472509776858, "grad_norm": 2.6581289768218994, "learning_rate": 4.338125000000001e-06, "loss": 0.0196, "step": 11125 }, { "epoch": 5.129974695192087, "grad_norm": 0.41683322191238403, "learning_rate": 4.3303125e-06, "loss": 0.012, "step": 11150 }, { "epoch": 5.141476880607315, "grad_norm": 0.7937358021736145, "learning_rate": 4.3225000000000005e-06, "loss": 0.0198, "step": 11175 }, { "epoch": 5.152979066022544, "grad_norm": 2.042461633682251, "learning_rate": 4.3146875e-06, "loss": 0.0157, "step": 11200 }, { "epoch": 5.164481251437773, "grad_norm": 1.6334706544876099, "learning_rate": 4.306875e-06, "loss": 0.0185, "step": 11225 }, { "epoch": 5.175983436853002, "grad_norm": 2.2739877700805664, "learning_rate": 4.2990625e-06, "loss": 0.0178, "step": 11250 }, { "epoch": 5.187485622268231, "grad_norm": 1.6258018016815186, "learning_rate": 4.29125e-06, "loss": 0.0173, "step": 11275 }, { "epoch": 5.1989878076834595, "grad_norm": 4.973881244659424, "learning_rate": 4.2834375000000004e-06, "loss": 0.0129, "step": 11300 }, { "epoch": 5.2104899930986885, "grad_norm": 0.9437686204910278, "learning_rate": 4.275625e-06, "loss": 0.0237, "step": 11325 }, { "epoch": 5.221992178513918, "grad_norm": 3.4761786460876465, "learning_rate": 4.2678125e-06, "loss": 0.0147, "step": 11350 }, { "epoch": 5.233494363929147, "grad_norm": 2.692365884780884, "learning_rate": 4.26e-06, "loss": 0.0189, "step": 11375 }, { "epoch": 5.244996549344376, "grad_norm": 2.9584712982177734, "learning_rate": 4.2521875000000005e-06, "loss": 0.0174, "step": 11400 }, { "epoch": 5.256498734759604, "grad_norm": 9.468619346618652, "learning_rate": 4.244375e-06, "loss": 0.0176, "step": 11425 }, { "epoch": 5.268000920174833, "grad_norm": 0.9750581383705139, "learning_rate": 4.2365625e-06, "loss": 0.021, "step": 11450 }, { "epoch": 5.279503105590062, "grad_norm": 3.0626797676086426, "learning_rate": 4.22875e-06, "loss": 0.0298, "step": 11475 }, { "epoch": 5.291005291005291, "grad_norm": 2.2305479049682617, "learning_rate": 4.220937500000001e-06, "loss": 0.0161, "step": 11500 }, { "epoch": 5.30250747642052, "grad_norm": 0.5742190480232239, "learning_rate": 4.2131250000000004e-06, "loss": 0.0216, "step": 11525 }, { "epoch": 5.314009661835748, "grad_norm": 2.7238235473632812, "learning_rate": 4.2053125e-06, "loss": 0.0178, "step": 11550 }, { "epoch": 5.3255118472509775, "grad_norm": 1.7739872932434082, "learning_rate": 4.1975e-06, "loss": 0.0203, "step": 11575 }, { "epoch": 5.3370140326662066, "grad_norm": 0.9364586472511292, "learning_rate": 4.1896875e-06, "loss": 0.0185, "step": 11600 }, { "epoch": 5.348516218081436, "grad_norm": 3.462063789367676, "learning_rate": 4.1818750000000005e-06, "loss": 0.027, "step": 11625 }, { "epoch": 5.360018403496665, "grad_norm": 4.4536051750183105, "learning_rate": 4.1740625e-06, "loss": 0.0137, "step": 11650 }, { "epoch": 5.371520588911893, "grad_norm": 1.4285831451416016, "learning_rate": 4.16625e-06, "loss": 0.022, "step": 11675 }, { "epoch": 5.383022774327122, "grad_norm": 0.1919490098953247, "learning_rate": 4.1584375e-06, "loss": 0.0127, "step": 11700 }, { "epoch": 5.394524959742351, "grad_norm": 1.8607268333435059, "learning_rate": 4.150625000000001e-06, "loss": 0.0221, "step": 11725 }, { "epoch": 5.40602714515758, "grad_norm": 2.7783443927764893, "learning_rate": 4.1428125e-06, "loss": 0.0114, "step": 11750 }, { "epoch": 5.417529330572809, "grad_norm": 0.5502150058746338, "learning_rate": 4.135e-06, "loss": 0.0283, "step": 11775 }, { "epoch": 5.429031515988037, "grad_norm": 4.172156810760498, "learning_rate": 4.1271875e-06, "loss": 0.0148, "step": 11800 }, { "epoch": 5.440533701403266, "grad_norm": 4.028002738952637, "learning_rate": 4.119375000000001e-06, "loss": 0.0248, "step": 11825 }, { "epoch": 5.4520358868184955, "grad_norm": 2.3138980865478516, "learning_rate": 4.1115625000000005e-06, "loss": 0.0132, "step": 11850 }, { "epoch": 5.463538072233725, "grad_norm": 1.1849713325500488, "learning_rate": 4.10375e-06, "loss": 0.0226, "step": 11875 }, { "epoch": 5.475040257648954, "grad_norm": 0.587064266204834, "learning_rate": 4.0959375e-06, "loss": 0.019, "step": 11900 }, { "epoch": 5.486542443064182, "grad_norm": 2.445380449295044, "learning_rate": 4.088125e-06, "loss": 0.0294, "step": 11925 }, { "epoch": 5.498044628479411, "grad_norm": 0.9706999659538269, "learning_rate": 4.080312500000001e-06, "loss": 0.0243, "step": 11950 }, { "epoch": 5.50954681389464, "grad_norm": 3.1857030391693115, "learning_rate": 4.0724999999999995e-06, "loss": 0.0285, "step": 11975 }, { "epoch": 5.521048999309869, "grad_norm": 2.901566982269287, "learning_rate": 4.0646875e-06, "loss": 0.0144, "step": 12000 }, { "epoch": 5.532551184725098, "grad_norm": 1.5645906925201416, "learning_rate": 4.056875e-06, "loss": 0.0209, "step": 12025 }, { "epoch": 5.544053370140326, "grad_norm": 0.3500732481479645, "learning_rate": 4.049062500000001e-06, "loss": 0.0154, "step": 12050 }, { "epoch": 5.555555555555555, "grad_norm": 1.8092036247253418, "learning_rate": 4.04125e-06, "loss": 0.0244, "step": 12075 }, { "epoch": 5.567057740970784, "grad_norm": 0.872996985912323, "learning_rate": 4.0334375e-06, "loss": 0.0128, "step": 12100 }, { "epoch": 5.5785599263860135, "grad_norm": 1.7378208637237549, "learning_rate": 4.025625e-06, "loss": 0.0173, "step": 12125 }, { "epoch": 5.590062111801243, "grad_norm": 2.589615821838379, "learning_rate": 4.017812500000001e-06, "loss": 0.019, "step": 12150 }, { "epoch": 5.601564297216471, "grad_norm": 1.2749011516571045, "learning_rate": 4.01e-06, "loss": 0.0219, "step": 12175 }, { "epoch": 5.6130664826317, "grad_norm": 3.331284761428833, "learning_rate": 4.0021875e-06, "loss": 0.0187, "step": 12200 }, { "epoch": 5.624568668046929, "grad_norm": 3.3452606201171875, "learning_rate": 3.994375e-06, "loss": 0.0251, "step": 12225 }, { "epoch": 5.636070853462158, "grad_norm": 3.171482563018799, "learning_rate": 3.9865625e-06, "loss": 0.0111, "step": 12250 }, { "epoch": 5.647573038877387, "grad_norm": 1.3494371175765991, "learning_rate": 3.978750000000001e-06, "loss": 0.0252, "step": 12275 }, { "epoch": 5.659075224292615, "grad_norm": 5.425341606140137, "learning_rate": 3.9709375e-06, "loss": 0.0154, "step": 12300 }, { "epoch": 5.670577409707844, "grad_norm": 3.904033899307251, "learning_rate": 3.963125e-06, "loss": 0.0257, "step": 12325 }, { "epoch": 5.682079595123073, "grad_norm": 6.220149993896484, "learning_rate": 3.9553125e-06, "loss": 0.0197, "step": 12350 }, { "epoch": 5.693581780538302, "grad_norm": 2.4043924808502197, "learning_rate": 3.947500000000001e-06, "loss": 0.0206, "step": 12375 }, { "epoch": 5.7050839659535315, "grad_norm": 1.716847538948059, "learning_rate": 3.9396875e-06, "loss": 0.0128, "step": 12400 }, { "epoch": 5.71658615136876, "grad_norm": 2.778402805328369, "learning_rate": 3.931875e-06, "loss": 0.025, "step": 12425 }, { "epoch": 5.728088336783989, "grad_norm": 1.2575827836990356, "learning_rate": 3.9240625e-06, "loss": 0.0165, "step": 12450 }, { "epoch": 5.739590522199218, "grad_norm": 1.911150574684143, "learning_rate": 3.916250000000001e-06, "loss": 0.0274, "step": 12475 }, { "epoch": 5.751092707614447, "grad_norm": 4.2152099609375, "learning_rate": 3.9084375e-06, "loss": 0.0205, "step": 12500 }, { "epoch": 5.762594893029676, "grad_norm": 2.5663771629333496, "learning_rate": 3.9006250000000005e-06, "loss": 0.0233, "step": 12525 }, { "epoch": 5.774097078444904, "grad_norm": 2.787692070007324, "learning_rate": 3.8928125e-06, "loss": 0.0168, "step": 12550 }, { "epoch": 5.785599263860133, "grad_norm": 1.5613856315612793, "learning_rate": 3.885e-06, "loss": 0.0208, "step": 12575 }, { "epoch": 5.797101449275362, "grad_norm": 4.623484134674072, "learning_rate": 3.8771875e-06, "loss": 0.0154, "step": 12600 }, { "epoch": 5.808603634690591, "grad_norm": 1.0770273208618164, "learning_rate": 3.869375e-06, "loss": 0.0251, "step": 12625 }, { "epoch": 5.8201058201058204, "grad_norm": 1.7411080598831177, "learning_rate": 3.8615625e-06, "loss": 0.0187, "step": 12650 }, { "epoch": 5.831608005521049, "grad_norm": 1.9490394592285156, "learning_rate": 3.85375e-06, "loss": 0.0255, "step": 12675 }, { "epoch": 5.843110190936278, "grad_norm": 3.750220537185669, "learning_rate": 3.8459375e-06, "loss": 0.015, "step": 12700 }, { "epoch": 5.854612376351507, "grad_norm": 4.523986339569092, "learning_rate": 3.838125e-06, "loss": 0.0251, "step": 12725 }, { "epoch": 5.866114561766736, "grad_norm": 0.7033637762069702, "learning_rate": 3.8303125000000004e-06, "loss": 0.0109, "step": 12750 }, { "epoch": 5.877616747181965, "grad_norm": 1.5173693895339966, "learning_rate": 3.8225e-06, "loss": 0.026, "step": 12775 }, { "epoch": 5.889118932597193, "grad_norm": 1.4995800256729126, "learning_rate": 3.8146875e-06, "loss": 0.0194, "step": 12800 }, { "epoch": 5.900621118012422, "grad_norm": 2.203517436981201, "learning_rate": 3.806875e-06, "loss": 0.0292, "step": 12825 }, { "epoch": 5.912123303427651, "grad_norm": 2.84173321723938, "learning_rate": 3.7990625e-06, "loss": 0.0159, "step": 12850 }, { "epoch": 5.92362548884288, "grad_norm": 0.5612061619758606, "learning_rate": 3.7912500000000003e-06, "loss": 0.0288, "step": 12875 }, { "epoch": 5.935127674258109, "grad_norm": 0.5369181632995605, "learning_rate": 3.7834375000000006e-06, "loss": 0.015, "step": 12900 }, { "epoch": 5.946629859673338, "grad_norm": 2.2607927322387695, "learning_rate": 3.775625e-06, "loss": 0.0286, "step": 12925 }, { "epoch": 5.958132045088567, "grad_norm": 2.4500582218170166, "learning_rate": 3.7678125e-06, "loss": 0.0204, "step": 12950 }, { "epoch": 5.969634230503796, "grad_norm": 1.60666024684906, "learning_rate": 3.7600000000000004e-06, "loss": 0.0199, "step": 12975 }, { "epoch": 5.981136415919025, "grad_norm": 0.704494059085846, "learning_rate": 3.7521875000000007e-06, "loss": 0.0117, "step": 13000 }, { "epoch": 5.992638601334254, "grad_norm": 0.9249849319458008, "learning_rate": 3.744375e-06, "loss": 0.0235, "step": 13025 }, { "epoch": 6.004140786749482, "grad_norm": 3.008746862411499, "learning_rate": 3.7365625000000003e-06, "loss": 0.0129, "step": 13050 }, { "epoch": 6.015642972164711, "grad_norm": 1.7184109687805176, "learning_rate": 3.7287500000000005e-06, "loss": 0.0127, "step": 13075 }, { "epoch": 6.02714515757994, "grad_norm": 0.9730533957481384, "learning_rate": 3.7209375000000003e-06, "loss": 0.0175, "step": 13100 }, { "epoch": 6.038647342995169, "grad_norm": 1.0653347969055176, "learning_rate": 3.713125e-06, "loss": 0.0088, "step": 13125 }, { "epoch": 6.050149528410398, "grad_norm": 2.9114506244659424, "learning_rate": 3.7053125e-06, "loss": 0.0137, "step": 13150 }, { "epoch": 6.0616517138256265, "grad_norm": 0.32335150241851807, "learning_rate": 3.6975e-06, "loss": 0.0077, "step": 13175 }, { "epoch": 6.073153899240856, "grad_norm": 0.48962631821632385, "learning_rate": 3.6896875000000004e-06, "loss": 0.0198, "step": 13200 }, { "epoch": 6.084656084656085, "grad_norm": 0.669650137424469, "learning_rate": 3.681875e-06, "loss": 0.0121, "step": 13225 }, { "epoch": 6.096158270071314, "grad_norm": 3.413156270980835, "learning_rate": 3.6740625e-06, "loss": 0.0128, "step": 13250 }, { "epoch": 6.107660455486543, "grad_norm": 2.7622175216674805, "learning_rate": 3.6662500000000003e-06, "loss": 0.0132, "step": 13275 }, { "epoch": 6.119162640901771, "grad_norm": 2.454317808151245, "learning_rate": 3.6584375000000005e-06, "loss": 0.0189, "step": 13300 }, { "epoch": 6.130664826317, "grad_norm": 1.0187458992004395, "learning_rate": 3.650625e-06, "loss": 0.0099, "step": 13325 }, { "epoch": 6.142167011732229, "grad_norm": 1.0523874759674072, "learning_rate": 3.6428125e-06, "loss": 0.0186, "step": 13350 }, { "epoch": 6.153669197147458, "grad_norm": 2.4889376163482666, "learning_rate": 3.6350000000000003e-06, "loss": 0.0102, "step": 13375 }, { "epoch": 6.165171382562687, "grad_norm": 1.4157731533050537, "learning_rate": 3.6275000000000004e-06, "loss": 0.0232, "step": 13400 }, { "epoch": 6.1766735679779154, "grad_norm": 2.285750389099121, "learning_rate": 3.6196875000000007e-06, "loss": 0.0112, "step": 13425 }, { "epoch": 6.1881757533931445, "grad_norm": 3.0357449054718018, "learning_rate": 3.611875e-06, "loss": 0.0151, "step": 13450 }, { "epoch": 6.199677938808374, "grad_norm": 1.9853347539901733, "learning_rate": 3.6040625000000003e-06, "loss": 0.0119, "step": 13475 }, { "epoch": 6.211180124223603, "grad_norm": 3.489882469177246, "learning_rate": 3.5962500000000005e-06, "loss": 0.0156, "step": 13500 }, { "epoch": 6.222682309638832, "grad_norm": 7.6776299476623535, "learning_rate": 3.5884375000000003e-06, "loss": 0.0107, "step": 13525 }, { "epoch": 6.23418449505406, "grad_norm": 1.0730276107788086, "learning_rate": 3.580625e-06, "loss": 0.0194, "step": 13550 }, { "epoch": 6.245686680469289, "grad_norm": 41.845558166503906, "learning_rate": 3.5728125e-06, "loss": 0.0115, "step": 13575 }, { "epoch": 6.257188865884518, "grad_norm": 2.866692543029785, "learning_rate": 3.565e-06, "loss": 0.0159, "step": 13600 }, { "epoch": 6.268691051299747, "grad_norm": 3.9908344745635986, "learning_rate": 3.5571875000000004e-06, "loss": 0.0099, "step": 13625 }, { "epoch": 6.280193236714976, "grad_norm": 2.3821098804473877, "learning_rate": 3.549375e-06, "loss": 0.0191, "step": 13650 }, { "epoch": 6.291695422130204, "grad_norm": 0.6831459403038025, "learning_rate": 3.5415625e-06, "loss": 0.0123, "step": 13675 }, { "epoch": 6.3031976075454335, "grad_norm": 1.3033053874969482, "learning_rate": 3.5337500000000003e-06, "loss": 0.0183, "step": 13700 }, { "epoch": 6.3146997929606625, "grad_norm": 0.3791348934173584, "learning_rate": 3.5259375000000005e-06, "loss": 0.0078, "step": 13725 }, { "epoch": 6.326201978375892, "grad_norm": 0.43763110041618347, "learning_rate": 3.518125e-06, "loss": 0.0131, "step": 13750 }, { "epoch": 6.337704163791121, "grad_norm": 2.630368232727051, "learning_rate": 3.5103125e-06, "loss": 0.0071, "step": 13775 }, { "epoch": 6.349206349206349, "grad_norm": 0.5663381814956665, "learning_rate": 3.5025000000000003e-06, "loss": 0.0212, "step": 13800 }, { "epoch": 6.360708534621578, "grad_norm": 2.189028739929199, "learning_rate": 3.4946875000000006e-06, "loss": 0.0137, "step": 13825 }, { "epoch": 6.372210720036807, "grad_norm": 1.2615488767623901, "learning_rate": 3.486875e-06, "loss": 0.0231, "step": 13850 }, { "epoch": 6.383712905452036, "grad_norm": 1.6485449075698853, "learning_rate": 3.4790625e-06, "loss": 0.0116, "step": 13875 }, { "epoch": 6.395215090867265, "grad_norm": 3.103294610977173, "learning_rate": 3.47125e-06, "loss": 0.0198, "step": 13900 }, { "epoch": 6.406717276282493, "grad_norm": 6.216635704040527, "learning_rate": 3.4634375000000002e-06, "loss": 0.0137, "step": 13925 }, { "epoch": 6.418219461697722, "grad_norm": 3.772575855255127, "learning_rate": 3.4556249999999996e-06, "loss": 0.0207, "step": 13950 }, { "epoch": 6.4297216471129515, "grad_norm": 0.9796826243400574, "learning_rate": 3.4478125e-06, "loss": 0.0094, "step": 13975 }, { "epoch": 6.4412238325281805, "grad_norm": 4.275996208190918, "learning_rate": 3.44e-06, "loss": 0.019, "step": 14000 }, { "epoch": 6.45272601794341, "grad_norm": 5.775335311889648, "learning_rate": 3.4321875000000003e-06, "loss": 0.0099, "step": 14025 }, { "epoch": 6.464228203358638, "grad_norm": 2.5531623363494873, "learning_rate": 3.4243750000000006e-06, "loss": 0.0213, "step": 14050 }, { "epoch": 6.475730388773867, "grad_norm": 0.7260667085647583, "learning_rate": 3.4165625e-06, "loss": 0.009, "step": 14075 }, { "epoch": 6.487232574189096, "grad_norm": 1.1557809114456177, "learning_rate": 3.40875e-06, "loss": 0.0218, "step": 14100 }, { "epoch": 6.498734759604325, "grad_norm": 1.5365861654281616, "learning_rate": 3.4009375000000004e-06, "loss": 0.0049, "step": 14125 }, { "epoch": 6.510236945019554, "grad_norm": 4.833078861236572, "learning_rate": 3.3931250000000007e-06, "loss": 0.0196, "step": 14150 }, { "epoch": 6.521739130434782, "grad_norm": 3.0063588619232178, "learning_rate": 3.3853125e-06, "loss": 0.0102, "step": 14175 }, { "epoch": 6.533241315850011, "grad_norm": 3.6960723400115967, "learning_rate": 3.3775000000000003e-06, "loss": 0.0182, "step": 14200 }, { "epoch": 6.54474350126524, "grad_norm": 0.6360275149345398, "learning_rate": 3.3696875e-06, "loss": 0.0093, "step": 14225 }, { "epoch": 6.5562456866804695, "grad_norm": 2.3226568698883057, "learning_rate": 3.3618750000000003e-06, "loss": 0.0211, "step": 14250 }, { "epoch": 6.5677478720956985, "grad_norm": 2.793957471847534, "learning_rate": 3.3540624999999997e-06, "loss": 0.0118, "step": 14275 }, { "epoch": 6.579250057510927, "grad_norm": 0.3393898904323578, "learning_rate": 3.34625e-06, "loss": 0.0115, "step": 14300 }, { "epoch": 6.590752242926156, "grad_norm": 0.8301447033882141, "learning_rate": 3.3384375e-06, "loss": 0.0069, "step": 14325 }, { "epoch": 6.602254428341385, "grad_norm": 0.6139010190963745, "learning_rate": 3.3306250000000004e-06, "loss": 0.0112, "step": 14350 }, { "epoch": 6.613756613756614, "grad_norm": 0.24870969355106354, "learning_rate": 3.3228125e-06, "loss": 0.0116, "step": 14375 }, { "epoch": 6.625258799171843, "grad_norm": 1.3729124069213867, "learning_rate": 3.315e-06, "loss": 0.0115, "step": 14400 }, { "epoch": 6.636760984587071, "grad_norm": 2.537313938140869, "learning_rate": 3.3071875000000003e-06, "loss": 0.0081, "step": 14425 }, { "epoch": 6.6482631700023, "grad_norm": 2.8953866958618164, "learning_rate": 3.2993750000000005e-06, "loss": 0.0152, "step": 14450 }, { "epoch": 6.659765355417529, "grad_norm": 0.47888821363449097, "learning_rate": 3.2915625e-06, "loss": 0.0098, "step": 14475 }, { "epoch": 6.671267540832758, "grad_norm": 0.4152248799800873, "learning_rate": 3.28375e-06, "loss": 0.0285, "step": 14500 }, { "epoch": 6.6827697262479875, "grad_norm": 0.7531673312187195, "learning_rate": 3.2759375000000003e-06, "loss": 0.0063, "step": 14525 }, { "epoch": 6.694271911663216, "grad_norm": 2.4068517684936523, "learning_rate": 3.268125e-06, "loss": 0.0213, "step": 14550 }, { "epoch": 6.705774097078445, "grad_norm": 2.0701165199279785, "learning_rate": 3.2603125e-06, "loss": 0.0096, "step": 14575 }, { "epoch": 6.717276282493674, "grad_norm": 3.544454336166382, "learning_rate": 3.2525e-06, "loss": 0.0234, "step": 14600 }, { "epoch": 6.728778467908903, "grad_norm": 0.8146782517433167, "learning_rate": 3.2446875e-06, "loss": 0.0066, "step": 14625 }, { "epoch": 6.740280653324132, "grad_norm": 2.016157627105713, "learning_rate": 3.2368750000000002e-06, "loss": 0.0139, "step": 14650 }, { "epoch": 6.75178283873936, "grad_norm": 3.558530807495117, "learning_rate": 3.2290625000000005e-06, "loss": 0.016, "step": 14675 }, { "epoch": 6.763285024154589, "grad_norm": 2.2227861881256104, "learning_rate": 3.22125e-06, "loss": 0.0197, "step": 14700 }, { "epoch": 6.774787209569818, "grad_norm": 1.3837552070617676, "learning_rate": 3.2134375e-06, "loss": 0.0096, "step": 14725 }, { "epoch": 6.786289394985047, "grad_norm": 7.47805643081665, "learning_rate": 3.2056250000000003e-06, "loss": 0.0136, "step": 14750 }, { "epoch": 6.797791580400276, "grad_norm": 2.1846354007720947, "learning_rate": 3.1978125000000006e-06, "loss": 0.0142, "step": 14775 }, { "epoch": 6.809293765815505, "grad_norm": 0.4281105101108551, "learning_rate": 3.19e-06, "loss": 0.0168, "step": 14800 }, { "epoch": 6.820795951230734, "grad_norm": 3.293972969055176, "learning_rate": 3.1821875e-06, "loss": 0.0125, "step": 14825 }, { "epoch": 6.832298136645963, "grad_norm": 1.7543948888778687, "learning_rate": 3.1743750000000004e-06, "loss": 0.0125, "step": 14850 }, { "epoch": 6.843800322061192, "grad_norm": 0.2645922005176544, "learning_rate": 3.1665625000000002e-06, "loss": 0.0094, "step": 14875 }, { "epoch": 6.855302507476421, "grad_norm": 0.2636635899543762, "learning_rate": 3.15875e-06, "loss": 0.012, "step": 14900 }, { "epoch": 6.866804692891649, "grad_norm": 7.451193332672119, "learning_rate": 3.1509375000000003e-06, "loss": 0.0078, "step": 14925 }, { "epoch": 6.878306878306878, "grad_norm": 3.3548383712768555, "learning_rate": 3.143125e-06, "loss": 0.0155, "step": 14950 }, { "epoch": 6.889809063722107, "grad_norm": 4.6555023193359375, "learning_rate": 3.1353125000000003e-06, "loss": 0.0108, "step": 14975 }, { "epoch": 6.901311249137336, "grad_norm": 1.744326114654541, "learning_rate": 3.1274999999999997e-06, "loss": 0.0246, "step": 15000 }, { "epoch": 6.901311249137336, "eval_loss": 0.20269618928432465, "eval_runtime": 5635.5955, "eval_samples_per_second": 1.685, "eval_steps_per_second": 0.211, "eval_wer": 0.09967969250480462, "step": 15000 }, { "epoch": 6.912813434552565, "grad_norm": 0.898334801197052, "learning_rate": 3.1196875e-06, "loss": 0.0087, "step": 15025 }, { "epoch": 6.9243156199677935, "grad_norm": 1.3468067646026611, "learning_rate": 3.111875e-06, "loss": 0.0212, "step": 15050 }, { "epoch": 6.935817805383023, "grad_norm": 0.397684782743454, "learning_rate": 3.1040625e-06, "loss": 0.0127, "step": 15075 }, { "epoch": 6.947319990798252, "grad_norm": 0.8051169514656067, "learning_rate": 3.0962500000000002e-06, "loss": 0.0186, "step": 15100 }, { "epoch": 6.958822176213481, "grad_norm": 0.3554774224758148, "learning_rate": 3.0884375e-06, "loss": 0.0083, "step": 15125 }, { "epoch": 6.97032436162871, "grad_norm": 2.511303424835205, "learning_rate": 3.0806250000000003e-06, "loss": 0.0144, "step": 15150 }, { "epoch": 6.981826547043938, "grad_norm": 0.17072099447250366, "learning_rate": 3.0728125e-06, "loss": 0.0097, "step": 15175 }, { "epoch": 6.993328732459167, "grad_norm": 0.16506649553775787, "learning_rate": 3.0650000000000003e-06, "loss": 0.0163, "step": 15200 }, { "epoch": 7.004830917874396, "grad_norm": 0.12069711089134216, "learning_rate": 3.0571875e-06, "loss": 0.0079, "step": 15225 }, { "epoch": 7.016333103289625, "grad_norm": 0.12159192562103271, "learning_rate": 3.0493750000000003e-06, "loss": 0.007, "step": 15250 }, { "epoch": 7.027835288704854, "grad_norm": 0.15364721417427063, "learning_rate": 3.0415625e-06, "loss": 0.0155, "step": 15275 }, { "epoch": 7.0393374741200825, "grad_norm": 0.516640841960907, "learning_rate": 3.03375e-06, "loss": 0.0092, "step": 15300 }, { "epoch": 7.0508396595353116, "grad_norm": 0.5874799489974976, "learning_rate": 3.0259375e-06, "loss": 0.0079, "step": 15325 }, { "epoch": 7.062341844950541, "grad_norm": 1.2351374626159668, "learning_rate": 3.018125e-06, "loss": 0.0148, "step": 15350 }, { "epoch": 7.07384403036577, "grad_norm": 0.7071799635887146, "learning_rate": 3.0103125000000002e-06, "loss": 0.015, "step": 15375 }, { "epoch": 7.085346215780999, "grad_norm": 7.619936943054199, "learning_rate": 3.0025e-06, "loss": 0.0063, "step": 15400 }, { "epoch": 7.096848401196227, "grad_norm": 0.20653395354747772, "learning_rate": 2.9946875000000003e-06, "loss": 0.0088, "step": 15425 }, { "epoch": 7.108350586611456, "grad_norm": 0.3233853280544281, "learning_rate": 2.986875e-06, "loss": 0.0058, "step": 15450 }, { "epoch": 7.119852772026685, "grad_norm": 1.1703747510910034, "learning_rate": 2.9790625000000003e-06, "loss": 0.0149, "step": 15475 }, { "epoch": 7.131354957441914, "grad_norm": 0.07214687764644623, "learning_rate": 2.97125e-06, "loss": 0.0041, "step": 15500 }, { "epoch": 7.142857142857143, "grad_norm": 1.454972505569458, "learning_rate": 2.9634375000000004e-06, "loss": 0.0121, "step": 15525 }, { "epoch": 7.154359328272371, "grad_norm": 0.7644615769386292, "learning_rate": 2.955625e-06, "loss": 0.0086, "step": 15550 }, { "epoch": 7.1658615136876005, "grad_norm": 3.402597188949585, "learning_rate": 2.9478125000000004e-06, "loss": 0.0095, "step": 15575 }, { "epoch": 7.17736369910283, "grad_norm": 3.2097392082214355, "learning_rate": 2.9400000000000002e-06, "loss": 0.0104, "step": 15600 }, { "epoch": 7.188865884518059, "grad_norm": 1.831574559211731, "learning_rate": 2.9325000000000003e-06, "loss": 0.0181, "step": 15625 }, { "epoch": 7.200368069933288, "grad_norm": 2.661018133163452, "learning_rate": 2.9246875e-06, "loss": 0.0048, "step": 15650 }, { "epoch": 7.211870255348516, "grad_norm": 1.9798295497894287, "learning_rate": 2.9168750000000003e-06, "loss": 0.0109, "step": 15675 }, { "epoch": 7.223372440763745, "grad_norm": 1.1260945796966553, "learning_rate": 2.9090625e-06, "loss": 0.0076, "step": 15700 }, { "epoch": 7.234874626178974, "grad_norm": 0.178892120718956, "learning_rate": 2.90125e-06, "loss": 0.0118, "step": 15725 }, { "epoch": 7.246376811594203, "grad_norm": 2.5324740409851074, "learning_rate": 2.8934374999999998e-06, "loss": 0.0068, "step": 15750 }, { "epoch": 7.257878997009432, "grad_norm": 0.655042290687561, "learning_rate": 2.885625e-06, "loss": 0.0075, "step": 15775 }, { "epoch": 7.26938118242466, "grad_norm": 5.1071624755859375, "learning_rate": 2.8778125000000002e-06, "loss": 0.0065, "step": 15800 }, { "epoch": 7.280883367839889, "grad_norm": 0.35025542974472046, "learning_rate": 2.87e-06, "loss": 0.0171, "step": 15825 }, { "epoch": 7.2923855532551185, "grad_norm": 0.16820687055587769, "learning_rate": 2.8621875000000003e-06, "loss": 0.0077, "step": 15850 }, { "epoch": 7.303887738670348, "grad_norm": 1.0210137367248535, "learning_rate": 2.854375e-06, "loss": 0.0088, "step": 15875 }, { "epoch": 7.315389924085577, "grad_norm": 0.3880836069583893, "learning_rate": 2.8465625000000003e-06, "loss": 0.0155, "step": 15900 }, { "epoch": 7.326892109500805, "grad_norm": 0.7286210060119629, "learning_rate": 2.83875e-06, "loss": 0.0153, "step": 15925 }, { "epoch": 7.338394294916034, "grad_norm": 1.0173991918563843, "learning_rate": 2.8309375000000004e-06, "loss": 0.0082, "step": 15950 }, { "epoch": 7.349896480331263, "grad_norm": 0.4859057664871216, "learning_rate": 2.823125e-06, "loss": 0.0104, "step": 15975 }, { "epoch": 7.361398665746492, "grad_norm": 0.1217811331152916, "learning_rate": 2.8153125000000004e-06, "loss": 0.0075, "step": 16000 }, { "epoch": 7.372900851161721, "grad_norm": 1.074440360069275, "learning_rate": 2.8075000000000002e-06, "loss": 0.0123, "step": 16025 }, { "epoch": 7.384403036576949, "grad_norm": 5.824402332305908, "learning_rate": 2.7996875e-06, "loss": 0.0057, "step": 16050 }, { "epoch": 7.395905221992178, "grad_norm": 2.5496740341186523, "learning_rate": 2.791875e-06, "loss": 0.0098, "step": 16075 }, { "epoch": 7.407407407407407, "grad_norm": 5.917174339294434, "learning_rate": 2.7840625e-06, "loss": 0.0057, "step": 16100 }, { "epoch": 7.4189095928226365, "grad_norm": 0.3634544909000397, "learning_rate": 2.77625e-06, "loss": 0.0128, "step": 16125 }, { "epoch": 7.430411778237866, "grad_norm": 3.414944887161255, "learning_rate": 2.7684375e-06, "loss": 0.0086, "step": 16150 }, { "epoch": 7.441913963653094, "grad_norm": 4.660429000854492, "learning_rate": 2.760625e-06, "loss": 0.0131, "step": 16175 }, { "epoch": 7.453416149068323, "grad_norm": 2.309429407119751, "learning_rate": 2.7528125e-06, "loss": 0.0058, "step": 16200 }, { "epoch": 7.464918334483552, "grad_norm": 5.828795909881592, "learning_rate": 2.745e-06, "loss": 0.0101, "step": 16225 }, { "epoch": 7.476420519898781, "grad_norm": 0.5273516774177551, "learning_rate": 2.7371875e-06, "loss": 0.0059, "step": 16250 }, { "epoch": 7.48792270531401, "grad_norm": 1.4716130495071411, "learning_rate": 2.729375e-06, "loss": 0.014, "step": 16275 }, { "epoch": 7.499424890729238, "grad_norm": 1.8553239107131958, "learning_rate": 2.7215625000000003e-06, "loss": 0.0068, "step": 16300 }, { "epoch": 7.510927076144467, "grad_norm": 2.6758711338043213, "learning_rate": 2.71375e-06, "loss": 0.0112, "step": 16325 }, { "epoch": 7.522429261559696, "grad_norm": 0.19957537949085236, "learning_rate": 2.7059375000000003e-06, "loss": 0.0095, "step": 16350 }, { "epoch": 7.5339314469749254, "grad_norm": 5.1007161140441895, "learning_rate": 2.698125e-06, "loss": 0.0109, "step": 16375 }, { "epoch": 7.545433632390154, "grad_norm": 8.458159446716309, "learning_rate": 2.6903125e-06, "loss": 0.0071, "step": 16400 }, { "epoch": 7.556935817805383, "grad_norm": 0.6693940758705139, "learning_rate": 2.6825e-06, "loss": 0.0187, "step": 16425 }, { "epoch": 7.568438003220612, "grad_norm": 1.4511332511901855, "learning_rate": 2.6746875e-06, "loss": 0.0079, "step": 16450 }, { "epoch": 7.579940188635841, "grad_norm": 3.054114818572998, "learning_rate": 2.666875e-06, "loss": 0.0185, "step": 16475 }, { "epoch": 7.59144237405107, "grad_norm": 6.097264766693115, "learning_rate": 2.6590625e-06, "loss": 0.01, "step": 16500 }, { "epoch": 7.602944559466298, "grad_norm": 1.9860618114471436, "learning_rate": 2.6512500000000002e-06, "loss": 0.0188, "step": 16525 }, { "epoch": 7.614446744881527, "grad_norm": 0.1648847460746765, "learning_rate": 2.6434375e-06, "loss": 0.0114, "step": 16550 }, { "epoch": 7.625948930296756, "grad_norm": 2.5525155067443848, "learning_rate": 2.6356250000000003e-06, "loss": 0.0158, "step": 16575 }, { "epoch": 7.637451115711985, "grad_norm": 2.294396162033081, "learning_rate": 2.6278125e-06, "loss": 0.0093, "step": 16600 }, { "epoch": 7.648953301127214, "grad_norm": 3.0494441986083984, "learning_rate": 2.6200000000000003e-06, "loss": 0.0233, "step": 16625 }, { "epoch": 7.660455486542443, "grad_norm": 0.1415322870016098, "learning_rate": 2.6121875e-06, "loss": 0.0087, "step": 16650 }, { "epoch": 7.671957671957672, "grad_norm": 2.2960498332977295, "learning_rate": 2.6043750000000004e-06, "loss": 0.0139, "step": 16675 }, { "epoch": 7.683459857372901, "grad_norm": 0.5837172269821167, "learning_rate": 2.5965625e-06, "loss": 0.0055, "step": 16700 }, { "epoch": 7.69496204278813, "grad_norm": 1.8971115350723267, "learning_rate": 2.5887500000000004e-06, "loss": 0.0115, "step": 16725 }, { "epoch": 7.706464228203359, "grad_norm": 4.95072603225708, "learning_rate": 2.5809375000000002e-06, "loss": 0.0077, "step": 16750 }, { "epoch": 7.717966413618587, "grad_norm": 2.0752921104431152, "learning_rate": 2.573125e-06, "loss": 0.0132, "step": 16775 }, { "epoch": 7.729468599033816, "grad_norm": 0.07725714892148972, "learning_rate": 2.5653125e-06, "loss": 0.0089, "step": 16800 }, { "epoch": 7.740970784449045, "grad_norm": 0.4947813153266907, "learning_rate": 2.5575e-06, "loss": 0.0089, "step": 16825 }, { "epoch": 7.752472969864274, "grad_norm": 3.427701234817505, "learning_rate": 2.5496875e-06, "loss": 0.0087, "step": 16850 }, { "epoch": 7.763975155279503, "grad_norm": 2.016444683074951, "learning_rate": 2.541875e-06, "loss": 0.0099, "step": 16875 }, { "epoch": 7.7754773406947315, "grad_norm": 0.25170424580574036, "learning_rate": 2.5340625e-06, "loss": 0.007, "step": 16900 }, { "epoch": 7.786979526109961, "grad_norm": 0.6936759948730469, "learning_rate": 2.52625e-06, "loss": 0.0069, "step": 16925 }, { "epoch": 7.79848171152519, "grad_norm": 1.1857188940048218, "learning_rate": 2.5184375e-06, "loss": 0.008, "step": 16950 }, { "epoch": 7.809983896940419, "grad_norm": 3.1954314708709717, "learning_rate": 2.510625e-06, "loss": 0.0147, "step": 16975 }, { "epoch": 7.821486082355648, "grad_norm": 1.4031758308410645, "learning_rate": 2.5028125e-06, "loss": 0.0081, "step": 17000 }, { "epoch": 7.832988267770876, "grad_norm": 0.52959805727005, "learning_rate": 2.4950000000000003e-06, "loss": 0.0123, "step": 17025 }, { "epoch": 7.844490453186105, "grad_norm": 4.232771396636963, "learning_rate": 2.4871875000000005e-06, "loss": 0.0054, "step": 17050 }, { "epoch": 7.855992638601334, "grad_norm": 0.2118764966726303, "learning_rate": 2.4793750000000003e-06, "loss": 0.0149, "step": 17075 }, { "epoch": 7.867494824016563, "grad_norm": 0.4932823181152344, "learning_rate": 2.4715625e-06, "loss": 0.0098, "step": 17100 }, { "epoch": 7.878997009431792, "grad_norm": 3.5741822719573975, "learning_rate": 2.46375e-06, "loss": 0.0128, "step": 17125 }, { "epoch": 7.8904991948470204, "grad_norm": 1.6678423881530762, "learning_rate": 2.4559375e-06, "loss": 0.0071, "step": 17150 }, { "epoch": 7.9020013802622495, "grad_norm": 3.0652177333831787, "learning_rate": 2.448125e-06, "loss": 0.0136, "step": 17175 }, { "epoch": 7.913503565677479, "grad_norm": 1.4408318996429443, "learning_rate": 2.4403125e-06, "loss": 0.0045, "step": 17200 }, { "epoch": 7.925005751092708, "grad_norm": 0.2646098732948303, "learning_rate": 2.4325e-06, "loss": 0.0155, "step": 17225 }, { "epoch": 7.936507936507937, "grad_norm": 2.7373244762420654, "learning_rate": 2.4246875000000002e-06, "loss": 0.0067, "step": 17250 }, { "epoch": 7.948010121923165, "grad_norm": 6.320342540740967, "learning_rate": 2.416875e-06, "loss": 0.0095, "step": 17275 }, { "epoch": 7.959512307338394, "grad_norm": 0.8445401191711426, "learning_rate": 2.4090625000000003e-06, "loss": 0.0061, "step": 17300 }, { "epoch": 7.971014492753623, "grad_norm": 1.2060354948043823, "learning_rate": 2.40125e-06, "loss": 0.0203, "step": 17325 }, { "epoch": 7.982516678168852, "grad_norm": 1.851037621498108, "learning_rate": 2.3934375000000003e-06, "loss": 0.007, "step": 17350 }, { "epoch": 7.994018863584081, "grad_norm": 0.901386022567749, "learning_rate": 2.385625e-06, "loss": 0.0088, "step": 17375 }, { "epoch": 8.00552104899931, "grad_norm": 0.07420093566179276, "learning_rate": 2.3778125000000004e-06, "loss": 0.013, "step": 17400 }, { "epoch": 8.01702323441454, "grad_norm": 0.05671022832393646, "learning_rate": 2.37e-06, "loss": 0.007, "step": 17425 }, { "epoch": 8.028525419829768, "grad_norm": 0.5261373519897461, "learning_rate": 2.3621875e-06, "loss": 0.0071, "step": 17450 }, { "epoch": 8.040027605244996, "grad_norm": 0.4346860349178314, "learning_rate": 2.354375e-06, "loss": 0.0067, "step": 17475 }, { "epoch": 8.051529790660226, "grad_norm": 0.08124396204948425, "learning_rate": 2.3465625e-06, "loss": 0.0113, "step": 17500 }, { "epoch": 8.063031976075454, "grad_norm": 2.8863022327423096, "learning_rate": 2.33875e-06, "loss": 0.0096, "step": 17525 }, { "epoch": 8.074534161490684, "grad_norm": 0.4603404700756073, "learning_rate": 2.3309375e-06, "loss": 0.0111, "step": 17550 }, { "epoch": 8.086036346905912, "grad_norm": 2.8856966495513916, "learning_rate": 2.323125e-06, "loss": 0.0059, "step": 17575 }, { "epoch": 8.09753853232114, "grad_norm": 0.10746220499277115, "learning_rate": 2.3153125e-06, "loss": 0.0095, "step": 17600 }, { "epoch": 8.10904071773637, "grad_norm": 0.5317927002906799, "learning_rate": 2.3075e-06, "loss": 0.006, "step": 17625 }, { "epoch": 8.120542903151598, "grad_norm": 0.14040638506412506, "learning_rate": 2.2996875e-06, "loss": 0.0177, "step": 17650 }, { "epoch": 8.132045088566828, "grad_norm": 0.5447073578834534, "learning_rate": 2.2918750000000004e-06, "loss": 0.0088, "step": 17675 }, { "epoch": 8.143547273982056, "grad_norm": 2.9328360557556152, "learning_rate": 2.2840625e-06, "loss": 0.018, "step": 17700 }, { "epoch": 8.155049459397285, "grad_norm": 0.19200760126113892, "learning_rate": 2.2762500000000004e-06, "loss": 0.0087, "step": 17725 }, { "epoch": 8.166551644812515, "grad_norm": 0.07997579872608185, "learning_rate": 2.2684375000000003e-06, "loss": 0.0087, "step": 17750 }, { "epoch": 8.178053830227743, "grad_norm": 2.795677900314331, "learning_rate": 2.260625e-06, "loss": 0.0095, "step": 17775 }, { "epoch": 8.189556015642973, "grad_norm": 0.10675506293773651, "learning_rate": 2.2528125e-06, "loss": 0.0103, "step": 17800 }, { "epoch": 8.201058201058201, "grad_norm": 1.484521746635437, "learning_rate": 2.245e-06, "loss": 0.0077, "step": 17825 }, { "epoch": 8.21256038647343, "grad_norm": 1.530125379562378, "learning_rate": 2.2371875e-06, "loss": 0.0136, "step": 17850 }, { "epoch": 8.224062571888659, "grad_norm": 8.444382667541504, "learning_rate": 2.229375e-06, "loss": 0.0061, "step": 17875 }, { "epoch": 8.235564757303887, "grad_norm": 0.18373289704322815, "learning_rate": 2.2215625e-06, "loss": 0.0103, "step": 17900 }, { "epoch": 8.247066942719117, "grad_norm": 8.391464233398438, "learning_rate": 2.21375e-06, "loss": 0.007, "step": 17925 }, { "epoch": 8.258569128134345, "grad_norm": 0.1739552766084671, "learning_rate": 2.2059375e-06, "loss": 0.0041, "step": 17950 }, { "epoch": 8.270071313549574, "grad_norm": 2.3926053047180176, "learning_rate": 2.1981250000000002e-06, "loss": 0.0049, "step": 17975 }, { "epoch": 8.281573498964804, "grad_norm": 0.11882667243480682, "learning_rate": 2.1903125e-06, "loss": 0.0137, "step": 18000 }, { "epoch": 8.293075684380032, "grad_norm": 0.7215703129768372, "learning_rate": 2.1825000000000003e-06, "loss": 0.0125, "step": 18025 }, { "epoch": 8.304577869795262, "grad_norm": 0.07454714179039001, "learning_rate": 2.1746875e-06, "loss": 0.0048, "step": 18050 }, { "epoch": 8.31608005521049, "grad_norm": 0.11876709014177322, "learning_rate": 2.1668750000000003e-06, "loss": 0.0083, "step": 18075 }, { "epoch": 8.327582240625718, "grad_norm": 2.6528899669647217, "learning_rate": 2.1590625e-06, "loss": 0.0211, "step": 18100 }, { "epoch": 8.339084426040948, "grad_norm": 0.046709995716810226, "learning_rate": 2.15125e-06, "loss": 0.0039, "step": 18125 }, { "epoch": 8.350586611456176, "grad_norm": 0.5029109716415405, "learning_rate": 2.1434374999999998e-06, "loss": 0.0137, "step": 18150 }, { "epoch": 8.362088796871406, "grad_norm": 8.609116554260254, "learning_rate": 2.135625e-06, "loss": 0.0081, "step": 18175 }, { "epoch": 8.373590982286634, "grad_norm": 0.24618186056613922, "learning_rate": 2.1278125e-06, "loss": 0.0127, "step": 18200 }, { "epoch": 8.385093167701863, "grad_norm": 0.26581060886383057, "learning_rate": 2.12e-06, "loss": 0.0058, "step": 18225 }, { "epoch": 8.396595353117092, "grad_norm": 1.805894374847412, "learning_rate": 2.1125e-06, "loss": 0.0067, "step": 18250 }, { "epoch": 8.40809753853232, "grad_norm": 0.11837717145681381, "learning_rate": 2.1046875e-06, "loss": 0.0085, "step": 18275 }, { "epoch": 8.41959972394755, "grad_norm": 1.3716498613357544, "learning_rate": 2.096875e-06, "loss": 0.01, "step": 18300 }, { "epoch": 8.431101909362779, "grad_norm": 0.13939572870731354, "learning_rate": 2.0890625e-06, "loss": 0.0042, "step": 18325 }, { "epoch": 8.442604094778007, "grad_norm": 0.18078701198101044, "learning_rate": 2.08125e-06, "loss": 0.0062, "step": 18350 }, { "epoch": 8.454106280193237, "grad_norm": 0.35175594687461853, "learning_rate": 2.0734375e-06, "loss": 0.0032, "step": 18375 }, { "epoch": 8.465608465608465, "grad_norm": 1.0184096097946167, "learning_rate": 2.0656250000000002e-06, "loss": 0.0135, "step": 18400 }, { "epoch": 8.477110651023695, "grad_norm": 0.35224124789237976, "learning_rate": 2.0578125e-06, "loss": 0.0061, "step": 18425 }, { "epoch": 8.488612836438923, "grad_norm": 4.74711275100708, "learning_rate": 2.0500000000000003e-06, "loss": 0.0144, "step": 18450 }, { "epoch": 8.500115021854151, "grad_norm": 0.630029022693634, "learning_rate": 2.0421875e-06, "loss": 0.0049, "step": 18475 }, { "epoch": 8.511617207269381, "grad_norm": 3.0329389572143555, "learning_rate": 2.0343750000000003e-06, "loss": 0.0107, "step": 18500 }, { "epoch": 8.52311939268461, "grad_norm": 1.9780049324035645, "learning_rate": 2.0265625e-06, "loss": 0.0048, "step": 18525 }, { "epoch": 8.53462157809984, "grad_norm": 2.6098361015319824, "learning_rate": 2.0187500000000004e-06, "loss": 0.0155, "step": 18550 }, { "epoch": 8.546123763515068, "grad_norm": 0.2777227461338043, "learning_rate": 2.0109375e-06, "loss": 0.0056, "step": 18575 }, { "epoch": 8.557625948930296, "grad_norm": 2.4276092052459717, "learning_rate": 2.003125e-06, "loss": 0.0126, "step": 18600 }, { "epoch": 8.569128134345526, "grad_norm": 0.2835908532142639, "learning_rate": 1.9953125e-06, "loss": 0.0059, "step": 18625 }, { "epoch": 8.580630319760754, "grad_norm": 2.6612679958343506, "learning_rate": 1.9875e-06, "loss": 0.0107, "step": 18650 }, { "epoch": 8.592132505175984, "grad_norm": 1.2350229024887085, "learning_rate": 1.9796875e-06, "loss": 0.0093, "step": 18675 }, { "epoch": 8.603634690591212, "grad_norm": 0.15607894957065582, "learning_rate": 1.971875e-06, "loss": 0.0107, "step": 18700 }, { "epoch": 8.61513687600644, "grad_norm": 0.07845500856637955, "learning_rate": 1.9640625e-06, "loss": 0.0073, "step": 18725 }, { "epoch": 8.62663906142167, "grad_norm": 1.0883415937423706, "learning_rate": 1.95625e-06, "loss": 0.0118, "step": 18750 }, { "epoch": 8.638141246836899, "grad_norm": 0.17504793405532837, "learning_rate": 1.9484375000000004e-06, "loss": 0.0067, "step": 18775 }, { "epoch": 8.649643432252129, "grad_norm": 1.5559245347976685, "learning_rate": 1.940625e-06, "loss": 0.0167, "step": 18800 }, { "epoch": 8.661145617667357, "grad_norm": 0.06407686322927475, "learning_rate": 1.9328125000000004e-06, "loss": 0.0036, "step": 18825 }, { "epoch": 8.672647803082585, "grad_norm": 1.3386480808258057, "learning_rate": 1.925e-06, "loss": 0.011, "step": 18850 }, { "epoch": 8.684149988497815, "grad_norm": 0.0743364468216896, "learning_rate": 1.9171875000000004e-06, "loss": 0.0081, "step": 18875 }, { "epoch": 8.695652173913043, "grad_norm": 2.949956178665161, "learning_rate": 1.9093750000000002e-06, "loss": 0.0072, "step": 18900 }, { "epoch": 8.707154359328273, "grad_norm": 0.7283833026885986, "learning_rate": 1.9015625000000003e-06, "loss": 0.0049, "step": 18925 }, { "epoch": 8.718656544743501, "grad_norm": 0.5794746279716492, "learning_rate": 1.89375e-06, "loss": 0.0101, "step": 18950 }, { "epoch": 8.73015873015873, "grad_norm": 1.550223708152771, "learning_rate": 1.8859375e-06, "loss": 0.0068, "step": 18975 }, { "epoch": 8.74166091557396, "grad_norm": 0.12086187303066254, "learning_rate": 1.8781250000000001e-06, "loss": 0.0096, "step": 19000 }, { "epoch": 8.753163100989187, "grad_norm": 2.682513475418091, "learning_rate": 1.8703125000000001e-06, "loss": 0.0048, "step": 19025 }, { "epoch": 8.764665286404417, "grad_norm": 0.8820897340774536, "learning_rate": 1.8625e-06, "loss": 0.0067, "step": 19050 }, { "epoch": 8.776167471819646, "grad_norm": 1.6323509216308594, "learning_rate": 1.8546875000000002e-06, "loss": 0.0066, "step": 19075 }, { "epoch": 8.787669657234874, "grad_norm": 0.3018206059932709, "learning_rate": 1.846875e-06, "loss": 0.0123, "step": 19100 }, { "epoch": 8.799171842650104, "grad_norm": 0.10852475464344025, "learning_rate": 1.8390625000000002e-06, "loss": 0.0059, "step": 19125 }, { "epoch": 8.810674028065332, "grad_norm": 3.6190342903137207, "learning_rate": 1.83125e-06, "loss": 0.01, "step": 19150 }, { "epoch": 8.822176213480562, "grad_norm": 4.896969795227051, "learning_rate": 1.8234375e-06, "loss": 0.007, "step": 19175 }, { "epoch": 8.83367839889579, "grad_norm": 0.1353834569454193, "learning_rate": 1.8156249999999999e-06, "loss": 0.0082, "step": 19200 }, { "epoch": 8.845180584311018, "grad_norm": 0.18897338211536407, "learning_rate": 1.8078125000000001e-06, "loss": 0.0034, "step": 19225 }, { "epoch": 8.856682769726248, "grad_norm": 1.5457974672317505, "learning_rate": 1.8e-06, "loss": 0.0066, "step": 19250 }, { "epoch": 8.868184955141476, "grad_norm": 0.9404191374778748, "learning_rate": 1.7921875000000002e-06, "loss": 0.0096, "step": 19275 }, { "epoch": 8.879687140556706, "grad_norm": 0.1098804920911789, "learning_rate": 1.784375e-06, "loss": 0.0073, "step": 19300 }, { "epoch": 8.891189325971935, "grad_norm": 1.549627661705017, "learning_rate": 1.7765625000000002e-06, "loss": 0.0043, "step": 19325 }, { "epoch": 8.902691511387163, "grad_norm": 0.5851069092750549, "learning_rate": 1.76875e-06, "loss": 0.007, "step": 19350 }, { "epoch": 8.914193696802393, "grad_norm": 0.2933562994003296, "learning_rate": 1.7609375e-06, "loss": 0.0059, "step": 19375 }, { "epoch": 8.925695882217621, "grad_norm": 0.9020884037017822, "learning_rate": 1.7531250000000003e-06, "loss": 0.0108, "step": 19400 }, { "epoch": 8.93719806763285, "grad_norm": 0.11604174226522446, "learning_rate": 1.7453125e-06, "loss": 0.0078, "step": 19425 }, { "epoch": 8.948700253048079, "grad_norm": 0.2992897033691406, "learning_rate": 1.7375000000000003e-06, "loss": 0.0159, "step": 19450 }, { "epoch": 8.960202438463307, "grad_norm": 6.1458306312561035, "learning_rate": 1.7296875000000001e-06, "loss": 0.0083, "step": 19475 }, { "epoch": 8.971704623878537, "grad_norm": 0.6744798421859741, "learning_rate": 1.7218750000000001e-06, "loss": 0.0126, "step": 19500 }, { "epoch": 8.983206809293765, "grad_norm": 0.13921253383159637, "learning_rate": 1.7140625e-06, "loss": 0.007, "step": 19525 }, { "epoch": 8.994708994708995, "grad_norm": 0.5519439578056335, "learning_rate": 1.7062500000000002e-06, "loss": 0.0081, "step": 19550 }, { "epoch": 9.006211180124224, "grad_norm": 0.20477654039859772, "learning_rate": 1.6984375e-06, "loss": 0.0056, "step": 19575 }, { "epoch": 9.017713365539452, "grad_norm": 0.7848691940307617, "learning_rate": 1.6906250000000002e-06, "loss": 0.0097, "step": 19600 }, { "epoch": 9.029215550954682, "grad_norm": 0.11725778132677078, "learning_rate": 1.6828125e-06, "loss": 0.0098, "step": 19625 }, { "epoch": 9.04071773636991, "grad_norm": 0.863402247428894, "learning_rate": 1.6750000000000003e-06, "loss": 0.0066, "step": 19650 }, { "epoch": 9.05221992178514, "grad_norm": 0.6256903409957886, "learning_rate": 1.6671875e-06, "loss": 0.0051, "step": 19675 }, { "epoch": 9.063722107200368, "grad_norm": 0.27045947313308716, "learning_rate": 1.659375e-06, "loss": 0.0062, "step": 19700 }, { "epoch": 9.075224292615596, "grad_norm": 1.267508864402771, "learning_rate": 1.6515625e-06, "loss": 0.0096, "step": 19725 }, { "epoch": 9.086726478030826, "grad_norm": 0.2194228619337082, "learning_rate": 1.6437500000000001e-06, "loss": 0.0073, "step": 19750 }, { "epoch": 9.098228663446054, "grad_norm": 0.7281507849693298, "learning_rate": 1.6359375e-06, "loss": 0.0109, "step": 19775 }, { "epoch": 9.109730848861284, "grad_norm": 0.041582778096199036, "learning_rate": 1.6281250000000002e-06, "loss": 0.0053, "step": 19800 }, { "epoch": 9.121233034276512, "grad_norm": 1.166810393333435, "learning_rate": 1.6203125e-06, "loss": 0.005, "step": 19825 }, { "epoch": 9.13273521969174, "grad_norm": 1.6084290742874146, "learning_rate": 1.6125e-06, "loss": 0.0039, "step": 19850 }, { "epoch": 9.14423740510697, "grad_norm": 0.07307042181491852, "learning_rate": 1.6046875e-06, "loss": 0.0078, "step": 19875 }, { "epoch": 9.155739590522199, "grad_norm": 0.04875970631837845, "learning_rate": 1.596875e-06, "loss": 0.0052, "step": 19900 }, { "epoch": 9.167241775937429, "grad_norm": 0.23526020348072052, "learning_rate": 1.5890624999999999e-06, "loss": 0.0129, "step": 19925 }, { "epoch": 9.178743961352657, "grad_norm": 0.24947020411491394, "learning_rate": 1.5812500000000001e-06, "loss": 0.0061, "step": 19950 }, { "epoch": 9.190246146767885, "grad_norm": 0.8459863066673279, "learning_rate": 1.5734375e-06, "loss": 0.008, "step": 19975 }, { "epoch": 9.201748332183115, "grad_norm": 0.057964421808719635, "learning_rate": 1.5656250000000002e-06, "loss": 0.0072, "step": 20000 }, { "epoch": 9.201748332183115, "eval_loss": 0.21524043381214142, "eval_runtime": 5582.0879, "eval_samples_per_second": 1.701, "eval_steps_per_second": 0.213, "eval_wer": 0.0967008327994875, "step": 20000 }, { "epoch": 9.213250517598343, "grad_norm": 0.24494586884975433, "learning_rate": 1.5578125000000002e-06, "loss": 0.0123, "step": 20025 }, { "epoch": 9.224752703013573, "grad_norm": 0.16685990989208221, "learning_rate": 1.55e-06, "loss": 0.0032, "step": 20050 }, { "epoch": 9.236254888428801, "grad_norm": 1.2150516510009766, "learning_rate": 1.5421875e-06, "loss": 0.0076, "step": 20075 }, { "epoch": 9.24775707384403, "grad_norm": 3.1026265621185303, "learning_rate": 1.534375e-06, "loss": 0.003, "step": 20100 }, { "epoch": 9.25925925925926, "grad_norm": 1.430039882659912, "learning_rate": 1.5265625e-06, "loss": 0.0064, "step": 20125 }, { "epoch": 9.270761444674488, "grad_norm": 0.9484136700630188, "learning_rate": 1.51875e-06, "loss": 0.0067, "step": 20150 }, { "epoch": 9.282263630089718, "grad_norm": 2.5765624046325684, "learning_rate": 1.5109375e-06, "loss": 0.0079, "step": 20175 }, { "epoch": 9.293765815504946, "grad_norm": 0.821998119354248, "learning_rate": 1.5031250000000001e-06, "loss": 0.0047, "step": 20200 }, { "epoch": 9.305268000920174, "grad_norm": 0.3511562645435333, "learning_rate": 1.4953125e-06, "loss": 0.0103, "step": 20225 }, { "epoch": 9.316770186335404, "grad_norm": 0.04969533905386925, "learning_rate": 1.4875e-06, "loss": 0.0034, "step": 20250 }, { "epoch": 9.328272371750632, "grad_norm": 0.08082418143749237, "learning_rate": 1.4796875e-06, "loss": 0.0072, "step": 20275 }, { "epoch": 9.339774557165862, "grad_norm": 0.06818880885839462, "learning_rate": 1.471875e-06, "loss": 0.0067, "step": 20300 }, { "epoch": 9.35127674258109, "grad_norm": 1.4060240983963013, "learning_rate": 1.4640625000000002e-06, "loss": 0.0071, "step": 20325 }, { "epoch": 9.362778927996319, "grad_norm": 0.20024247467517853, "learning_rate": 1.4562500000000002e-06, "loss": 0.0044, "step": 20350 }, { "epoch": 9.374281113411548, "grad_norm": 1.157453179359436, "learning_rate": 1.4484375e-06, "loss": 0.0094, "step": 20375 }, { "epoch": 9.385783298826777, "grad_norm": 3.1247355937957764, "learning_rate": 1.440625e-06, "loss": 0.0088, "step": 20400 }, { "epoch": 9.397285484242007, "grad_norm": 0.12344136089086533, "learning_rate": 1.4328125e-06, "loss": 0.0114, "step": 20425 }, { "epoch": 9.408787669657235, "grad_norm": 0.16506707668304443, "learning_rate": 1.4250000000000001e-06, "loss": 0.007, "step": 20450 }, { "epoch": 9.420289855072463, "grad_norm": 0.12009504437446594, "learning_rate": 1.4171875000000001e-06, "loss": 0.0062, "step": 20475 }, { "epoch": 9.431792040487693, "grad_norm": 0.33941954374313354, "learning_rate": 1.4093750000000002e-06, "loss": 0.0049, "step": 20500 }, { "epoch": 9.443294225902921, "grad_norm": 2.774742603302002, "learning_rate": 1.4015625000000002e-06, "loss": 0.015, "step": 20525 }, { "epoch": 9.454796411318151, "grad_norm": 2.181804656982422, "learning_rate": 1.39375e-06, "loss": 0.0068, "step": 20550 }, { "epoch": 9.46629859673338, "grad_norm": 0.1634143590927124, "learning_rate": 1.3859375e-06, "loss": 0.0131, "step": 20575 }, { "epoch": 9.477800782148607, "grad_norm": 0.28276145458221436, "learning_rate": 1.378125e-06, "loss": 0.0033, "step": 20600 }, { "epoch": 9.489302967563837, "grad_norm": 0.23474666476249695, "learning_rate": 1.3703125e-06, "loss": 0.0076, "step": 20625 }, { "epoch": 9.500805152979066, "grad_norm": 0.23035980761051178, "learning_rate": 1.3625e-06, "loss": 0.0081, "step": 20650 }, { "epoch": 9.512307338394296, "grad_norm": 0.15259630978107452, "learning_rate": 1.3546875e-06, "loss": 0.0078, "step": 20675 }, { "epoch": 9.523809523809524, "grad_norm": 1.3978540897369385, "learning_rate": 1.3468750000000001e-06, "loss": 0.0039, "step": 20700 }, { "epoch": 9.535311709224752, "grad_norm": 1.555802345275879, "learning_rate": 1.3390625e-06, "loss": 0.007, "step": 20725 }, { "epoch": 9.546813894639982, "grad_norm": 0.11839170753955841, "learning_rate": 1.33125e-06, "loss": 0.0045, "step": 20750 }, { "epoch": 9.55831608005521, "grad_norm": 1.8819234371185303, "learning_rate": 1.3234375e-06, "loss": 0.0079, "step": 20775 }, { "epoch": 9.56981826547044, "grad_norm": 2.1347029209136963, "learning_rate": 1.315625e-06, "loss": 0.005, "step": 20800 }, { "epoch": 9.581320450885668, "grad_norm": 0.05001327767968178, "learning_rate": 1.3078125e-06, "loss": 0.0094, "step": 20825 }, { "epoch": 9.592822636300896, "grad_norm": 4.695704460144043, "learning_rate": 1.3e-06, "loss": 0.0041, "step": 20850 }, { "epoch": 9.604324821716126, "grad_norm": 0.27146437764167786, "learning_rate": 1.2921875e-06, "loss": 0.005, "step": 20875 }, { "epoch": 9.615827007131355, "grad_norm": 2.277533769607544, "learning_rate": 1.284375e-06, "loss": 0.0064, "step": 20900 }, { "epoch": 9.627329192546584, "grad_norm": 0.10806547850370407, "learning_rate": 1.2765625e-06, "loss": 0.0063, "step": 20925 }, { "epoch": 9.638831377961813, "grad_norm": 1.3636436462402344, "learning_rate": 1.2690625000000002e-06, "loss": 0.0103, "step": 20950 }, { "epoch": 9.65033356337704, "grad_norm": 2.6159441471099854, "learning_rate": 1.2612500000000002e-06, "loss": 0.0052, "step": 20975 }, { "epoch": 9.66183574879227, "grad_norm": 0.5515117645263672, "learning_rate": 1.2534375e-06, "loss": 0.0077, "step": 21000 }, { "epoch": 9.673337934207499, "grad_norm": 0.8103013038635254, "learning_rate": 1.245625e-06, "loss": 0.0091, "step": 21025 }, { "epoch": 9.684840119622729, "grad_norm": 2.336944103240967, "learning_rate": 1.2378125e-06, "loss": 0.0048, "step": 21050 }, { "epoch": 9.696342305037957, "grad_norm": 3.5888826847076416, "learning_rate": 1.23e-06, "loss": 0.0059, "step": 21075 }, { "epoch": 9.707844490453185, "grad_norm": 2.42950439453125, "learning_rate": 1.2221875e-06, "loss": 0.0071, "step": 21100 }, { "epoch": 9.719346675868415, "grad_norm": 1.5883959531784058, "learning_rate": 1.2143750000000001e-06, "loss": 0.0103, "step": 21125 }, { "epoch": 9.730848861283643, "grad_norm": 0.9261289238929749, "learning_rate": 1.2065625000000001e-06, "loss": 0.0101, "step": 21150 }, { "epoch": 9.742351046698873, "grad_norm": 2.5712246894836426, "learning_rate": 1.19875e-06, "loss": 0.0071, "step": 21175 }, { "epoch": 9.753853232114102, "grad_norm": 6.325390815734863, "learning_rate": 1.1909375e-06, "loss": 0.0042, "step": 21200 }, { "epoch": 9.76535541752933, "grad_norm": 0.3912803828716278, "learning_rate": 1.183125e-06, "loss": 0.0064, "step": 21225 }, { "epoch": 9.77685760294456, "grad_norm": 4.111480712890625, "learning_rate": 1.1753125e-06, "loss": 0.0056, "step": 21250 }, { "epoch": 9.788359788359788, "grad_norm": 1.7652671337127686, "learning_rate": 1.1675e-06, "loss": 0.0061, "step": 21275 }, { "epoch": 9.799861973775018, "grad_norm": 1.497182011604309, "learning_rate": 1.1596875e-06, "loss": 0.0045, "step": 21300 }, { "epoch": 9.811364159190246, "grad_norm": 0.23965400457382202, "learning_rate": 1.151875e-06, "loss": 0.0117, "step": 21325 }, { "epoch": 9.822866344605474, "grad_norm": 1.4831360578536987, "learning_rate": 1.1440625e-06, "loss": 0.0067, "step": 21350 }, { "epoch": 9.834368530020704, "grad_norm": 0.11675738543272018, "learning_rate": 1.13625e-06, "loss": 0.0047, "step": 21375 }, { "epoch": 9.845870715435932, "grad_norm": 0.06517274677753448, "learning_rate": 1.1284375e-06, "loss": 0.0076, "step": 21400 }, { "epoch": 9.857372900851162, "grad_norm": 0.10633418709039688, "learning_rate": 1.120625e-06, "loss": 0.0061, "step": 21425 }, { "epoch": 9.86887508626639, "grad_norm": 0.047500479966402054, "learning_rate": 1.1128125000000002e-06, "loss": 0.0058, "step": 21450 }, { "epoch": 9.880377271681619, "grad_norm": 1.3911513090133667, "learning_rate": 1.1050000000000002e-06, "loss": 0.0097, "step": 21475 }, { "epoch": 9.891879457096849, "grad_norm": 0.05551273375749588, "learning_rate": 1.0971875e-06, "loss": 0.0071, "step": 21500 }, { "epoch": 9.903381642512077, "grad_norm": 0.4141998291015625, "learning_rate": 1.089375e-06, "loss": 0.0068, "step": 21525 }, { "epoch": 9.914883827927307, "grad_norm": 0.11964666098356247, "learning_rate": 1.0815625e-06, "loss": 0.0058, "step": 21550 }, { "epoch": 9.926386013342535, "grad_norm": 0.20705200731754303, "learning_rate": 1.07375e-06, "loss": 0.0082, "step": 21575 }, { "epoch": 9.937888198757763, "grad_norm": 0.06934256851673126, "learning_rate": 1.0659375000000001e-06, "loss": 0.006, "step": 21600 }, { "epoch": 9.949390384172993, "grad_norm": 0.9905650615692139, "learning_rate": 1.0581250000000001e-06, "loss": 0.0082, "step": 21625 }, { "epoch": 9.960892569588221, "grad_norm": 6.720580101013184, "learning_rate": 1.0503125000000002e-06, "loss": 0.0087, "step": 21650 }, { "epoch": 9.972394755003451, "grad_norm": 2.596421480178833, "learning_rate": 1.0425e-06, "loss": 0.0103, "step": 21675 }, { "epoch": 9.98389694041868, "grad_norm": 0.18345965445041656, "learning_rate": 1.0346875e-06, "loss": 0.006, "step": 21700 }, { "epoch": 9.995399125833908, "grad_norm": 0.20510388910770416, "learning_rate": 1.026875e-06, "loss": 0.01, "step": 21725 }, { "epoch": 10.006901311249138, "grad_norm": 1.5538175106048584, "learning_rate": 1.0190625e-06, "loss": 0.0051, "step": 21750 }, { "epoch": 10.018403496664366, "grad_norm": 4.992893695831299, "learning_rate": 1.01125e-06, "loss": 0.007, "step": 21775 }, { "epoch": 10.029905682079596, "grad_norm": 2.5293116569519043, "learning_rate": 1.0034375e-06, "loss": 0.0097, "step": 21800 }, { "epoch": 10.041407867494824, "grad_norm": 2.987748861312866, "learning_rate": 9.95625e-07, "loss": 0.0039, "step": 21825 }, { "epoch": 10.052910052910052, "grad_norm": 0.8729166984558105, "learning_rate": 9.878125000000001e-07, "loss": 0.0102, "step": 21850 }, { "epoch": 10.064412238325282, "grad_norm": 4.021528244018555, "learning_rate": 9.8e-07, "loss": 0.0032, "step": 21875 }, { "epoch": 10.07591442374051, "grad_norm": 0.037765491753816605, "learning_rate": 9.721875e-07, "loss": 0.0061, "step": 21900 }, { "epoch": 10.08741660915574, "grad_norm": 0.4817400574684143, "learning_rate": 9.64375e-07, "loss": 0.0052, "step": 21925 }, { "epoch": 10.098918794570968, "grad_norm": 0.18971405923366547, "learning_rate": 9.565625e-07, "loss": 0.0065, "step": 21950 }, { "epoch": 10.110420979986197, "grad_norm": 2.408292770385742, "learning_rate": 9.4875e-07, "loss": 0.0049, "step": 21975 }, { "epoch": 10.121923165401427, "grad_norm": 1.3557521104812622, "learning_rate": 9.409374999999999e-07, "loss": 0.0069, "step": 22000 }, { "epoch": 10.133425350816655, "grad_norm": 4.988788604736328, "learning_rate": 9.33125e-07, "loss": 0.0037, "step": 22025 }, { "epoch": 10.144927536231885, "grad_norm": 1.5118516683578491, "learning_rate": 9.253125e-07, "loss": 0.0041, "step": 22050 }, { "epoch": 10.156429721647113, "grad_norm": 0.13832196593284607, "learning_rate": 9.175000000000001e-07, "loss": 0.0058, "step": 22075 }, { "epoch": 10.167931907062341, "grad_norm": 2.6763222217559814, "learning_rate": 9.096875000000001e-07, "loss": 0.0073, "step": 22100 }, { "epoch": 10.179434092477571, "grad_norm": 0.1248018741607666, "learning_rate": 9.018750000000002e-07, "loss": 0.01, "step": 22125 }, { "epoch": 10.1909362778928, "grad_norm": 0.22059573233127594, "learning_rate": 8.940625000000001e-07, "loss": 0.0099, "step": 22150 }, { "epoch": 10.20243846330803, "grad_norm": 0.35290199518203735, "learning_rate": 8.862500000000001e-07, "loss": 0.0043, "step": 22175 }, { "epoch": 10.213940648723257, "grad_norm": 0.038650188595056534, "learning_rate": 8.784375000000001e-07, "loss": 0.0073, "step": 22200 }, { "epoch": 10.225442834138486, "grad_norm": 1.4717687368392944, "learning_rate": 8.706250000000001e-07, "loss": 0.0084, "step": 22225 }, { "epoch": 10.236945019553715, "grad_norm": 0.5546708703041077, "learning_rate": 8.628125e-07, "loss": 0.0067, "step": 22250 }, { "epoch": 10.248447204968944, "grad_norm": 0.07813633978366852, "learning_rate": 8.550000000000001e-07, "loss": 0.0034, "step": 22275 }, { "epoch": 10.259949390384174, "grad_norm": 0.40856441855430603, "learning_rate": 8.471875000000001e-07, "loss": 0.0091, "step": 22300 }, { "epoch": 10.271451575799402, "grad_norm": 0.05716840550303459, "learning_rate": 8.39375e-07, "loss": 0.0069, "step": 22325 }, { "epoch": 10.28295376121463, "grad_norm": 0.07827286422252655, "learning_rate": 8.315625e-07, "loss": 0.0066, "step": 22350 }, { "epoch": 10.29445594662986, "grad_norm": 8.593811988830566, "learning_rate": 8.237500000000001e-07, "loss": 0.0088, "step": 22375 }, { "epoch": 10.305958132045088, "grad_norm": 1.3823190927505493, "learning_rate": 8.159375000000001e-07, "loss": 0.0085, "step": 22400 }, { "epoch": 10.317460317460318, "grad_norm": 5.355988025665283, "learning_rate": 8.08125e-07, "loss": 0.0031, "step": 22425 }, { "epoch": 10.328962502875546, "grad_norm": 0.39019063115119934, "learning_rate": 8.003125e-07, "loss": 0.006, "step": 22450 }, { "epoch": 10.340464688290774, "grad_norm": 5.189722537994385, "learning_rate": 7.925e-07, "loss": 0.0064, "step": 22475 }, { "epoch": 10.351966873706004, "grad_norm": 1.724393606185913, "learning_rate": 7.846875000000001e-07, "loss": 0.0067, "step": 22500 }, { "epoch": 10.363469059121233, "grad_norm": 5.20131778717041, "learning_rate": 7.76875e-07, "loss": 0.0077, "step": 22525 }, { "epoch": 10.374971244536463, "grad_norm": 0.0890192911028862, "learning_rate": 7.690625000000001e-07, "loss": 0.0036, "step": 22550 }, { "epoch": 10.38647342995169, "grad_norm": 0.6779229640960693, "learning_rate": 7.612500000000001e-07, "loss": 0.0048, "step": 22575 }, { "epoch": 10.397975615366919, "grad_norm": 0.616266667842865, "learning_rate": 7.534375e-07, "loss": 0.0097, "step": 22600 }, { "epoch": 10.409477800782149, "grad_norm": 0.03939608484506607, "learning_rate": 7.456250000000001e-07, "loss": 0.0047, "step": 22625 }, { "epoch": 10.420979986197377, "grad_norm": 0.14820168912410736, "learning_rate": 7.378125000000001e-07, "loss": 0.0096, "step": 22650 }, { "epoch": 10.432482171612607, "grad_norm": 0.12704187631607056, "learning_rate": 7.3e-07, "loss": 0.0066, "step": 22675 }, { "epoch": 10.443984357027835, "grad_norm": 0.037611182779073715, "learning_rate": 7.221875e-07, "loss": 0.0061, "step": 22700 }, { "epoch": 10.455486542443063, "grad_norm": 4.043584823608398, "learning_rate": 7.14375e-07, "loss": 0.0085, "step": 22725 }, { "epoch": 10.466988727858293, "grad_norm": 2.1938695907592773, "learning_rate": 7.065625000000001e-07, "loss": 0.0123, "step": 22750 }, { "epoch": 10.478490913273522, "grad_norm": 0.1777833104133606, "learning_rate": 6.9875e-07, "loss": 0.0041, "step": 22775 }, { "epoch": 10.489993098688752, "grad_norm": 0.21180225908756256, "learning_rate": 6.909375e-07, "loss": 0.0089, "step": 22800 }, { "epoch": 10.50149528410398, "grad_norm": 0.3625020682811737, "learning_rate": 6.83125e-07, "loss": 0.0052, "step": 22825 }, { "epoch": 10.512997469519208, "grad_norm": 1.3957737684249878, "learning_rate": 6.753124999999999e-07, "loss": 0.0105, "step": 22850 }, { "epoch": 10.524499654934438, "grad_norm": 3.243558168411255, "learning_rate": 6.675000000000001e-07, "loss": 0.005, "step": 22875 }, { "epoch": 10.536001840349666, "grad_norm": 2.227569341659546, "learning_rate": 6.596875000000001e-07, "loss": 0.0081, "step": 22900 }, { "epoch": 10.547504025764896, "grad_norm": 2.815009593963623, "learning_rate": 6.51875e-07, "loss": 0.0078, "step": 22925 }, { "epoch": 10.559006211180124, "grad_norm": 0.16958042979240417, "learning_rate": 6.440625e-07, "loss": 0.0127, "step": 22950 }, { "epoch": 10.570508396595352, "grad_norm": 4.25639009475708, "learning_rate": 6.3625e-07, "loss": 0.0088, "step": 22975 }, { "epoch": 10.582010582010582, "grad_norm": 0.4477657675743103, "learning_rate": 6.284375000000001e-07, "loss": 0.0043, "step": 23000 }, { "epoch": 10.59351276742581, "grad_norm": 2.4832139015197754, "learning_rate": 6.20625e-07, "loss": 0.0045, "step": 23025 }, { "epoch": 10.60501495284104, "grad_norm": 0.2865842282772064, "learning_rate": 6.128125e-07, "loss": 0.007, "step": 23050 }, { "epoch": 10.616517138256269, "grad_norm": 0.3463385999202728, "learning_rate": 6.05e-07, "loss": 0.0068, "step": 23075 }, { "epoch": 10.628019323671497, "grad_norm": 0.08812834322452545, "learning_rate": 5.971875e-07, "loss": 0.005, "step": 23100 }, { "epoch": 10.639521509086727, "grad_norm": 0.11067871749401093, "learning_rate": 5.89375e-07, "loss": 0.0047, "step": 23125 }, { "epoch": 10.651023694501955, "grad_norm": 1.1273283958435059, "learning_rate": 5.815625e-07, "loss": 0.0049, "step": 23150 }, { "epoch": 10.662525879917185, "grad_norm": 0.0541173480451107, "learning_rate": 5.737500000000001e-07, "loss": 0.0042, "step": 23175 }, { "epoch": 10.674028065332413, "grad_norm": 0.04831864312291145, "learning_rate": 5.659375e-07, "loss": 0.0075, "step": 23200 }, { "epoch": 10.685530250747641, "grad_norm": 0.07917584478855133, "learning_rate": 5.581250000000001e-07, "loss": 0.0044, "step": 23225 }, { "epoch": 10.697032436162871, "grad_norm": 2.2876992225646973, "learning_rate": 5.503125000000001e-07, "loss": 0.0073, "step": 23250 }, { "epoch": 10.7085346215781, "grad_norm": 0.03435774892568588, "learning_rate": 5.425e-07, "loss": 0.0031, "step": 23275 }, { "epoch": 10.72003680699333, "grad_norm": 1.8223545551300049, "learning_rate": 5.346875e-07, "loss": 0.0072, "step": 23300 }, { "epoch": 10.731538992408558, "grad_norm": 12.397570610046387, "learning_rate": 5.26875e-07, "loss": 0.0035, "step": 23325 }, { "epoch": 10.743041177823786, "grad_norm": 0.5610597133636475, "learning_rate": 5.19375e-07, "loss": 0.0088, "step": 23350 }, { "epoch": 10.754543363239016, "grad_norm": 0.4081664979457855, "learning_rate": 5.115625e-07, "loss": 0.0051, "step": 23375 }, { "epoch": 10.766045548654244, "grad_norm": 0.123787522315979, "learning_rate": 5.0375e-07, "loss": 0.0069, "step": 23400 }, { "epoch": 10.777547734069474, "grad_norm": 2.293886423110962, "learning_rate": 4.959375000000001e-07, "loss": 0.0057, "step": 23425 }, { "epoch": 10.789049919484702, "grad_norm": 6.182718753814697, "learning_rate": 4.881250000000001e-07, "loss": 0.0082, "step": 23450 }, { "epoch": 10.80055210489993, "grad_norm": 0.033033497631549835, "learning_rate": 4.803125e-07, "loss": 0.0064, "step": 23475 }, { "epoch": 10.81205429031516, "grad_norm": 0.5063899755477905, "learning_rate": 4.7250000000000003e-07, "loss": 0.0107, "step": 23500 }, { "epoch": 10.823556475730388, "grad_norm": 1.6246389150619507, "learning_rate": 4.646875e-07, "loss": 0.0066, "step": 23525 }, { "epoch": 10.835058661145618, "grad_norm": 0.16620787978172302, "learning_rate": 4.56875e-07, "loss": 0.0059, "step": 23550 }, { "epoch": 10.846560846560847, "grad_norm": 0.7030823826789856, "learning_rate": 4.490625e-07, "loss": 0.0022, "step": 23575 }, { "epoch": 10.858063031976075, "grad_norm": 0.8228742480278015, "learning_rate": 4.4125e-07, "loss": 0.0125, "step": 23600 }, { "epoch": 10.869565217391305, "grad_norm": 4.570178031921387, "learning_rate": 4.334375e-07, "loss": 0.0052, "step": 23625 }, { "epoch": 10.881067402806533, "grad_norm": 2.8716719150543213, "learning_rate": 4.25625e-07, "loss": 0.005, "step": 23650 }, { "epoch": 10.892569588221763, "grad_norm": 5.523809909820557, "learning_rate": 4.178125000000001e-07, "loss": 0.0063, "step": 23675 }, { "epoch": 10.904071773636991, "grad_norm": 1.16761314868927, "learning_rate": 4.1000000000000004e-07, "loss": 0.0095, "step": 23700 }, { "epoch": 10.91557395905222, "grad_norm": 0.024232987314462662, "learning_rate": 4.0218750000000007e-07, "loss": 0.0036, "step": 23725 }, { "epoch": 10.92707614446745, "grad_norm": 4.76410436630249, "learning_rate": 3.9437500000000004e-07, "loss": 0.0097, "step": 23750 }, { "epoch": 10.938578329882677, "grad_norm": 5.289037704467773, "learning_rate": 3.8656250000000006e-07, "loss": 0.006, "step": 23775 }, { "epoch": 10.950080515297907, "grad_norm": 0.05810718610882759, "learning_rate": 3.7875000000000003e-07, "loss": 0.0063, "step": 23800 }, { "epoch": 10.961582700713135, "grad_norm": 2.577594757080078, "learning_rate": 3.709375e-07, "loss": 0.0041, "step": 23825 }, { "epoch": 10.973084886128364, "grad_norm": 0.039299797266721725, "learning_rate": 3.63125e-07, "loss": 0.0062, "step": 23850 }, { "epoch": 10.984587071543594, "grad_norm": 1.8657886981964111, "learning_rate": 3.553125e-07, "loss": 0.0029, "step": 23875 }, { "epoch": 10.996089256958822, "grad_norm": 0.037355002015829086, "learning_rate": 3.475e-07, "loss": 0.0068, "step": 23900 }, { "epoch": 11.007591442374052, "grad_norm": 0.03586237132549286, "learning_rate": 3.3968750000000003e-07, "loss": 0.0055, "step": 23925 }, { "epoch": 11.01909362778928, "grad_norm": 1.9938002824783325, "learning_rate": 3.31875e-07, "loss": 0.0054, "step": 23950 }, { "epoch": 11.030595813204508, "grad_norm": 0.05722161382436752, "learning_rate": 3.240625e-07, "loss": 0.0022, "step": 23975 }, { "epoch": 11.042097998619738, "grad_norm": 0.16819870471954346, "learning_rate": 3.1625e-07, "loss": 0.0077, "step": 24000 }, { "epoch": 11.053600184034966, "grad_norm": 1.404698371887207, "learning_rate": 3.084375e-07, "loss": 0.0078, "step": 24025 }, { "epoch": 11.065102369450196, "grad_norm": 1.955828309059143, "learning_rate": 3.00625e-07, "loss": 0.0056, "step": 24050 }, { "epoch": 11.076604554865424, "grad_norm": 0.1421104073524475, "learning_rate": 2.9281250000000006e-07, "loss": 0.0044, "step": 24075 }, { "epoch": 11.088106740280653, "grad_norm": 0.2362927496433258, "learning_rate": 2.85e-07, "loss": 0.0046, "step": 24100 }, { "epoch": 11.099608925695883, "grad_norm": 0.3808918297290802, "learning_rate": 2.771875e-07, "loss": 0.0041, "step": 24125 }, { "epoch": 11.11111111111111, "grad_norm": 0.596932590007782, "learning_rate": 2.69375e-07, "loss": 0.0036, "step": 24150 }, { "epoch": 11.12261329652634, "grad_norm": 2.1200876235961914, "learning_rate": 2.615625e-07, "loss": 0.0055, "step": 24175 }, { "epoch": 11.134115481941569, "grad_norm": 0.17938555777072906, "learning_rate": 2.5375e-07, "loss": 0.0035, "step": 24200 }, { "epoch": 11.145617667356797, "grad_norm": 0.06144664064049721, "learning_rate": 2.4593750000000003e-07, "loss": 0.0066, "step": 24225 }, { "epoch": 11.157119852772027, "grad_norm": 0.05789117515087128, "learning_rate": 2.3812500000000002e-07, "loss": 0.0033, "step": 24250 }, { "epoch": 11.168622038187255, "grad_norm": 1.0644292831420898, "learning_rate": 2.3031250000000002e-07, "loss": 0.0055, "step": 24275 }, { "epoch": 11.180124223602485, "grad_norm": 0.05568142980337143, "learning_rate": 2.2250000000000001e-07, "loss": 0.0096, "step": 24300 }, { "epoch": 11.191626409017713, "grad_norm": 3.572922468185425, "learning_rate": 2.146875e-07, "loss": 0.0058, "step": 24325 }, { "epoch": 11.203128594432942, "grad_norm": 0.3286091089248657, "learning_rate": 2.06875e-07, "loss": 0.0035, "step": 24350 }, { "epoch": 11.214630779848171, "grad_norm": 3.0645318031311035, "learning_rate": 1.9906250000000003e-07, "loss": 0.007, "step": 24375 }, { "epoch": 11.2261329652634, "grad_norm": 9.076870918273926, "learning_rate": 1.9125e-07, "loss": 0.0038, "step": 24400 }, { "epoch": 11.23763515067863, "grad_norm": 0.054851166903972626, "learning_rate": 1.8343750000000002e-07, "loss": 0.0049, "step": 24425 }, { "epoch": 11.249137336093858, "grad_norm": 0.7788383364677429, "learning_rate": 1.75625e-07, "loss": 0.0044, "step": 24450 }, { "epoch": 11.260639521509086, "grad_norm": 0.06403613835573196, "learning_rate": 1.678125e-07, "loss": 0.0055, "step": 24475 }, { "epoch": 11.272141706924316, "grad_norm": 0.1481279581785202, "learning_rate": 1.6e-07, "loss": 0.0043, "step": 24500 }, { "epoch": 11.283643892339544, "grad_norm": 1.9446523189544678, "learning_rate": 1.521875e-07, "loss": 0.0065, "step": 24525 }, { "epoch": 11.295146077754774, "grad_norm": 0.01817043498158455, "learning_rate": 1.44375e-07, "loss": 0.0043, "step": 24550 }, { "epoch": 11.306648263170002, "grad_norm": 2.417325258255005, "learning_rate": 1.3656250000000002e-07, "loss": 0.0037, "step": 24575 }, { "epoch": 11.31815044858523, "grad_norm": 0.08347784727811813, "learning_rate": 1.2875e-07, "loss": 0.0025, "step": 24600 }, { "epoch": 11.32965263400046, "grad_norm": 3.372894763946533, "learning_rate": 1.209375e-07, "loss": 0.0099, "step": 24625 }, { "epoch": 11.341154819415689, "grad_norm": 0.43756258487701416, "learning_rate": 1.1312500000000002e-07, "loss": 0.0058, "step": 24650 }, { "epoch": 11.352657004830919, "grad_norm": 0.4029462933540344, "learning_rate": 1.0531250000000001e-07, "loss": 0.0061, "step": 24675 }, { "epoch": 11.364159190246147, "grad_norm": 4.701152324676514, "learning_rate": 9.75e-08, "loss": 0.0051, "step": 24700 }, { "epoch": 11.375661375661375, "grad_norm": 0.5707642436027527, "learning_rate": 8.96875e-08, "loss": 0.0054, "step": 24725 }, { "epoch": 11.387163561076605, "grad_norm": 0.0709519162774086, "learning_rate": 8.187500000000001e-08, "loss": 0.0031, "step": 24750 }, { "epoch": 11.398665746491833, "grad_norm": 1.804284930229187, "learning_rate": 7.40625e-08, "loss": 0.0052, "step": 24775 }, { "epoch": 11.410167931907063, "grad_norm": 0.04206651449203491, "learning_rate": 6.625e-08, "loss": 0.0038, "step": 24800 }, { "epoch": 11.421670117322291, "grad_norm": 0.5502020716667175, "learning_rate": 5.843750000000001e-08, "loss": 0.0087, "step": 24825 }, { "epoch": 11.43317230273752, "grad_norm": 0.21491751074790955, "learning_rate": 5.0625e-08, "loss": 0.003, "step": 24850 }, { "epoch": 11.44467448815275, "grad_norm": 2.5923168659210205, "learning_rate": 4.28125e-08, "loss": 0.0059, "step": 24875 }, { "epoch": 11.456176673567978, "grad_norm": 0.21680384874343872, "learning_rate": 3.5e-08, "loss": 0.0063, "step": 24900 }, { "epoch": 11.467678858983207, "grad_norm": 0.0767444297671318, "learning_rate": 2.7187499999999998e-08, "loss": 0.0054, "step": 24925 }, { "epoch": 11.479181044398436, "grad_norm": 0.42297929525375366, "learning_rate": 1.9375e-08, "loss": 0.0025, "step": 24950 }, { "epoch": 11.490683229813664, "grad_norm": 0.13791832327842712, "learning_rate": 1.1562500000000002e-08, "loss": 0.0088, "step": 24975 }, { "epoch": 11.502185415228894, "grad_norm": 5.369208812713623, "learning_rate": 3.75e-09, "loss": 0.0074, "step": 25000 }, { "epoch": 11.502185415228894, "eval_loss": 0.2163197249174118, "eval_runtime": 5391.0097, "eval_samples_per_second": 1.761, "eval_steps_per_second": 0.22, "eval_wer": 0.09290518898142217, "step": 25000 }, { "epoch": 11.502185415228894, "step": 25000, "total_flos": 4.082307363540173e+20, "train_loss": 0.05244532932087779, "train_runtime": 190521.5004, "train_samples_per_second": 2.1, "train_steps_per_second": 0.131 } ], "logging_steps": 25, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 12, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.082307363540173e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }