{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.2431108463904117, "eval_steps": 2000, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 29.186527252197266, "learning_rate": 3.6666666666666664e-09, "loss": 0.9853, "step": 25 }, { "epoch": 0.0, "grad_norm": 50.51877975463867, "learning_rate": 7.666666666666666e-09, "loss": 2.3272, "step": 50 }, { "epoch": 0.0, "grad_norm": 19.928226470947266, "learning_rate": 1.1833333333333332e-08, "loss": 0.8806, "step": 75 }, { "epoch": 0.0, "grad_norm": 51.900150299072266, "learning_rate": 1.6e-08, "loss": 2.3406, "step": 100 }, { "epoch": 0.0, "grad_norm": 28.492319107055664, "learning_rate": 2.0166666666666665e-08, "loss": 0.9825, "step": 125 }, { "epoch": 0.0, "grad_norm": 52.842262268066406, "learning_rate": 2.433333333333333e-08, "loss": 2.175, "step": 150 }, { "epoch": 0.0, "grad_norm": 21.88471031188965, "learning_rate": 2.85e-08, "loss": 1.0408, "step": 175 }, { "epoch": 0.0, "grad_norm": 46.56641387939453, "learning_rate": 3.266666666666666e-08, "loss": 2.2269, "step": 200 }, { "epoch": 0.0, "grad_norm": 19.953123092651367, "learning_rate": 3.683333333333333e-08, "loss": 0.981, "step": 225 }, { "epoch": 0.0, "grad_norm": 54.310176849365234, "learning_rate": 4.1e-08, "loss": 2.3529, "step": 250 }, { "epoch": 0.0, "grad_norm": 27.19457244873047, "learning_rate": 4.516666666666667e-08, "loss": 0.9664, "step": 275 }, { "epoch": 0.0, "grad_norm": 46.24717712402344, "learning_rate": 4.933333333333333e-08, "loss": 2.4138, "step": 300 }, { "epoch": 0.0, "grad_norm": 33.52537536621094, "learning_rate": 5.3499999999999996e-08, "loss": 0.8663, "step": 325 }, { "epoch": 0.0, "grad_norm": 52.76784896850586, "learning_rate": 5.766666666666666e-08, "loss": 2.2765, "step": 350 }, { "epoch": 0.0, "grad_norm": 22.990055084228516, "learning_rate": 6.183333333333332e-08, "loss": 0.9368, "step": 375 }, { "epoch": 0.0, "grad_norm": 45.44700241088867, "learning_rate": 6.6e-08, "loss": 2.2677, "step": 400 }, { "epoch": 0.01, "grad_norm": 17.737760543823242, "learning_rate": 7.016666666666667e-08, "loss": 0.8554, "step": 425 }, { "epoch": 0.01, "grad_norm": 42.23236846923828, "learning_rate": 7.433333333333334e-08, "loss": 2.082, "step": 450 }, { "epoch": 0.01, "grad_norm": 23.066301345825195, "learning_rate": 7.85e-08, "loss": 0.9648, "step": 475 }, { "epoch": 0.01, "grad_norm": 40.47060775756836, "learning_rate": 8.266666666666667e-08, "loss": 2.0705, "step": 500 }, { "epoch": 0.01, "grad_norm": 22.501338958740234, "learning_rate": 8.683333333333332e-08, "loss": 0.8634, "step": 525 }, { "epoch": 0.01, "grad_norm": 47.01912307739258, "learning_rate": 9.1e-08, "loss": 1.9144, "step": 550 }, { "epoch": 0.01, "grad_norm": 20.637834548950195, "learning_rate": 9.516666666666667e-08, "loss": 0.8638, "step": 575 }, { "epoch": 0.01, "grad_norm": 42.16878890991211, "learning_rate": 9.933333333333332e-08, "loss": 1.9055, "step": 600 }, { "epoch": 0.01, "grad_norm": 22.79128646850586, "learning_rate": 1.035e-07, "loss": 0.8053, "step": 625 }, { "epoch": 0.01, "grad_norm": 42.09727096557617, "learning_rate": 1.0766666666666665e-07, "loss": 1.7969, "step": 650 }, { "epoch": 0.01, "grad_norm": 25.856449127197266, "learning_rate": 1.1183333333333334e-07, "loss": 0.7391, "step": 675 }, { "epoch": 0.01, "grad_norm": 44.90492248535156, "learning_rate": 1.16e-07, "loss": 1.6681, "step": 700 }, { "epoch": 0.01, "grad_norm": 19.00392723083496, "learning_rate": 1.2016666666666668e-07, "loss": 0.6716, "step": 725 }, { "epoch": 0.01, "grad_norm": 38.95526123046875, "learning_rate": 1.2433333333333332e-07, "loss": 1.358, "step": 750 }, { "epoch": 0.01, "grad_norm": 23.28983497619629, "learning_rate": 1.285e-07, "loss": 0.5689, "step": 775 }, { "epoch": 0.01, "grad_norm": 43.42043685913086, "learning_rate": 1.3266666666666664e-07, "loss": 1.2778, "step": 800 }, { "epoch": 0.01, "grad_norm": 19.808530807495117, "learning_rate": 1.3683333333333333e-07, "loss": 0.5527, "step": 825 }, { "epoch": 0.01, "grad_norm": 35.28702926635742, "learning_rate": 1.4099999999999998e-07, "loss": 1.184, "step": 850 }, { "epoch": 0.01, "grad_norm": 19.137269973754883, "learning_rate": 1.4516666666666665e-07, "loss": 0.5565, "step": 875 }, { "epoch": 0.01, "grad_norm": 41.621971130371094, "learning_rate": 1.4933333333333335e-07, "loss": 1.147, "step": 900 }, { "epoch": 0.01, "grad_norm": 18.03120994567871, "learning_rate": 1.535e-07, "loss": 0.5302, "step": 925 }, { "epoch": 0.01, "grad_norm": 39.525482177734375, "learning_rate": 1.5766666666666666e-07, "loss": 1.1378, "step": 950 }, { "epoch": 0.01, "grad_norm": 25.292404174804688, "learning_rate": 1.6183333333333333e-07, "loss": 0.4831, "step": 975 }, { "epoch": 0.01, "grad_norm": 35.80854797363281, "learning_rate": 1.66e-07, "loss": 1.0397, "step": 1000 }, { "epoch": 0.01, "grad_norm": 18.648828506469727, "learning_rate": 1.7016666666666665e-07, "loss": 0.4555, "step": 1025 }, { "epoch": 0.01, "grad_norm": 39.72472381591797, "learning_rate": 1.7433333333333332e-07, "loss": 1.1202, "step": 1050 }, { "epoch": 0.01, "grad_norm": 16.013608932495117, "learning_rate": 1.785e-07, "loss": 0.4914, "step": 1075 }, { "epoch": 0.01, "grad_norm": 36.78615188598633, "learning_rate": 1.8266666666666666e-07, "loss": 1.0005, "step": 1100 }, { "epoch": 0.01, "grad_norm": 17.770790100097656, "learning_rate": 1.868333333333333e-07, "loss": 0.4111, "step": 1125 }, { "epoch": 0.01, "grad_norm": 31.849889755249023, "learning_rate": 1.91e-07, "loss": 0.927, "step": 1150 }, { "epoch": 0.01, "grad_norm": 18.490398406982422, "learning_rate": 1.9516666666666665e-07, "loss": 0.4016, "step": 1175 }, { "epoch": 0.01, "grad_norm": 31.49574851989746, "learning_rate": 1.9933333333333332e-07, "loss": 0.9365, "step": 1200 }, { "epoch": 0.01, "grad_norm": 17.154340744018555, "learning_rate": 2.035e-07, "loss": 0.4247, "step": 1225 }, { "epoch": 0.02, "grad_norm": 25.48064422607422, "learning_rate": 2.0766666666666666e-07, "loss": 0.9058, "step": 1250 }, { "epoch": 0.02, "grad_norm": 19.94418716430664, "learning_rate": 2.1183333333333333e-07, "loss": 0.449, "step": 1275 }, { "epoch": 0.02, "grad_norm": 29.873273849487305, "learning_rate": 2.1599999999999998e-07, "loss": 0.8359, "step": 1300 }, { "epoch": 0.02, "grad_norm": 15.53857135772705, "learning_rate": 2.2016666666666667e-07, "loss": 0.4133, "step": 1325 }, { "epoch": 0.02, "grad_norm": 28.56229019165039, "learning_rate": 2.2433333333333332e-07, "loss": 0.8403, "step": 1350 }, { "epoch": 0.02, "grad_norm": 8.155872344970703, "learning_rate": 2.285e-07, "loss": 0.3881, "step": 1375 }, { "epoch": 0.02, "grad_norm": 27.127405166625977, "learning_rate": 2.3266666666666666e-07, "loss": 0.822, "step": 1400 }, { "epoch": 0.02, "grad_norm": 11.950821876525879, "learning_rate": 2.3683333333333333e-07, "loss": 0.4201, "step": 1425 }, { "epoch": 0.02, "grad_norm": 31.142114639282227, "learning_rate": 2.41e-07, "loss": 0.8028, "step": 1450 }, { "epoch": 0.02, "grad_norm": 15.139409065246582, "learning_rate": 2.4516666666666665e-07, "loss": 0.3845, "step": 1475 }, { "epoch": 0.02, "grad_norm": 27.636058807373047, "learning_rate": 2.493333333333333e-07, "loss": 0.8359, "step": 1500 }, { "epoch": 0.02, "grad_norm": 25.11219024658203, "learning_rate": 2.535e-07, "loss": 0.4223, "step": 1525 }, { "epoch": 0.02, "grad_norm": 31.15846061706543, "learning_rate": 2.5766666666666663e-07, "loss": 0.875, "step": 1550 }, { "epoch": 0.02, "grad_norm": 19.472412109375, "learning_rate": 2.618333333333333e-07, "loss": 0.3604, "step": 1575 }, { "epoch": 0.02, "grad_norm": 22.84535026550293, "learning_rate": 2.66e-07, "loss": 0.7952, "step": 1600 }, { "epoch": 0.02, "grad_norm": 16.574451446533203, "learning_rate": 2.7016666666666667e-07, "loss": 0.3795, "step": 1625 }, { "epoch": 0.02, "grad_norm": 39.41230392456055, "learning_rate": 2.743333333333333e-07, "loss": 0.8251, "step": 1650 }, { "epoch": 0.02, "grad_norm": 13.901805877685547, "learning_rate": 2.785e-07, "loss": 0.39, "step": 1675 }, { "epoch": 0.02, "grad_norm": 24.57431983947754, "learning_rate": 2.8266666666666666e-07, "loss": 0.8535, "step": 1700 }, { "epoch": 0.02, "grad_norm": 15.14571475982666, "learning_rate": 2.868333333333333e-07, "loss": 0.3597, "step": 1725 }, { "epoch": 0.02, "grad_norm": 23.563344955444336, "learning_rate": 2.9099999999999995e-07, "loss": 0.7706, "step": 1750 }, { "epoch": 0.02, "grad_norm": 17.370380401611328, "learning_rate": 2.951666666666667e-07, "loss": 0.3564, "step": 1775 }, { "epoch": 0.02, "grad_norm": 32.44514083862305, "learning_rate": 2.9933333333333334e-07, "loss": 0.7507, "step": 1800 }, { "epoch": 0.02, "grad_norm": 12.74821949005127, "learning_rate": 3.035e-07, "loss": 0.3674, "step": 1825 }, { "epoch": 0.02, "grad_norm": 22.746973037719727, "learning_rate": 3.0766666666666663e-07, "loss": 0.7485, "step": 1850 }, { "epoch": 0.02, "grad_norm": 16.455074310302734, "learning_rate": 3.118333333333333e-07, "loss": 0.3533, "step": 1875 }, { "epoch": 0.02, "grad_norm": 32.016258239746094, "learning_rate": 3.1599999999999997e-07, "loss": 0.7237, "step": 1900 }, { "epoch": 0.02, "grad_norm": 22.601268768310547, "learning_rate": 3.201666666666666e-07, "loss": 0.4137, "step": 1925 }, { "epoch": 0.02, "grad_norm": 21.461885452270508, "learning_rate": 3.243333333333333e-07, "loss": 0.6908, "step": 1950 }, { "epoch": 0.02, "grad_norm": 16.42440414428711, "learning_rate": 3.285e-07, "loss": 0.3973, "step": 1975 }, { "epoch": 0.02, "grad_norm": 34.0020637512207, "learning_rate": 3.3266666666666665e-07, "loss": 0.748, "step": 2000 }, { "epoch": 0.02, "eval_loss": 0.3752075731754303, "eval_runtime": 5729.5086, "eval_samples_per_second": 1.643, "eval_steps_per_second": 0.205, "eval_wer": 0.21564669788470855, "step": 2000 }, { "epoch": 0.02, "grad_norm": 15.8253755569458, "learning_rate": 3.368333333333333e-07, "loss": 0.328, "step": 2025 }, { "epoch": 0.02, "grad_norm": 27.537580490112305, "learning_rate": 3.41e-07, "loss": 0.7222, "step": 2050 }, { "epoch": 0.03, "grad_norm": 20.178504943847656, "learning_rate": 3.4516666666666664e-07, "loss": 0.4064, "step": 2075 }, { "epoch": 0.03, "grad_norm": 27.599384307861328, "learning_rate": 3.4933333333333334e-07, "loss": 0.8175, "step": 2100 }, { "epoch": 0.03, "grad_norm": 14.29587459564209, "learning_rate": 3.535e-07, "loss": 0.3303, "step": 2125 }, { "epoch": 0.03, "grad_norm": 28.619686126708984, "learning_rate": 3.576666666666667e-07, "loss": 0.632, "step": 2150 }, { "epoch": 0.03, "grad_norm": 20.577468872070312, "learning_rate": 3.618333333333333e-07, "loss": 0.3908, "step": 2175 }, { "epoch": 0.03, "grad_norm": 24.521394729614258, "learning_rate": 3.6599999999999997e-07, "loss": 0.7307, "step": 2200 }, { "epoch": 0.03, "grad_norm": 18.374794006347656, "learning_rate": 3.701666666666666e-07, "loss": 0.4117, "step": 2225 }, { "epoch": 0.03, "grad_norm": 32.217586517333984, "learning_rate": 3.743333333333333e-07, "loss": 0.7281, "step": 2250 }, { "epoch": 0.03, "grad_norm": 14.12385368347168, "learning_rate": 3.785e-07, "loss": 0.3208, "step": 2275 }, { "epoch": 0.03, "grad_norm": 27.859783172607422, "learning_rate": 3.8266666666666665e-07, "loss": 0.7194, "step": 2300 }, { "epoch": 0.03, "grad_norm": 16.94115447998047, "learning_rate": 3.868333333333333e-07, "loss": 0.3764, "step": 2325 }, { "epoch": 0.03, "grad_norm": 28.36104393005371, "learning_rate": 3.91e-07, "loss": 0.7183, "step": 2350 }, { "epoch": 0.03, "grad_norm": 10.514703750610352, "learning_rate": 3.9516666666666664e-07, "loss": 0.3764, "step": 2375 }, { "epoch": 0.03, "grad_norm": 33.07015609741211, "learning_rate": 3.993333333333333e-07, "loss": 0.7127, "step": 2400 }, { "epoch": 0.03, "grad_norm": 25.2418212890625, "learning_rate": 4.0350000000000003e-07, "loss": 0.3723, "step": 2425 }, { "epoch": 0.03, "grad_norm": 25.15533447265625, "learning_rate": 4.076666666666667e-07, "loss": 0.7695, "step": 2450 }, { "epoch": 0.03, "grad_norm": 12.2613525390625, "learning_rate": 4.118333333333333e-07, "loss": 0.3878, "step": 2475 }, { "epoch": 0.03, "grad_norm": 27.38979148864746, "learning_rate": 4.1599999999999997e-07, "loss": 0.7286, "step": 2500 }, { "epoch": 0.03, "grad_norm": 12.769454956054688, "learning_rate": 4.2016666666666666e-07, "loss": 0.3283, "step": 2525 }, { "epoch": 0.03, "grad_norm": 25.558361053466797, "learning_rate": 4.243333333333333e-07, "loss": 0.655, "step": 2550 }, { "epoch": 0.03, "grad_norm": 19.812528610229492, "learning_rate": 4.2849999999999995e-07, "loss": 0.3285, "step": 2575 }, { "epoch": 0.03, "grad_norm": 23.480504989624023, "learning_rate": 4.3266666666666665e-07, "loss": 0.7011, "step": 2600 }, { "epoch": 0.03, "grad_norm": 27.310148239135742, "learning_rate": 4.3683333333333335e-07, "loss": 0.3668, "step": 2625 }, { "epoch": 0.03, "grad_norm": 21.11886978149414, "learning_rate": 4.41e-07, "loss": 0.7359, "step": 2650 }, { "epoch": 0.03, "grad_norm": 13.02730655670166, "learning_rate": 4.4516666666666664e-07, "loss": 0.3218, "step": 2675 }, { "epoch": 0.03, "grad_norm": 23.938663482666016, "learning_rate": 4.493333333333333e-07, "loss": 0.7084, "step": 2700 }, { "epoch": 0.03, "grad_norm": 13.4992094039917, "learning_rate": 4.535e-07, "loss": 0.3586, "step": 2725 }, { "epoch": 0.03, "grad_norm": 24.50682258605957, "learning_rate": 4.576666666666666e-07, "loss": 0.6918, "step": 2750 }, { "epoch": 0.03, "grad_norm": 15.688295364379883, "learning_rate": 4.618333333333333e-07, "loss": 0.3183, "step": 2775 }, { "epoch": 0.03, "grad_norm": 27.072919845581055, "learning_rate": 4.66e-07, "loss": 0.8227, "step": 2800 }, { "epoch": 0.03, "grad_norm": 16.015159606933594, "learning_rate": 4.7016666666666666e-07, "loss": 0.3831, "step": 2825 }, { "epoch": 0.03, "grad_norm": 31.845600128173828, "learning_rate": 4.743333333333333e-07, "loss": 0.6515, "step": 2850 }, { "epoch": 0.03, "grad_norm": 12.855831146240234, "learning_rate": 4.785e-07, "loss": 0.345, "step": 2875 }, { "epoch": 0.04, "grad_norm": 30.276899337768555, "learning_rate": 4.826666666666666e-07, "loss": 0.7189, "step": 2900 }, { "epoch": 0.04, "grad_norm": 13.716350555419922, "learning_rate": 4.868333333333333e-07, "loss": 0.3857, "step": 2925 }, { "epoch": 0.04, "grad_norm": 24.70086097717285, "learning_rate": 4.909999999999999e-07, "loss": 0.6669, "step": 2950 }, { "epoch": 0.04, "grad_norm": 13.5408353805542, "learning_rate": 4.951666666666666e-07, "loss": 0.3805, "step": 2975 }, { "epoch": 0.04, "grad_norm": 27.32679557800293, "learning_rate": 4.993333333333333e-07, "loss": 0.6949, "step": 3000 }, { "epoch": 0.04, "grad_norm": 14.190648078918457, "learning_rate": 5.034999999999999e-07, "loss": 0.3463, "step": 3025 }, { "epoch": 0.04, "grad_norm": 40.79596710205078, "learning_rate": 5.076666666666667e-07, "loss": 0.7188, "step": 3050 }, { "epoch": 0.04, "grad_norm": 16.573617935180664, "learning_rate": 5.118333333333333e-07, "loss": 0.2875, "step": 3075 }, { "epoch": 0.04, "grad_norm": 24.329591751098633, "learning_rate": 5.16e-07, "loss": 0.5879, "step": 3100 }, { "epoch": 0.04, "grad_norm": 14.05192756652832, "learning_rate": 5.201666666666666e-07, "loss": 0.3572, "step": 3125 }, { "epoch": 0.04, "grad_norm": 22.151996612548828, "learning_rate": 5.243333333333333e-07, "loss": 0.6409, "step": 3150 }, { "epoch": 0.04, "grad_norm": 14.75150203704834, "learning_rate": 5.284999999999999e-07, "loss": 0.3657, "step": 3175 }, { "epoch": 0.04, "grad_norm": 31.801061630249023, "learning_rate": 5.326666666666666e-07, "loss": 0.7341, "step": 3200 }, { "epoch": 0.04, "grad_norm": 10.417610168457031, "learning_rate": 5.368333333333334e-07, "loss": 0.3016, "step": 3225 }, { "epoch": 0.04, "grad_norm": 26.536266326904297, "learning_rate": 5.41e-07, "loss": 0.7012, "step": 3250 }, { "epoch": 0.04, "grad_norm": 16.091211318969727, "learning_rate": 5.451666666666667e-07, "loss": 0.3128, "step": 3275 }, { "epoch": 0.04, "grad_norm": 33.78011703491211, "learning_rate": 5.493333333333333e-07, "loss": 0.6206, "step": 3300 }, { "epoch": 0.04, "grad_norm": 15.934347152709961, "learning_rate": 5.535e-07, "loss": 0.3373, "step": 3325 }, { "epoch": 0.04, "grad_norm": 25.661874771118164, "learning_rate": 5.576666666666667e-07, "loss": 0.6734, "step": 3350 }, { "epoch": 0.04, "grad_norm": 11.325174331665039, "learning_rate": 5.618333333333333e-07, "loss": 0.3077, "step": 3375 }, { "epoch": 0.04, "grad_norm": 21.682788848876953, "learning_rate": 5.66e-07, "loss": 0.6616, "step": 3400 }, { "epoch": 0.04, "grad_norm": 16.362476348876953, "learning_rate": 5.701666666666667e-07, "loss": 0.3484, "step": 3425 }, { "epoch": 0.04, "grad_norm": 30.024934768676758, "learning_rate": 5.743333333333334e-07, "loss": 0.6997, "step": 3450 }, { "epoch": 0.04, "grad_norm": 13.67060661315918, "learning_rate": 5.784999999999999e-07, "loss": 0.2889, "step": 3475 }, { "epoch": 0.04, "grad_norm": Infinity, "learning_rate": 5.825e-07, "loss": 0.6621, "step": 3500 }, { "epoch": 0.04, "grad_norm": 22.19521141052246, "learning_rate": 5.866666666666666e-07, "loss": 0.2992, "step": 3525 }, { "epoch": 0.04, "grad_norm": 23.638166427612305, "learning_rate": 5.908333333333333e-07, "loss": 0.633, "step": 3550 }, { "epoch": 0.04, "grad_norm": 13.818583488464355, "learning_rate": 5.949999999999999e-07, "loss": 0.3736, "step": 3575 }, { "epoch": 0.04, "grad_norm": 33.79849624633789, "learning_rate": 5.991666666666666e-07, "loss": 0.665, "step": 3600 }, { "epoch": 0.04, "grad_norm": 13.363014221191406, "learning_rate": 6.033333333333333e-07, "loss": 0.2737, "step": 3625 }, { "epoch": 0.04, "grad_norm": 31.426158905029297, "learning_rate": 6.075e-07, "loss": 0.7285, "step": 3650 }, { "epoch": 0.04, "grad_norm": 19.794960021972656, "learning_rate": 6.116666666666667e-07, "loss": 0.3175, "step": 3675 }, { "epoch": 0.04, "grad_norm": 26.806880950927734, "learning_rate": 6.158333333333333e-07, "loss": 0.6022, "step": 3700 }, { "epoch": 0.05, "grad_norm": 23.275510787963867, "learning_rate": 6.2e-07, "loss": 0.3038, "step": 3725 }, { "epoch": 0.05, "grad_norm": 22.10231590270996, "learning_rate": 6.241666666666666e-07, "loss": 0.6785, "step": 3750 }, { "epoch": 0.05, "grad_norm": 14.206493377685547, "learning_rate": 6.283333333333333e-07, "loss": 0.3128, "step": 3775 }, { "epoch": 0.05, "grad_norm": 19.696733474731445, "learning_rate": 6.324999999999999e-07, "loss": 0.6817, "step": 3800 }, { "epoch": 0.05, "grad_norm": 16.63739013671875, "learning_rate": 6.366666666666667e-07, "loss": 0.3067, "step": 3825 }, { "epoch": 0.05, "grad_norm": 26.20191192626953, "learning_rate": 6.408333333333334e-07, "loss": 0.6781, "step": 3850 }, { "epoch": 0.05, "grad_norm": 21.292455673217773, "learning_rate": 6.45e-07, "loss": 0.2981, "step": 3875 }, { "epoch": 0.05, "grad_norm": 26.61952781677246, "learning_rate": 6.491666666666667e-07, "loss": 0.6331, "step": 3900 }, { "epoch": 0.05, "grad_norm": 15.374119758605957, "learning_rate": 6.533333333333333e-07, "loss": 0.3302, "step": 3925 }, { "epoch": 0.05, "grad_norm": 26.00304412841797, "learning_rate": 6.575e-07, "loss": 0.6184, "step": 3950 }, { "epoch": 0.05, "grad_norm": 20.321468353271484, "learning_rate": 6.616666666666665e-07, "loss": 0.321, "step": 3975 }, { "epoch": 0.05, "grad_norm": 22.67734146118164, "learning_rate": 6.658333333333333e-07, "loss": 0.6051, "step": 4000 }, { "epoch": 0.05, "eval_loss": 0.4198240339756012, "eval_runtime": 5680.6895, "eval_samples_per_second": 1.657, "eval_steps_per_second": 0.207, "eval_wer": 0.1885031487162926, "step": 4000 }, { "epoch": 0.05, "grad_norm": 20.116188049316406, "learning_rate": 6.7e-07, "loss": 0.3214, "step": 4025 }, { "epoch": 0.05, "grad_norm": 21.24591827392578, "learning_rate": 6.741666666666666e-07, "loss": 0.6371, "step": 4050 }, { "epoch": 0.05, "grad_norm": 16.673837661743164, "learning_rate": 6.783333333333333e-07, "loss": 0.3389, "step": 4075 }, { "epoch": 0.05, "grad_norm": 27.16354751586914, "learning_rate": 6.824999999999999e-07, "loss": 0.6304, "step": 4100 }, { "epoch": 0.05, "grad_norm": 18.105676651000977, "learning_rate": 6.866666666666666e-07, "loss": 0.3528, "step": 4125 }, { "epoch": 0.05, "grad_norm": 27.059154510498047, "learning_rate": 6.908333333333333e-07, "loss": 0.6902, "step": 4150 }, { "epoch": 0.05, "grad_norm": 22.998558044433594, "learning_rate": 6.949999999999999e-07, "loss": 0.3802, "step": 4175 }, { "epoch": 0.05, "grad_norm": 27.54330062866211, "learning_rate": 6.991666666666667e-07, "loss": 0.6378, "step": 4200 }, { "epoch": 0.05, "grad_norm": 16.366973876953125, "learning_rate": 7.033333333333333e-07, "loss": 0.339, "step": 4225 }, { "epoch": 0.05, "grad_norm": 25.825971603393555, "learning_rate": 7.075e-07, "loss": 0.5944, "step": 4250 }, { "epoch": 0.05, "grad_norm": 26.39034652709961, "learning_rate": 7.116666666666666e-07, "loss": 0.3246, "step": 4275 }, { "epoch": 0.05, "grad_norm": 24.95716094970703, "learning_rate": 7.158333333333333e-07, "loss": 0.5966, "step": 4300 }, { "epoch": 0.05, "grad_norm": 13.741753578186035, "learning_rate": 7.2e-07, "loss": 0.3068, "step": 4325 }, { "epoch": 0.05, "grad_norm": 24.966617584228516, "learning_rate": 7.241666666666666e-07, "loss": 0.6098, "step": 4350 }, { "epoch": 0.05, "grad_norm": 14.291642189025879, "learning_rate": 7.283333333333334e-07, "loss": 0.3096, "step": 4375 }, { "epoch": 0.05, "grad_norm": 22.74665641784668, "learning_rate": 7.325e-07, "loss": 0.7213, "step": 4400 }, { "epoch": 0.05, "grad_norm": 13.496010780334473, "learning_rate": 7.366666666666667e-07, "loss": 0.2867, "step": 4425 }, { "epoch": 0.05, "grad_norm": 22.160560607910156, "learning_rate": 7.408333333333333e-07, "loss": 0.6883, "step": 4450 }, { "epoch": 0.05, "grad_norm": 15.879670143127441, "learning_rate": 7.45e-07, "loss": 0.3274, "step": 4475 }, { "epoch": 0.05, "grad_norm": 25.978158950805664, "learning_rate": 7.491666666666667e-07, "loss": 0.663, "step": 4500 }, { "epoch": 0.06, "grad_norm": 12.220645904541016, "learning_rate": 7.533333333333332e-07, "loss": 0.3133, "step": 4525 }, { "epoch": 0.06, "grad_norm": 22.561466217041016, "learning_rate": 7.575e-07, "loss": 0.6417, "step": 4550 }, { "epoch": 0.06, "grad_norm": 13.648468017578125, "learning_rate": 7.616666666666666e-07, "loss": 0.2709, "step": 4575 }, { "epoch": 0.06, "grad_norm": 24.237659454345703, "learning_rate": 7.658333333333333e-07, "loss": 0.609, "step": 4600 }, { "epoch": 0.06, "grad_norm": 12.015928268432617, "learning_rate": 7.699999999999999e-07, "loss": 0.3517, "step": 4625 }, { "epoch": 0.06, "grad_norm": 10.939224243164062, "learning_rate": 7.741666666666666e-07, "loss": 0.5428, "step": 4650 }, { "epoch": 0.06, "grad_norm": 21.860904693603516, "learning_rate": 7.783333333333333e-07, "loss": 0.3269, "step": 4675 }, { "epoch": 0.06, "grad_norm": 34.1320915222168, "learning_rate": 7.824999999999999e-07, "loss": 0.6189, "step": 4700 }, { "epoch": 0.06, "grad_norm": 16.4717960357666, "learning_rate": 7.866666666666666e-07, "loss": 0.3147, "step": 4725 }, { "epoch": 0.06, "grad_norm": 24.782644271850586, "learning_rate": 7.908333333333333e-07, "loss": 0.5405, "step": 4750 }, { "epoch": 0.06, "grad_norm": 23.22640037536621, "learning_rate": 7.95e-07, "loss": 0.3325, "step": 4775 }, { "epoch": 0.06, "grad_norm": 25.234392166137695, "learning_rate": 7.991666666666666e-07, "loss": 0.6092, "step": 4800 }, { "epoch": 0.06, "grad_norm": 17.815200805664062, "learning_rate": 8.033333333333333e-07, "loss": 0.3421, "step": 4825 }, { "epoch": 0.06, "grad_norm": 31.845441818237305, "learning_rate": 8.075e-07, "loss": 0.6604, "step": 4850 }, { "epoch": 0.06, "grad_norm": 18.663719177246094, "learning_rate": 8.116666666666666e-07, "loss": 0.3373, "step": 4875 }, { "epoch": 0.06, "grad_norm": 18.988887786865234, "learning_rate": 8.158333333333333e-07, "loss": 0.6191, "step": 4900 }, { "epoch": 0.06, "grad_norm": 14.552395820617676, "learning_rate": 8.199999999999999e-07, "loss": 0.3123, "step": 4925 }, { "epoch": 0.06, "grad_norm": 24.135231018066406, "learning_rate": 8.241666666666667e-07, "loss": 0.5547, "step": 4950 }, { "epoch": 0.06, "grad_norm": 13.5560302734375, "learning_rate": 8.283333333333334e-07, "loss": 0.2659, "step": 4975 }, { "epoch": 0.06, "grad_norm": 23.879039764404297, "learning_rate": 8.325e-07, "loss": 0.5899, "step": 5000 }, { "epoch": 0.06, "grad_norm": 10.661543846130371, "learning_rate": 8.366666666666667e-07, "loss": 0.2815, "step": 5025 }, { "epoch": 0.06, "grad_norm": 23.548585891723633, "learning_rate": 8.408333333333333e-07, "loss": 0.5315, "step": 5050 }, { "epoch": 0.06, "grad_norm": 18.99573516845703, "learning_rate": 8.45e-07, "loss": 0.3102, "step": 5075 }, { "epoch": 0.06, "grad_norm": 25.472843170166016, "learning_rate": 8.491666666666665e-07, "loss": 0.5856, "step": 5100 }, { "epoch": 0.06, "grad_norm": 10.183819770812988, "learning_rate": 8.533333333333334e-07, "loss": 0.2289, "step": 5125 }, { "epoch": 0.06, "grad_norm": 21.010845184326172, "learning_rate": 8.575e-07, "loss": 0.5432, "step": 5150 }, { "epoch": 0.06, "grad_norm": 10.91658878326416, "learning_rate": 8.616666666666666e-07, "loss": 0.2439, "step": 5175 }, { "epoch": 0.06, "grad_norm": 27.883943557739258, "learning_rate": 8.658333333333333e-07, "loss": 0.5995, "step": 5200 }, { "epoch": 0.06, "grad_norm": 13.827203750610352, "learning_rate": 8.699999999999999e-07, "loss": 0.3348, "step": 5225 }, { "epoch": 0.06, "grad_norm": 25.215486526489258, "learning_rate": 8.741666666666666e-07, "loss": 0.559, "step": 5250 }, { "epoch": 0.06, "grad_norm": 19.4741153717041, "learning_rate": 8.783333333333332e-07, "loss": 0.3154, "step": 5275 }, { "epoch": 0.06, "grad_norm": 21.052459716796875, "learning_rate": 8.824999999999999e-07, "loss": 0.581, "step": 5300 }, { "epoch": 0.06, "grad_norm": 14.411347389221191, "learning_rate": 8.866666666666667e-07, "loss": 0.2701, "step": 5325 }, { "epoch": 0.07, "grad_norm": 15.227518081665039, "learning_rate": 8.908333333333333e-07, "loss": 0.6402, "step": 5350 }, { "epoch": 0.07, "grad_norm": 17.570085525512695, "learning_rate": 8.95e-07, "loss": 0.3374, "step": 5375 }, { "epoch": 0.07, "grad_norm": 16.47588348388672, "learning_rate": 8.991666666666666e-07, "loss": 0.5876, "step": 5400 }, { "epoch": 0.07, "grad_norm": 15.246146202087402, "learning_rate": 9.033333333333333e-07, "loss": 0.2909, "step": 5425 }, { "epoch": 0.07, "grad_norm": 22.148481369018555, "learning_rate": 9.074999999999999e-07, "loss": 0.5704, "step": 5450 }, { "epoch": 0.07, "grad_norm": 11.771788597106934, "learning_rate": 9.116666666666666e-07, "loss": 0.284, "step": 5475 }, { "epoch": 0.07, "grad_norm": 34.46316146850586, "learning_rate": 9.158333333333334e-07, "loss": 0.6357, "step": 5500 }, { "epoch": 0.07, "grad_norm": 17.2254695892334, "learning_rate": 9.2e-07, "loss": 0.3433, "step": 5525 }, { "epoch": 0.07, "grad_norm": 23.89180564880371, "learning_rate": 9.241666666666667e-07, "loss": 0.5568, "step": 5550 }, { "epoch": 0.07, "grad_norm": 16.978792190551758, "learning_rate": 9.283333333333333e-07, "loss": 0.3001, "step": 5575 }, { "epoch": 0.07, "grad_norm": 25.58534049987793, "learning_rate": 9.325e-07, "loss": 0.5594, "step": 5600 }, { "epoch": 0.07, "grad_norm": 6.470766067504883, "learning_rate": 9.366666666666666e-07, "loss": 0.3021, "step": 5625 }, { "epoch": 0.07, "grad_norm": 15.57498836517334, "learning_rate": 9.408333333333333e-07, "loss": 0.548, "step": 5650 }, { "epoch": 0.07, "grad_norm": 8.752182960510254, "learning_rate": 9.45e-07, "loss": 0.3141, "step": 5675 }, { "epoch": 0.07, "grad_norm": Infinity, "learning_rate": 9.489999999999999e-07, "loss": 0.6522, "step": 5700 }, { "epoch": 0.07, "grad_norm": 14.224292755126953, "learning_rate": 9.531666666666666e-07, "loss": 0.2806, "step": 5725 }, { "epoch": 0.07, "grad_norm": 28.10686683654785, "learning_rate": 9.573333333333333e-07, "loss": 0.5524, "step": 5750 }, { "epoch": 0.07, "grad_norm": 14.760727882385254, "learning_rate": 9.615e-07, "loss": 0.3168, "step": 5775 }, { "epoch": 0.07, "grad_norm": 21.50145149230957, "learning_rate": 9.656666666666667e-07, "loss": 0.5542, "step": 5800 }, { "epoch": 0.07, "grad_norm": 19.867286682128906, "learning_rate": 9.698333333333332e-07, "loss": 0.2855, "step": 5825 }, { "epoch": 0.07, "grad_norm": 23.064851760864258, "learning_rate": 9.74e-07, "loss": 0.5558, "step": 5850 }, { "epoch": 0.07, "grad_norm": 21.55337905883789, "learning_rate": 9.781666666666666e-07, "loss": 0.2855, "step": 5875 }, { "epoch": 0.07, "grad_norm": 28.359773635864258, "learning_rate": 9.823333333333333e-07, "loss": 0.5859, "step": 5900 }, { "epoch": 0.07, "grad_norm": 14.382160186767578, "learning_rate": 9.865e-07, "loss": 0.3203, "step": 5925 }, { "epoch": 0.07, "grad_norm": 17.879419326782227, "learning_rate": 9.906666666666667e-07, "loss": 0.5121, "step": 5950 }, { "epoch": 0.07, "grad_norm": 19.03439712524414, "learning_rate": 9.948333333333334e-07, "loss": 0.3244, "step": 5975 }, { "epoch": 0.07, "grad_norm": 20.255910873413086, "learning_rate": 9.989999999999999e-07, "loss": 0.5578, "step": 6000 }, { "epoch": 0.07, "eval_loss": 0.424468457698822, "eval_runtime": 5766.0002, "eval_samples_per_second": 1.633, "eval_steps_per_second": 0.204, "eval_wer": 0.17566607460035524, "step": 6000 }, { "epoch": 0.07, "grad_norm": 10.418182373046875, "learning_rate": 9.955e-07, "loss": 0.2279, "step": 6025 }, { "epoch": 0.07, "grad_norm": 32.746212005615234, "learning_rate": 9.8975e-07, "loss": 0.617, "step": 6050 }, { "epoch": 0.07, "grad_norm": 14.78303050994873, "learning_rate": 9.835e-07, "loss": 0.2966, "step": 6075 }, { "epoch": 0.07, "grad_norm": 24.772193908691406, "learning_rate": 9.772499999999998e-07, "loss": 0.5872, "step": 6100 }, { "epoch": 0.07, "grad_norm": 17.655696868896484, "learning_rate": 9.709999999999999e-07, "loss": 0.3257, "step": 6125 }, { "epoch": 0.07, "grad_norm": 22.594074249267578, "learning_rate": 9.6475e-07, "loss": 0.6294, "step": 6150 }, { "epoch": 0.08, "grad_norm": 15.015108108520508, "learning_rate": 9.585e-07, "loss": 0.292, "step": 6175 }, { "epoch": 0.08, "grad_norm": 27.46025276184082, "learning_rate": 9.5225e-07, "loss": 0.5023, "step": 6200 }, { "epoch": 0.08, "grad_norm": 15.613073348999023, "learning_rate": 9.459999999999999e-07, "loss": 0.2915, "step": 6225 }, { "epoch": 0.08, "grad_norm": 24.478090286254883, "learning_rate": 9.3975e-07, "loss": 0.5793, "step": 6250 }, { "epoch": 0.08, "grad_norm": 15.330205917358398, "learning_rate": 9.334999999999999e-07, "loss": 0.366, "step": 6275 }, { "epoch": 0.08, "grad_norm": 25.022016525268555, "learning_rate": 9.2725e-07, "loss": 0.5548, "step": 6300 }, { "epoch": 0.08, "grad_norm": 13.249412536621094, "learning_rate": 9.21e-07, "loss": 0.3341, "step": 6325 }, { "epoch": 0.08, "grad_norm": 25.130050659179688, "learning_rate": 9.147499999999999e-07, "loss": 0.5431, "step": 6350 }, { "epoch": 0.08, "grad_norm": 15.13709831237793, "learning_rate": 9.085e-07, "loss": 0.2728, "step": 6375 }, { "epoch": 0.08, "grad_norm": 19.128374099731445, "learning_rate": 9.022499999999999e-07, "loss": 0.5788, "step": 6400 }, { "epoch": 0.08, "grad_norm": 13.50425910949707, "learning_rate": 8.96e-07, "loss": 0.3066, "step": 6425 }, { "epoch": 0.08, "grad_norm": 27.552776336669922, "learning_rate": 8.8975e-07, "loss": 0.5665, "step": 6450 }, { "epoch": 0.08, "grad_norm": 16.90278434753418, "learning_rate": 8.834999999999999e-07, "loss": 0.3108, "step": 6475 }, { "epoch": 0.08, "grad_norm": 15.420982360839844, "learning_rate": 8.772499999999999e-07, "loss": 0.5931, "step": 6500 }, { "epoch": 0.08, "grad_norm": 12.858573913574219, "learning_rate": 8.71e-07, "loss": 0.2762, "step": 6525 }, { "epoch": 0.08, "grad_norm": 30.493162155151367, "learning_rate": 8.6475e-07, "loss": 0.548, "step": 6550 }, { "epoch": 0.08, "grad_norm": 21.05153465270996, "learning_rate": 8.585e-07, "loss": 0.2743, "step": 6575 }, { "epoch": 0.08, "grad_norm": 24.79546546936035, "learning_rate": 8.522499999999999e-07, "loss": 0.5789, "step": 6600 }, { "epoch": 0.08, "grad_norm": 5.389392375946045, "learning_rate": 8.459999999999999e-07, "loss": 0.2551, "step": 6625 }, { "epoch": 0.08, "grad_norm": 29.287887573242188, "learning_rate": 8.3975e-07, "loss": 0.5231, "step": 6650 }, { "epoch": 0.08, "grad_norm": 15.32079792022705, "learning_rate": 8.334999999999999e-07, "loss": 0.2667, "step": 6675 }, { "epoch": 0.08, "grad_norm": 21.276037216186523, "learning_rate": 8.2725e-07, "loss": 0.539, "step": 6700 }, { "epoch": 0.08, "grad_norm": 10.917491912841797, "learning_rate": 8.21e-07, "loss": 0.2951, "step": 6725 }, { "epoch": 0.08, "grad_norm": 23.395267486572266, "learning_rate": 8.147499999999999e-07, "loss": 0.5505, "step": 6750 }, { "epoch": 0.08, "grad_norm": 20.620159149169922, "learning_rate": 8.085e-07, "loss": 0.3287, "step": 6775 }, { "epoch": 0.08, "grad_norm": 32.70819091796875, "learning_rate": 8.0225e-07, "loss": 0.6325, "step": 6800 }, { "epoch": 0.08, "grad_norm": 15.693964004516602, "learning_rate": 7.96e-07, "loss": 0.2545, "step": 6825 }, { "epoch": 0.08, "grad_norm": 21.390012741088867, "learning_rate": 7.897499999999999e-07, "loss": 0.5113, "step": 6850 }, { "epoch": 0.08, "grad_norm": 16.327972412109375, "learning_rate": 7.834999999999999e-07, "loss": 0.2523, "step": 6875 }, { "epoch": 0.08, "grad_norm": 18.395044326782227, "learning_rate": 7.7725e-07, "loss": 0.5417, "step": 6900 }, { "epoch": 0.08, "grad_norm": 14.424351692199707, "learning_rate": 7.71e-07, "loss": 0.3177, "step": 6925 }, { "epoch": 0.08, "grad_norm": 28.32687759399414, "learning_rate": 7.6475e-07, "loss": 0.5442, "step": 6950 }, { "epoch": 0.08, "grad_norm": 19.87085723876953, "learning_rate": 7.584999999999999e-07, "loss": 0.2768, "step": 6975 }, { "epoch": 0.09, "grad_norm": 24.301137924194336, "learning_rate": 7.5225e-07, "loss": 0.6135, "step": 7000 }, { "epoch": 0.09, "grad_norm": 22.25948715209961, "learning_rate": 7.459999999999999e-07, "loss": 0.3079, "step": 7025 }, { "epoch": 0.09, "grad_norm": 27.84773063659668, "learning_rate": 7.3975e-07, "loss": 0.6219, "step": 7050 }, { "epoch": 0.09, "grad_norm": 14.436989784240723, "learning_rate": 7.335e-07, "loss": 0.3135, "step": 7075 }, { "epoch": 0.09, "grad_norm": 18.467741012573242, "learning_rate": 7.272499999999999e-07, "loss": 0.5659, "step": 7100 }, { "epoch": 0.09, "grad_norm": 10.770480155944824, "learning_rate": 7.21e-07, "loss": 0.2582, "step": 7125 }, { "epoch": 0.09, "grad_norm": 13.723494529724121, "learning_rate": 7.147499999999999e-07, "loss": 0.5401, "step": 7150 }, { "epoch": 0.09, "grad_norm": 16.69199562072754, "learning_rate": 7.085e-07, "loss": 0.2815, "step": 7175 }, { "epoch": 0.09, "grad_norm": 17.908796310424805, "learning_rate": 7.0225e-07, "loss": 0.6891, "step": 7200 }, { "epoch": 0.09, "grad_norm": 21.230213165283203, "learning_rate": 6.959999999999999e-07, "loss": 0.31, "step": 7225 }, { "epoch": 0.09, "grad_norm": 23.39426612854004, "learning_rate": 6.897499999999999e-07, "loss": 0.5378, "step": 7250 }, { "epoch": 0.09, "grad_norm": 11.744900703430176, "learning_rate": 6.835e-07, "loss": 0.3132, "step": 7275 }, { "epoch": 0.09, "grad_norm": 20.57970428466797, "learning_rate": 6.7725e-07, "loss": 0.4722, "step": 7300 }, { "epoch": 0.09, "grad_norm": 17.392757415771484, "learning_rate": 6.71e-07, "loss": 0.336, "step": 7325 }, { "epoch": 0.09, "grad_norm": 25.665088653564453, "learning_rate": 6.6475e-07, "loss": 0.6683, "step": 7350 }, { "epoch": 0.09, "grad_norm": 16.384979248046875, "learning_rate": 6.584999999999999e-07, "loss": 0.2941, "step": 7375 }, { "epoch": 0.09, "grad_norm": 16.39459800720215, "learning_rate": 6.5225e-07, "loss": 0.5657, "step": 7400 }, { "epoch": 0.09, "grad_norm": 11.207764625549316, "learning_rate": 6.46e-07, "loss": 0.2659, "step": 7425 }, { "epoch": 0.09, "grad_norm": 30.415802001953125, "learning_rate": 6.3975e-07, "loss": 0.5069, "step": 7450 }, { "epoch": 0.09, "grad_norm": 14.5722074508667, "learning_rate": 6.335e-07, "loss": 0.3173, "step": 7475 }, { "epoch": 0.09, "grad_norm": 19.074825286865234, "learning_rate": 6.272499999999999e-07, "loss": 0.542, "step": 7500 }, { "epoch": 0.09, "grad_norm": 15.403111457824707, "learning_rate": 6.21e-07, "loss": 0.2693, "step": 7525 }, { "epoch": 0.09, "grad_norm": 28.799901962280273, "learning_rate": 6.1475e-07, "loss": 0.5837, "step": 7550 }, { "epoch": 0.09, "grad_norm": 11.103520393371582, "learning_rate": 6.085e-07, "loss": 0.2405, "step": 7575 }, { "epoch": 0.09, "grad_norm": 27.544315338134766, "learning_rate": 6.022499999999999e-07, "loss": 0.5857, "step": 7600 }, { "epoch": 0.09, "grad_norm": 17.446556091308594, "learning_rate": 5.96e-07, "loss": 0.317, "step": 7625 }, { "epoch": 0.09, "grad_norm": 26.29592514038086, "learning_rate": 5.897499999999999e-07, "loss": 0.5691, "step": 7650 }, { "epoch": 0.09, "grad_norm": 16.815860748291016, "learning_rate": 5.835e-07, "loss": 0.3095, "step": 7675 }, { "epoch": 0.09, "grad_norm": 25.93883514404297, "learning_rate": 5.772500000000001e-07, "loss": 0.6384, "step": 7700 }, { "epoch": 0.09, "grad_norm": 19.090412139892578, "learning_rate": 5.709999999999999e-07, "loss": 0.2832, "step": 7725 }, { "epoch": 0.09, "grad_norm": 26.75637435913086, "learning_rate": 5.6475e-07, "loss": 0.5051, "step": 7750 }, { "epoch": 0.09, "grad_norm": 15.7750883102417, "learning_rate": 5.584999999999999e-07, "loss": 0.2899, "step": 7775 }, { "epoch": 0.09, "grad_norm": 24.66639518737793, "learning_rate": 5.5225e-07, "loss": 0.5858, "step": 7800 }, { "epoch": 0.1, "grad_norm": 12.055851936340332, "learning_rate": 5.46e-07, "loss": 0.2841, "step": 7825 }, { "epoch": 0.1, "grad_norm": 24.859352111816406, "learning_rate": 5.397499999999999e-07, "loss": 0.5694, "step": 7850 }, { "epoch": 0.1, "grad_norm": 10.433359146118164, "learning_rate": 5.335e-07, "loss": 0.2717, "step": 7875 }, { "epoch": 0.1, "grad_norm": 22.402997970581055, "learning_rate": 5.2725e-07, "loss": 0.553, "step": 7900 }, { "epoch": 0.1, "grad_norm": 19.177494049072266, "learning_rate": 5.21e-07, "loss": 0.2599, "step": 7925 }, { "epoch": 0.1, "grad_norm": 31.914413452148438, "learning_rate": 5.1475e-07, "loss": 0.5566, "step": 7950 }, { "epoch": 0.1, "grad_norm": 14.158437728881836, "learning_rate": 5.085e-07, "loss": 0.2437, "step": 7975 }, { "epoch": 0.1, "grad_norm": 21.340044021606445, "learning_rate": 5.022499999999999e-07, "loss": 0.4632, "step": 8000 }, { "epoch": 0.1, "eval_loss": 0.43929019570350647, "eval_runtime": 5740.2852, "eval_samples_per_second": 1.64, "eval_steps_per_second": 0.205, "eval_wer": 0.17359922493137414, "step": 8000 }, { "epoch": 0.1, "grad_norm": 15.289112091064453, "learning_rate": 4.96e-07, "loss": 0.29, "step": 8025 }, { "epoch": 0.1, "grad_norm": 22.75160789489746, "learning_rate": 4.9e-07, "loss": 0.5262, "step": 8050 }, { "epoch": 0.1, "grad_norm": 30.43035125732422, "learning_rate": 4.8375e-07, "loss": 0.3062, "step": 8075 }, { "epoch": 0.1, "grad_norm": 31.55746841430664, "learning_rate": 4.775e-07, "loss": 0.5132, "step": 8100 }, { "epoch": 0.1, "grad_norm": 17.411041259765625, "learning_rate": 4.7125e-07, "loss": 0.2732, "step": 8125 }, { "epoch": 0.1, "grad_norm": 11.30186653137207, "learning_rate": 4.65e-07, "loss": 0.5143, "step": 8150 }, { "epoch": 0.1, "grad_norm": 15.796743392944336, "learning_rate": 4.5874999999999995e-07, "loss": 0.302, "step": 8175 }, { "epoch": 0.1, "grad_norm": 23.819459915161133, "learning_rate": 4.525e-07, "loss": 0.5925, "step": 8200 }, { "epoch": 0.1, "grad_norm": 18.643949508666992, "learning_rate": 4.4624999999999996e-07, "loss": 0.2842, "step": 8225 }, { "epoch": 0.1, "grad_norm": 25.308483123779297, "learning_rate": 4.3999999999999997e-07, "loss": 0.5643, "step": 8250 }, { "epoch": 0.1, "grad_norm": 11.329629898071289, "learning_rate": 4.3375000000000003e-07, "loss": 0.3107, "step": 8275 }, { "epoch": 0.1, "grad_norm": 20.567607879638672, "learning_rate": 4.275e-07, "loss": 0.5414, "step": 8300 }, { "epoch": 0.1, "grad_norm": 12.207862854003906, "learning_rate": 4.2125e-07, "loss": 0.2623, "step": 8325 }, { "epoch": 0.1, "grad_norm": 18.38344955444336, "learning_rate": 4.1499999999999994e-07, "loss": 0.5246, "step": 8350 }, { "epoch": 0.1, "grad_norm": 9.61295223236084, "learning_rate": 4.0875e-07, "loss": 0.3014, "step": 8375 }, { "epoch": 0.1, "grad_norm": 24.633577346801758, "learning_rate": 4.025e-07, "loss": 0.5996, "step": 8400 }, { "epoch": 0.1, "grad_norm": 8.710371971130371, "learning_rate": 3.9624999999999996e-07, "loss": 0.2369, "step": 8425 }, { "epoch": 0.1, "grad_norm": 20.97711753845215, "learning_rate": 3.8999999999999997e-07, "loss": 0.5599, "step": 8450 }, { "epoch": 0.1, "grad_norm": 8.884387016296387, "learning_rate": 3.8375e-07, "loss": 0.2391, "step": 8475 }, { "epoch": 0.1, "grad_norm": 22.268924713134766, "learning_rate": 3.775e-07, "loss": 0.5217, "step": 8500 }, { "epoch": 0.1, "grad_norm": 14.277334213256836, "learning_rate": 3.7125e-07, "loss": 0.3079, "step": 8525 }, { "epoch": 0.1, "grad_norm": 30.062461853027344, "learning_rate": 3.65e-07, "loss": 0.5072, "step": 8550 }, { "epoch": 0.1, "grad_norm": 18.741954803466797, "learning_rate": 3.5875e-07, "loss": 0.2697, "step": 8575 }, { "epoch": 0.1, "grad_norm": 27.5913028717041, "learning_rate": 3.5249999999999996e-07, "loss": 0.5521, "step": 8600 }, { "epoch": 0.1, "grad_norm": 10.070502281188965, "learning_rate": 3.4624999999999997e-07, "loss": 0.2877, "step": 8625 }, { "epoch": 0.11, "grad_norm": 18.297412872314453, "learning_rate": 3.4000000000000003e-07, "loss": 0.5527, "step": 8650 }, { "epoch": 0.11, "grad_norm": 11.771409034729004, "learning_rate": 3.3375e-07, "loss": 0.2221, "step": 8675 }, { "epoch": 0.11, "grad_norm": 22.42823600769043, "learning_rate": 3.275e-07, "loss": 0.472, "step": 8700 }, { "epoch": 0.11, "grad_norm": 17.16645050048828, "learning_rate": 3.2124999999999994e-07, "loss": 0.2754, "step": 8725 }, { "epoch": 0.11, "grad_norm": 26.470958709716797, "learning_rate": 3.15e-07, "loss": 0.6017, "step": 8750 }, { "epoch": 0.11, "grad_norm": 14.21453857421875, "learning_rate": 3.0875e-07, "loss": 0.2395, "step": 8775 }, { "epoch": 0.11, "grad_norm": 13.669867515563965, "learning_rate": 3.0249999999999996e-07, "loss": 0.4772, "step": 8800 }, { "epoch": 0.11, "grad_norm": 13.404635429382324, "learning_rate": 2.9625e-07, "loss": 0.2681, "step": 8825 }, { "epoch": 0.11, "grad_norm": 32.027488708496094, "learning_rate": 2.9e-07, "loss": 0.591, "step": 8850 }, { "epoch": 0.11, "grad_norm": 12.78139591217041, "learning_rate": 2.8375e-07, "loss": 0.2708, "step": 8875 }, { "epoch": 0.11, "grad_norm": 19.808069229125977, "learning_rate": 2.775e-07, "loss": 0.5718, "step": 8900 }, { "epoch": 0.11, "grad_norm": 8.049223899841309, "learning_rate": 2.7125e-07, "loss": 0.3105, "step": 8925 }, { "epoch": 0.11, "grad_norm": 28.265857696533203, "learning_rate": 2.65e-07, "loss": 0.5694, "step": 8950 }, { "epoch": 0.11, "grad_norm": 11.352721214294434, "learning_rate": 2.5874999999999996e-07, "loss": 0.2624, "step": 8975 }, { "epoch": 0.11, "grad_norm": 26.757156372070312, "learning_rate": 2.5249999999999996e-07, "loss": 0.5656, "step": 9000 }, { "epoch": 0.11, "grad_norm": 18.198179244995117, "learning_rate": 2.4624999999999997e-07, "loss": 0.288, "step": 9025 }, { "epoch": 0.11, "grad_norm": 23.22308349609375, "learning_rate": 2.4e-07, "loss": 0.6548, "step": 9050 }, { "epoch": 0.11, "grad_norm": 11.637288093566895, "learning_rate": 2.3375e-07, "loss": 0.2776, "step": 9075 }, { "epoch": 0.11, "grad_norm": 29.29697036743164, "learning_rate": 2.275e-07, "loss": 0.5056, "step": 9100 }, { "epoch": 0.11, "grad_norm": 17.896018981933594, "learning_rate": 2.2125e-07, "loss": 0.2556, "step": 9125 }, { "epoch": 0.11, "grad_norm": 22.04491424560547, "learning_rate": 2.1499999999999998e-07, "loss": 0.5296, "step": 9150 }, { "epoch": 0.11, "grad_norm": 15.423519134521484, "learning_rate": 2.0874999999999999e-07, "loss": 0.2784, "step": 9175 }, { "epoch": 0.11, "grad_norm": 21.817243576049805, "learning_rate": 2.025e-07, "loss": 0.5813, "step": 9200 }, { "epoch": 0.11, "grad_norm": 14.63697624206543, "learning_rate": 1.9625e-07, "loss": 0.2917, "step": 9225 }, { "epoch": 0.11, "grad_norm": 22.647859573364258, "learning_rate": 1.8999999999999998e-07, "loss": 0.5685, "step": 9250 }, { "epoch": 0.11, "grad_norm": 13.825846672058105, "learning_rate": 1.8375e-07, "loss": 0.2498, "step": 9275 }, { "epoch": 0.11, "grad_norm": 32.008758544921875, "learning_rate": 1.775e-07, "loss": 0.5868, "step": 9300 }, { "epoch": 0.11, "grad_norm": 16.272075653076172, "learning_rate": 1.7125e-07, "loss": 0.269, "step": 9325 }, { "epoch": 0.11, "grad_norm": 18.543703079223633, "learning_rate": 1.65e-07, "loss": 0.5475, "step": 9350 }, { "epoch": 0.11, "grad_norm": 16.70926284790039, "learning_rate": 1.5875e-07, "loss": 0.2535, "step": 9375 }, { "epoch": 0.11, "grad_norm": 20.96632194519043, "learning_rate": 1.525e-07, "loss": 0.5715, "step": 9400 }, { "epoch": 0.11, "grad_norm": 14.177762985229492, "learning_rate": 1.4624999999999998e-07, "loss": 0.2631, "step": 9425 }, { "epoch": 0.11, "grad_norm": 22.189653396606445, "learning_rate": 1.4e-07, "loss": 0.5152, "step": 9450 }, { "epoch": 0.12, "grad_norm": 16.282991409301758, "learning_rate": 1.3375e-07, "loss": 0.2671, "step": 9475 }, { "epoch": 0.12, "grad_norm": 28.795602798461914, "learning_rate": 1.275e-07, "loss": 0.598, "step": 9500 }, { "epoch": 0.12, "grad_norm": 19.492183685302734, "learning_rate": 1.2125e-07, "loss": 0.2908, "step": 9525 }, { "epoch": 0.12, "grad_norm": 23.78717613220215, "learning_rate": 1.15e-07, "loss": 0.4983, "step": 9550 }, { "epoch": 0.12, "grad_norm": 11.744542121887207, "learning_rate": 1.0874999999999999e-07, "loss": 0.2812, "step": 9575 }, { "epoch": 0.12, "grad_norm": 28.317659378051758, "learning_rate": 1.0249999999999998e-07, "loss": 0.6472, "step": 9600 }, { "epoch": 0.12, "grad_norm": 17.57823944091797, "learning_rate": 9.624999999999999e-08, "loss": 0.254, "step": 9625 }, { "epoch": 0.12, "grad_norm": 24.610986709594727, "learning_rate": 9e-08, "loss": 0.5404, "step": 9650 }, { "epoch": 0.12, "grad_norm": 15.964032173156738, "learning_rate": 8.375e-08, "loss": 0.2462, "step": 9675 }, { "epoch": 0.12, "grad_norm": 21.19764518737793, "learning_rate": 7.75e-08, "loss": 0.6009, "step": 9700 }, { "epoch": 0.12, "grad_norm": 15.994060516357422, "learning_rate": 7.124999999999999e-08, "loss": 0.2784, "step": 9725 }, { "epoch": 0.12, "grad_norm": 22.544214248657227, "learning_rate": 6.5e-08, "loss": 0.5595, "step": 9750 }, { "epoch": 0.12, "grad_norm": 15.244584083557129, "learning_rate": 5.8749999999999993e-08, "loss": 0.257, "step": 9775 }, { "epoch": 0.12, "grad_norm": 19.461477279663086, "learning_rate": 5.2499999999999994e-08, "loss": 0.5941, "step": 9800 }, { "epoch": 0.12, "grad_norm": 9.137640953063965, "learning_rate": 4.625e-08, "loss": 0.2462, "step": 9825 }, { "epoch": 0.12, "grad_norm": 16.050039291381836, "learning_rate": 4e-08, "loss": 0.5579, "step": 9850 }, { "epoch": 0.12, "grad_norm": 12.063644409179688, "learning_rate": 3.375e-08, "loss": 0.2443, "step": 9875 }, { "epoch": 0.12, "grad_norm": 19.16944122314453, "learning_rate": 2.7499999999999998e-08, "loss": 0.4084, "step": 9900 }, { "epoch": 0.12, "grad_norm": 13.46724796295166, "learning_rate": 2.1250000000000002e-08, "loss": 0.2948, "step": 9925 }, { "epoch": 0.12, "grad_norm": 23.261869430541992, "learning_rate": 1.5e-08, "loss": 0.5243, "step": 9950 }, { "epoch": 0.12, "grad_norm": 17.960269927978516, "learning_rate": 8.75e-09, "loss": 0.227, "step": 9975 }, { "epoch": 0.12, "grad_norm": 25.144319534301758, "learning_rate": 2.5e-09, "loss": 0.5151, "step": 10000 }, { "epoch": 0.12, "eval_loss": 0.44570982456207275, "eval_runtime": 5594.932, "eval_samples_per_second": 1.683, "eval_steps_per_second": 0.21, "eval_wer": 0.1700952688519296, "step": 10000 }, { "epoch": 0.12, "grad_norm": Infinity, "learning_rate": 7.133571428571429e-07, "loss": 0.2487, "step": 10025 }, { "epoch": 0.12, "grad_norm": Infinity, "learning_rate": 7.116428571428571e-07, "loss": 0.4951, "step": 10050 }, { "epoch": 0.12, "grad_norm": 18.395633697509766, "learning_rate": 7.098571428571429e-07, "loss": 0.2762, "step": 10075 }, { "epoch": 0.12, "grad_norm": 14.118087768554688, "learning_rate": 7.080714285714285e-07, "loss": 0.5376, "step": 10100 }, { "epoch": 0.12, "grad_norm": 15.895506858825684, "learning_rate": 7.062857142857142e-07, "loss": 0.2819, "step": 10125 }, { "epoch": 0.12, "grad_norm": 26.209333419799805, "learning_rate": 7.045e-07, "loss": 0.5038, "step": 10150 }, { "epoch": 0.12, "grad_norm": 9.421802520751953, "learning_rate": 7.027142857142857e-07, "loss": 0.2401, "step": 10175 }, { "epoch": 0.12, "grad_norm": 35.024810791015625, "learning_rate": 7.009285714285714e-07, "loss": 0.5469, "step": 10200 }, { "epoch": 0.12, "grad_norm": 8.198253631591797, "learning_rate": 6.991428571428571e-07, "loss": 0.3045, "step": 10225 }, { "epoch": 0.12, "grad_norm": 23.263959884643555, "learning_rate": 6.973571428571429e-07, "loss": 0.5404, "step": 10250 }, { "epoch": 0.12, "grad_norm": 24.623031616210938, "learning_rate": 6.955714285714286e-07, "loss": 0.2843, "step": 10275 }, { "epoch": 0.13, "grad_norm": 16.614179611206055, "learning_rate": 6.937857142857143e-07, "loss": 0.5677, "step": 10300 }, { "epoch": 0.13, "grad_norm": 13.380021095275879, "learning_rate": 6.919999999999999e-07, "loss": 0.2898, "step": 10325 }, { "epoch": 0.13, "grad_norm": 20.24687957763672, "learning_rate": 6.902142857142856e-07, "loss": 0.5123, "step": 10350 }, { "epoch": 0.13, "grad_norm": 15.209970474243164, "learning_rate": 6.884285714285714e-07, "loss": 0.2592, "step": 10375 }, { "epoch": 0.13, "grad_norm": 25.526763916015625, "learning_rate": 6.86642857142857e-07, "loss": 0.4767, "step": 10400 }, { "epoch": 0.13, "grad_norm": 13.861222267150879, "learning_rate": 6.848571428571428e-07, "loss": 0.3133, "step": 10425 }, { "epoch": 0.13, "grad_norm": 21.006370544433594, "learning_rate": 6.830714285714285e-07, "loss": 0.4938, "step": 10450 }, { "epoch": 0.13, "grad_norm": 19.099258422851562, "learning_rate": 6.812857142857143e-07, "loss": 0.2615, "step": 10475 }, { "epoch": 0.13, "grad_norm": 23.394371032714844, "learning_rate": 6.794999999999999e-07, "loss": 0.5587, "step": 10500 }, { "epoch": 0.13, "grad_norm": 17.32550048828125, "learning_rate": 6.777142857142857e-07, "loss": 0.2864, "step": 10525 }, { "epoch": 0.13, "grad_norm": 21.120290756225586, "learning_rate": 6.759285714285714e-07, "loss": 0.5864, "step": 10550 }, { "epoch": 0.13, "grad_norm": 13.446245193481445, "learning_rate": 6.741428571428572e-07, "loss": 0.2657, "step": 10575 }, { "epoch": 0.13, "grad_norm": 20.756250381469727, "learning_rate": 6.723571428571429e-07, "loss": 0.5236, "step": 10600 }, { "epoch": 0.13, "grad_norm": 19.133296966552734, "learning_rate": 6.705714285714285e-07, "loss": 0.3078, "step": 10625 }, { "epoch": 0.13, "grad_norm": 31.154388427734375, "learning_rate": 6.687857142857143e-07, "loss": 0.5568, "step": 10650 }, { "epoch": 0.13, "grad_norm": 16.301631927490234, "learning_rate": 6.67e-07, "loss": 0.2456, "step": 10675 }, { "epoch": 0.13, "grad_norm": 27.033632278442383, "learning_rate": 6.652142857142858e-07, "loss": 0.5484, "step": 10700 }, { "epoch": 0.13, "grad_norm": 5.143960952758789, "learning_rate": 6.634285714285714e-07, "loss": 0.2738, "step": 10725 }, { "epoch": 0.13, "grad_norm": 24.503541946411133, "learning_rate": 6.616428571428571e-07, "loss": 0.5612, "step": 10750 }, { "epoch": 0.13, "grad_norm": 19.447397232055664, "learning_rate": 6.598571428571428e-07, "loss": 0.2954, "step": 10775 }, { "epoch": 0.13, "grad_norm": 19.933080673217773, "learning_rate": 6.580714285714286e-07, "loss": 0.5517, "step": 10800 }, { "epoch": 0.13, "grad_norm": 18.4548282623291, "learning_rate": 6.562857142857142e-07, "loss": 0.2887, "step": 10825 }, { "epoch": 0.13, "grad_norm": 27.120908737182617, "learning_rate": 6.544999999999999e-07, "loss": 0.5242, "step": 10850 }, { "epoch": 0.13, "grad_norm": 11.658120155334473, "learning_rate": 6.527142857142857e-07, "loss": 0.2301, "step": 10875 }, { "epoch": 0.13, "grad_norm": 21.553525924682617, "learning_rate": 6.509285714285714e-07, "loss": 0.5728, "step": 10900 }, { "epoch": 0.13, "grad_norm": 16.63558006286621, "learning_rate": 6.491428571428571e-07, "loss": 0.2946, "step": 10925 }, { "epoch": 0.13, "grad_norm": 21.972610473632812, "learning_rate": 6.473571428571428e-07, "loss": 0.5271, "step": 10950 }, { "epoch": 0.13, "grad_norm": 12.130209922790527, "learning_rate": 6.455714285714286e-07, "loss": 0.236, "step": 10975 }, { "epoch": 0.13, "grad_norm": 21.220565795898438, "learning_rate": 6.437857142857143e-07, "loss": 0.4897, "step": 11000 }, { "epoch": 0.13, "grad_norm": 16.87418556213379, "learning_rate": 6.42e-07, "loss": 0.3331, "step": 11025 }, { "epoch": 0.13, "grad_norm": 37.632633209228516, "learning_rate": 6.402142857142857e-07, "loss": 0.5686, "step": 11050 }, { "epoch": 0.13, "grad_norm": 22.07012176513672, "learning_rate": 6.384285714285714e-07, "loss": 0.2804, "step": 11075 }, { "epoch": 0.13, "grad_norm": 28.006338119506836, "learning_rate": 6.366428571428572e-07, "loss": 0.6237, "step": 11100 }, { "epoch": 0.14, "grad_norm": 11.953436851501465, "learning_rate": 6.348571428571428e-07, "loss": 0.2531, "step": 11125 }, { "epoch": 0.14, "grad_norm": 18.82573699951172, "learning_rate": 6.330714285714286e-07, "loss": 0.5206, "step": 11150 }, { "epoch": 0.14, "grad_norm": 15.685461044311523, "learning_rate": 6.312857142857143e-07, "loss": 0.2804, "step": 11175 }, { "epoch": 0.14, "grad_norm": 20.35894775390625, "learning_rate": 6.295e-07, "loss": 0.5052, "step": 11200 }, { "epoch": 0.14, "grad_norm": 10.60390567779541, "learning_rate": 6.277142857142856e-07, "loss": 0.2865, "step": 11225 }, { "epoch": 0.14, "grad_norm": 29.205368041992188, "learning_rate": 6.259285714285714e-07, "loss": 0.5974, "step": 11250 }, { "epoch": 0.14, "grad_norm": 17.855728149414062, "learning_rate": 6.241428571428571e-07, "loss": 0.3193, "step": 11275 }, { "epoch": 0.14, "grad_norm": 24.602779388427734, "learning_rate": 6.223571428571428e-07, "loss": 0.4878, "step": 11300 }, { "epoch": 0.14, "grad_norm": 18.369998931884766, "learning_rate": 6.205714285714285e-07, "loss": 0.2899, "step": 11325 }, { "epoch": 0.14, "grad_norm": Infinity, "learning_rate": 6.188571428571429e-07, "loss": 0.474, "step": 11350 }, { "epoch": 0.14, "grad_norm": 12.755717277526855, "learning_rate": 6.170714285714285e-07, "loss": 0.2261, "step": 11375 }, { "epoch": 0.14, "grad_norm": 25.792221069335938, "learning_rate": 6.152857142857143e-07, "loss": 0.5289, "step": 11400 }, { "epoch": 0.14, "grad_norm": 12.499135971069336, "learning_rate": 6.135e-07, "loss": 0.3456, "step": 11425 }, { "epoch": 0.14, "grad_norm": 23.764741897583008, "learning_rate": 6.117142857142858e-07, "loss": 0.4705, "step": 11450 }, { "epoch": 0.14, "grad_norm": 15.811933517456055, "learning_rate": 6.099285714285713e-07, "loss": 0.2818, "step": 11475 }, { "epoch": 0.14, "grad_norm": 27.48798179626465, "learning_rate": 6.081428571428571e-07, "loss": 0.4752, "step": 11500 }, { "epoch": 0.14, "grad_norm": 29.260459899902344, "learning_rate": 6.063571428571428e-07, "loss": 0.3103, "step": 11525 }, { "epoch": 0.14, "grad_norm": 24.313169479370117, "learning_rate": 6.045714285714286e-07, "loss": 0.5345, "step": 11550 }, { "epoch": 0.14, "grad_norm": 14.812594413757324, "learning_rate": 6.027857142857142e-07, "loss": 0.2789, "step": 11575 }, { "epoch": 0.14, "grad_norm": 19.387434005737305, "learning_rate": 6.009999999999999e-07, "loss": 0.5909, "step": 11600 }, { "epoch": 0.14, "grad_norm": 6.080020904541016, "learning_rate": 5.992142857142857e-07, "loss": 0.2259, "step": 11625 }, { "epoch": 0.14, "grad_norm": 23.574949264526367, "learning_rate": 5.974285714285714e-07, "loss": 0.5457, "step": 11650 }, { "epoch": 0.14, "grad_norm": 13.463486671447754, "learning_rate": 5.956428571428571e-07, "loss": 0.2709, "step": 11675 }, { "epoch": 0.14, "grad_norm": 28.031545639038086, "learning_rate": 5.938571428571428e-07, "loss": 0.5709, "step": 11700 }, { "epoch": 0.14, "grad_norm": 16.419267654418945, "learning_rate": 5.920714285714286e-07, "loss": 0.2395, "step": 11725 }, { "epoch": 0.14, "grad_norm": 24.46676254272461, "learning_rate": 5.902857142857143e-07, "loss": 0.4678, "step": 11750 }, { "epoch": 0.14, "grad_norm": 13.469151496887207, "learning_rate": 5.885e-07, "loss": 0.2192, "step": 11775 }, { "epoch": 0.14, "grad_norm": 22.810352325439453, "learning_rate": 5.867142857142857e-07, "loss": 0.5398, "step": 11800 }, { "epoch": 0.14, "grad_norm": 12.745511054992676, "learning_rate": 5.849285714285714e-07, "loss": 0.3098, "step": 11825 }, { "epoch": 0.14, "grad_norm": 18.4826602935791, "learning_rate": 5.831428571428572e-07, "loss": 0.5599, "step": 11850 }, { "epoch": 0.14, "grad_norm": 11.836509704589844, "learning_rate": 5.813571428571428e-07, "loss": 0.2207, "step": 11875 }, { "epoch": 0.14, "grad_norm": 21.229785919189453, "learning_rate": 5.795714285714286e-07, "loss": 0.5776, "step": 11900 }, { "epoch": 0.14, "grad_norm": 11.208019256591797, "learning_rate": 5.777857142857142e-07, "loss": 0.247, "step": 11925 }, { "epoch": 0.15, "grad_norm": 25.267257690429688, "learning_rate": 5.76e-07, "loss": 0.4998, "step": 11950 }, { "epoch": 0.15, "grad_norm": 14.406000137329102, "learning_rate": 5.742142857142856e-07, "loss": 0.2457, "step": 11975 }, { "epoch": 0.15, "grad_norm": 21.350296020507812, "learning_rate": 5.724285714285714e-07, "loss": 0.5158, "step": 12000 }, { "epoch": 0.15, "eval_loss": 0.43139249086380005, "eval_runtime": 6062.945, "eval_samples_per_second": 1.553, "eval_steps_per_second": 0.194, "eval_wer": 0.1670757306636525, "step": 12000 }, { "epoch": 0.15, "grad_norm": 10.665511131286621, "learning_rate": 5.706428571428571e-07, "loss": 0.3042, "step": 12025 }, { "epoch": 0.15, "grad_norm": 20.711769104003906, "learning_rate": 5.688571428571428e-07, "loss": 0.5473, "step": 12050 }, { "epoch": 0.15, "grad_norm": 15.992523193359375, "learning_rate": 5.670714285714285e-07, "loss": 0.2341, "step": 12075 }, { "epoch": 0.15, "grad_norm": 22.357173919677734, "learning_rate": 5.652857142857142e-07, "loss": 0.5332, "step": 12100 }, { "epoch": 0.15, "grad_norm": 12.882672309875488, "learning_rate": 5.635e-07, "loss": 0.295, "step": 12125 }, { "epoch": 0.15, "grad_norm": 19.411699295043945, "learning_rate": 5.617142857142857e-07, "loss": 0.5108, "step": 12150 }, { "epoch": 0.15, "grad_norm": 11.176936149597168, "learning_rate": 5.599285714285714e-07, "loss": 0.2256, "step": 12175 }, { "epoch": 0.15, "grad_norm": 21.744428634643555, "learning_rate": 5.581428571428571e-07, "loss": 0.5773, "step": 12200 }, { "epoch": 0.15, "grad_norm": 14.22724723815918, "learning_rate": 5.563571428571429e-07, "loss": 0.2695, "step": 12225 }, { "epoch": 0.15, "grad_norm": 38.878807067871094, "learning_rate": 5.545714285714286e-07, "loss": 0.5021, "step": 12250 }, { "epoch": 0.15, "grad_norm": 18.826614379882812, "learning_rate": 5.527857142857143e-07, "loss": 0.2635, "step": 12275 }, { "epoch": 0.15, "grad_norm": 14.811022758483887, "learning_rate": 5.51e-07, "loss": 0.543, "step": 12300 }, { "epoch": 0.15, "grad_norm": 9.75754165649414, "learning_rate": 5.492142857142857e-07, "loss": 0.3347, "step": 12325 }, { "epoch": 0.15, "grad_norm": 25.858911514282227, "learning_rate": 5.474285714285714e-07, "loss": 0.5124, "step": 12350 }, { "epoch": 0.15, "grad_norm": 10.652257919311523, "learning_rate": 5.45642857142857e-07, "loss": 0.2577, "step": 12375 }, { "epoch": 0.15, "grad_norm": 29.380006790161133, "learning_rate": 5.438571428571428e-07, "loss": 0.534, "step": 12400 }, { "epoch": 0.15, "grad_norm": 11.318015098571777, "learning_rate": 5.420714285714285e-07, "loss": 0.2509, "step": 12425 }, { "epoch": 0.15, "grad_norm": 25.717960357666016, "learning_rate": 5.402857142857143e-07, "loss": 0.5379, "step": 12450 }, { "epoch": 0.15, "grad_norm": 20.083452224731445, "learning_rate": 5.384999999999999e-07, "loss": 0.2902, "step": 12475 }, { "epoch": 0.15, "grad_norm": 23.3952579498291, "learning_rate": 5.367142857142857e-07, "loss": 0.5801, "step": 12500 }, { "epoch": 0.15, "grad_norm": 9.673083305358887, "learning_rate": 5.349285714285714e-07, "loss": 0.2961, "step": 12525 }, { "epoch": 0.15, "grad_norm": 21.045778274536133, "learning_rate": 5.331428571428571e-07, "loss": 0.5459, "step": 12550 }, { "epoch": 0.15, "grad_norm": 15.67827320098877, "learning_rate": 5.313571428571428e-07, "loss": 0.2613, "step": 12575 }, { "epoch": 0.15, "grad_norm": 29.011518478393555, "learning_rate": 5.295714285714285e-07, "loss": 0.5127, "step": 12600 }, { "epoch": 0.15, "grad_norm": 9.100702285766602, "learning_rate": 5.277857142857143e-07, "loss": 0.2787, "step": 12625 }, { "epoch": 0.15, "grad_norm": 26.210891723632812, "learning_rate": 5.26e-07, "loss": 0.6275, "step": 12650 }, { "epoch": 0.15, "grad_norm": 14.144155502319336, "learning_rate": 5.242142857142858e-07, "loss": 0.2381, "step": 12675 }, { "epoch": 0.15, "grad_norm": 29.32146644592285, "learning_rate": 5.224285714285714e-07, "loss": 0.5048, "step": 12700 }, { "epoch": 0.15, "grad_norm": 15.683405876159668, "learning_rate": 5.206428571428572e-07, "loss": 0.3016, "step": 12725 }, { "epoch": 0.15, "grad_norm": 25.86359214782715, "learning_rate": 5.188571428571429e-07, "loss": 0.5358, "step": 12750 }, { "epoch": 0.16, "grad_norm": 10.712611198425293, "learning_rate": 5.170714285714287e-07, "loss": 0.242, "step": 12775 }, { "epoch": 0.16, "grad_norm": 14.679198265075684, "learning_rate": 5.152857142857142e-07, "loss": 0.5162, "step": 12800 }, { "epoch": 0.16, "grad_norm": 16.717084884643555, "learning_rate": 5.134999999999999e-07, "loss": 0.2672, "step": 12825 }, { "epoch": 0.16, "grad_norm": 18.414783477783203, "learning_rate": 5.117142857142857e-07, "loss": 0.5164, "step": 12850 }, { "epoch": 0.16, "grad_norm": 9.667854309082031, "learning_rate": 5.099285714285714e-07, "loss": 0.2618, "step": 12875 }, { "epoch": 0.16, "grad_norm": 31.132843017578125, "learning_rate": 5.081428571428571e-07, "loss": 0.4941, "step": 12900 }, { "epoch": 0.16, "grad_norm": 14.9796781539917, "learning_rate": 5.063571428571428e-07, "loss": 0.2561, "step": 12925 }, { "epoch": 0.16, "grad_norm": 24.235136032104492, "learning_rate": 5.045714285714286e-07, "loss": 0.541, "step": 12950 }, { "epoch": 0.16, "grad_norm": 11.6466064453125, "learning_rate": 5.027857142857143e-07, "loss": 0.2448, "step": 12975 }, { "epoch": 0.16, "grad_norm": 25.600833892822266, "learning_rate": 5.009999999999999e-07, "loss": 0.535, "step": 13000 }, { "epoch": 0.16, "grad_norm": 19.636394500732422, "learning_rate": 4.992142857142857e-07, "loss": 0.2688, "step": 13025 }, { "epoch": 0.16, "grad_norm": 20.44740867614746, "learning_rate": 4.974285714285714e-07, "loss": 0.4732, "step": 13050 }, { "epoch": 0.16, "grad_norm": 17.387100219726562, "learning_rate": 4.956428571428572e-07, "loss": 0.2469, "step": 13075 }, { "epoch": 0.16, "grad_norm": 24.79920196533203, "learning_rate": 4.938571428571428e-07, "loss": 0.4996, "step": 13100 }, { "epoch": 0.16, "grad_norm": 16.365625381469727, "learning_rate": 4.920714285714286e-07, "loss": 0.2572, "step": 13125 }, { "epoch": 0.16, "grad_norm": 16.618408203125, "learning_rate": 4.902857142857142e-07, "loss": 0.5222, "step": 13150 }, { "epoch": 0.16, "grad_norm": 11.194711685180664, "learning_rate": 4.885e-07, "loss": 0.291, "step": 13175 }, { "epoch": 0.16, "grad_norm": 23.214378356933594, "learning_rate": 4.867142857142857e-07, "loss": 0.4937, "step": 13200 }, { "epoch": 0.16, "grad_norm": 19.738113403320312, "learning_rate": 4.849285714285715e-07, "loss": 0.2994, "step": 13225 }, { "epoch": 0.16, "grad_norm": 23.572124481201172, "learning_rate": 4.831428571428571e-07, "loss": 0.512, "step": 13250 }, { "epoch": 0.16, "grad_norm": 17.797889709472656, "learning_rate": 4.813571428571428e-07, "loss": 0.2503, "step": 13275 }, { "epoch": 0.16, "grad_norm": 19.600574493408203, "learning_rate": 4.795714285714286e-07, "loss": 0.498, "step": 13300 }, { "epoch": 0.16, "grad_norm": 16.852575302124023, "learning_rate": 4.777857142857142e-07, "loss": 0.2647, "step": 13325 }, { "epoch": 0.16, "grad_norm": 22.06913185119629, "learning_rate": 4.76e-07, "loss": 0.5222, "step": 13350 }, { "epoch": 0.16, "grad_norm": 11.366828918457031, "learning_rate": 4.7421428571428567e-07, "loss": 0.2006, "step": 13375 }, { "epoch": 0.16, "grad_norm": 26.567928314208984, "learning_rate": 4.724285714285714e-07, "loss": 0.6113, "step": 13400 }, { "epoch": 0.16, "grad_norm": 17.440595626831055, "learning_rate": 4.706428571428571e-07, "loss": 0.2967, "step": 13425 }, { "epoch": 0.16, "grad_norm": 32.105918884277344, "learning_rate": 4.689285714285714e-07, "loss": 0.4915, "step": 13450 }, { "epoch": 0.16, "grad_norm": 15.742903709411621, "learning_rate": 4.671428571428571e-07, "loss": 0.2735, "step": 13475 }, { "epoch": 0.16, "grad_norm": 16.67759895324707, "learning_rate": 4.6535714285714286e-07, "loss": 0.5659, "step": 13500 }, { "epoch": 0.16, "grad_norm": 11.289206504821777, "learning_rate": 4.6357142857142855e-07, "loss": 0.2511, "step": 13525 }, { "epoch": 0.16, "grad_norm": 24.552431106567383, "learning_rate": 4.617857142857143e-07, "loss": 0.5276, "step": 13550 }, { "epoch": 0.17, "grad_norm": 16.458602905273438, "learning_rate": 4.6e-07, "loss": 0.2565, "step": 13575 }, { "epoch": 0.17, "grad_norm": 20.243223190307617, "learning_rate": 4.5821428571428574e-07, "loss": 0.5313, "step": 13600 }, { "epoch": 0.17, "grad_norm": 9.613998413085938, "learning_rate": 4.564285714285714e-07, "loss": 0.2872, "step": 13625 }, { "epoch": 0.17, "grad_norm": 28.92829132080078, "learning_rate": 4.546428571428571e-07, "loss": 0.5673, "step": 13650 }, { "epoch": 0.17, "grad_norm": 14.831592559814453, "learning_rate": 4.528571428571428e-07, "loss": 0.251, "step": 13675 }, { "epoch": 0.17, "grad_norm": 29.575359344482422, "learning_rate": 4.5107142857142856e-07, "loss": 0.5256, "step": 13700 }, { "epoch": 0.17, "grad_norm": 15.341351509094238, "learning_rate": 4.4928571428571426e-07, "loss": 0.2483, "step": 13725 }, { "epoch": 0.17, "grad_norm": 31.83696174621582, "learning_rate": 4.475e-07, "loss": 0.5649, "step": 13750 }, { "epoch": 0.17, "grad_norm": 15.396077156066895, "learning_rate": 4.457142857142857e-07, "loss": 0.3735, "step": 13775 }, { "epoch": 0.17, "grad_norm": 23.51662826538086, "learning_rate": 4.4392857142857144e-07, "loss": 0.5416, "step": 13800 }, { "epoch": 0.17, "grad_norm": 11.324971199035645, "learning_rate": 4.421428571428571e-07, "loss": 0.2627, "step": 13825 }, { "epoch": 0.17, "grad_norm": 24.304067611694336, "learning_rate": 4.4035714285714283e-07, "loss": 0.5778, "step": 13850 }, { "epoch": 0.17, "grad_norm": 16.256675720214844, "learning_rate": 4.385714285714285e-07, "loss": 0.2809, "step": 13875 }, { "epoch": 0.17, "grad_norm": 20.40346908569336, "learning_rate": 4.3678571428571427e-07, "loss": 0.4767, "step": 13900 }, { "epoch": 0.17, "grad_norm": 15.67408275604248, "learning_rate": 4.3499999999999996e-07, "loss": 0.2654, "step": 13925 }, { "epoch": 0.17, "grad_norm": 23.163921356201172, "learning_rate": 4.332142857142857e-07, "loss": 0.5091, "step": 13950 }, { "epoch": 0.17, "grad_norm": 17.081083297729492, "learning_rate": 4.314285714285714e-07, "loss": 0.2929, "step": 13975 }, { "epoch": 0.17, "grad_norm": 26.59824562072754, "learning_rate": 4.2964285714285715e-07, "loss": 0.55, "step": 14000 }, { "epoch": 0.17, "eval_loss": 0.45657408237457275, "eval_runtime": 6018.009, "eval_samples_per_second": 1.564, "eval_steps_per_second": 0.196, "eval_wer": 0.1674794122396254, "step": 14000 }, { "epoch": 0.17, "grad_norm": 14.647737503051758, "learning_rate": 4.2785714285714284e-07, "loss": 0.2821, "step": 14025 }, { "epoch": 0.17, "grad_norm": 22.898862838745117, "learning_rate": 4.2607142857142854e-07, "loss": 0.4583, "step": 14050 }, { "epoch": 0.17, "grad_norm": 8.966913223266602, "learning_rate": 4.2428571428571423e-07, "loss": 0.283, "step": 14075 }, { "epoch": 0.17, "grad_norm": 16.121633529663086, "learning_rate": 4.225e-07, "loss": 0.5825, "step": 14100 }, { "epoch": 0.17, "grad_norm": 13.079733848571777, "learning_rate": 4.2071428571428567e-07, "loss": 0.2593, "step": 14125 }, { "epoch": 0.17, "grad_norm": 17.657615661621094, "learning_rate": 4.189285714285714e-07, "loss": 0.5296, "step": 14150 }, { "epoch": 0.17, "grad_norm": 16.596885681152344, "learning_rate": 4.171428571428571e-07, "loss": 0.2974, "step": 14175 }, { "epoch": 0.17, "grad_norm": 20.227094650268555, "learning_rate": 4.1535714285714286e-07, "loss": 0.4874, "step": 14200 }, { "epoch": 0.17, "grad_norm": 11.48043441772461, "learning_rate": 4.1357142857142855e-07, "loss": 0.2495, "step": 14225 }, { "epoch": 0.17, "grad_norm": 27.15212631225586, "learning_rate": 4.117857142857143e-07, "loss": 0.4958, "step": 14250 }, { "epoch": 0.17, "grad_norm": 13.044829368591309, "learning_rate": 4.0999999999999994e-07, "loss": 0.319, "step": 14275 }, { "epoch": 0.17, "grad_norm": 25.95546531677246, "learning_rate": 4.082142857142857e-07, "loss": 0.5586, "step": 14300 }, { "epoch": 0.17, "grad_norm": 16.749534606933594, "learning_rate": 4.064285714285714e-07, "loss": 0.2233, "step": 14325 }, { "epoch": 0.17, "grad_norm": 17.384183883666992, "learning_rate": 4.046428571428571e-07, "loss": 0.5221, "step": 14350 }, { "epoch": 0.17, "grad_norm": 23.787689208984375, "learning_rate": 4.028571428571428e-07, "loss": 0.2446, "step": 14375 }, { "epoch": 0.18, "grad_norm": 23.294313430786133, "learning_rate": 4.0107142857142857e-07, "loss": 0.4733, "step": 14400 }, { "epoch": 0.18, "grad_norm": 12.99344253540039, "learning_rate": 3.9928571428571426e-07, "loss": 0.2601, "step": 14425 }, { "epoch": 0.18, "grad_norm": 27.727008819580078, "learning_rate": 3.975e-07, "loss": 0.5488, "step": 14450 }, { "epoch": 0.18, "grad_norm": 13.73043441772461, "learning_rate": 3.957142857142857e-07, "loss": 0.2472, "step": 14475 }, { "epoch": 0.18, "grad_norm": 22.068260192871094, "learning_rate": 3.939285714285714e-07, "loss": 0.4978, "step": 14500 }, { "epoch": 0.18, "grad_norm": 11.672805786132812, "learning_rate": 3.921428571428571e-07, "loss": 0.243, "step": 14525 }, { "epoch": 0.18, "grad_norm": 19.580429077148438, "learning_rate": 3.9035714285714283e-07, "loss": 0.4744, "step": 14550 }, { "epoch": 0.18, "grad_norm": 12.825048446655273, "learning_rate": 3.8857142857142853e-07, "loss": 0.2659, "step": 14575 }, { "epoch": 0.18, "grad_norm": 24.486330032348633, "learning_rate": 3.8678571428571427e-07, "loss": 0.512, "step": 14600 }, { "epoch": 0.18, "grad_norm": 17.93408203125, "learning_rate": 3.8499999999999997e-07, "loss": 0.267, "step": 14625 }, { "epoch": 0.18, "grad_norm": 23.19489288330078, "learning_rate": 3.832142857142857e-07, "loss": 0.4564, "step": 14650 }, { "epoch": 0.18, "grad_norm": 11.471485137939453, "learning_rate": 3.814285714285714e-07, "loss": 0.2921, "step": 14675 }, { "epoch": 0.18, "grad_norm": 21.568504333496094, "learning_rate": 3.796428571428571e-07, "loss": 0.4536, "step": 14700 }, { "epoch": 0.18, "grad_norm": 20.54655647277832, "learning_rate": 3.778571428571428e-07, "loss": 0.2324, "step": 14725 }, { "epoch": 0.18, "grad_norm": 23.148265838623047, "learning_rate": 3.7607142857142854e-07, "loss": 0.5221, "step": 14750 }, { "epoch": 0.18, "grad_norm": 20.88414192199707, "learning_rate": 3.7428571428571423e-07, "loss": 0.3087, "step": 14775 }, { "epoch": 0.18, "grad_norm": 22.698204040527344, "learning_rate": 3.725e-07, "loss": 0.5205, "step": 14800 }, { "epoch": 0.18, "grad_norm": 10.197999000549316, "learning_rate": 3.7071428571428573e-07, "loss": 0.2257, "step": 14825 }, { "epoch": 0.18, "grad_norm": 21.910158157348633, "learning_rate": 3.689285714285714e-07, "loss": 0.4669, "step": 14850 }, { "epoch": 0.18, "grad_norm": 14.414984703063965, "learning_rate": 3.6714285714285717e-07, "loss": 0.2946, "step": 14875 }, { "epoch": 0.18, "grad_norm": 25.156875610351562, "learning_rate": 3.6535714285714286e-07, "loss": 0.4716, "step": 14900 }, { "epoch": 0.18, "grad_norm": 15.973970413208008, "learning_rate": 3.6357142857142855e-07, "loss": 0.2882, "step": 14925 }, { "epoch": 0.18, "grad_norm": 20.180315017700195, "learning_rate": 3.6178571428571425e-07, "loss": 0.5465, "step": 14950 }, { "epoch": 0.18, "grad_norm": 13.591038703918457, "learning_rate": 3.6e-07, "loss": 0.2794, "step": 14975 }, { "epoch": 0.18, "grad_norm": 19.940364837646484, "learning_rate": 3.582142857142857e-07, "loss": 0.4463, "step": 15000 }, { "epoch": 0.18, "grad_norm": 15.9667329788208, "learning_rate": 3.5642857142857143e-07, "loss": 0.2443, "step": 15025 }, { "epoch": 0.18, "grad_norm": 35.515045166015625, "learning_rate": 3.5464285714285713e-07, "loss": 0.5523, "step": 15050 }, { "epoch": 0.18, "grad_norm": 10.865702629089355, "learning_rate": 3.528571428571429e-07, "loss": 0.2733, "step": 15075 }, { "epoch": 0.18, "grad_norm": 19.473037719726562, "learning_rate": 3.5107142857142857e-07, "loss": 0.5079, "step": 15100 }, { "epoch": 0.18, "grad_norm": 31.667394638061523, "learning_rate": 3.492857142857143e-07, "loss": 0.2393, "step": 15125 }, { "epoch": 0.18, "grad_norm": 29.721817016601562, "learning_rate": 3.4749999999999996e-07, "loss": 0.5714, "step": 15150 }, { "epoch": 0.18, "grad_norm": 20.16938018798828, "learning_rate": 3.457142857142857e-07, "loss": 0.3115, "step": 15175 }, { "epoch": 0.18, "grad_norm": 22.576316833496094, "learning_rate": 3.439285714285714e-07, "loss": 0.5191, "step": 15200 }, { "epoch": 0.19, "grad_norm": 16.064035415649414, "learning_rate": 3.4214285714285714e-07, "loss": 0.305, "step": 15225 }, { "epoch": 0.19, "grad_norm": 20.527408599853516, "learning_rate": 3.4035714285714284e-07, "loss": 0.5645, "step": 15250 }, { "epoch": 0.19, "grad_norm": 6.0652337074279785, "learning_rate": 3.385714285714286e-07, "loss": 0.2188, "step": 15275 }, { "epoch": 0.19, "grad_norm": 29.08500862121582, "learning_rate": 3.367857142857143e-07, "loss": 0.5295, "step": 15300 }, { "epoch": 0.19, "grad_norm": 11.278789520263672, "learning_rate": 3.35e-07, "loss": 0.2096, "step": 15325 }, { "epoch": 0.19, "grad_norm": 16.811601638793945, "learning_rate": 3.332142857142857e-07, "loss": 0.5849, "step": 15350 }, { "epoch": 0.19, "grad_norm": 22.24079704284668, "learning_rate": 3.314285714285714e-07, "loss": 0.2348, "step": 15375 }, { "epoch": 0.19, "grad_norm": 22.854068756103516, "learning_rate": 3.296428571428571e-07, "loss": 0.58, "step": 15400 }, { "epoch": 0.19, "grad_norm": 20.740047454833984, "learning_rate": 3.2785714285714285e-07, "loss": 0.2833, "step": 15425 }, { "epoch": 0.19, "grad_norm": 26.679668426513672, "learning_rate": 3.2607142857142854e-07, "loss": 0.5188, "step": 15450 }, { "epoch": 0.19, "grad_norm": 8.891940116882324, "learning_rate": 3.242857142857143e-07, "loss": 0.2301, "step": 15475 }, { "epoch": 0.19, "grad_norm": 28.738801956176758, "learning_rate": 3.225e-07, "loss": 0.6019, "step": 15500 }, { "epoch": 0.19, "grad_norm": 25.064411163330078, "learning_rate": 3.2071428571428573e-07, "loss": 0.2489, "step": 15525 }, { "epoch": 0.19, "grad_norm": 22.51979637145996, "learning_rate": 3.189285714285714e-07, "loss": 0.4956, "step": 15550 }, { "epoch": 0.19, "grad_norm": 21.971162796020508, "learning_rate": 3.171428571428571e-07, "loss": 0.2867, "step": 15575 }, { "epoch": 0.19, "grad_norm": 29.71180534362793, "learning_rate": 3.153571428571428e-07, "loss": 0.5688, "step": 15600 }, { "epoch": 0.19, "grad_norm": 8.722527503967285, "learning_rate": 3.1357142857142856e-07, "loss": 0.2506, "step": 15625 }, { "epoch": 0.19, "grad_norm": 24.56787872314453, "learning_rate": 3.1178571428571425e-07, "loss": 0.4948, "step": 15650 }, { "epoch": 0.19, "grad_norm": 9.346379280090332, "learning_rate": 3.1e-07, "loss": 0.2228, "step": 15675 }, { "epoch": 0.19, "grad_norm": 25.692184448242188, "learning_rate": 3.082142857142857e-07, "loss": 0.459, "step": 15700 }, { "epoch": 0.19, "grad_norm": 29.484182357788086, "learning_rate": 3.0642857142857144e-07, "loss": 0.2462, "step": 15725 }, { "epoch": 0.19, "grad_norm": 17.136838912963867, "learning_rate": 3.0464285714285713e-07, "loss": 0.4357, "step": 15750 }, { "epoch": 0.19, "grad_norm": 17.58600425720215, "learning_rate": 3.028571428571429e-07, "loss": 0.2663, "step": 15775 }, { "epoch": 0.19, "grad_norm": 18.859933853149414, "learning_rate": 3.010714285714285e-07, "loss": 0.5014, "step": 15800 }, { "epoch": 0.19, "grad_norm": 10.988012313842773, "learning_rate": 2.9928571428571426e-07, "loss": 0.2599, "step": 15825 }, { "epoch": 0.19, "grad_norm": 26.970216751098633, "learning_rate": 2.9749999999999996e-07, "loss": 0.527, "step": 15850 }, { "epoch": 0.19, "grad_norm": 16.885482788085938, "learning_rate": 2.957142857142857e-07, "loss": 0.3002, "step": 15875 }, { "epoch": 0.19, "grad_norm": 21.51068115234375, "learning_rate": 2.939285714285714e-07, "loss": 0.5631, "step": 15900 }, { "epoch": 0.19, "grad_norm": 9.604096412658691, "learning_rate": 2.9214285714285714e-07, "loss": 0.2152, "step": 15925 }, { "epoch": 0.19, "grad_norm": 17.939319610595703, "learning_rate": 2.9035714285714284e-07, "loss": 0.5335, "step": 15950 }, { "epoch": 0.19, "grad_norm": 8.930473327636719, "learning_rate": 2.885714285714286e-07, "loss": 0.2041, "step": 15975 }, { "epoch": 0.19, "grad_norm": 29.47776222229004, "learning_rate": 2.867857142857143e-07, "loss": 0.4524, "step": 16000 }, { "epoch": 0.19, "eval_loss": 0.46478670835494995, "eval_runtime": 5851.5448, "eval_samples_per_second": 1.609, "eval_steps_per_second": 0.201, "eval_wer": 0.16631680930082351, "step": 16000 }, { "epoch": 0.19, "grad_norm": 14.176447868347168, "learning_rate": 2.8499999999999997e-07, "loss": 0.2508, "step": 16025 }, { "epoch": 0.2, "grad_norm": 23.6607666015625, "learning_rate": 2.8321428571428566e-07, "loss": 0.4966, "step": 16050 }, { "epoch": 0.2, "grad_norm": 14.520540237426758, "learning_rate": 2.814285714285714e-07, "loss": 0.2649, "step": 16075 }, { "epoch": 0.2, "grad_norm": 22.186479568481445, "learning_rate": 2.796428571428571e-07, "loss": 0.5105, "step": 16100 }, { "epoch": 0.2, "grad_norm": 21.052902221679688, "learning_rate": 2.7785714285714285e-07, "loss": 0.2248, "step": 16125 }, { "epoch": 0.2, "grad_norm": 25.37480354309082, "learning_rate": 2.7607142857142854e-07, "loss": 0.5451, "step": 16150 }, { "epoch": 0.2, "grad_norm": 22.131818771362305, "learning_rate": 2.742857142857143e-07, "loss": 0.2533, "step": 16175 }, { "epoch": 0.2, "grad_norm": 27.51265525817871, "learning_rate": 2.725e-07, "loss": 0.4579, "step": 16200 }, { "epoch": 0.2, "grad_norm": 10.886811256408691, "learning_rate": 2.7071428571428573e-07, "loss": 0.197, "step": 16225 }, { "epoch": 0.2, "grad_norm": 24.875947952270508, "learning_rate": 2.6892857142857137e-07, "loss": 0.46, "step": 16250 }, { "epoch": 0.2, "grad_norm": 9.09632396697998, "learning_rate": 2.671428571428571e-07, "loss": 0.2771, "step": 16275 }, { "epoch": 0.2, "grad_norm": 13.609580993652344, "learning_rate": 2.653571428571428e-07, "loss": 0.4694, "step": 16300 }, { "epoch": 0.2, "grad_norm": 9.03917121887207, "learning_rate": 2.6357142857142856e-07, "loss": 0.2659, "step": 16325 }, { "epoch": 0.2, "grad_norm": 23.09002113342285, "learning_rate": 2.6178571428571425e-07, "loss": 0.5136, "step": 16350 }, { "epoch": 0.2, "grad_norm": 16.277647018432617, "learning_rate": 2.6e-07, "loss": 0.2674, "step": 16375 }, { "epoch": 0.2, "grad_norm": 28.242874145507812, "learning_rate": 2.582142857142857e-07, "loss": 0.5222, "step": 16400 }, { "epoch": 0.2, "grad_norm": 14.258796691894531, "learning_rate": 2.5642857142857144e-07, "loss": 0.2876, "step": 16425 }, { "epoch": 0.2, "grad_norm": 31.952289581298828, "learning_rate": 2.546428571428571e-07, "loss": 0.5867, "step": 16450 }, { "epoch": 0.2, "grad_norm": 13.42287540435791, "learning_rate": 2.528571428571428e-07, "loss": 0.292, "step": 16475 }, { "epoch": 0.2, "grad_norm": 19.588457107543945, "learning_rate": 2.510714285714285e-07, "loss": 0.5209, "step": 16500 }, { "epoch": 0.2, "grad_norm": 9.94702434539795, "learning_rate": 2.4928571428571427e-07, "loss": 0.2441, "step": 16525 }, { "epoch": 0.2, "grad_norm": 21.62166404724121, "learning_rate": 2.475e-07, "loss": 0.4807, "step": 16550 }, { "epoch": 0.2, "grad_norm": 14.849344253540039, "learning_rate": 2.457142857142857e-07, "loss": 0.2493, "step": 16575 }, { "epoch": 0.2, "grad_norm": 27.06203842163086, "learning_rate": 2.4392857142857145e-07, "loss": 0.5525, "step": 16600 }, { "epoch": 0.2, "grad_norm": 10.47172737121582, "learning_rate": 2.4214285714285715e-07, "loss": 0.2425, "step": 16625 }, { "epoch": 0.2, "grad_norm": 26.490150451660156, "learning_rate": 2.4035714285714284e-07, "loss": 0.5732, "step": 16650 }, { "epoch": 0.2, "grad_norm": 9.605720520019531, "learning_rate": 2.385714285714286e-07, "loss": 0.2543, "step": 16675 }, { "epoch": 0.2, "grad_norm": 20.90159034729004, "learning_rate": 2.3678571428571428e-07, "loss": 0.42, "step": 16700 }, { "epoch": 0.2, "grad_norm": 9.984245300292969, "learning_rate": 2.3499999999999997e-07, "loss": 0.2732, "step": 16725 }, { "epoch": 0.2, "grad_norm": 20.062936782836914, "learning_rate": 2.332142857142857e-07, "loss": 0.5472, "step": 16750 }, { "epoch": 0.2, "grad_norm": 14.146800994873047, "learning_rate": 2.3142857142857141e-07, "loss": 0.2751, "step": 16775 }, { "epoch": 0.2, "grad_norm": 23.891094207763672, "learning_rate": 2.2964285714285713e-07, "loss": 0.5239, "step": 16800 }, { "epoch": 0.2, "grad_norm": 8.14566707611084, "learning_rate": 2.2785714285714285e-07, "loss": 0.2631, "step": 16825 }, { "epoch": 0.2, "grad_norm": 19.639022827148438, "learning_rate": 2.2607142857142855e-07, "loss": 0.5249, "step": 16850 }, { "epoch": 0.21, "grad_norm": 9.657629013061523, "learning_rate": 2.2428571428571427e-07, "loss": 0.3039, "step": 16875 }, { "epoch": 0.21, "grad_norm": 17.923763275146484, "learning_rate": 2.225e-07, "loss": 0.4895, "step": 16900 }, { "epoch": 0.21, "grad_norm": 13.95570182800293, "learning_rate": 2.207142857142857e-07, "loss": 0.2654, "step": 16925 }, { "epoch": 0.21, "grad_norm": 15.692480087280273, "learning_rate": 2.189285714285714e-07, "loss": 0.4649, "step": 16950 }, { "epoch": 0.21, "grad_norm": 16.137231826782227, "learning_rate": 2.1714285714285712e-07, "loss": 0.2288, "step": 16975 }, { "epoch": 0.21, "grad_norm": 29.005409240722656, "learning_rate": 2.1535714285714284e-07, "loss": 0.5068, "step": 17000 }, { "epoch": 0.21, "grad_norm": 16.533184051513672, "learning_rate": 2.1357142857142856e-07, "loss": 0.2713, "step": 17025 }, { "epoch": 0.21, "grad_norm": 26.245988845825195, "learning_rate": 2.1178571428571428e-07, "loss": 0.51, "step": 17050 }, { "epoch": 0.21, "grad_norm": 16.645214080810547, "learning_rate": 2.0999999999999997e-07, "loss": 0.2328, "step": 17075 }, { "epoch": 0.21, "grad_norm": 20.229503631591797, "learning_rate": 2.082142857142857e-07, "loss": 0.5205, "step": 17100 }, { "epoch": 0.21, "grad_norm": 16.3220157623291, "learning_rate": 2.0642857142857141e-07, "loss": 0.2531, "step": 17125 }, { "epoch": 0.21, "grad_norm": 25.806249618530273, "learning_rate": 2.0464285714285713e-07, "loss": 0.5184, "step": 17150 }, { "epoch": 0.21, "grad_norm": 12.498074531555176, "learning_rate": 2.0285714285714283e-07, "loss": 0.2544, "step": 17175 }, { "epoch": 0.21, "grad_norm": 24.79607582092285, "learning_rate": 2.0107142857142855e-07, "loss": 0.5435, "step": 17200 }, { "epoch": 0.21, "grad_norm": 24.32138442993164, "learning_rate": 1.9928571428571427e-07, "loss": 0.2794, "step": 17225 }, { "epoch": 0.21, "grad_norm": 22.922056198120117, "learning_rate": 1.975e-07, "loss": 0.4487, "step": 17250 }, { "epoch": 0.21, "grad_norm": 16.88331413269043, "learning_rate": 1.9571428571428568e-07, "loss": 0.262, "step": 17275 }, { "epoch": 0.21, "grad_norm": 27.17171287536621, "learning_rate": 1.939285714285714e-07, "loss": 0.4624, "step": 17300 }, { "epoch": 0.21, "grad_norm": 13.903470039367676, "learning_rate": 1.9214285714285712e-07, "loss": 0.27, "step": 17325 }, { "epoch": 0.21, "grad_norm": 9.872733116149902, "learning_rate": 1.9035714285714284e-07, "loss": 0.5501, "step": 17350 }, { "epoch": 0.21, "grad_norm": 20.017364501953125, "learning_rate": 1.885714285714286e-07, "loss": 0.2589, "step": 17375 }, { "epoch": 0.21, "grad_norm": 24.02932357788086, "learning_rate": 1.8678571428571426e-07, "loss": 0.5821, "step": 17400 }, { "epoch": 0.21, "grad_norm": 19.6367244720459, "learning_rate": 1.85e-07, "loss": 0.3638, "step": 17425 }, { "epoch": 0.21, "grad_norm": Infinity, "learning_rate": 1.8328571428571426e-07, "loss": 0.5555, "step": 17450 }, { "epoch": 0.21, "grad_norm": 13.749106407165527, "learning_rate": 1.8149999999999998e-07, "loss": 0.3132, "step": 17475 }, { "epoch": 0.21, "grad_norm": 15.566059112548828, "learning_rate": 1.797142857142857e-07, "loss": 0.5162, "step": 17500 }, { "epoch": 0.21, "grad_norm": 21.73044204711914, "learning_rate": 1.7792857142857142e-07, "loss": 0.24, "step": 17525 }, { "epoch": 0.21, "grad_norm": 29.611074447631836, "learning_rate": 1.7614285714285714e-07, "loss": 0.5108, "step": 17550 }, { "epoch": 0.21, "grad_norm": 16.056873321533203, "learning_rate": 1.7435714285714283e-07, "loss": 0.2247, "step": 17575 }, { "epoch": 0.21, "grad_norm": 26.387723922729492, "learning_rate": 1.7257142857142855e-07, "loss": 0.5551, "step": 17600 }, { "epoch": 0.21, "grad_norm": 15.484909057617188, "learning_rate": 1.7078571428571427e-07, "loss": 0.2685, "step": 17625 }, { "epoch": 0.21, "grad_norm": 12.681870460510254, "learning_rate": 1.69e-07, "loss": 0.5002, "step": 17650 }, { "epoch": 0.21, "grad_norm": 20.047056198120117, "learning_rate": 1.6721428571428568e-07, "loss": 0.305, "step": 17675 }, { "epoch": 0.22, "grad_norm": 18.66116714477539, "learning_rate": 1.654285714285714e-07, "loss": 0.5289, "step": 17700 }, { "epoch": 0.22, "grad_norm": 20.76094627380371, "learning_rate": 1.6364285714285712e-07, "loss": 0.2464, "step": 17725 }, { "epoch": 0.22, "grad_norm": 18.275331497192383, "learning_rate": 1.6185714285714287e-07, "loss": 0.4905, "step": 17750 }, { "epoch": 0.22, "grad_norm": 13.712215423583984, "learning_rate": 1.6007142857142854e-07, "loss": 0.2137, "step": 17775 }, { "epoch": 0.22, "grad_norm": 22.638090133666992, "learning_rate": 1.5828571428571429e-07, "loss": 0.5996, "step": 17800 }, { "epoch": 0.22, "grad_norm": 22.729068756103516, "learning_rate": 1.565e-07, "loss": 0.2864, "step": 17825 }, { "epoch": 0.22, "grad_norm": 15.553844451904297, "learning_rate": 1.5471428571428573e-07, "loss": 0.4922, "step": 17850 }, { "epoch": 0.22, "grad_norm": 20.399259567260742, "learning_rate": 1.5292857142857145e-07, "loss": 0.276, "step": 17875 }, { "epoch": 0.22, "grad_norm": 20.935850143432617, "learning_rate": 1.5114285714285714e-07, "loss": 0.5087, "step": 17900 }, { "epoch": 0.22, "grad_norm": 18.157344818115234, "learning_rate": 1.4935714285714286e-07, "loss": 0.2693, "step": 17925 }, { "epoch": 0.22, "grad_norm": 14.853775024414062, "learning_rate": 1.4757142857142858e-07, "loss": 0.5126, "step": 17950 }, { "epoch": 0.22, "grad_norm": 15.481501579284668, "learning_rate": 1.457857142857143e-07, "loss": 0.2645, "step": 17975 }, { "epoch": 0.22, "grad_norm": 25.41742515563965, "learning_rate": 1.44e-07, "loss": 0.4667, "step": 18000 }, { "epoch": 0.22, "eval_loss": 0.46059784293174744, "eval_runtime": 5876.4376, "eval_samples_per_second": 1.602, "eval_steps_per_second": 0.2, "eval_wer": 0.16560632972711126, "step": 18000 }, { "epoch": 0.22, "grad_norm": 13.322805404663086, "learning_rate": 1.422142857142857e-07, "loss": 0.2463, "step": 18025 }, { "epoch": 0.22, "grad_norm": 20.623361587524414, "learning_rate": 1.4042857142857143e-07, "loss": 0.4702, "step": 18050 }, { "epoch": 0.22, "grad_norm": 11.25727653503418, "learning_rate": 1.3864285714285715e-07, "loss": 0.2297, "step": 18075 }, { "epoch": 0.22, "grad_norm": 24.11102867126465, "learning_rate": 1.3685714285714285e-07, "loss": 0.5188, "step": 18100 }, { "epoch": 0.22, "grad_norm": 14.874909400939941, "learning_rate": 1.3507142857142857e-07, "loss": 0.2141, "step": 18125 }, { "epoch": 0.22, "grad_norm": 24.302215576171875, "learning_rate": 1.3328571428571429e-07, "loss": 0.4678, "step": 18150 }, { "epoch": 0.22, "grad_norm": 15.379274368286133, "learning_rate": 1.315e-07, "loss": 0.2166, "step": 18175 }, { "epoch": 0.22, "grad_norm": 24.384815216064453, "learning_rate": 1.2971428571428573e-07, "loss": 0.4773, "step": 18200 }, { "epoch": 0.22, "grad_norm": 14.261998176574707, "learning_rate": 1.2792857142857142e-07, "loss": 0.2611, "step": 18225 }, { "epoch": 0.22, "grad_norm": 26.193994522094727, "learning_rate": 1.2614285714285714e-07, "loss": 0.4536, "step": 18250 }, { "epoch": 0.22, "grad_norm": 15.70848560333252, "learning_rate": 1.2435714285714286e-07, "loss": 0.2462, "step": 18275 }, { "epoch": 0.22, "grad_norm": 21.586435317993164, "learning_rate": 1.2257142857142855e-07, "loss": 0.5351, "step": 18300 }, { "epoch": 0.22, "grad_norm": 13.686724662780762, "learning_rate": 1.2078571428571427e-07, "loss": 0.2181, "step": 18325 }, { "epoch": 0.22, "grad_norm": 24.10140609741211, "learning_rate": 1.19e-07, "loss": 0.5414, "step": 18350 }, { "epoch": 0.22, "grad_norm": 16.506885528564453, "learning_rate": 1.1721428571428571e-07, "loss": 0.248, "step": 18375 }, { "epoch": 0.22, "grad_norm": 19.96807289123535, "learning_rate": 1.1542857142857142e-07, "loss": 0.4668, "step": 18400 }, { "epoch": 0.22, "grad_norm": 7.820561408996582, "learning_rate": 1.1364285714285714e-07, "loss": 0.2736, "step": 18425 }, { "epoch": 0.22, "grad_norm": 25.755311965942383, "learning_rate": 1.1185714285714286e-07, "loss": 0.5258, "step": 18450 }, { "epoch": 0.22, "grad_norm": 12.378037452697754, "learning_rate": 1.1007142857142857e-07, "loss": 0.2869, "step": 18475 }, { "epoch": 0.22, "grad_norm": 15.160594940185547, "learning_rate": 1.0828571428571429e-07, "loss": 0.4904, "step": 18500 }, { "epoch": 0.23, "grad_norm": 20.206640243530273, "learning_rate": 1.065e-07, "loss": 0.2504, "step": 18525 }, { "epoch": 0.23, "grad_norm": 22.9788875579834, "learning_rate": 1.0471428571428571e-07, "loss": 0.5201, "step": 18550 }, { "epoch": 0.23, "grad_norm": 15.308506965637207, "learning_rate": 1.0292857142857142e-07, "loss": 0.258, "step": 18575 }, { "epoch": 0.23, "grad_norm": 14.326108932495117, "learning_rate": 1.0114285714285714e-07, "loss": 0.4509, "step": 18600 }, { "epoch": 0.23, "grad_norm": 26.459646224975586, "learning_rate": 9.935714285714285e-08, "loss": 0.2744, "step": 18625 }, { "epoch": 0.23, "grad_norm": 19.211641311645508, "learning_rate": 9.757142857142857e-08, "loss": 0.5296, "step": 18650 }, { "epoch": 0.23, "grad_norm": 15.568469047546387, "learning_rate": 9.578571428571427e-08, "loss": 0.2472, "step": 18675 }, { "epoch": 0.23, "grad_norm": 29.26220703125, "learning_rate": 9.4e-08, "loss": 0.4798, "step": 18700 }, { "epoch": 0.23, "grad_norm": 25.08895492553711, "learning_rate": 9.221428571428571e-08, "loss": 0.2771, "step": 18725 }, { "epoch": 0.23, "grad_norm": 19.168804168701172, "learning_rate": 9.042857142857142e-08, "loss": 0.506, "step": 18750 }, { "epoch": 0.23, "grad_norm": 17.489612579345703, "learning_rate": 8.864285714285714e-08, "loss": 0.2914, "step": 18775 }, { "epoch": 0.23, "grad_norm": 16.952068328857422, "learning_rate": 8.685714285714285e-08, "loss": 0.5445, "step": 18800 }, { "epoch": 0.23, "grad_norm": 13.241303443908691, "learning_rate": 8.507142857142857e-08, "loss": 0.2818, "step": 18825 }, { "epoch": 0.23, "grad_norm": 21.655899047851562, "learning_rate": 8.328571428571428e-08, "loss": 0.5256, "step": 18850 }, { "epoch": 0.23, "grad_norm": 18.215364456176758, "learning_rate": 8.15e-08, "loss": 0.2672, "step": 18875 }, { "epoch": 0.23, "grad_norm": 26.478195190429688, "learning_rate": 7.97142857142857e-08, "loss": 0.4729, "step": 18900 }, { "epoch": 0.23, "grad_norm": 16.756649017333984, "learning_rate": 7.792857142857142e-08, "loss": 0.2469, "step": 18925 }, { "epoch": 0.23, "grad_norm": 25.294132232666016, "learning_rate": 7.614285714285713e-08, "loss": 0.5517, "step": 18950 }, { "epoch": 0.23, "grad_norm": 15.660456657409668, "learning_rate": 7.435714285714285e-08, "loss": 0.2261, "step": 18975 }, { "epoch": 0.23, "grad_norm": 24.570951461791992, "learning_rate": 7.257142857142856e-08, "loss": 0.464, "step": 19000 }, { "epoch": 0.23, "grad_norm": 13.32069206237793, "learning_rate": 7.078571428571428e-08, "loss": 0.2759, "step": 19025 }, { "epoch": 0.23, "grad_norm": 27.2366943359375, "learning_rate": 6.900000000000001e-08, "loss": 0.4982, "step": 19050 }, { "epoch": 0.23, "grad_norm": 10.226125717163086, "learning_rate": 6.721428571428572e-08, "loss": 0.2669, "step": 19075 }, { "epoch": 0.23, "grad_norm": 28.88689613342285, "learning_rate": 6.542857142857144e-08, "loss": 0.5166, "step": 19100 }, { "epoch": 0.23, "grad_norm": 9.242531776428223, "learning_rate": 6.364285714285714e-08, "loss": 0.2363, "step": 19125 }, { "epoch": 0.23, "grad_norm": 26.46955108642578, "learning_rate": 6.185714285714286e-08, "loss": 0.5085, "step": 19150 }, { "epoch": 0.23, "grad_norm": 17.96638298034668, "learning_rate": 6.007142857142857e-08, "loss": 0.2484, "step": 19175 }, { "epoch": 0.23, "grad_norm": 19.205671310424805, "learning_rate": 5.828571428571428e-08, "loss": 0.4366, "step": 19200 }, { "epoch": 0.23, "grad_norm": 19.420578002929688, "learning_rate": 5.6499999999999996e-08, "loss": 0.2316, "step": 19225 }, { "epoch": 0.23, "grad_norm": Infinity, "learning_rate": 5.4785714285714285e-08, "loss": 0.5216, "step": 19250 }, { "epoch": 0.23, "grad_norm": 11.171784400939941, "learning_rate": 5.3e-08, "loss": 0.2496, "step": 19275 }, { "epoch": 0.23, "grad_norm": 18.622215270996094, "learning_rate": 5.121428571428571e-08, "loss": 0.5563, "step": 19300 }, { "epoch": 0.23, "grad_norm": 15.999675750732422, "learning_rate": 4.9428571428571425e-08, "loss": 0.2272, "step": 19325 }, { "epoch": 0.24, "grad_norm": 29.481321334838867, "learning_rate": 4.764285714285714e-08, "loss": 0.5892, "step": 19350 }, { "epoch": 0.24, "grad_norm": 17.218955993652344, "learning_rate": 4.585714285714286e-08, "loss": 0.2746, "step": 19375 }, { "epoch": 0.24, "grad_norm": 17.27052879333496, "learning_rate": 4.407142857142857e-08, "loss": 0.4847, "step": 19400 }, { "epoch": 0.24, "grad_norm": 13.923643112182617, "learning_rate": 4.2285714285714285e-08, "loss": 0.2418, "step": 19425 }, { "epoch": 0.24, "grad_norm": 20.42319679260254, "learning_rate": 4.05e-08, "loss": 0.5244, "step": 19450 }, { "epoch": 0.24, "grad_norm": 21.954940795898438, "learning_rate": 3.871428571428571e-08, "loss": 0.265, "step": 19475 }, { "epoch": 0.24, "grad_norm": 22.70041847229004, "learning_rate": 3.6928571428571426e-08, "loss": 0.4468, "step": 19500 }, { "epoch": 0.24, "grad_norm": 6.995919227600098, "learning_rate": 3.514285714285714e-08, "loss": 0.271, "step": 19525 }, { "epoch": 0.24, "grad_norm": 19.12382698059082, "learning_rate": 3.335714285714285e-08, "loss": 0.5083, "step": 19550 }, { "epoch": 0.24, "grad_norm": 13.486324310302734, "learning_rate": 3.1571428571428566e-08, "loss": 0.2645, "step": 19575 }, { "epoch": 0.24, "grad_norm": 20.960376739501953, "learning_rate": 2.9785714285714286e-08, "loss": 0.4991, "step": 19600 }, { "epoch": 0.24, "grad_norm": 19.863283157348633, "learning_rate": 2.8e-08, "loss": 0.25, "step": 19625 }, { "epoch": 0.24, "grad_norm": 24.58002281188965, "learning_rate": 2.6214285714285713e-08, "loss": 0.5011, "step": 19650 }, { "epoch": 0.24, "grad_norm": 17.584407806396484, "learning_rate": 2.4428571428571426e-08, "loss": 0.3192, "step": 19675 }, { "epoch": 0.24, "grad_norm": 26.56721305847168, "learning_rate": 2.264285714285714e-08, "loss": 0.4995, "step": 19700 }, { "epoch": 0.24, "grad_norm": 13.253254890441895, "learning_rate": 2.0857142857142856e-08, "loss": 0.2682, "step": 19725 }, { "epoch": 0.24, "grad_norm": 22.812877655029297, "learning_rate": 1.9071428571428573e-08, "loss": 0.5177, "step": 19750 }, { "epoch": 0.24, "grad_norm": 10.498454093933105, "learning_rate": 1.7285714285714286e-08, "loss": 0.2192, "step": 19775 }, { "epoch": 0.24, "grad_norm": 31.69744110107422, "learning_rate": 1.55e-08, "loss": 0.5125, "step": 19800 }, { "epoch": 0.24, "grad_norm": 11.616924285888672, "learning_rate": 1.3714285714285713e-08, "loss": 0.2494, "step": 19825 }, { "epoch": 0.24, "grad_norm": 25.574071884155273, "learning_rate": 1.1928571428571428e-08, "loss": 0.4375, "step": 19850 }, { "epoch": 0.24, "grad_norm": 16.213420867919922, "learning_rate": 1.0142857142857142e-08, "loss": 0.2279, "step": 19875 }, { "epoch": 0.24, "grad_norm": 16.850711822509766, "learning_rate": 8.357142857142857e-09, "loss": 0.466, "step": 19900 }, { "epoch": 0.24, "grad_norm": 11.44871997833252, "learning_rate": 6.571428571428572e-09, "loss": 0.2349, "step": 19925 }, { "epoch": 0.24, "grad_norm": 24.55624008178711, "learning_rate": 4.785714285714285e-09, "loss": 0.4876, "step": 19950 }, { "epoch": 0.24, "grad_norm": 11.952068328857422, "learning_rate": 3e-09, "loss": 0.2713, "step": 19975 }, { "epoch": 0.24, "grad_norm": 17.582544326782227, "learning_rate": 1.2142857142857142e-09, "loss": 0.5133, "step": 20000 }, { "epoch": 0.24, "eval_loss": 0.4545673429965973, "eval_runtime": 5956.6112, "eval_samples_per_second": 1.58, "eval_steps_per_second": 0.198, "eval_wer": 0.16517035362506055, "step": 20000 }, { "epoch": 0.24, "step": 20000, "total_flos": 1.632967852032e+20, "train_loss": 0.19572856886386872, "train_runtime": 76264.8528, "train_samples_per_second": 2.098, "train_steps_per_second": 0.262 } ], "logging_steps": 25, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.632967852032e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }