{ "best_metric": null, "best_model_checkpoint": null, "epoch": 432.43243243243245, "eval_steps": 1500, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.3513513513513513, "grad_norm": 0.5760506987571716, "learning_rate": 1.9364864864864865e-06, "loss": 0.0736, "step": 50 }, { "epoch": 2.7027027027027026, "grad_norm": 0.4748266339302063, "learning_rate": 3.2743243243243245e-06, "loss": 0.0682, "step": 100 }, { "epoch": 4.054054054054054, "grad_norm": 0.4784703552722931, "learning_rate": 4.612162162162162e-06, "loss": 0.0661, "step": 150 }, { "epoch": 5.405405405405405, "grad_norm": 0.4363403022289276, "learning_rate": 5.95e-06, "loss": 0.0646, "step": 200 }, { "epoch": 6.756756756756757, "grad_norm": 0.49891752004623413, "learning_rate": 7.287837837837838e-06, "loss": 0.0624, "step": 250 }, { "epoch": 8.108108108108109, "grad_norm": 0.47630417346954346, "learning_rate": 8.625675675675676e-06, "loss": 0.0635, "step": 300 }, { "epoch": 9.45945945945946, "grad_norm": 0.5554671287536621, "learning_rate": 9.963513513513515e-06, "loss": 0.0617, "step": 350 }, { "epoch": 10.81081081081081, "grad_norm": 0.4327123165130615, "learning_rate": 1.1301351351351353e-05, "loss": 0.0595, "step": 400 }, { "epoch": 12.162162162162161, "grad_norm": 0.5002420544624329, "learning_rate": 1.263918918918919e-05, "loss": 0.0585, "step": 450 }, { "epoch": 13.513513513513514, "grad_norm": 0.42524299025535583, "learning_rate": 1.3977027027027028e-05, "loss": 0.0579, "step": 500 }, { "epoch": 14.864864864864865, "grad_norm": 0.44030219316482544, "learning_rate": 1.5314864864864867e-05, "loss": 0.0575, "step": 550 }, { "epoch": 16.216216216216218, "grad_norm": 0.46804261207580566, "learning_rate": 1.6625945945945947e-05, "loss": 0.0579, "step": 600 }, { "epoch": 17.56756756756757, "grad_norm": 0.5248824954032898, "learning_rate": 1.7963783783783787e-05, "loss": 0.0556, "step": 650 }, { "epoch": 18.91891891891892, "grad_norm": 0.43462327122688293, "learning_rate": 1.9301621621621623e-05, "loss": 0.0546, "step": 700 }, { "epoch": 20.27027027027027, "grad_norm": 0.48718762397766113, "learning_rate": 2.0639459459459462e-05, "loss": 0.0529, "step": 750 }, { "epoch": 21.62162162162162, "grad_norm": 0.40828168392181396, "learning_rate": 2.19772972972973e-05, "loss": 0.0523, "step": 800 }, { "epoch": 22.972972972972972, "grad_norm": 0.4686122238636017, "learning_rate": 2.3315135135135137e-05, "loss": 0.0531, "step": 850 }, { "epoch": 24.324324324324323, "grad_norm": 0.4426785707473755, "learning_rate": 2.4652972972972976e-05, "loss": 0.051, "step": 900 }, { "epoch": 25.675675675675677, "grad_norm": 0.4910499155521393, "learning_rate": 2.5990810810810812e-05, "loss": 0.0486, "step": 950 }, { "epoch": 27.027027027027028, "grad_norm": 0.4253314435482025, "learning_rate": 2.7328648648648652e-05, "loss": 0.0491, "step": 1000 }, { "epoch": 28.37837837837838, "grad_norm": 0.3838571012020111, "learning_rate": 2.866648648648649e-05, "loss": 0.049, "step": 1050 }, { "epoch": 29.72972972972973, "grad_norm": 0.39428308606147766, "learning_rate": 3.0004324324324327e-05, "loss": 0.0478, "step": 1100 }, { "epoch": 31.08108108108108, "grad_norm": 0.3973025381565094, "learning_rate": 3.134216216216216e-05, "loss": 0.0473, "step": 1150 }, { "epoch": 32.432432432432435, "grad_norm": 0.47299668192863464, "learning_rate": 3.268e-05, "loss": 0.047, "step": 1200 }, { "epoch": 33.78378378378378, "grad_norm": 0.5077139139175415, "learning_rate": 3.4017837837837835e-05, "loss": 0.0459, "step": 1250 }, { "epoch": 35.13513513513514, "grad_norm": 0.3789336085319519, "learning_rate": 3.535567567567568e-05, "loss": 0.0462, "step": 1300 }, { "epoch": 36.486486486486484, "grad_norm": 0.3798762857913971, "learning_rate": 3.669351351351351e-05, "loss": 0.045, "step": 1350 }, { "epoch": 37.83783783783784, "grad_norm": 0.41409873962402344, "learning_rate": 3.803135135135135e-05, "loss": 0.0438, "step": 1400 }, { "epoch": 39.189189189189186, "grad_norm": 0.4816993176937103, "learning_rate": 3.936918918918919e-05, "loss": 0.0432, "step": 1450 }, { "epoch": 40.54054054054054, "grad_norm": 0.31075775623321533, "learning_rate": 4.070702702702703e-05, "loss": 0.0415, "step": 1500 }, { "epoch": 41.891891891891895, "grad_norm": 0.5353565812110901, "learning_rate": 4.2044864864864864e-05, "loss": 0.0414, "step": 1550 }, { "epoch": 43.24324324324324, "grad_norm": 0.3337886333465576, "learning_rate": 4.3382702702702707e-05, "loss": 0.0406, "step": 1600 }, { "epoch": 44.5945945945946, "grad_norm": 0.3847792446613312, "learning_rate": 4.472054054054054e-05, "loss": 0.0404, "step": 1650 }, { "epoch": 45.945945945945944, "grad_norm": 0.4075019657611847, "learning_rate": 4.605837837837838e-05, "loss": 0.0409, "step": 1700 }, { "epoch": 47.2972972972973, "grad_norm": 0.4205191433429718, "learning_rate": 4.7396216216216214e-05, "loss": 0.0399, "step": 1750 }, { "epoch": 48.648648648648646, "grad_norm": 0.5053867101669312, "learning_rate": 4.873405405405406e-05, "loss": 0.0387, "step": 1800 }, { "epoch": 50.0, "grad_norm": 0.43625885248184204, "learning_rate": 5.007189189189189e-05, "loss": 0.0393, "step": 1850 }, { "epoch": 51.351351351351354, "grad_norm": 0.44896236062049866, "learning_rate": 5.140972972972973e-05, "loss": 0.0376, "step": 1900 }, { "epoch": 52.7027027027027, "grad_norm": 0.42207279801368713, "learning_rate": 5.274756756756757e-05, "loss": 0.0392, "step": 1950 }, { "epoch": 54.054054054054056, "grad_norm": 0.4605530798435211, "learning_rate": 5.408540540540541e-05, "loss": 0.0384, "step": 2000 }, { "epoch": 55.4054054054054, "grad_norm": 0.4523848295211792, "learning_rate": 5.5423243243243243e-05, "loss": 0.0366, "step": 2050 }, { "epoch": 56.75675675675676, "grad_norm": 0.37463346123695374, "learning_rate": 5.6761081081081086e-05, "loss": 0.037, "step": 2100 }, { "epoch": 58.108108108108105, "grad_norm": 0.5655389428138733, "learning_rate": 5.809891891891892e-05, "loss": 0.0383, "step": 2150 }, { "epoch": 59.45945945945946, "grad_norm": 0.441587895154953, "learning_rate": 5.943675675675676e-05, "loss": 0.0392, "step": 2200 }, { "epoch": 60.810810810810814, "grad_norm": 0.44860920310020447, "learning_rate": 6.074783783783784e-05, "loss": 0.0372, "step": 2250 }, { "epoch": 62.16216216216216, "grad_norm": 0.4874947965145111, "learning_rate": 6.208567567567567e-05, "loss": 0.0372, "step": 2300 }, { "epoch": 63.513513513513516, "grad_norm": 0.44674238562583923, "learning_rate": 6.342351351351351e-05, "loss": 0.0368, "step": 2350 }, { "epoch": 64.86486486486487, "grad_norm": 0.535265326499939, "learning_rate": 6.476135135135136e-05, "loss": 0.0372, "step": 2400 }, { "epoch": 66.21621621621621, "grad_norm": 0.38546523451805115, "learning_rate": 6.60991891891892e-05, "loss": 0.0349, "step": 2450 }, { "epoch": 67.56756756756756, "grad_norm": 0.4751232862472534, "learning_rate": 6.743702702702703e-05, "loss": 0.038, "step": 2500 }, { "epoch": 68.91891891891892, "grad_norm": 0.6618958115577698, "learning_rate": 6.877486486486487e-05, "loss": 0.0512, "step": 2550 }, { "epoch": 70.27027027027027, "grad_norm": 0.4467822015285492, "learning_rate": 7.01127027027027e-05, "loss": 0.0517, "step": 2600 }, { "epoch": 71.62162162162163, "grad_norm": 0.5807027816772461, "learning_rate": 7.145054054054054e-05, "loss": 0.0506, "step": 2650 }, { "epoch": 72.97297297297297, "grad_norm": 0.44088873267173767, "learning_rate": 7.278837837837837e-05, "loss": 0.0517, "step": 2700 }, { "epoch": 74.32432432432432, "grad_norm": 0.5117065906524658, "learning_rate": 7.412621621621622e-05, "loss": 0.0497, "step": 2750 }, { "epoch": 75.67567567567568, "grad_norm": 0.5457988381385803, "learning_rate": 7.546405405405406e-05, "loss": 0.0494, "step": 2800 }, { "epoch": 77.02702702702703, "grad_norm": 0.35911738872528076, "learning_rate": 7.68018918918919e-05, "loss": 0.0476, "step": 2850 }, { "epoch": 78.37837837837837, "grad_norm": 0.5895106196403503, "learning_rate": 7.813972972972973e-05, "loss": 0.0479, "step": 2900 }, { "epoch": 79.72972972972973, "grad_norm": 0.39779385924339294, "learning_rate": 7.947756756756757e-05, "loss": 0.0479, "step": 2950 }, { "epoch": 81.08108108108108, "grad_norm": 0.5251230001449585, "learning_rate": 8.08154054054054e-05, "loss": 0.0468, "step": 3000 }, { "epoch": 81.08108108108108, "eval_loss": 0.5344434976577759, "eval_runtime": 19.3274, "eval_samples_per_second": 81.076, "eval_steps_per_second": 0.259, "eval_wer": 0.18456803226491192, "step": 3000 }, { "epoch": 82.43243243243244, "grad_norm": 0.44322407245635986, "learning_rate": 8.215324324324325e-05, "loss": 0.0458, "step": 3050 }, { "epoch": 83.78378378378379, "grad_norm": 0.47398409247398376, "learning_rate": 8.349108108108109e-05, "loss": 0.0461, "step": 3100 }, { "epoch": 85.13513513513513, "grad_norm": 0.452659010887146, "learning_rate": 8.482891891891893e-05, "loss": 0.045, "step": 3150 }, { "epoch": 86.48648648648648, "grad_norm": 0.6125317215919495, "learning_rate": 8.616675675675676e-05, "loss": 0.046, "step": 3200 }, { "epoch": 87.83783783783784, "grad_norm": 0.4655373692512512, "learning_rate": 8.75045945945946e-05, "loss": 0.0462, "step": 3250 }, { "epoch": 89.1891891891892, "grad_norm": 0.5071247220039368, "learning_rate": 8.884243243243243e-05, "loss": 0.0455, "step": 3300 }, { "epoch": 90.54054054054055, "grad_norm": 0.4586324691772461, "learning_rate": 9.018027027027027e-05, "loss": 0.0443, "step": 3350 }, { "epoch": 91.89189189189189, "grad_norm": 0.49810245633125305, "learning_rate": 9.151810810810812e-05, "loss": 0.0444, "step": 3400 }, { "epoch": 93.24324324324324, "grad_norm": 0.4874321222305298, "learning_rate": 9.285594594594595e-05, "loss": 0.0424, "step": 3450 }, { "epoch": 94.5945945945946, "grad_norm": 0.5060502886772156, "learning_rate": 9.419378378378379e-05, "loss": 0.0432, "step": 3500 }, { "epoch": 95.94594594594595, "grad_norm": 0.4356514513492584, "learning_rate": 9.553162162162163e-05, "loss": 0.042, "step": 3550 }, { "epoch": 97.29729729729729, "grad_norm": 0.5080994367599487, "learning_rate": 9.684270270270271e-05, "loss": 0.0424, "step": 3600 }, { "epoch": 98.64864864864865, "grad_norm": 0.6090648174285889, "learning_rate": 9.818054054054055e-05, "loss": 0.0423, "step": 3650 }, { "epoch": 100.0, "grad_norm": 0.6025941967964172, "learning_rate": 9.951837837837838e-05, "loss": 0.0445, "step": 3700 }, { "epoch": 101.35135135135135, "grad_norm": 0.592052161693573, "learning_rate": 0.0001, "loss": 0.0425, "step": 3750 }, { "epoch": 102.70270270270271, "grad_norm": 0.5158424973487854, "learning_rate": 0.0001, "loss": 0.0423, "step": 3800 }, { "epoch": 104.05405405405405, "grad_norm": 0.45459866523742676, "learning_rate": 0.0001, "loss": 0.0411, "step": 3850 }, { "epoch": 105.4054054054054, "grad_norm": 0.4477308392524719, "learning_rate": 0.0001, "loss": 0.0426, "step": 3900 }, { "epoch": 106.75675675675676, "grad_norm": 0.5172644853591919, "learning_rate": 0.0001, "loss": 0.0413, "step": 3950 }, { "epoch": 108.10810810810811, "grad_norm": 0.4363681674003601, "learning_rate": 0.0001, "loss": 0.0418, "step": 4000 }, { "epoch": 109.45945945945945, "grad_norm": 0.3899792730808258, "learning_rate": 0.0001, "loss": 0.0386, "step": 4050 }, { "epoch": 110.8108108108108, "grad_norm": 0.3736754357814789, "learning_rate": 0.0001, "loss": 0.0397, "step": 4100 }, { "epoch": 112.16216216216216, "grad_norm": 0.4031231999397278, "learning_rate": 0.0001, "loss": 0.0372, "step": 4150 }, { "epoch": 113.51351351351352, "grad_norm": 0.3616081774234772, "learning_rate": 0.0001, "loss": 0.0381, "step": 4200 }, { "epoch": 114.86486486486487, "grad_norm": 0.46817055344581604, "learning_rate": 0.0001, "loss": 0.0366, "step": 4250 }, { "epoch": 116.21621621621621, "grad_norm": 0.4078225791454315, "learning_rate": 0.0001, "loss": 0.0383, "step": 4300 }, { "epoch": 117.56756756756756, "grad_norm": 0.3821820020675659, "learning_rate": 0.0001, "loss": 0.0348, "step": 4350 }, { "epoch": 118.91891891891892, "grad_norm": 0.44808605313301086, "learning_rate": 0.0001, "loss": 0.0358, "step": 4400 }, { "epoch": 120.27027027027027, "grad_norm": 0.3535892367362976, "learning_rate": 0.0001, "loss": 0.0337, "step": 4450 }, { "epoch": 121.62162162162163, "grad_norm": 0.32362979650497437, "learning_rate": 0.0001, "loss": 0.0344, "step": 4500 }, { "epoch": 121.62162162162163, "eval_loss": 0.5824956893920898, "eval_runtime": 17.5119, "eval_samples_per_second": 89.482, "eval_steps_per_second": 0.286, "eval_wer": 0.18653152196985778, "step": 4500 }, { "epoch": 122.97297297297297, "grad_norm": 0.5193214416503906, "learning_rate": 0.0001, "loss": 0.0361, "step": 4550 }, { "epoch": 124.32432432432432, "grad_norm": 0.3041287362575531, "learning_rate": 0.0001, "loss": 0.0335, "step": 4600 }, { "epoch": 125.67567567567568, "grad_norm": 0.44249922037124634, "learning_rate": 0.0001, "loss": 0.034, "step": 4650 }, { "epoch": 127.02702702702703, "grad_norm": 0.357164204120636, "learning_rate": 0.0001, "loss": 0.0326, "step": 4700 }, { "epoch": 128.3783783783784, "grad_norm": 0.30578091740608215, "learning_rate": 0.0001, "loss": 0.0307, "step": 4750 }, { "epoch": 129.72972972972974, "grad_norm": 0.4774022102355957, "learning_rate": 0.0001, "loss": 0.0321, "step": 4800 }, { "epoch": 131.0810810810811, "grad_norm": 0.3393169343471527, "learning_rate": 0.0001, "loss": 0.0336, "step": 4850 }, { "epoch": 132.43243243243242, "grad_norm": 0.42481565475463867, "learning_rate": 0.0001, "loss": 0.0317, "step": 4900 }, { "epoch": 133.78378378378378, "grad_norm": 0.45170778036117554, "learning_rate": 0.0001, "loss": 0.0309, "step": 4950 }, { "epoch": 135.13513513513513, "grad_norm": 0.44404086470603943, "learning_rate": 0.0001, "loss": 0.0331, "step": 5000 }, { "epoch": 136.48648648648648, "grad_norm": 0.4285108149051666, "learning_rate": 0.0001, "loss": 0.0304, "step": 5050 }, { "epoch": 137.83783783783784, "grad_norm": 0.3434101343154907, "learning_rate": 0.0001, "loss": 0.0294, "step": 5100 }, { "epoch": 139.1891891891892, "grad_norm": 0.41777992248535156, "learning_rate": 0.0001, "loss": 0.0302, "step": 5150 }, { "epoch": 140.54054054054055, "grad_norm": 0.3897533714771271, "learning_rate": 0.0001, "loss": 0.0303, "step": 5200 }, { "epoch": 141.8918918918919, "grad_norm": 0.3457304537296295, "learning_rate": 0.0001, "loss": 0.0297, "step": 5250 }, { "epoch": 143.24324324324326, "grad_norm": 0.38188374042510986, "learning_rate": 0.0001, "loss": 0.0291, "step": 5300 }, { "epoch": 144.59459459459458, "grad_norm": 0.44426918029785156, "learning_rate": 0.0001, "loss": 0.0308, "step": 5350 }, { "epoch": 145.94594594594594, "grad_norm": 0.46593207120895386, "learning_rate": 0.0001, "loss": 0.0306, "step": 5400 }, { "epoch": 147.2972972972973, "grad_norm": 0.5084848403930664, "learning_rate": 0.0001, "loss": 0.0293, "step": 5450 }, { "epoch": 148.64864864864865, "grad_norm": 0.35385948419570923, "learning_rate": 0.0001, "loss": 0.0307, "step": 5500 }, { "epoch": 150.0, "grad_norm": 0.2549344003200531, "learning_rate": 0.0001, "loss": 0.0274, "step": 5550 }, { "epoch": 151.35135135135135, "grad_norm": 0.40980347990989685, "learning_rate": 0.0001, "loss": 0.0294, "step": 5600 }, { "epoch": 152.7027027027027, "grad_norm": 0.413776695728302, "learning_rate": 0.0001, "loss": 0.0285, "step": 5650 }, { "epoch": 154.05405405405406, "grad_norm": 0.4476383626461029, "learning_rate": 0.0001, "loss": 0.0276, "step": 5700 }, { "epoch": 155.40540540540542, "grad_norm": 0.3967137336730957, "learning_rate": 0.0001, "loss": 0.0286, "step": 5750 }, { "epoch": 156.75675675675674, "grad_norm": 0.3066927492618561, "learning_rate": 0.0001, "loss": 0.0285, "step": 5800 }, { "epoch": 158.1081081081081, "grad_norm": 0.43991604447364807, "learning_rate": 0.0001, "loss": 0.0297, "step": 5850 }, { "epoch": 159.45945945945945, "grad_norm": 0.6042722463607788, "learning_rate": 0.0001, "loss": 0.0291, "step": 5900 }, { "epoch": 160.8108108108108, "grad_norm": 0.3852483034133911, "learning_rate": 0.0001, "loss": 0.0291, "step": 5950 }, { "epoch": 162.16216216216216, "grad_norm": 0.3369120657444, "learning_rate": 0.0001, "loss": 0.0273, "step": 6000 }, { "epoch": 162.16216216216216, "eval_loss": 0.613073468208313, "eval_runtime": 21.311, "eval_samples_per_second": 73.53, "eval_steps_per_second": 0.235, "eval_wer": 0.18998089577584376, "step": 6000 }, { "epoch": 163.51351351351352, "grad_norm": 0.3259856402873993, "learning_rate": 0.0001, "loss": 0.0264, "step": 6050 }, { "epoch": 164.86486486486487, "grad_norm": 0.3508945405483246, "learning_rate": 0.0001, "loss": 0.0274, "step": 6100 }, { "epoch": 166.21621621621622, "grad_norm": 0.3457798957824707, "learning_rate": 0.0001, "loss": 0.0269, "step": 6150 }, { "epoch": 167.56756756756758, "grad_norm": 0.4306040406227112, "learning_rate": 0.0001, "loss": 0.0264, "step": 6200 }, { "epoch": 168.9189189189189, "grad_norm": 1.110560417175293, "learning_rate": 0.0001, "loss": 0.0262, "step": 6250 }, { "epoch": 170.27027027027026, "grad_norm": 0.3896867632865906, "learning_rate": 0.0001, "loss": 0.026, "step": 6300 }, { "epoch": 171.6216216216216, "grad_norm": 0.28667861223220825, "learning_rate": 0.0001, "loss": 0.0264, "step": 6350 }, { "epoch": 172.97297297297297, "grad_norm": 0.3063699007034302, "learning_rate": 0.0001, "loss": 0.0257, "step": 6400 }, { "epoch": 174.32432432432432, "grad_norm": 0.3457682132720947, "learning_rate": 0.0001, "loss": 0.0241, "step": 6450 }, { "epoch": 175.67567567567568, "grad_norm": 0.3746369183063507, "learning_rate": 0.0001, "loss": 0.0246, "step": 6500 }, { "epoch": 177.02702702702703, "grad_norm": 0.3032655715942383, "learning_rate": 0.0001, "loss": 0.0238, "step": 6550 }, { "epoch": 178.3783783783784, "grad_norm": 0.3203031122684479, "learning_rate": 0.0001, "loss": 0.026, "step": 6600 }, { "epoch": 179.72972972972974, "grad_norm": 0.4081636965274811, "learning_rate": 0.0001, "loss": 0.0251, "step": 6650 }, { "epoch": 181.0810810810811, "grad_norm": 0.38508379459381104, "learning_rate": 0.0001, "loss": 0.0232, "step": 6700 }, { "epoch": 182.43243243243242, "grad_norm": 0.43911251425743103, "learning_rate": 0.0001, "loss": 0.0242, "step": 6750 }, { "epoch": 183.78378378378378, "grad_norm": 0.38736340403556824, "learning_rate": 0.0001, "loss": 0.0237, "step": 6800 }, { "epoch": 185.13513513513513, "grad_norm": 0.6214938163757324, "learning_rate": 0.0001, "loss": 0.0235, "step": 6850 }, { "epoch": 186.48648648648648, "grad_norm": 0.3473169505596161, "learning_rate": 0.0001, "loss": 0.0234, "step": 6900 }, { "epoch": 187.83783783783784, "grad_norm": 0.34946322441101074, "learning_rate": 0.0001, "loss": 0.0232, "step": 6950 }, { "epoch": 189.1891891891892, "grad_norm": 0.3939970135688782, "learning_rate": 0.0001, "loss": 0.0244, "step": 7000 }, { "epoch": 190.54054054054055, "grad_norm": 0.37184515595436096, "learning_rate": 0.0001, "loss": 0.0238, "step": 7050 }, { "epoch": 191.8918918918919, "grad_norm": 0.39142072200775146, "learning_rate": 0.0001, "loss": 0.0238, "step": 7100 }, { "epoch": 193.24324324324326, "grad_norm": 0.35670343041419983, "learning_rate": 0.0001, "loss": 0.0234, "step": 7150 }, { "epoch": 194.59459459459458, "grad_norm": 0.5025286674499512, "learning_rate": 0.0001, "loss": 0.0237, "step": 7200 }, { "epoch": 195.94594594594594, "grad_norm": 0.4491577744483948, "learning_rate": 0.0001, "loss": 0.0245, "step": 7250 }, { "epoch": 197.2972972972973, "grad_norm": 0.30056050419807434, "learning_rate": 0.0001, "loss": 0.0222, "step": 7300 }, { "epoch": 198.64864864864865, "grad_norm": 0.332044780254364, "learning_rate": 0.0001, "loss": 0.0219, "step": 7350 }, { "epoch": 200.0, "grad_norm": 0.2828930914402008, "learning_rate": 0.0001, "loss": 0.0232, "step": 7400 }, { "epoch": 201.35135135135135, "grad_norm": 0.3619934022426605, "learning_rate": 0.0001, "loss": 0.0244, "step": 7450 }, { "epoch": 202.7027027027027, "grad_norm": 0.4312371015548706, "learning_rate": 0.0001, "loss": 0.0238, "step": 7500 }, { "epoch": 202.7027027027027, "eval_loss": 0.6532334685325623, "eval_runtime": 29.8357, "eval_samples_per_second": 52.521, "eval_steps_per_second": 0.168, "eval_wer": 0.19167904903417535, "step": 7500 }, { "epoch": 204.05405405405406, "grad_norm": 0.3734581470489502, "learning_rate": 0.0001, "loss": 0.0234, "step": 7550 }, { "epoch": 205.40540540540542, "grad_norm": 0.4595019519329071, "learning_rate": 0.0001, "loss": 0.0229, "step": 7600 }, { "epoch": 206.75675675675674, "grad_norm": 0.2700786888599396, "learning_rate": 0.0001, "loss": 0.0227, "step": 7650 }, { "epoch": 208.1081081081081, "grad_norm": 0.24691906571388245, "learning_rate": 0.0001, "loss": 0.0229, "step": 7700 }, { "epoch": 209.45945945945945, "grad_norm": 0.3328978717327118, "learning_rate": 0.0001, "loss": 0.0217, "step": 7750 }, { "epoch": 210.8108108108108, "grad_norm": 0.286808580160141, "learning_rate": 0.0001, "loss": 0.0205, "step": 7800 }, { "epoch": 212.16216216216216, "grad_norm": 0.27080458402633667, "learning_rate": 0.0001, "loss": 0.0202, "step": 7850 }, { "epoch": 213.51351351351352, "grad_norm": 0.398179292678833, "learning_rate": 0.0001, "loss": 0.0215, "step": 7900 }, { "epoch": 214.86486486486487, "grad_norm": 0.3541491627693176, "learning_rate": 0.0001, "loss": 0.0218, "step": 7950 }, { "epoch": 216.21621621621622, "grad_norm": 0.3138297498226166, "learning_rate": 0.0001, "loss": 0.022, "step": 8000 }, { "epoch": 217.56756756756758, "grad_norm": 0.38513001799583435, "learning_rate": 0.0001, "loss": 0.0215, "step": 8050 }, { "epoch": 218.9189189189189, "grad_norm": 0.400036484003067, "learning_rate": 0.0001, "loss": 0.021, "step": 8100 }, { "epoch": 220.27027027027026, "grad_norm": 0.3203113377094269, "learning_rate": 0.0001, "loss": 0.0207, "step": 8150 }, { "epoch": 221.6216216216216, "grad_norm": 0.3765117824077606, "learning_rate": 0.0001, "loss": 0.0197, "step": 8200 }, { "epoch": 222.97297297297297, "grad_norm": 0.3336365222930908, "learning_rate": 0.0001, "loss": 0.0211, "step": 8250 }, { "epoch": 224.32432432432432, "grad_norm": 0.29828354716300964, "learning_rate": 0.0001, "loss": 0.0188, "step": 8300 }, { "epoch": 225.67567567567568, "grad_norm": 0.34553930163383484, "learning_rate": 0.0001, "loss": 0.0199, "step": 8350 }, { "epoch": 227.02702702702703, "grad_norm": 0.3510328531265259, "learning_rate": 0.0001, "loss": 0.0215, "step": 8400 }, { "epoch": 228.3783783783784, "grad_norm": 0.48810675740242004, "learning_rate": 0.0001, "loss": 0.0217, "step": 8450 }, { "epoch": 229.72972972972974, "grad_norm": 0.34023284912109375, "learning_rate": 0.0001, "loss": 0.0225, "step": 8500 }, { "epoch": 231.0810810810811, "grad_norm": 0.31986966729164124, "learning_rate": 0.0001, "loss": 0.0217, "step": 8550 }, { "epoch": 232.43243243243242, "grad_norm": 0.27697187662124634, "learning_rate": 0.0001, "loss": 0.0205, "step": 8600 }, { "epoch": 233.78378378378378, "grad_norm": 0.3078053593635559, "learning_rate": 0.0001, "loss": 0.0185, "step": 8650 }, { "epoch": 235.13513513513513, "grad_norm": 0.24676857888698578, "learning_rate": 0.0001, "loss": 0.0202, "step": 8700 }, { "epoch": 236.48648648648648, "grad_norm": 0.2980283498764038, "learning_rate": 0.0001, "loss": 0.0202, "step": 8750 }, { "epoch": 237.83783783783784, "grad_norm": 0.34748488664627075, "learning_rate": 0.0001, "loss": 0.0188, "step": 8800 }, { "epoch": 239.1891891891892, "grad_norm": 0.31379759311676025, "learning_rate": 0.0001, "loss": 0.0195, "step": 8850 }, { "epoch": 240.54054054054055, "grad_norm": 0.31512585282325745, "learning_rate": 0.0001, "loss": 0.0197, "step": 8900 }, { "epoch": 241.8918918918919, "grad_norm": 0.28801149129867554, "learning_rate": 0.0001, "loss": 0.0188, "step": 8950 }, { "epoch": 243.24324324324326, "grad_norm": 0.29776033759117126, "learning_rate": 0.0001, "loss": 0.0196, "step": 9000 }, { "epoch": 243.24324324324326, "eval_loss": 0.6647829413414001, "eval_runtime": 22.108, "eval_samples_per_second": 70.879, "eval_steps_per_second": 0.226, "eval_wer": 0.19295266397792402, "step": 9000 }, { "epoch": 244.59459459459458, "grad_norm": 0.2917761206626892, "learning_rate": 0.0001, "loss": 0.0194, "step": 9050 }, { "epoch": 245.94594594594594, "grad_norm": 0.28261467814445496, "learning_rate": 0.0001, "loss": 0.0186, "step": 9100 }, { "epoch": 247.2972972972973, "grad_norm": 0.44025787711143494, "learning_rate": 0.0001, "loss": 0.0191, "step": 9150 }, { "epoch": 248.64864864864865, "grad_norm": 0.26063069701194763, "learning_rate": 0.0001, "loss": 0.0182, "step": 9200 }, { "epoch": 250.0, "grad_norm": 0.3038322329521179, "learning_rate": 0.0001, "loss": 0.0185, "step": 9250 }, { "epoch": 251.35135135135135, "grad_norm": 0.30964452028274536, "learning_rate": 0.0001, "loss": 0.0188, "step": 9300 }, { "epoch": 252.7027027027027, "grad_norm": 0.34113481640815735, "learning_rate": 0.0001, "loss": 0.0189, "step": 9350 }, { "epoch": 254.05405405405406, "grad_norm": 0.28624454140663147, "learning_rate": 0.0001, "loss": 0.0186, "step": 9400 }, { "epoch": 255.40540540540542, "grad_norm": 0.28637397289276123, "learning_rate": 0.0001, "loss": 0.0189, "step": 9450 }, { "epoch": 256.7567567567568, "grad_norm": 0.3362099230289459, "learning_rate": 0.0001, "loss": 0.0194, "step": 9500 }, { "epoch": 258.1081081081081, "grad_norm": 0.30529114603996277, "learning_rate": 0.0001, "loss": 0.0186, "step": 9550 }, { "epoch": 259.4594594594595, "grad_norm": 0.257412314414978, "learning_rate": 0.0001, "loss": 0.018, "step": 9600 }, { "epoch": 260.81081081081084, "grad_norm": 0.34228768944740295, "learning_rate": 0.0001, "loss": 0.019, "step": 9650 }, { "epoch": 262.1621621621622, "grad_norm": 0.43392807245254517, "learning_rate": 0.0001, "loss": 0.0188, "step": 9700 }, { "epoch": 263.5135135135135, "grad_norm": 0.2830718457698822, "learning_rate": 0.0001, "loss": 0.0191, "step": 9750 }, { "epoch": 264.86486486486484, "grad_norm": 0.33202308416366577, "learning_rate": 0.0001, "loss": 0.0179, "step": 9800 }, { "epoch": 266.2162162162162, "grad_norm": 0.4601031243801117, "learning_rate": 0.0001, "loss": 0.0176, "step": 9850 }, { "epoch": 267.56756756756755, "grad_norm": 0.33976060152053833, "learning_rate": 0.0001, "loss": 0.0183, "step": 9900 }, { "epoch": 268.9189189189189, "grad_norm": 0.23951521515846252, "learning_rate": 0.0001, "loss": 0.0171, "step": 9950 }, { "epoch": 270.27027027027026, "grad_norm": 0.7073889970779419, "learning_rate": 0.0001, "loss": 0.0165, "step": 10000 }, { "epoch": 271.6216216216216, "grad_norm": 0.31089919805526733, "learning_rate": 0.0001, "loss": 0.018, "step": 10050 }, { "epoch": 272.97297297297297, "grad_norm": 0.38815826177597046, "learning_rate": 0.0001, "loss": 0.0184, "step": 10100 }, { "epoch": 274.3243243243243, "grad_norm": 0.2964986562728882, "learning_rate": 0.0001, "loss": 0.0172, "step": 10150 }, { "epoch": 275.6756756756757, "grad_norm": 0.2726752460002899, "learning_rate": 0.0001, "loss": 0.018, "step": 10200 }, { "epoch": 277.02702702702703, "grad_norm": 0.29586270451545715, "learning_rate": 0.0001, "loss": 0.0169, "step": 10250 }, { "epoch": 278.3783783783784, "grad_norm": 0.3921571671962738, "learning_rate": 0.0001, "loss": 0.0186, "step": 10300 }, { "epoch": 279.72972972972974, "grad_norm": 0.3076343238353729, "learning_rate": 0.0001, "loss": 0.02, "step": 10350 }, { "epoch": 281.0810810810811, "grad_norm": 0.3205571472644806, "learning_rate": 0.0001, "loss": 0.0198, "step": 10400 }, { "epoch": 282.43243243243245, "grad_norm": 0.43506285548210144, "learning_rate": 0.0001, "loss": 0.0186, "step": 10450 }, { "epoch": 283.7837837837838, "grad_norm": 0.31954890489578247, "learning_rate": 0.0001, "loss": 0.018, "step": 10500 }, { "epoch": 283.7837837837838, "eval_loss": 0.6387069821357727, "eval_runtime": 41.5803, "eval_samples_per_second": 37.686, "eval_steps_per_second": 0.12, "eval_wer": 0.19608363404797283, "step": 10500 }, { "epoch": 285.13513513513516, "grad_norm": 0.32306522130966187, "learning_rate": 0.0001, "loss": 0.0174, "step": 10550 }, { "epoch": 286.4864864864865, "grad_norm": 0.2770741879940033, "learning_rate": 0.0001, "loss": 0.0172, "step": 10600 }, { "epoch": 287.8378378378378, "grad_norm": 0.2585732936859131, "learning_rate": 0.0001, "loss": 0.0162, "step": 10650 }, { "epoch": 289.18918918918916, "grad_norm": 0.2847765386104584, "learning_rate": 0.0001, "loss": 0.0161, "step": 10700 }, { "epoch": 290.5405405405405, "grad_norm": 0.2730112075805664, "learning_rate": 0.0001, "loss": 0.0178, "step": 10750 }, { "epoch": 291.8918918918919, "grad_norm": 0.29540035128593445, "learning_rate": 0.0001, "loss": 0.0177, "step": 10800 }, { "epoch": 293.2432432432432, "grad_norm": 0.38015422224998474, "learning_rate": 0.0001, "loss": 0.0166, "step": 10850 }, { "epoch": 294.5945945945946, "grad_norm": 0.3097437918186188, "learning_rate": 0.0001, "loss": 0.0175, "step": 10900 }, { "epoch": 295.94594594594594, "grad_norm": 0.2496563345193863, "learning_rate": 0.0001, "loss": 0.016, "step": 10950 }, { "epoch": 297.2972972972973, "grad_norm": 0.29481038451194763, "learning_rate": 0.0001, "loss": 0.0168, "step": 11000 }, { "epoch": 298.64864864864865, "grad_norm": 0.30355343222618103, "learning_rate": 0.0001, "loss": 0.017, "step": 11050 }, { "epoch": 300.0, "grad_norm": 0.2778134047985077, "learning_rate": 0.0001, "loss": 0.0165, "step": 11100 }, { "epoch": 301.35135135135135, "grad_norm": 0.29996800422668457, "learning_rate": 0.0001, "loss": 0.0158, "step": 11150 }, { "epoch": 302.7027027027027, "grad_norm": 0.3584728240966797, "learning_rate": 0.0001, "loss": 0.0155, "step": 11200 }, { "epoch": 304.05405405405406, "grad_norm": 0.5590859055519104, "learning_rate": 0.0001, "loss": 0.0162, "step": 11250 }, { "epoch": 305.4054054054054, "grad_norm": 0.22827082872390747, "learning_rate": 0.0001, "loss": 0.0166, "step": 11300 }, { "epoch": 306.7567567567568, "grad_norm": 0.37123405933380127, "learning_rate": 0.0001, "loss": 0.016, "step": 11350 }, { "epoch": 308.1081081081081, "grad_norm": 0.230214461684227, "learning_rate": 0.0001, "loss": 0.0168, "step": 11400 }, { "epoch": 309.4594594594595, "grad_norm": 0.23933345079421997, "learning_rate": 0.0001, "loss": 0.0164, "step": 11450 }, { "epoch": 310.81081081081084, "grad_norm": 0.3098168969154358, "learning_rate": 0.0001, "loss": 0.0152, "step": 11500 }, { "epoch": 312.1621621621622, "grad_norm": 0.3287512958049774, "learning_rate": 0.0001, "loss": 0.014, "step": 11550 }, { "epoch": 313.5135135135135, "grad_norm": 0.46195220947265625, "learning_rate": 0.0001, "loss": 0.0154, "step": 11600 }, { "epoch": 314.86486486486484, "grad_norm": 0.3282325863838196, "learning_rate": 0.0001, "loss": 0.0158, "step": 11650 }, { "epoch": 316.2162162162162, "grad_norm": 0.2945399284362793, "learning_rate": 0.0001, "loss": 0.0151, "step": 11700 }, { "epoch": 317.56756756756755, "grad_norm": 0.27878549695014954, "learning_rate": 0.0001, "loss": 0.0145, "step": 11750 }, { "epoch": 318.9189189189189, "grad_norm": 0.252695232629776, "learning_rate": 0.0001, "loss": 0.0142, "step": 11800 }, { "epoch": 320.27027027027026, "grad_norm": 0.26799729466438293, "learning_rate": 0.0001, "loss": 0.0145, "step": 11850 }, { "epoch": 321.6216216216216, "grad_norm": 0.2436702400445938, "learning_rate": 0.0001, "loss": 0.0147, "step": 11900 }, { "epoch": 322.97297297297297, "grad_norm": 0.40171217918395996, "learning_rate": 0.0001, "loss": 0.0145, "step": 11950 }, { "epoch": 324.3243243243243, "grad_norm": 0.2604866027832031, "learning_rate": 0.0001, "loss": 0.0154, "step": 12000 }, { "epoch": 324.3243243243243, "eval_loss": 0.6963035464286804, "eval_runtime": 17.3801, "eval_samples_per_second": 90.161, "eval_steps_per_second": 0.288, "eval_wer": 0.19910846953937592, "step": 12000 }, { "epoch": 325.6756756756757, "grad_norm": 0.28629496693611145, "learning_rate": 0.0001, "loss": 0.0159, "step": 12050 }, { "epoch": 327.02702702702703, "grad_norm": 0.24716606736183167, "learning_rate": 0.0001, "loss": 0.0152, "step": 12100 }, { "epoch": 328.3783783783784, "grad_norm": 0.2562699019908905, "learning_rate": 0.0001, "loss": 0.0153, "step": 12150 }, { "epoch": 329.72972972972974, "grad_norm": 0.27679792046546936, "learning_rate": 0.0001, "loss": 0.0147, "step": 12200 }, { "epoch": 331.0810810810811, "grad_norm": 0.2266552597284317, "learning_rate": 0.0001, "loss": 0.0149, "step": 12250 }, { "epoch": 332.43243243243245, "grad_norm": 0.2453828752040863, "learning_rate": 0.0001, "loss": 0.0151, "step": 12300 }, { "epoch": 333.7837837837838, "grad_norm": 0.3448384702205658, "learning_rate": 0.0001, "loss": 0.014, "step": 12350 }, { "epoch": 335.13513513513516, "grad_norm": 0.20089378952980042, "learning_rate": 0.0001, "loss": 0.0148, "step": 12400 }, { "epoch": 336.4864864864865, "grad_norm": 0.2895062267780304, "learning_rate": 0.0001, "loss": 0.016, "step": 12450 }, { "epoch": 337.8378378378378, "grad_norm": 0.3491511642932892, "learning_rate": 0.0001, "loss": 0.0153, "step": 12500 }, { "epoch": 339.18918918918916, "grad_norm": 0.2785622179508209, "learning_rate": 0.0001, "loss": 0.0152, "step": 12550 }, { "epoch": 340.5405405405405, "grad_norm": 0.25130748748779297, "learning_rate": 0.0001, "loss": 0.0148, "step": 12600 }, { "epoch": 341.8918918918919, "grad_norm": 0.3330935835838318, "learning_rate": 0.0001, "loss": 0.0147, "step": 12650 }, { "epoch": 343.2432432432432, "grad_norm": 0.2657862901687622, "learning_rate": 0.0001, "loss": 0.0156, "step": 12700 }, { "epoch": 344.5945945945946, "grad_norm": 0.3090120553970337, "learning_rate": 0.0001, "loss": 0.0145, "step": 12750 }, { "epoch": 345.94594594594594, "grad_norm": 0.31333035230636597, "learning_rate": 0.0001, "loss": 0.0137, "step": 12800 }, { "epoch": 347.2972972972973, "grad_norm": 0.33461394906044006, "learning_rate": 0.0001, "loss": 0.0139, "step": 12850 }, { "epoch": 348.64864864864865, "grad_norm": 0.21069389581680298, "learning_rate": 0.0001, "loss": 0.0138, "step": 12900 }, { "epoch": 350.0, "grad_norm": 0.23119139671325684, "learning_rate": 0.0001, "loss": 0.0142, "step": 12950 }, { "epoch": 351.35135135135135, "grad_norm": 0.23619785904884338, "learning_rate": 0.0001, "loss": 0.0131, "step": 13000 }, { "epoch": 352.7027027027027, "grad_norm": 0.4682454466819763, "learning_rate": 0.0001, "loss": 0.0129, "step": 13050 }, { "epoch": 354.05405405405406, "grad_norm": 0.3268776834011078, "learning_rate": 0.0001, "loss": 0.0144, "step": 13100 }, { "epoch": 355.4054054054054, "grad_norm": 0.3269369900226593, "learning_rate": 0.0001, "loss": 0.0142, "step": 13150 }, { "epoch": 356.7567567567568, "grad_norm": 0.34849807620048523, "learning_rate": 0.0001, "loss": 0.0142, "step": 13200 }, { "epoch": 358.1081081081081, "grad_norm": 0.24423350393772125, "learning_rate": 0.0001, "loss": 0.0148, "step": 13250 }, { "epoch": 359.4594594594595, "grad_norm": 0.2239474058151245, "learning_rate": 0.0001, "loss": 0.0146, "step": 13300 }, { "epoch": 360.81081081081084, "grad_norm": 0.2930073142051697, "learning_rate": 0.0001, "loss": 0.0145, "step": 13350 }, { "epoch": 362.1621621621622, "grad_norm": 0.25597310066223145, "learning_rate": 0.0001, "loss": 0.0137, "step": 13400 }, { "epoch": 363.5135135135135, "grad_norm": 0.3600046932697296, "learning_rate": 0.0001, "loss": 0.0142, "step": 13450 }, { "epoch": 364.86486486486484, "grad_norm": 0.37317752838134766, "learning_rate": 0.0001, "loss": 0.0134, "step": 13500 }, { "epoch": 364.86486486486484, "eval_loss": 0.7163126468658447, "eval_runtime": 17.4822, "eval_samples_per_second": 89.634, "eval_steps_per_second": 0.286, "eval_wer": 0.20101889195499895, "step": 13500 }, { "epoch": 366.2162162162162, "grad_norm": 0.2017332762479782, "learning_rate": 0.0001, "loss": 0.0134, "step": 13550 }, { "epoch": 367.56756756756755, "grad_norm": 0.26539239287376404, "learning_rate": 0.0001, "loss": 0.0148, "step": 13600 }, { "epoch": 368.9189189189189, "grad_norm": 0.2736688256263733, "learning_rate": 0.0001, "loss": 0.0148, "step": 13650 }, { "epoch": 370.27027027027026, "grad_norm": 0.28902319073677063, "learning_rate": 0.0001, "loss": 0.0137, "step": 13700 }, { "epoch": 371.6216216216216, "grad_norm": 0.1861814558506012, "learning_rate": 0.0001, "loss": 0.0132, "step": 13750 }, { "epoch": 372.97297297297297, "grad_norm": 0.2393738031387329, "learning_rate": 0.0001, "loss": 0.0125, "step": 13800 }, { "epoch": 374.3243243243243, "grad_norm": 0.3993573486804962, "learning_rate": 0.0001, "loss": 0.0125, "step": 13850 }, { "epoch": 375.6756756756757, "grad_norm": 0.3024432361125946, "learning_rate": 0.0001, "loss": 0.0134, "step": 13900 }, { "epoch": 377.02702702702703, "grad_norm": 0.35330072045326233, "learning_rate": 0.0001, "loss": 0.0143, "step": 13950 }, { "epoch": 378.3783783783784, "grad_norm": 0.21859917044639587, "learning_rate": 0.0001, "loss": 0.014, "step": 14000 }, { "epoch": 379.72972972972974, "grad_norm": 0.31557369232177734, "learning_rate": 0.0001, "loss": 0.0135, "step": 14050 }, { "epoch": 381.0810810810811, "grad_norm": 0.2256789207458496, "learning_rate": 0.0001, "loss": 0.0124, "step": 14100 }, { "epoch": 382.43243243243245, "grad_norm": 0.2742190659046173, "learning_rate": 0.0001, "loss": 0.0131, "step": 14150 }, { "epoch": 383.7837837837838, "grad_norm": 0.21607190370559692, "learning_rate": 0.0001, "loss": 0.0135, "step": 14200 }, { "epoch": 385.13513513513516, "grad_norm": 0.2626590430736542, "learning_rate": 0.0001, "loss": 0.0126, "step": 14250 }, { "epoch": 386.4864864864865, "grad_norm": 0.18108737468719482, "learning_rate": 0.0001, "loss": 0.0133, "step": 14300 }, { "epoch": 387.8378378378378, "grad_norm": 0.27729663252830505, "learning_rate": 0.0001, "loss": 0.0143, "step": 14350 }, { "epoch": 389.18918918918916, "grad_norm": 0.38008466362953186, "learning_rate": 0.0001, "loss": 0.0138, "step": 14400 }, { "epoch": 390.5405405405405, "grad_norm": 0.26434582471847534, "learning_rate": 0.0001, "loss": 0.0138, "step": 14450 }, { "epoch": 391.8918918918919, "grad_norm": 0.2773403823375702, "learning_rate": 0.0001, "loss": 0.0138, "step": 14500 }, { "epoch": 393.2432432432432, "grad_norm": 0.23838981986045837, "learning_rate": 0.0001, "loss": 0.0137, "step": 14550 }, { "epoch": 394.5945945945946, "grad_norm": 0.2961066663265228, "learning_rate": 0.0001, "loss": 0.0136, "step": 14600 }, { "epoch": 395.94594594594594, "grad_norm": 0.24311979115009308, "learning_rate": 0.0001, "loss": 0.0133, "step": 14650 }, { "epoch": 397.2972972972973, "grad_norm": 0.3343033492565155, "learning_rate": 0.0001, "loss": 0.0138, "step": 14700 }, { "epoch": 398.64864864864865, "grad_norm": 0.23256798088550568, "learning_rate": 0.0001, "loss": 0.0133, "step": 14750 }, { "epoch": 400.0, "grad_norm": 0.31679514050483704, "learning_rate": 0.0001, "loss": 0.0131, "step": 14800 }, { "epoch": 401.35135135135135, "grad_norm": 0.24046526849269867, "learning_rate": 0.0001, "loss": 0.0115, "step": 14850 }, { "epoch": 402.7027027027027, "grad_norm": 0.2563251852989197, "learning_rate": 0.0001, "loss": 0.0121, "step": 14900 }, { "epoch": 404.05405405405406, "grad_norm": 0.18860304355621338, "learning_rate": 0.0001, "loss": 0.0118, "step": 14950 }, { "epoch": 405.4054054054054, "grad_norm": 0.27949538826942444, "learning_rate": 0.0001, "loss": 0.0117, "step": 15000 }, { "epoch": 405.4054054054054, "eval_loss": 0.7209838628768921, "eval_runtime": 55.6252, "eval_samples_per_second": 28.171, "eval_steps_per_second": 0.09, "eval_wer": 0.19634897049458713, "step": 15000 }, { "epoch": 406.7567567567568, "grad_norm": 0.25572505593299866, "learning_rate": 0.0001, "loss": 0.0126, "step": 15050 }, { "epoch": 408.1081081081081, "grad_norm": 0.2233952134847641, "learning_rate": 0.0001, "loss": 0.0127, "step": 15100 }, { "epoch": 409.4594594594595, "grad_norm": 0.30835413932800293, "learning_rate": 0.0001, "loss": 0.0113, "step": 15150 }, { "epoch": 410.81081081081084, "grad_norm": 0.24969109892845154, "learning_rate": 0.0001, "loss": 0.0125, "step": 15200 }, { "epoch": 412.1621621621622, "grad_norm": 0.2240106463432312, "learning_rate": 0.0001, "loss": 0.0125, "step": 15250 }, { "epoch": 413.5135135135135, "grad_norm": 0.2874402403831482, "learning_rate": 0.0001, "loss": 0.0118, "step": 15300 }, { "epoch": 414.86486486486484, "grad_norm": 0.3226901590824127, "learning_rate": 0.0001, "loss": 0.012, "step": 15350 }, { "epoch": 416.2162162162162, "grad_norm": 0.2564234733581543, "learning_rate": 0.0001, "loss": 0.012, "step": 15400 }, { "epoch": 417.56756756756755, "grad_norm": 0.34858137369155884, "learning_rate": 0.0001, "loss": 0.0114, "step": 15450 }, { "epoch": 418.9189189189189, "grad_norm": 0.20746035873889923, "learning_rate": 0.0001, "loss": 0.0129, "step": 15500 }, { "epoch": 420.27027027027026, "grad_norm": 0.35932862758636475, "learning_rate": 0.0001, "loss": 0.0133, "step": 15550 }, { "epoch": 421.6216216216216, "grad_norm": 0.20093189179897308, "learning_rate": 0.0001, "loss": 0.0126, "step": 15600 }, { "epoch": 422.97297297297297, "grad_norm": 0.32909420132637024, "learning_rate": 0.0001, "loss": 0.0128, "step": 15650 }, { "epoch": 424.3243243243243, "grad_norm": 0.28278329968452454, "learning_rate": 0.0001, "loss": 0.0117, "step": 15700 }, { "epoch": 425.6756756756757, "grad_norm": 0.1597350388765335, "learning_rate": 0.0001, "loss": 0.0119, "step": 15750 }, { "epoch": 427.02702702702703, "grad_norm": 0.20241086184978485, "learning_rate": 0.0001, "loss": 0.0114, "step": 15800 }, { "epoch": 428.3783783783784, "grad_norm": 0.24632301926612854, "learning_rate": 0.0001, "loss": 0.0115, "step": 15850 }, { "epoch": 429.72972972972974, "grad_norm": 0.36104726791381836, "learning_rate": 0.0001, "loss": 0.0114, "step": 15900 }, { "epoch": 431.0810810810811, "grad_norm": 0.23273630440235138, "learning_rate": 0.0001, "loss": 0.0115, "step": 15950 }, { "epoch": 432.43243243243245, "grad_norm": 0.2528134882450104, "learning_rate": 0.0001, "loss": 0.0113, "step": 16000 } ], "logging_steps": 50, "max_steps": 37000, "num_input_tokens_seen": 0, "num_train_epochs": 1000, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.491310429887309e+20, "train_batch_size": 64, "trial_name": null, "trial_params": null }