|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 432.43243243243245, |
|
"eval_steps": 1500, |
|
"global_step": 16000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.5760506987571716, |
|
"learning_rate": 1.9364864864864865e-06, |
|
"loss": 0.0736, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 0.4748266339302063, |
|
"learning_rate": 3.2743243243243245e-06, |
|
"loss": 0.0682, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.054054054054054, |
|
"grad_norm": 0.4784703552722931, |
|
"learning_rate": 4.612162162162162e-06, |
|
"loss": 0.0661, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.405405405405405, |
|
"grad_norm": 0.4363403022289276, |
|
"learning_rate": 5.95e-06, |
|
"loss": 0.0646, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.756756756756757, |
|
"grad_norm": 0.49891752004623413, |
|
"learning_rate": 7.287837837837838e-06, |
|
"loss": 0.0624, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.108108108108109, |
|
"grad_norm": 0.47630417346954346, |
|
"learning_rate": 8.625675675675676e-06, |
|
"loss": 0.0635, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.45945945945946, |
|
"grad_norm": 0.5554671287536621, |
|
"learning_rate": 9.963513513513515e-06, |
|
"loss": 0.0617, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 10.81081081081081, |
|
"grad_norm": 0.4327123165130615, |
|
"learning_rate": 1.1301351351351353e-05, |
|
"loss": 0.0595, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.162162162162161, |
|
"grad_norm": 0.5002420544624329, |
|
"learning_rate": 1.263918918918919e-05, |
|
"loss": 0.0585, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 13.513513513513514, |
|
"grad_norm": 0.42524299025535583, |
|
"learning_rate": 1.3977027027027028e-05, |
|
"loss": 0.0579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 14.864864864864865, |
|
"grad_norm": 0.44030219316482544, |
|
"learning_rate": 1.5314864864864867e-05, |
|
"loss": 0.0575, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 16.216216216216218, |
|
"grad_norm": 0.46804261207580566, |
|
"learning_rate": 1.6625945945945947e-05, |
|
"loss": 0.0579, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 17.56756756756757, |
|
"grad_norm": 0.5248824954032898, |
|
"learning_rate": 1.7963783783783787e-05, |
|
"loss": 0.0556, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 18.91891891891892, |
|
"grad_norm": 0.43462327122688293, |
|
"learning_rate": 1.9301621621621623e-05, |
|
"loss": 0.0546, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 20.27027027027027, |
|
"grad_norm": 0.48718762397766113, |
|
"learning_rate": 2.0639459459459462e-05, |
|
"loss": 0.0529, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 21.62162162162162, |
|
"grad_norm": 0.40828168392181396, |
|
"learning_rate": 2.19772972972973e-05, |
|
"loss": 0.0523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 22.972972972972972, |
|
"grad_norm": 0.4686122238636017, |
|
"learning_rate": 2.3315135135135137e-05, |
|
"loss": 0.0531, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 24.324324324324323, |
|
"grad_norm": 0.4426785707473755, |
|
"learning_rate": 2.4652972972972976e-05, |
|
"loss": 0.051, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 25.675675675675677, |
|
"grad_norm": 0.4910499155521393, |
|
"learning_rate": 2.5990810810810812e-05, |
|
"loss": 0.0486, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 27.027027027027028, |
|
"grad_norm": 0.4253314435482025, |
|
"learning_rate": 2.7328648648648652e-05, |
|
"loss": 0.0491, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 28.37837837837838, |
|
"grad_norm": 0.3838571012020111, |
|
"learning_rate": 2.866648648648649e-05, |
|
"loss": 0.049, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 29.72972972972973, |
|
"grad_norm": 0.39428308606147766, |
|
"learning_rate": 3.0004324324324327e-05, |
|
"loss": 0.0478, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 31.08108108108108, |
|
"grad_norm": 0.3973025381565094, |
|
"learning_rate": 3.134216216216216e-05, |
|
"loss": 0.0473, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 32.432432432432435, |
|
"grad_norm": 0.47299668192863464, |
|
"learning_rate": 3.268e-05, |
|
"loss": 0.047, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 33.78378378378378, |
|
"grad_norm": 0.5077139139175415, |
|
"learning_rate": 3.4017837837837835e-05, |
|
"loss": 0.0459, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 35.13513513513514, |
|
"grad_norm": 0.3789336085319519, |
|
"learning_rate": 3.535567567567568e-05, |
|
"loss": 0.0462, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 36.486486486486484, |
|
"grad_norm": 0.3798762857913971, |
|
"learning_rate": 3.669351351351351e-05, |
|
"loss": 0.045, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 37.83783783783784, |
|
"grad_norm": 0.41409873962402344, |
|
"learning_rate": 3.803135135135135e-05, |
|
"loss": 0.0438, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 39.189189189189186, |
|
"grad_norm": 0.4816993176937103, |
|
"learning_rate": 3.936918918918919e-05, |
|
"loss": 0.0432, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 40.54054054054054, |
|
"grad_norm": 0.31075775623321533, |
|
"learning_rate": 4.070702702702703e-05, |
|
"loss": 0.0415, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 41.891891891891895, |
|
"grad_norm": 0.5353565812110901, |
|
"learning_rate": 4.2044864864864864e-05, |
|
"loss": 0.0414, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 43.24324324324324, |
|
"grad_norm": 0.3337886333465576, |
|
"learning_rate": 4.3382702702702707e-05, |
|
"loss": 0.0406, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 44.5945945945946, |
|
"grad_norm": 0.3847792446613312, |
|
"learning_rate": 4.472054054054054e-05, |
|
"loss": 0.0404, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 45.945945945945944, |
|
"grad_norm": 0.4075019657611847, |
|
"learning_rate": 4.605837837837838e-05, |
|
"loss": 0.0409, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 47.2972972972973, |
|
"grad_norm": 0.4205191433429718, |
|
"learning_rate": 4.7396216216216214e-05, |
|
"loss": 0.0399, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 48.648648648648646, |
|
"grad_norm": 0.5053867101669312, |
|
"learning_rate": 4.873405405405406e-05, |
|
"loss": 0.0387, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 0.43625885248184204, |
|
"learning_rate": 5.007189189189189e-05, |
|
"loss": 0.0393, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 51.351351351351354, |
|
"grad_norm": 0.44896236062049866, |
|
"learning_rate": 5.140972972972973e-05, |
|
"loss": 0.0376, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 52.7027027027027, |
|
"grad_norm": 0.42207279801368713, |
|
"learning_rate": 5.274756756756757e-05, |
|
"loss": 0.0392, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 54.054054054054056, |
|
"grad_norm": 0.4605530798435211, |
|
"learning_rate": 5.408540540540541e-05, |
|
"loss": 0.0384, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 55.4054054054054, |
|
"grad_norm": 0.4523848295211792, |
|
"learning_rate": 5.5423243243243243e-05, |
|
"loss": 0.0366, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 56.75675675675676, |
|
"grad_norm": 0.37463346123695374, |
|
"learning_rate": 5.6761081081081086e-05, |
|
"loss": 0.037, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 58.108108108108105, |
|
"grad_norm": 0.5655389428138733, |
|
"learning_rate": 5.809891891891892e-05, |
|
"loss": 0.0383, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 59.45945945945946, |
|
"grad_norm": 0.441587895154953, |
|
"learning_rate": 5.943675675675676e-05, |
|
"loss": 0.0392, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 60.810810810810814, |
|
"grad_norm": 0.44860920310020447, |
|
"learning_rate": 6.074783783783784e-05, |
|
"loss": 0.0372, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 62.16216216216216, |
|
"grad_norm": 0.4874947965145111, |
|
"learning_rate": 6.208567567567567e-05, |
|
"loss": 0.0372, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 63.513513513513516, |
|
"grad_norm": 0.44674238562583923, |
|
"learning_rate": 6.342351351351351e-05, |
|
"loss": 0.0368, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 64.86486486486487, |
|
"grad_norm": 0.535265326499939, |
|
"learning_rate": 6.476135135135136e-05, |
|
"loss": 0.0372, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 66.21621621621621, |
|
"grad_norm": 0.38546523451805115, |
|
"learning_rate": 6.60991891891892e-05, |
|
"loss": 0.0349, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 67.56756756756756, |
|
"grad_norm": 0.4751232862472534, |
|
"learning_rate": 6.743702702702703e-05, |
|
"loss": 0.038, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 68.91891891891892, |
|
"grad_norm": 0.6618958115577698, |
|
"learning_rate": 6.877486486486487e-05, |
|
"loss": 0.0512, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 70.27027027027027, |
|
"grad_norm": 0.4467822015285492, |
|
"learning_rate": 7.01127027027027e-05, |
|
"loss": 0.0517, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 71.62162162162163, |
|
"grad_norm": 0.5807027816772461, |
|
"learning_rate": 7.145054054054054e-05, |
|
"loss": 0.0506, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 72.97297297297297, |
|
"grad_norm": 0.44088873267173767, |
|
"learning_rate": 7.278837837837837e-05, |
|
"loss": 0.0517, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 74.32432432432432, |
|
"grad_norm": 0.5117065906524658, |
|
"learning_rate": 7.412621621621622e-05, |
|
"loss": 0.0497, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 75.67567567567568, |
|
"grad_norm": 0.5457988381385803, |
|
"learning_rate": 7.546405405405406e-05, |
|
"loss": 0.0494, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 77.02702702702703, |
|
"grad_norm": 0.35911738872528076, |
|
"learning_rate": 7.68018918918919e-05, |
|
"loss": 0.0476, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 78.37837837837837, |
|
"grad_norm": 0.5895106196403503, |
|
"learning_rate": 7.813972972972973e-05, |
|
"loss": 0.0479, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 79.72972972972973, |
|
"grad_norm": 0.39779385924339294, |
|
"learning_rate": 7.947756756756757e-05, |
|
"loss": 0.0479, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 81.08108108108108, |
|
"grad_norm": 0.5251230001449585, |
|
"learning_rate": 8.08154054054054e-05, |
|
"loss": 0.0468, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 81.08108108108108, |
|
"eval_loss": 0.5344434976577759, |
|
"eval_runtime": 19.3274, |
|
"eval_samples_per_second": 81.076, |
|
"eval_steps_per_second": 0.259, |
|
"eval_wer": 0.18456803226491192, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 82.43243243243244, |
|
"grad_norm": 0.44322407245635986, |
|
"learning_rate": 8.215324324324325e-05, |
|
"loss": 0.0458, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 83.78378378378379, |
|
"grad_norm": 0.47398409247398376, |
|
"learning_rate": 8.349108108108109e-05, |
|
"loss": 0.0461, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 85.13513513513513, |
|
"grad_norm": 0.452659010887146, |
|
"learning_rate": 8.482891891891893e-05, |
|
"loss": 0.045, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 86.48648648648648, |
|
"grad_norm": 0.6125317215919495, |
|
"learning_rate": 8.616675675675676e-05, |
|
"loss": 0.046, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 87.83783783783784, |
|
"grad_norm": 0.4655373692512512, |
|
"learning_rate": 8.75045945945946e-05, |
|
"loss": 0.0462, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 89.1891891891892, |
|
"grad_norm": 0.5071247220039368, |
|
"learning_rate": 8.884243243243243e-05, |
|
"loss": 0.0455, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 90.54054054054055, |
|
"grad_norm": 0.4586324691772461, |
|
"learning_rate": 9.018027027027027e-05, |
|
"loss": 0.0443, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 91.89189189189189, |
|
"grad_norm": 0.49810245633125305, |
|
"learning_rate": 9.151810810810812e-05, |
|
"loss": 0.0444, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 93.24324324324324, |
|
"grad_norm": 0.4874321222305298, |
|
"learning_rate": 9.285594594594595e-05, |
|
"loss": 0.0424, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 94.5945945945946, |
|
"grad_norm": 0.5060502886772156, |
|
"learning_rate": 9.419378378378379e-05, |
|
"loss": 0.0432, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 95.94594594594595, |
|
"grad_norm": 0.4356514513492584, |
|
"learning_rate": 9.553162162162163e-05, |
|
"loss": 0.042, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 97.29729729729729, |
|
"grad_norm": 0.5080994367599487, |
|
"learning_rate": 9.684270270270271e-05, |
|
"loss": 0.0424, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 98.64864864864865, |
|
"grad_norm": 0.6090648174285889, |
|
"learning_rate": 9.818054054054055e-05, |
|
"loss": 0.0423, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 0.6025941967964172, |
|
"learning_rate": 9.951837837837838e-05, |
|
"loss": 0.0445, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 101.35135135135135, |
|
"grad_norm": 0.592052161693573, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0425, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 102.70270270270271, |
|
"grad_norm": 0.5158424973487854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0423, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 104.05405405405405, |
|
"grad_norm": 0.45459866523742676, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0411, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 105.4054054054054, |
|
"grad_norm": 0.4477308392524719, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0426, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 106.75675675675676, |
|
"grad_norm": 0.5172644853591919, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0413, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 108.10810810810811, |
|
"grad_norm": 0.4363681674003601, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0418, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 109.45945945945945, |
|
"grad_norm": 0.3899792730808258, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0386, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 110.8108108108108, |
|
"grad_norm": 0.3736754357814789, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0397, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 112.16216216216216, |
|
"grad_norm": 0.4031231999397278, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0372, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 113.51351351351352, |
|
"grad_norm": 0.3616081774234772, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0381, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 114.86486486486487, |
|
"grad_norm": 0.46817055344581604, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0366, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 116.21621621621621, |
|
"grad_norm": 0.4078225791454315, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0383, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 117.56756756756756, |
|
"grad_norm": 0.3821820020675659, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0348, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 118.91891891891892, |
|
"grad_norm": 0.44808605313301086, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0358, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 120.27027027027027, |
|
"grad_norm": 0.3535892367362976, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0337, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 121.62162162162163, |
|
"grad_norm": 0.32362979650497437, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0344, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 121.62162162162163, |
|
"eval_loss": 0.5824956893920898, |
|
"eval_runtime": 17.5119, |
|
"eval_samples_per_second": 89.482, |
|
"eval_steps_per_second": 0.286, |
|
"eval_wer": 0.18653152196985778, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 122.97297297297297, |
|
"grad_norm": 0.5193214416503906, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0361, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 124.32432432432432, |
|
"grad_norm": 0.3041287362575531, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0335, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 125.67567567567568, |
|
"grad_norm": 0.44249922037124634, |
|
"learning_rate": 0.0001, |
|
"loss": 0.034, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 127.02702702702703, |
|
"grad_norm": 0.357164204120636, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0326, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 128.3783783783784, |
|
"grad_norm": 0.30578091740608215, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0307, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 129.72972972972974, |
|
"grad_norm": 0.4774022102355957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0321, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 131.0810810810811, |
|
"grad_norm": 0.3393169343471527, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0336, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 132.43243243243242, |
|
"grad_norm": 0.42481565475463867, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0317, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 133.78378378378378, |
|
"grad_norm": 0.45170778036117554, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0309, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 135.13513513513513, |
|
"grad_norm": 0.44404086470603943, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0331, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 136.48648648648648, |
|
"grad_norm": 0.4285108149051666, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0304, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 137.83783783783784, |
|
"grad_norm": 0.3434101343154907, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0294, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 139.1891891891892, |
|
"grad_norm": 0.41777992248535156, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0302, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 140.54054054054055, |
|
"grad_norm": 0.3897533714771271, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0303, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 141.8918918918919, |
|
"grad_norm": 0.3457304537296295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0297, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 143.24324324324326, |
|
"grad_norm": 0.38188374042510986, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0291, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 144.59459459459458, |
|
"grad_norm": 0.44426918029785156, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0308, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 145.94594594594594, |
|
"grad_norm": 0.46593207120895386, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0306, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 147.2972972972973, |
|
"grad_norm": 0.5084848403930664, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0293, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 148.64864864864865, |
|
"grad_norm": 0.35385948419570923, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0307, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"grad_norm": 0.2549344003200531, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0274, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 151.35135135135135, |
|
"grad_norm": 0.40980347990989685, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0294, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 152.7027027027027, |
|
"grad_norm": 0.413776695728302, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0285, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 154.05405405405406, |
|
"grad_norm": 0.4476383626461029, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0276, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 155.40540540540542, |
|
"grad_norm": 0.3967137336730957, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0286, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 156.75675675675674, |
|
"grad_norm": 0.3066927492618561, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0285, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 158.1081081081081, |
|
"grad_norm": 0.43991604447364807, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0297, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 159.45945945945945, |
|
"grad_norm": 0.6042722463607788, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0291, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 160.8108108108108, |
|
"grad_norm": 0.3852483034133911, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0291, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 162.16216216216216, |
|
"grad_norm": 0.3369120657444, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0273, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 162.16216216216216, |
|
"eval_loss": 0.613073468208313, |
|
"eval_runtime": 21.311, |
|
"eval_samples_per_second": 73.53, |
|
"eval_steps_per_second": 0.235, |
|
"eval_wer": 0.18998089577584376, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 163.51351351351352, |
|
"grad_norm": 0.3259856402873993, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0264, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 164.86486486486487, |
|
"grad_norm": 0.3508945405483246, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0274, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 166.21621621621622, |
|
"grad_norm": 0.3457798957824707, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0269, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 167.56756756756758, |
|
"grad_norm": 0.4306040406227112, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0264, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 168.9189189189189, |
|
"grad_norm": 1.110560417175293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0262, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 170.27027027027026, |
|
"grad_norm": 0.3896867632865906, |
|
"learning_rate": 0.0001, |
|
"loss": 0.026, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 171.6216216216216, |
|
"grad_norm": 0.28667861223220825, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0264, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 172.97297297297297, |
|
"grad_norm": 0.3063699007034302, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0257, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 174.32432432432432, |
|
"grad_norm": 0.3457682132720947, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0241, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 175.67567567567568, |
|
"grad_norm": 0.3746369183063507, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0246, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 177.02702702702703, |
|
"grad_norm": 0.3032655715942383, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0238, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 178.3783783783784, |
|
"grad_norm": 0.3203031122684479, |
|
"learning_rate": 0.0001, |
|
"loss": 0.026, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 179.72972972972974, |
|
"grad_norm": 0.4081636965274811, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0251, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 181.0810810810811, |
|
"grad_norm": 0.38508379459381104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0232, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 182.43243243243242, |
|
"grad_norm": 0.43911251425743103, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0242, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 183.78378378378378, |
|
"grad_norm": 0.38736340403556824, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0237, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 185.13513513513513, |
|
"grad_norm": 0.6214938163757324, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0235, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 186.48648648648648, |
|
"grad_norm": 0.3473169505596161, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0234, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 187.83783783783784, |
|
"grad_norm": 0.34946322441101074, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0232, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 189.1891891891892, |
|
"grad_norm": 0.3939970135688782, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0244, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 190.54054054054055, |
|
"grad_norm": 0.37184515595436096, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0238, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 191.8918918918919, |
|
"grad_norm": 0.39142072200775146, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0238, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 193.24324324324326, |
|
"grad_norm": 0.35670343041419983, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0234, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 194.59459459459458, |
|
"grad_norm": 0.5025286674499512, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0237, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 195.94594594594594, |
|
"grad_norm": 0.4491577744483948, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0245, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 197.2972972972973, |
|
"grad_norm": 0.30056050419807434, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0222, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 198.64864864864865, |
|
"grad_norm": 0.332044780254364, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0219, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 0.2828930914402008, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0232, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 201.35135135135135, |
|
"grad_norm": 0.3619934022426605, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0244, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 202.7027027027027, |
|
"grad_norm": 0.4312371015548706, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0238, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 202.7027027027027, |
|
"eval_loss": 0.6532334685325623, |
|
"eval_runtime": 29.8357, |
|
"eval_samples_per_second": 52.521, |
|
"eval_steps_per_second": 0.168, |
|
"eval_wer": 0.19167904903417535, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 204.05405405405406, |
|
"grad_norm": 0.3734581470489502, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0234, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 205.40540540540542, |
|
"grad_norm": 0.4595019519329071, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0229, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 206.75675675675674, |
|
"grad_norm": 0.2700786888599396, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0227, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 208.1081081081081, |
|
"grad_norm": 0.24691906571388245, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0229, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 209.45945945945945, |
|
"grad_norm": 0.3328978717327118, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0217, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 210.8108108108108, |
|
"grad_norm": 0.286808580160141, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0205, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 212.16216216216216, |
|
"grad_norm": 0.27080458402633667, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0202, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 213.51351351351352, |
|
"grad_norm": 0.398179292678833, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0215, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 214.86486486486487, |
|
"grad_norm": 0.3541491627693176, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0218, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 216.21621621621622, |
|
"grad_norm": 0.3138297498226166, |
|
"learning_rate": 0.0001, |
|
"loss": 0.022, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 217.56756756756758, |
|
"grad_norm": 0.38513001799583435, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0215, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 218.9189189189189, |
|
"grad_norm": 0.400036484003067, |
|
"learning_rate": 0.0001, |
|
"loss": 0.021, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 220.27027027027026, |
|
"grad_norm": 0.3203113377094269, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0207, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 221.6216216216216, |
|
"grad_norm": 0.3765117824077606, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0197, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 222.97297297297297, |
|
"grad_norm": 0.3336365222930908, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0211, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 224.32432432432432, |
|
"grad_norm": 0.29828354716300964, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0188, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 225.67567567567568, |
|
"grad_norm": 0.34553930163383484, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0199, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 227.02702702702703, |
|
"grad_norm": 0.3510328531265259, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0215, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 228.3783783783784, |
|
"grad_norm": 0.48810675740242004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0217, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 229.72972972972974, |
|
"grad_norm": 0.34023284912109375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0225, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 231.0810810810811, |
|
"grad_norm": 0.31986966729164124, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0217, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 232.43243243243242, |
|
"grad_norm": 0.27697187662124634, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0205, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 233.78378378378378, |
|
"grad_norm": 0.3078053593635559, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0185, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 235.13513513513513, |
|
"grad_norm": 0.24676857888698578, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0202, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 236.48648648648648, |
|
"grad_norm": 0.2980283498764038, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0202, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 237.83783783783784, |
|
"grad_norm": 0.34748488664627075, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0188, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 239.1891891891892, |
|
"grad_norm": 0.31379759311676025, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0195, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 240.54054054054055, |
|
"grad_norm": 0.31512585282325745, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0197, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 241.8918918918919, |
|
"grad_norm": 0.28801149129867554, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0188, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 243.24324324324326, |
|
"grad_norm": 0.29776033759117126, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0196, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 243.24324324324326, |
|
"eval_loss": 0.6647829413414001, |
|
"eval_runtime": 22.108, |
|
"eval_samples_per_second": 70.879, |
|
"eval_steps_per_second": 0.226, |
|
"eval_wer": 0.19295266397792402, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 244.59459459459458, |
|
"grad_norm": 0.2917761206626892, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0194, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 245.94594594594594, |
|
"grad_norm": 0.28261467814445496, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0186, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 247.2972972972973, |
|
"grad_norm": 0.44025787711143494, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0191, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 248.64864864864865, |
|
"grad_norm": 0.26063069701194763, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0182, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"grad_norm": 0.3038322329521179, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0185, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 251.35135135135135, |
|
"grad_norm": 0.30964452028274536, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0188, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 252.7027027027027, |
|
"grad_norm": 0.34113481640815735, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0189, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 254.05405405405406, |
|
"grad_norm": 0.28624454140663147, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0186, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 255.40540540540542, |
|
"grad_norm": 0.28637397289276123, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0189, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 256.7567567567568, |
|
"grad_norm": 0.3362099230289459, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0194, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 258.1081081081081, |
|
"grad_norm": 0.30529114603996277, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0186, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 259.4594594594595, |
|
"grad_norm": 0.257412314414978, |
|
"learning_rate": 0.0001, |
|
"loss": 0.018, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 260.81081081081084, |
|
"grad_norm": 0.34228768944740295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.019, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 262.1621621621622, |
|
"grad_norm": 0.43392807245254517, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0188, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 263.5135135135135, |
|
"grad_norm": 0.2830718457698822, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0191, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 264.86486486486484, |
|
"grad_norm": 0.33202308416366577, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0179, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 266.2162162162162, |
|
"grad_norm": 0.4601031243801117, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0176, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 267.56756756756755, |
|
"grad_norm": 0.33976060152053833, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0183, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 268.9189189189189, |
|
"grad_norm": 0.23951521515846252, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0171, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 270.27027027027026, |
|
"grad_norm": 0.7073889970779419, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0165, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 271.6216216216216, |
|
"grad_norm": 0.31089919805526733, |
|
"learning_rate": 0.0001, |
|
"loss": 0.018, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 272.97297297297297, |
|
"grad_norm": 0.38815826177597046, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0184, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 274.3243243243243, |
|
"grad_norm": 0.2964986562728882, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0172, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 275.6756756756757, |
|
"grad_norm": 0.2726752460002899, |
|
"learning_rate": 0.0001, |
|
"loss": 0.018, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 277.02702702702703, |
|
"grad_norm": 0.29586270451545715, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0169, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 278.3783783783784, |
|
"grad_norm": 0.3921571671962738, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0186, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 279.72972972972974, |
|
"grad_norm": 0.3076343238353729, |
|
"learning_rate": 0.0001, |
|
"loss": 0.02, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 281.0810810810811, |
|
"grad_norm": 0.3205571472644806, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0198, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 282.43243243243245, |
|
"grad_norm": 0.43506285548210144, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0186, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 283.7837837837838, |
|
"grad_norm": 0.31954890489578247, |
|
"learning_rate": 0.0001, |
|
"loss": 0.018, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 283.7837837837838, |
|
"eval_loss": 0.6387069821357727, |
|
"eval_runtime": 41.5803, |
|
"eval_samples_per_second": 37.686, |
|
"eval_steps_per_second": 0.12, |
|
"eval_wer": 0.19608363404797283, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 285.13513513513516, |
|
"grad_norm": 0.32306522130966187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0174, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 286.4864864864865, |
|
"grad_norm": 0.2770741879940033, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0172, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 287.8378378378378, |
|
"grad_norm": 0.2585732936859131, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0162, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 289.18918918918916, |
|
"grad_norm": 0.2847765386104584, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0161, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 290.5405405405405, |
|
"grad_norm": 0.2730112075805664, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0178, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 291.8918918918919, |
|
"grad_norm": 0.29540035128593445, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0177, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 293.2432432432432, |
|
"grad_norm": 0.38015422224998474, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0166, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 294.5945945945946, |
|
"grad_norm": 0.3097437918186188, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0175, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 295.94594594594594, |
|
"grad_norm": 0.2496563345193863, |
|
"learning_rate": 0.0001, |
|
"loss": 0.016, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 297.2972972972973, |
|
"grad_norm": 0.29481038451194763, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0168, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 298.64864864864865, |
|
"grad_norm": 0.30355343222618103, |
|
"learning_rate": 0.0001, |
|
"loss": 0.017, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"grad_norm": 0.2778134047985077, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0165, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 301.35135135135135, |
|
"grad_norm": 0.29996800422668457, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0158, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 302.7027027027027, |
|
"grad_norm": 0.3584728240966797, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0155, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 304.05405405405406, |
|
"grad_norm": 0.5590859055519104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0162, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 305.4054054054054, |
|
"grad_norm": 0.22827082872390747, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0166, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 306.7567567567568, |
|
"grad_norm": 0.37123405933380127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.016, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 308.1081081081081, |
|
"grad_norm": 0.230214461684227, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0168, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 309.4594594594595, |
|
"grad_norm": 0.23933345079421997, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0164, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 310.81081081081084, |
|
"grad_norm": 0.3098168969154358, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0152, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 312.1621621621622, |
|
"grad_norm": 0.3287512958049774, |
|
"learning_rate": 0.0001, |
|
"loss": 0.014, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 313.5135135135135, |
|
"grad_norm": 0.46195220947265625, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0154, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 314.86486486486484, |
|
"grad_norm": 0.3282325863838196, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0158, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 316.2162162162162, |
|
"grad_norm": 0.2945399284362793, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0151, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 317.56756756756755, |
|
"grad_norm": 0.27878549695014954, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 318.9189189189189, |
|
"grad_norm": 0.252695232629776, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0142, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 320.27027027027026, |
|
"grad_norm": 0.26799729466438293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 321.6216216216216, |
|
"grad_norm": 0.2436702400445938, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0147, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 322.97297297297297, |
|
"grad_norm": 0.40171217918395996, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 324.3243243243243, |
|
"grad_norm": 0.2604866027832031, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0154, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 324.3243243243243, |
|
"eval_loss": 0.6963035464286804, |
|
"eval_runtime": 17.3801, |
|
"eval_samples_per_second": 90.161, |
|
"eval_steps_per_second": 0.288, |
|
"eval_wer": 0.19910846953937592, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 325.6756756756757, |
|
"grad_norm": 0.28629496693611145, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0159, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 327.02702702702703, |
|
"grad_norm": 0.24716606736183167, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0152, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 328.3783783783784, |
|
"grad_norm": 0.2562699019908905, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0153, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 329.72972972972974, |
|
"grad_norm": 0.27679792046546936, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0147, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 331.0810810810811, |
|
"grad_norm": 0.2266552597284317, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0149, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 332.43243243243245, |
|
"grad_norm": 0.2453828752040863, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0151, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 333.7837837837838, |
|
"grad_norm": 0.3448384702205658, |
|
"learning_rate": 0.0001, |
|
"loss": 0.014, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 335.13513513513516, |
|
"grad_norm": 0.20089378952980042, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0148, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 336.4864864864865, |
|
"grad_norm": 0.2895062267780304, |
|
"learning_rate": 0.0001, |
|
"loss": 0.016, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 337.8378378378378, |
|
"grad_norm": 0.3491511642932892, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0153, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 339.18918918918916, |
|
"grad_norm": 0.2785622179508209, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0152, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 340.5405405405405, |
|
"grad_norm": 0.25130748748779297, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0148, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 341.8918918918919, |
|
"grad_norm": 0.3330935835838318, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0147, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 343.2432432432432, |
|
"grad_norm": 0.2657862901687622, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0156, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 344.5945945945946, |
|
"grad_norm": 0.3090120553970337, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 345.94594594594594, |
|
"grad_norm": 0.31333035230636597, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 347.2972972972973, |
|
"grad_norm": 0.33461394906044006, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0139, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 348.64864864864865, |
|
"grad_norm": 0.21069389581680298, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0138, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"grad_norm": 0.23119139671325684, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0142, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 351.35135135135135, |
|
"grad_norm": 0.23619785904884338, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0131, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 352.7027027027027, |
|
"grad_norm": 0.4682454466819763, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0129, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 354.05405405405406, |
|
"grad_norm": 0.3268776834011078, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0144, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 355.4054054054054, |
|
"grad_norm": 0.3269369900226593, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0142, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 356.7567567567568, |
|
"grad_norm": 0.34849807620048523, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0142, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 358.1081081081081, |
|
"grad_norm": 0.24423350393772125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0148, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 359.4594594594595, |
|
"grad_norm": 0.2239474058151245, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0146, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 360.81081081081084, |
|
"grad_norm": 0.2930073142051697, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0145, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 362.1621621621622, |
|
"grad_norm": 0.25597310066223145, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 363.5135135135135, |
|
"grad_norm": 0.3600046932697296, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0142, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 364.86486486486484, |
|
"grad_norm": 0.37317752838134766, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0134, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 364.86486486486484, |
|
"eval_loss": 0.7163126468658447, |
|
"eval_runtime": 17.4822, |
|
"eval_samples_per_second": 89.634, |
|
"eval_steps_per_second": 0.286, |
|
"eval_wer": 0.20101889195499895, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 366.2162162162162, |
|
"grad_norm": 0.2017332762479782, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0134, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 367.56756756756755, |
|
"grad_norm": 0.26539239287376404, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0148, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 368.9189189189189, |
|
"grad_norm": 0.2736688256263733, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0148, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 370.27027027027026, |
|
"grad_norm": 0.28902319073677063, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 371.6216216216216, |
|
"grad_norm": 0.1861814558506012, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0132, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 372.97297297297297, |
|
"grad_norm": 0.2393738031387329, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0125, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 374.3243243243243, |
|
"grad_norm": 0.3993573486804962, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0125, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 375.6756756756757, |
|
"grad_norm": 0.3024432361125946, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0134, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 377.02702702702703, |
|
"grad_norm": 0.35330072045326233, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0143, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 378.3783783783784, |
|
"grad_norm": 0.21859917044639587, |
|
"learning_rate": 0.0001, |
|
"loss": 0.014, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 379.72972972972974, |
|
"grad_norm": 0.31557369232177734, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0135, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 381.0810810810811, |
|
"grad_norm": 0.2256789207458496, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0124, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 382.43243243243245, |
|
"grad_norm": 0.2742190659046173, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0131, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 383.7837837837838, |
|
"grad_norm": 0.21607190370559692, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0135, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 385.13513513513516, |
|
"grad_norm": 0.2626590430736542, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0126, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 386.4864864864865, |
|
"grad_norm": 0.18108737468719482, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0133, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 387.8378378378378, |
|
"grad_norm": 0.27729663252830505, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0143, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 389.18918918918916, |
|
"grad_norm": 0.38008466362953186, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0138, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 390.5405405405405, |
|
"grad_norm": 0.26434582471847534, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0138, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 391.8918918918919, |
|
"grad_norm": 0.2773403823375702, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0138, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 393.2432432432432, |
|
"grad_norm": 0.23838981986045837, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0137, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 394.5945945945946, |
|
"grad_norm": 0.2961066663265228, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0136, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 395.94594594594594, |
|
"grad_norm": 0.24311979115009308, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0133, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 397.2972972972973, |
|
"grad_norm": 0.3343033492565155, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0138, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 398.64864864864865, |
|
"grad_norm": 0.23256798088550568, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0133, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"grad_norm": 0.31679514050483704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0131, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 401.35135135135135, |
|
"grad_norm": 0.24046526849269867, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0115, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 402.7027027027027, |
|
"grad_norm": 0.2563251852989197, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0121, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 404.05405405405406, |
|
"grad_norm": 0.18860304355621338, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 405.4054054054054, |
|
"grad_norm": 0.27949538826942444, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0117, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 405.4054054054054, |
|
"eval_loss": 0.7209838628768921, |
|
"eval_runtime": 55.6252, |
|
"eval_samples_per_second": 28.171, |
|
"eval_steps_per_second": 0.09, |
|
"eval_wer": 0.19634897049458713, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 406.7567567567568, |
|
"grad_norm": 0.25572505593299866, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0126, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 408.1081081081081, |
|
"grad_norm": 0.2233952134847641, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0127, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 409.4594594594595, |
|
"grad_norm": 0.30835413932800293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0113, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 410.81081081081084, |
|
"grad_norm": 0.24969109892845154, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0125, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 412.1621621621622, |
|
"grad_norm": 0.2240106463432312, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0125, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 413.5135135135135, |
|
"grad_norm": 0.2874402403831482, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0118, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 414.86486486486484, |
|
"grad_norm": 0.3226901590824127, |
|
"learning_rate": 0.0001, |
|
"loss": 0.012, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 416.2162162162162, |
|
"grad_norm": 0.2564234733581543, |
|
"learning_rate": 0.0001, |
|
"loss": 0.012, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 417.56756756756755, |
|
"grad_norm": 0.34858137369155884, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 418.9189189189189, |
|
"grad_norm": 0.20746035873889923, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0129, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 420.27027027027026, |
|
"grad_norm": 0.35932862758636475, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0133, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 421.6216216216216, |
|
"grad_norm": 0.20093189179897308, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0126, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 422.97297297297297, |
|
"grad_norm": 0.32909420132637024, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0128, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 424.3243243243243, |
|
"grad_norm": 0.28278329968452454, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0117, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 425.6756756756757, |
|
"grad_norm": 0.1597350388765335, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0119, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 427.02702702702703, |
|
"grad_norm": 0.20241086184978485, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 428.3783783783784, |
|
"grad_norm": 0.24632301926612854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0115, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 429.72972972972974, |
|
"grad_norm": 0.36104726791381836, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0114, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 431.0810810810811, |
|
"grad_norm": 0.23273630440235138, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0115, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 432.43243243243245, |
|
"grad_norm": 0.2528134882450104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0113, |
|
"step": 16000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 37000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1000, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.491310429887309e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|