|
{ |
|
"best_metric": 0.23875188827514648, |
|
"best_model_checkpoint": "./checkpoint-13500", |
|
"epoch": 2.0, |
|
"global_step": 13822, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.533333333333333e-06, |
|
"loss": 17.2403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.32e-05, |
|
"loss": 10.2311, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 7.834, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.646666666666667e-05, |
|
"loss": 6.0656, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.313333333333333e-05, |
|
"loss": 4.3748, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 3.878422975540161, |
|
"eval_runtime": 285.8223, |
|
"eval_samples_per_second": 20.264, |
|
"eval_steps_per_second": 0.318, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.9800000000000005e-05, |
|
"loss": 3.2923, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.646666666666667e-05, |
|
"loss": 2.9475, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 5.3133333333333335e-05, |
|
"loss": 2.8639, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5.9800000000000003e-05, |
|
"loss": 2.8265, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 6.646666666666667e-05, |
|
"loss": 2.8068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 2.828850746154785, |
|
"eval_runtime": 292.3877, |
|
"eval_samples_per_second": 19.809, |
|
"eval_steps_per_second": 0.311, |
|
"eval_wer": 0.9826485059793412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 7.306666666666668e-05, |
|
"loss": 2.779, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.973333333333334e-05, |
|
"loss": 2.6402, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.64e-05, |
|
"loss": 2.1119, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.306666666666667e-05, |
|
"loss": 1.7965, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.973333333333334e-05, |
|
"loss": 1.6698, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.881136417388916, |
|
"eval_runtime": 297.1806, |
|
"eval_samples_per_second": 19.49, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.7127472384241911, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.92209056971271e-05, |
|
"loss": 1.5882, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.840934913163448e-05, |
|
"loss": 1.5172, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.759779256614186e-05, |
|
"loss": 1.4579, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.678623600064926e-05, |
|
"loss": 1.3829, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.597467943515663e-05, |
|
"loss": 1.3488, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.516592800617218, |
|
"eval_runtime": 301.2842, |
|
"eval_samples_per_second": 19.224, |
|
"eval_steps_per_second": 0.302, |
|
"eval_wer": 0.5369024731988661, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.516312286966402e-05, |
|
"loss": 1.2981, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.43515663041714e-05, |
|
"loss": 1.2845, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.354000973867879e-05, |
|
"loss": 1.2459, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.272845317318618e-05, |
|
"loss": 1.2255, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.191689660769356e-05, |
|
"loss": 1.2239, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.4104757010936737, |
|
"eval_runtime": 299.1395, |
|
"eval_samples_per_second": 19.362, |
|
"eval_steps_per_second": 0.304, |
|
"eval_wer": 0.474111245071524, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.110534004220094e-05, |
|
"loss": 1.2024, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.030189904236326e-05, |
|
"loss": 1.1851, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.949034247687063e-05, |
|
"loss": 1.1768, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.867878591137803e-05, |
|
"loss": 1.1641, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.786722934588541e-05, |
|
"loss": 1.1537, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.35850802063941956, |
|
"eval_runtime": 299.662, |
|
"eval_samples_per_second": 19.328, |
|
"eval_steps_per_second": 0.304, |
|
"eval_wer": 0.4448499462348073, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.70556727803928e-05, |
|
"loss": 1.1449, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.624411621490018e-05, |
|
"loss": 1.1379, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.543255964940758e-05, |
|
"loss": 1.1331, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.462100308391495e-05, |
|
"loss": 1.1205, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.380944651842234e-05, |
|
"loss": 1.1184, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.333638072013855, |
|
"eval_runtime": 297.0402, |
|
"eval_samples_per_second": 19.499, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.42922545537489004, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.299788995292971e-05, |
|
"loss": 1.1014, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.218633338743711e-05, |
|
"loss": 1.1114, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.13747768219445e-05, |
|
"loss": 1.117, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.056322025645188e-05, |
|
"loss": 1.102, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.975166369095926e-05, |
|
"loss": 1.0968, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.31949570775032043, |
|
"eval_runtime": 296.0172, |
|
"eval_samples_per_second": 19.566, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 0.4180162273127179, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.894822269112158e-05, |
|
"loss": 1.0942, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.813666612562897e-05, |
|
"loss": 1.0859, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.732510956013635e-05, |
|
"loss": 1.0767, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.652166856029866e-05, |
|
"loss": 1.0766, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.571011199480604e-05, |
|
"loss": 1.0737, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.30754634737968445, |
|
"eval_runtime": 296.2378, |
|
"eval_samples_per_second": 19.552, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 0.41408973899442797, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.489855542931342e-05, |
|
"loss": 1.0807, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.40869988638208e-05, |
|
"loss": 1.071, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.32754422983282e-05, |
|
"loss": 1.0613, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.246388573283557e-05, |
|
"loss": 1.0635, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.165232916734297e-05, |
|
"loss": 1.0677, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.30150118470191956, |
|
"eval_runtime": 297.4742, |
|
"eval_samples_per_second": 19.471, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.4089250219948516, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.084077260185034e-05, |
|
"loss": 1.0707, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.002921603635774e-05, |
|
"loss": 1.0617, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.921765947086512e-05, |
|
"loss": 1.0566, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.84061029053725e-05, |
|
"loss": 1.0518, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.760266190553481e-05, |
|
"loss": 1.0462, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.297052800655365, |
|
"eval_runtime": 296.1592, |
|
"eval_samples_per_second": 19.557, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 0.4077193782788621, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.679110534004221e-05, |
|
"loss": 1.0514, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.597954877454959e-05, |
|
"loss": 1.0446, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.516799220905698e-05, |
|
"loss": 1.0358, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.435643564356436e-05, |
|
"loss": 1.0364, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.354487907807174e-05, |
|
"loss": 1.0392, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.2870033383369446, |
|
"eval_runtime": 295.9814, |
|
"eval_samples_per_second": 19.569, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 0.3997034768157972, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.273332251257913e-05, |
|
"loss": 1.0375, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.192176594708652e-05, |
|
"loss": 1.0408, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.11102093815939e-05, |
|
"loss": 1.0382, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.0298652816101284e-05, |
|
"loss": 1.0335, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.948709625060867e-05, |
|
"loss": 1.0178, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.28046590089797974, |
|
"eval_runtime": 297.8045, |
|
"eval_samples_per_second": 19.449, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.39629834794225943, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.867553968511605e-05, |
|
"loss": 1.0312, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.786398311962344e-05, |
|
"loss": 1.033, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.7052426554130825e-05, |
|
"loss": 1.0289, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.624086998863821e-05, |
|
"loss": 1.027, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.54293134231456e-05, |
|
"loss": 0.992, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.2747785747051239, |
|
"eval_runtime": 298.4841, |
|
"eval_samples_per_second": 19.405, |
|
"eval_steps_per_second": 0.305, |
|
"eval_wer": 0.39352862589201343, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.461775685765298e-05, |
|
"loss": 1.0025, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.3806200292160366e-05, |
|
"loss": 1.0122, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.299464372666775e-05, |
|
"loss": 1.018, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.218308716117514e-05, |
|
"loss": 0.9936, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.137153059568252e-05, |
|
"loss": 1.0197, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 0.26907604932785034, |
|
"eval_runtime": 298.796, |
|
"eval_samples_per_second": 19.384, |
|
"eval_steps_per_second": 0.305, |
|
"eval_wer": 0.3884453713056796, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.055997403018991e-05, |
|
"loss": 1.008, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.97484174646973e-05, |
|
"loss": 1.0028, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.893686089920468e-05, |
|
"loss": 0.9929, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.8125304333712064e-05, |
|
"loss": 0.995, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.731374776821945e-05, |
|
"loss": 1.0056, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 0.26817116141319275, |
|
"eval_runtime": 298.1504, |
|
"eval_samples_per_second": 19.426, |
|
"eval_steps_per_second": 0.305, |
|
"eval_wer": 0.3888526833718922, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.6510306768381754e-05, |
|
"loss": 0.9971, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.5698750202889145e-05, |
|
"loss": 0.9976, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.488719363739653e-05, |
|
"loss": 1.0014, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.407563707190391e-05, |
|
"loss": 0.9835, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.3272196072066225e-05, |
|
"loss": 0.9826, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.26473307609558105, |
|
"eval_runtime": 299.0161, |
|
"eval_samples_per_second": 19.37, |
|
"eval_steps_per_second": 0.304, |
|
"eval_wer": 0.38675095311023494, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.2460639506573615e-05, |
|
"loss": 0.9838, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.1649082941081e-05, |
|
"loss": 0.9836, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.0845641941243305e-05, |
|
"loss": 0.9824, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.003408537575069e-05, |
|
"loss": 0.9715, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.922252881025807e-05, |
|
"loss": 0.9815, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.26034271717071533, |
|
"eval_runtime": 299.6782, |
|
"eval_samples_per_second": 19.327, |
|
"eval_steps_per_second": 0.304, |
|
"eval_wer": 0.3831828994102121, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.841097224476546e-05, |
|
"loss": 0.9757, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.7599415679272846e-05, |
|
"loss": 0.9689, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.678785911378023e-05, |
|
"loss": 0.9778, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.5976302548287614e-05, |
|
"loss": 0.9794, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.5164745982795e-05, |
|
"loss": 0.9717, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.25609487295150757, |
|
"eval_runtime": 299.6976, |
|
"eval_samples_per_second": 19.326, |
|
"eval_steps_per_second": 0.304, |
|
"eval_wer": 0.3807064420476392, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.435318941730239e-05, |
|
"loss": 0.9752, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.354163285180977e-05, |
|
"loss": 0.965, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.2730076286317155e-05, |
|
"loss": 0.9522, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.191851972082454e-05, |
|
"loss": 0.9718, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.110696315533193e-05, |
|
"loss": 0.9605, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.25231894850730896, |
|
"eval_runtime": 297.5796, |
|
"eval_samples_per_second": 19.464, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.3782951546156603, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.0295406589839315e-05, |
|
"loss": 0.9635, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.94838500243467e-05, |
|
"loss": 0.9632, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.8672293458854082e-05, |
|
"loss": 0.9548, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7868852459016392e-05, |
|
"loss": 0.9554, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7057295893523783e-05, |
|
"loss": 0.96, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.24939315021038055, |
|
"eval_runtime": 300.036, |
|
"eval_samples_per_second": 19.304, |
|
"eval_steps_per_second": 0.303, |
|
"eval_wer": 0.3787513441298185, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.6245739328031166e-05, |
|
"loss": 0.9611, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.543418276253855e-05, |
|
"loss": 0.9594, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4622626197045937e-05, |
|
"loss": 0.9589, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.381106963155332e-05, |
|
"loss": 0.9441, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.2999513066060704e-05, |
|
"loss": 0.9442, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.24783751368522644, |
|
"eval_runtime": 297.7741, |
|
"eval_samples_per_second": 19.451, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.3760142070448695, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.218795650056809e-05, |
|
"loss": 0.9496, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.1376399935075474e-05, |
|
"loss": 0.9486, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.056484336958286e-05, |
|
"loss": 0.9558, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.9753286804090245e-05, |
|
"loss": 0.9486, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.8941730238597632e-05, |
|
"loss": 0.9564, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.2454409897327423, |
|
"eval_runtime": 296.0265, |
|
"eval_samples_per_second": 19.566, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 0.3733096549252175, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.8130173673105015e-05, |
|
"loss": 0.9427, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.73186171076124e-05, |
|
"loss": 0.9423, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.6507060542119786e-05, |
|
"loss": 0.9503, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.5695503976627173e-05, |
|
"loss": 0.9383, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.4883947411134558e-05, |
|
"loss": 0.9436, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 0.24390804767608643, |
|
"eval_runtime": 295.4584, |
|
"eval_samples_per_second": 19.603, |
|
"eval_steps_per_second": 0.308, |
|
"eval_wer": 0.37467822346769203, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.4072390845641942e-05, |
|
"loss": 0.9491, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.3260834280149325e-05, |
|
"loss": 0.9419, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.2449277714656712e-05, |
|
"loss": 0.9517, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.1637721149164097e-05, |
|
"loss": 0.9367, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.0826164583671483e-05, |
|
"loss": 0.938, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.24111612141132355, |
|
"eval_runtime": 296.9314, |
|
"eval_samples_per_second": 19.506, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.37159894424712436, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.0014608018178868e-05, |
|
"loss": 0.9337, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.203051452686253e-06, |
|
"loss": 0.9284, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.391494887193638e-06, |
|
"loss": 0.938, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.579938321701023e-06, |
|
"loss": 0.9365, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 6.768381756208409e-06, |
|
"loss": 0.9353, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.23965783417224884, |
|
"eval_runtime": 296.6078, |
|
"eval_samples_per_second": 19.527, |
|
"eval_steps_per_second": 0.307, |
|
"eval_wer": 0.3697904786731402, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.956825190715793e-06, |
|
"loss": 0.9413, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.153384190878104e-06, |
|
"loss": 0.9356, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.34182762538549e-06, |
|
"loss": 0.9209, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.530271059892875e-06, |
|
"loss": 0.9362, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.71871449440026e-06, |
|
"loss": 0.9271, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.23875188827514648, |
|
"eval_runtime": 296.9414, |
|
"eval_samples_per_second": 19.506, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.3680797679950471, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.907157928907645e-06, |
|
"loss": 0.9288, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.0956013634150302e-06, |
|
"loss": 0.9345, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.840447979224152e-07, |
|
"loss": 0.9326, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 13822, |
|
"total_flos": 1.2600843645735263e+20, |
|
"train_loss": 1.442369053426242, |
|
"train_runtime": 53680.5392, |
|
"train_samples_per_second": 16.478, |
|
"train_steps_per_second": 0.257 |
|
} |
|
], |
|
"max_steps": 13822, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.2600843645735263e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|