|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.99453551912568, |
|
"global_step": 9100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 14.944, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.076923076923077e-05, |
|
"loss": 14.3202, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.6263736263736265e-05, |
|
"loss": 8.0014, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.175824175824176e-05, |
|
"loss": 3.9265, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.7252747252747255e-05, |
|
"loss": 3.2842, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.274725274725275e-05, |
|
"loss": 3.1869, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.824175824175824e-05, |
|
"loss": 3.108, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 4.3736263736263734e-05, |
|
"loss": 3.1175, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 4.923076923076923e-05, |
|
"loss": 3.0525, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 5.472527472527473e-05, |
|
"loss": 3.0418, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 3.0175631046295166, |
|
"eval_runtime": 180.0397, |
|
"eval_samples_per_second": 26.9, |
|
"eval_steps_per_second": 0.844, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 6.021978021978022e-05, |
|
"loss": 2.9976, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 6.571428571428571e-05, |
|
"loss": 2.9326, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 7.120879120879122e-05, |
|
"loss": 2.9006, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.67032967032967e-05, |
|
"loss": 2.5847, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 8.219780219780219e-05, |
|
"loss": 1.9329, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 8.76923076923077e-05, |
|
"loss": 1.5005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 9.318681318681319e-05, |
|
"loss": 1.3603, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 9.868131868131869e-05, |
|
"loss": 1.2882, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 0.00010417582417582417, |
|
"loss": 1.2259, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.00010967032967032966, |
|
"loss": 1.1819, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_loss": 0.2561783790588379, |
|
"eval_runtime": 183.3784, |
|
"eval_samples_per_second": 26.41, |
|
"eval_steps_per_second": 0.829, |
|
"eval_wer": 0.21680386034629576, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.00011516483516483517, |
|
"loss": 1.1526, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.00012065934065934066, |
|
"loss": 1.1248, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 0.00012615384615384615, |
|
"loss": 1.0837, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00013164835164835166, |
|
"loss": 1.0713, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.00013714285714285716, |
|
"loss": 1.0511, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 0.00014263736263736264, |
|
"loss": 1.0454, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 0.00014813186813186812, |
|
"loss": 1.0157, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 0.00015362637362637362, |
|
"loss": 1.0207, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 0.00015912087912087913, |
|
"loss": 0.9859, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"learning_rate": 0.0001646153846153846, |
|
"loss": 1.0032, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.48, |
|
"eval_loss": 0.1746312528848648, |
|
"eval_runtime": 184.5587, |
|
"eval_samples_per_second": 26.241, |
|
"eval_steps_per_second": 0.824, |
|
"eval_wer": 0.15461254612546124, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 0.0001701098901098901, |
|
"loss": 0.974, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 0.00017560439560439562, |
|
"loss": 0.9528, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 0.0001810989010989011, |
|
"loss": 0.9774, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.68, |
|
"learning_rate": 0.0001865934065934066, |
|
"loss": 0.9495, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0001920879120879121, |
|
"loss": 0.9525, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.00019758241758241759, |
|
"loss": 0.9385, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 0.00020307692307692306, |
|
"loss": 0.9422, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 0.00020857142857142857, |
|
"loss": 0.9028, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 0.00021406593406593407, |
|
"loss": 0.9091, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"learning_rate": 0.00021956043956043955, |
|
"loss": 0.9077, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_loss": 0.15996481478214264, |
|
"eval_runtime": 182.6325, |
|
"eval_samples_per_second": 26.518, |
|
"eval_steps_per_second": 0.832, |
|
"eval_wer": 0.13391995458416123, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 0.00022505494505494506, |
|
"loss": 0.9073, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 0.00023054945054945056, |
|
"loss": 0.9046, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.62, |
|
"learning_rate": 0.00023604395604395604, |
|
"loss": 0.8864, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"learning_rate": 0.00024153846153846155, |
|
"loss": 0.8888, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 24.72, |
|
"learning_rate": 0.00024703296703296705, |
|
"loss": 0.8757, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 25.27, |
|
"learning_rate": 0.00024915750915750914, |
|
"loss": 0.8819, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 25.82, |
|
"learning_rate": 0.00024732600732600734, |
|
"loss": 0.8722, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 26.37, |
|
"learning_rate": 0.0002454945054945055, |
|
"loss": 0.8757, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 0.00024366300366300369, |
|
"loss": 0.8544, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"learning_rate": 0.00024183150183150186, |
|
"loss": 0.8687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 27.47, |
|
"eval_loss": 0.1647317260503769, |
|
"eval_runtime": 181.9124, |
|
"eval_samples_per_second": 26.623, |
|
"eval_steps_per_second": 0.836, |
|
"eval_wer": 0.1378370706783991, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 28.02, |
|
"learning_rate": 0.00024, |
|
"loss": 0.8661, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 0.00023816849816849818, |
|
"loss": 0.8187, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 0.00023633699633699635, |
|
"loss": 0.8425, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 0.00023450549450549453, |
|
"loss": 0.8455, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 30.22, |
|
"learning_rate": 0.00023267399267399267, |
|
"loss": 0.8369, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 0.00023084249084249085, |
|
"loss": 0.8253, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 31.32, |
|
"learning_rate": 0.00022901098901098902, |
|
"loss": 0.8213, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 0.0002271794871794872, |
|
"loss": 0.808, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 32.42, |
|
"learning_rate": 0.00022534798534798534, |
|
"loss": 0.8352, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"learning_rate": 0.00022351648351648352, |
|
"loss": 0.8081, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"eval_loss": 0.16083544492721558, |
|
"eval_runtime": 184.6287, |
|
"eval_samples_per_second": 26.231, |
|
"eval_steps_per_second": 0.823, |
|
"eval_wer": 0.135310814646608, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 33.51, |
|
"learning_rate": 0.0002216849816849817, |
|
"loss": 0.8157, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 34.07, |
|
"learning_rate": 0.00021985347985347986, |
|
"loss": 0.8035, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 0.00021802197802197804, |
|
"loss": 0.7844, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 35.16, |
|
"learning_rate": 0.00021619047619047619, |
|
"loss": 0.7835, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 0.00021435897435897436, |
|
"loss": 0.7854, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 36.26, |
|
"learning_rate": 0.00021252747252747253, |
|
"loss": 0.7845, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 36.81, |
|
"learning_rate": 0.0002106959706959707, |
|
"loss": 0.7677, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 37.36, |
|
"learning_rate": 0.00020886446886446885, |
|
"loss": 0.7933, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 37.91, |
|
"learning_rate": 0.00020703296703296703, |
|
"loss": 0.7781, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0002052014652014652, |
|
"loss": 0.7923, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.15337252616882324, |
|
"eval_runtime": 182.8057, |
|
"eval_samples_per_second": 26.493, |
|
"eval_steps_per_second": 0.831, |
|
"eval_wer": 0.12773204655123474, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 0.00020336996336996338, |
|
"loss": 0.7861, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 39.56, |
|
"learning_rate": 0.00020157509157509157, |
|
"loss": 0.767, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 40.11, |
|
"learning_rate": 0.00019974358974358974, |
|
"loss": 0.7688, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 40.66, |
|
"learning_rate": 0.0001979120879120879, |
|
"loss": 0.7515, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 41.21, |
|
"learning_rate": 0.00019608058608058606, |
|
"loss": 0.7602, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 41.75, |
|
"learning_rate": 0.00019424908424908423, |
|
"loss": 0.7565, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"learning_rate": 0.0001924175824175824, |
|
"loss": 0.7646, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 42.85, |
|
"learning_rate": 0.0001905860805860806, |
|
"loss": 0.7547, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 0.00018875457875457878, |
|
"loss": 0.7474, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 43.95, |
|
"learning_rate": 0.00018692307692307693, |
|
"loss": 0.7349, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 43.95, |
|
"eval_loss": 0.15459321439266205, |
|
"eval_runtime": 182.8482, |
|
"eval_samples_per_second": 26.486, |
|
"eval_steps_per_second": 0.831, |
|
"eval_wer": 0.13034345728072666, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 44.5, |
|
"learning_rate": 0.0001850915750915751, |
|
"loss": 0.7447, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 45.05, |
|
"learning_rate": 0.00018326007326007328, |
|
"loss": 0.7442, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 0.00018142857142857145, |
|
"loss": 0.7311, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 0.0001795970695970696, |
|
"loss": 0.7432, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 46.7, |
|
"learning_rate": 0.00017776556776556777, |
|
"loss": 0.7361, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 47.25, |
|
"learning_rate": 0.00017593406593406595, |
|
"loss": 0.7358, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 47.8, |
|
"learning_rate": 0.00017410256410256412, |
|
"loss": 0.718, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"learning_rate": 0.00017227106227106227, |
|
"loss": 0.7327, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 48.9, |
|
"learning_rate": 0.00017043956043956044, |
|
"loss": 0.7129, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 49.45, |
|
"learning_rate": 0.00016860805860805861, |
|
"loss": 0.7199, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 49.45, |
|
"eval_loss": 0.16171683371067047, |
|
"eval_runtime": 181.6699, |
|
"eval_samples_per_second": 26.658, |
|
"eval_steps_per_second": 0.837, |
|
"eval_wer": 0.12770366165200114, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 0.0001667765567765568, |
|
"loss": 0.7156, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 50.55, |
|
"learning_rate": 0.00016494505494505496, |
|
"loss": 0.7265, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 51.1, |
|
"learning_rate": 0.0001631135531135531, |
|
"loss": 0.7169, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 51.64, |
|
"learning_rate": 0.00016128205128205128, |
|
"loss": 0.7022, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 52.2, |
|
"learning_rate": 0.00015945054945054946, |
|
"loss": 0.7182, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 52.74, |
|
"learning_rate": 0.00015761904761904763, |
|
"loss": 0.7239, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 53.3, |
|
"learning_rate": 0.00015578754578754578, |
|
"loss": 0.702, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"learning_rate": 0.00015395604395604395, |
|
"loss": 0.6951, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 54.39, |
|
"learning_rate": 0.00015212454212454213, |
|
"loss": 0.6972, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 54.94, |
|
"learning_rate": 0.0001502930402930403, |
|
"loss": 0.7028, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 54.94, |
|
"eval_loss": 0.15723808109760284, |
|
"eval_runtime": 179.3818, |
|
"eval_samples_per_second": 26.998, |
|
"eval_steps_per_second": 0.847, |
|
"eval_wer": 0.1286687482259438, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 55.49, |
|
"learning_rate": 0.00014846153846153845, |
|
"loss": 0.6829, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 56.04, |
|
"learning_rate": 0.00014663003663003662, |
|
"loss": 0.6864, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 56.59, |
|
"learning_rate": 0.0001447985347985348, |
|
"loss": 0.6854, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 0.00014296703296703297, |
|
"loss": 0.6759, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.00014113553113553112, |
|
"loss": 0.6775, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 58.24, |
|
"learning_rate": 0.0001393040293040293, |
|
"loss": 0.7114, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 58.79, |
|
"learning_rate": 0.00013747252747252746, |
|
"loss": 0.6791, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 59.34, |
|
"learning_rate": 0.00013564102564102566, |
|
"loss": 0.6862, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 59.89, |
|
"learning_rate": 0.00013380952380952384, |
|
"loss": 0.6716, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 60.44, |
|
"learning_rate": 0.00013197802197802198, |
|
"loss": 0.6912, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 60.44, |
|
"eval_loss": 0.15595602989196777, |
|
"eval_runtime": 177.0801, |
|
"eval_samples_per_second": 27.349, |
|
"eval_steps_per_second": 0.858, |
|
"eval_wer": 0.12489355662787398, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 60.98, |
|
"learning_rate": 0.00013014652014652016, |
|
"loss": 0.6743, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 0.00012831501831501833, |
|
"loss": 0.6683, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 62.09, |
|
"learning_rate": 0.0001264835164835165, |
|
"loss": 0.6654, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 62.63, |
|
"learning_rate": 0.00012465201465201465, |
|
"loss": 0.6583, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 63.19, |
|
"learning_rate": 0.00012282051282051283, |
|
"loss": 0.6634, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 63.73, |
|
"learning_rate": 0.000120989010989011, |
|
"loss": 0.6566, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 64.28, |
|
"learning_rate": 0.00011915750915750916, |
|
"loss": 0.67, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 64.83, |
|
"learning_rate": 0.00011732600732600734, |
|
"loss": 0.6524, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 65.38, |
|
"learning_rate": 0.0001154945054945055, |
|
"loss": 0.6673, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 65.93, |
|
"learning_rate": 0.00011366300366300367, |
|
"loss": 0.6492, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 65.93, |
|
"eval_loss": 0.15421651303768158, |
|
"eval_runtime": 182.1682, |
|
"eval_samples_per_second": 26.585, |
|
"eval_steps_per_second": 0.834, |
|
"eval_wer": 0.12600056769798468, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 66.48, |
|
"learning_rate": 0.00011183150183150183, |
|
"loss": 0.6548, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 67.03, |
|
"learning_rate": 0.00011, |
|
"loss": 0.6542, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 67.58, |
|
"learning_rate": 0.00010816849816849816, |
|
"loss": 0.6351, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 68.13, |
|
"learning_rate": 0.00010633699633699634, |
|
"loss": 0.6513, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 68.68, |
|
"learning_rate": 0.00010450549450549451, |
|
"loss": 0.6328, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 0.00010267399267399267, |
|
"loss": 0.6507, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 69.78, |
|
"learning_rate": 0.00010084249084249085, |
|
"loss": 0.6389, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 70.33, |
|
"learning_rate": 9.901098901098901e-05, |
|
"loss": 0.6525, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 70.87, |
|
"learning_rate": 9.717948717948718e-05, |
|
"loss": 0.6436, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 9.534798534798534e-05, |
|
"loss": 0.6407, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_loss": 0.16047754883766174, |
|
"eval_runtime": 178.7321, |
|
"eval_samples_per_second": 27.096, |
|
"eval_steps_per_second": 0.85, |
|
"eval_wer": 0.12398523985239852, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 71.97, |
|
"learning_rate": 9.351648351648353e-05, |
|
"loss": 0.6274, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 72.52, |
|
"learning_rate": 9.168498168498169e-05, |
|
"loss": 0.6338, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 73.08, |
|
"learning_rate": 8.989010989010989e-05, |
|
"loss": 0.6296, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 73.62, |
|
"learning_rate": 8.805860805860807e-05, |
|
"loss": 0.6202, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 74.17, |
|
"learning_rate": 8.622710622710623e-05, |
|
"loss": 0.6332, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 74.72, |
|
"learning_rate": 8.43956043956044e-05, |
|
"loss": 0.623, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 75.27, |
|
"learning_rate": 8.256410256410257e-05, |
|
"loss": 0.6406, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 75.82, |
|
"learning_rate": 8.073260073260073e-05, |
|
"loss": 0.6136, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 76.37, |
|
"learning_rate": 7.890109890109891e-05, |
|
"loss": 0.6313, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 7.706959706959707e-05, |
|
"loss": 0.6222, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.15765224397182465, |
|
"eval_runtime": 176.2542, |
|
"eval_samples_per_second": 27.477, |
|
"eval_steps_per_second": 0.862, |
|
"eval_wer": 0.12185637240987794, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 77.47, |
|
"learning_rate": 7.523809523809524e-05, |
|
"loss": 0.6149, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 78.02, |
|
"learning_rate": 7.34065934065934e-05, |
|
"loss": 0.6338, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 78.57, |
|
"learning_rate": 7.157509157509158e-05, |
|
"loss": 0.6231, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 79.12, |
|
"learning_rate": 6.974358974358974e-05, |
|
"loss": 0.6245, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 79.67, |
|
"learning_rate": 6.791208791208791e-05, |
|
"loss": 0.6022, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 80.22, |
|
"learning_rate": 6.608058608058607e-05, |
|
"loss": 0.6138, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 80.77, |
|
"learning_rate": 6.424908424908426e-05, |
|
"loss": 0.6163, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 81.32, |
|
"learning_rate": 6.241758241758242e-05, |
|
"loss": 0.6275, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 81.86, |
|
"learning_rate": 6.0586080586080586e-05, |
|
"loss": 0.6085, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 82.42, |
|
"learning_rate": 5.8754578754578754e-05, |
|
"loss": 0.6039, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 82.42, |
|
"eval_loss": 0.16446340084075928, |
|
"eval_runtime": 178.7505, |
|
"eval_samples_per_second": 27.094, |
|
"eval_steps_per_second": 0.85, |
|
"eval_wer": 0.12489355662787398, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 82.96, |
|
"learning_rate": 5.692307692307693e-05, |
|
"loss": 0.6063, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 83.51, |
|
"learning_rate": 5.5091575091575095e-05, |
|
"loss": 0.6039, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 84.07, |
|
"learning_rate": 5.326007326007326e-05, |
|
"loss": 0.6204, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 84.61, |
|
"learning_rate": 5.142857142857143e-05, |
|
"loss": 0.5922, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 85.16, |
|
"learning_rate": 4.9597069597069596e-05, |
|
"loss": 0.6031, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 85.71, |
|
"learning_rate": 4.776556776556776e-05, |
|
"loss": 0.5984, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 86.26, |
|
"learning_rate": 4.593406593406593e-05, |
|
"loss": 0.6213, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 86.81, |
|
"learning_rate": 4.41025641025641e-05, |
|
"loss": 0.5927, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 87.36, |
|
"learning_rate": 4.227106227106227e-05, |
|
"loss": 0.598, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 87.91, |
|
"learning_rate": 4.0439560439560445e-05, |
|
"loss": 0.5928, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 87.91, |
|
"eval_loss": 0.15899540483951569, |
|
"eval_runtime": 178.7907, |
|
"eval_samples_per_second": 27.088, |
|
"eval_steps_per_second": 0.85, |
|
"eval_wer": 0.12140221402214021, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 88.46, |
|
"learning_rate": 3.860805860805861e-05, |
|
"loss": 0.6021, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 89.01, |
|
"learning_rate": 3.677655677655678e-05, |
|
"loss": 0.5962, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 89.56, |
|
"learning_rate": 3.494505494505495e-05, |
|
"loss": 0.5798, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 90.11, |
|
"learning_rate": 3.3113553113553114e-05, |
|
"loss": 0.6024, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 90.66, |
|
"learning_rate": 3.128205128205128e-05, |
|
"loss": 0.5831, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 91.21, |
|
"learning_rate": 2.945054945054945e-05, |
|
"loss": 0.6001, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 91.75, |
|
"learning_rate": 2.761904761904762e-05, |
|
"loss": 0.5901, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 2.578754578754579e-05, |
|
"loss": 0.6078, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 92.85, |
|
"learning_rate": 2.3956043956043956e-05, |
|
"loss": 0.5853, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 2.2124542124542124e-05, |
|
"loss": 0.6022, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_loss": 0.1596660017967224, |
|
"eval_runtime": 178.6421, |
|
"eval_samples_per_second": 27.11, |
|
"eval_steps_per_second": 0.851, |
|
"eval_wer": 0.1213170593244394, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 93.95, |
|
"learning_rate": 2.029304029304029e-05, |
|
"loss": 0.5721, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 94.5, |
|
"learning_rate": 1.8461538461538465e-05, |
|
"loss": 0.5839, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 95.05, |
|
"learning_rate": 1.6630036630036632e-05, |
|
"loss": 0.587, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 95.6, |
|
"learning_rate": 1.4835164835164835e-05, |
|
"loss": 0.5817, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"learning_rate": 1.3003663003663005e-05, |
|
"loss": 0.5819, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 96.7, |
|
"learning_rate": 1.1172161172161172e-05, |
|
"loss": 0.5853, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 97.25, |
|
"learning_rate": 9.340659340659341e-06, |
|
"loss": 0.5778, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 97.8, |
|
"learning_rate": 7.509157509157509e-06, |
|
"loss": 0.6038, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 98.35, |
|
"learning_rate": 5.677655677655678e-06, |
|
"loss": 0.5756, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 98.9, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 0.5814, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 98.9, |
|
"eval_loss": 0.1598692536354065, |
|
"eval_runtime": 178.176, |
|
"eval_samples_per_second": 27.181, |
|
"eval_steps_per_second": 0.853, |
|
"eval_wer": 0.11989781436275901, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 99.45, |
|
"learning_rate": 2.0146520146520148e-06, |
|
"loss": 0.5807, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 1.8315018315018315e-07, |
|
"loss": 0.5798, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"step": 9100, |
|
"total_flos": 1.4065789113067918e+20, |
|
"train_loss": 1.0890738963032818, |
|
"train_runtime": 51321.8258, |
|
"train_samples_per_second": 22.778, |
|
"train_steps_per_second": 0.177 |
|
} |
|
], |
|
"max_steps": 9100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.4065789113067918e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|