|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.994708994709, |
|
"global_step": 9400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.950000000000001e-06, |
|
"loss": 13.5577, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 5.868, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.4950000000000001e-05, |
|
"loss": 4.1355, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.995e-05, |
|
"loss": 3.5988, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 2.495e-05, |
|
"loss": 3.3036, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 3.262770652770996, |
|
"eval_runtime": 136.0729, |
|
"eval_samples_per_second": 20.166, |
|
"eval_steps_per_second": 2.521, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.995e-05, |
|
"loss": 3.2243, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 3.495e-05, |
|
"loss": 3.1823, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 3.995e-05, |
|
"loss": 3.1371, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.495e-05, |
|
"loss": 3.0942, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 4.995e-05, |
|
"loss": 2.9734, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"eval_loss": 2.5676724910736084, |
|
"eval_runtime": 135.3086, |
|
"eval_samples_per_second": 20.28, |
|
"eval_steps_per_second": 2.535, |
|
"eval_wer": 0.9980321266761246, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 5.495e-05, |
|
"loss": 2.2815, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 5.995000000000001e-05, |
|
"loss": 1.6698, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 6.494999999999999e-05, |
|
"loss": 1.4895, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 6.995e-05, |
|
"loss": 1.3959, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 7.495e-05, |
|
"loss": 1.3466, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_loss": 0.44553351402282715, |
|
"eval_runtime": 134.1872, |
|
"eval_samples_per_second": 20.449, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 0.6306347535581895, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 17.02, |
|
"learning_rate": 7.995e-05, |
|
"loss": 1.3288, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 8.495e-05, |
|
"loss": 1.2866, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 8.995e-05, |
|
"loss": 1.2619, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 9.495e-05, |
|
"loss": 1.2496, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"learning_rate": 9.99e-05, |
|
"loss": 1.2424, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"eval_loss": 0.3603059649467468, |
|
"eval_runtime": 134.7251, |
|
"eval_samples_per_second": 20.367, |
|
"eval_steps_per_second": 2.546, |
|
"eval_wer": 0.5301359205528351, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 9.867567567567569e-05, |
|
"loss": 1.2253, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 23.4, |
|
"learning_rate": 9.732432432432433e-05, |
|
"loss": 1.209, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 24.47, |
|
"learning_rate": 9.597297297297298e-05, |
|
"loss": 1.1984, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 9.462162162162162e-05, |
|
"loss": 1.1735, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"learning_rate": 9.327027027027028e-05, |
|
"loss": 1.1655, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 26.59, |
|
"eval_loss": 0.3164927661418915, |
|
"eval_runtime": 135.1028, |
|
"eval_samples_per_second": 20.31, |
|
"eval_steps_per_second": 2.539, |
|
"eval_wer": 0.4739828840785319, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 27.66, |
|
"learning_rate": 9.191891891891893e-05, |
|
"loss": 1.1529, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 28.72, |
|
"learning_rate": 9.056756756756757e-05, |
|
"loss": 1.1408, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 8.921621621621622e-05, |
|
"loss": 1.1201, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 30.85, |
|
"learning_rate": 8.787837837837838e-05, |
|
"loss": 1.1108, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"learning_rate": 8.652702702702703e-05, |
|
"loss": 1.1026, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"eval_loss": 0.2930183410644531, |
|
"eval_runtime": 135.7738, |
|
"eval_samples_per_second": 20.21, |
|
"eval_steps_per_second": 2.526, |
|
"eval_wer": 0.4400256281177063, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 32.97, |
|
"learning_rate": 8.517567567567568e-05, |
|
"loss": 1.1035, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 34.04, |
|
"learning_rate": 8.382432432432433e-05, |
|
"loss": 1.0976, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 8.247297297297298e-05, |
|
"loss": 1.0717, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 36.17, |
|
"learning_rate": 8.112162162162162e-05, |
|
"loss": 1.0778, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"learning_rate": 7.977027027027028e-05, |
|
"loss": 1.0655, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 37.23, |
|
"eval_loss": 0.26754099130630493, |
|
"eval_runtime": 134.1895, |
|
"eval_samples_per_second": 20.449, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 0.41590773877625736, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 7.841891891891892e-05, |
|
"loss": 1.0588, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 39.36, |
|
"learning_rate": 7.706756756756757e-05, |
|
"loss": 1.0401, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"learning_rate": 7.571621621621621e-05, |
|
"loss": 1.0296, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 41.49, |
|
"learning_rate": 7.436486486486487e-05, |
|
"loss": 1.0218, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"learning_rate": 7.301351351351352e-05, |
|
"loss": 1.0239, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 42.55, |
|
"eval_loss": 0.25800037384033203, |
|
"eval_runtime": 134.1224, |
|
"eval_samples_per_second": 20.459, |
|
"eval_steps_per_second": 2.557, |
|
"eval_wer": 0.3912864399798636, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 43.61, |
|
"learning_rate": 7.166216216216216e-05, |
|
"loss": 1.0226, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 44.68, |
|
"learning_rate": 7.031081081081081e-05, |
|
"loss": 1.0165, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 45.74, |
|
"learning_rate": 6.895945945945947e-05, |
|
"loss": 1.0083, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 6.760810810810811e-05, |
|
"loss": 0.9981, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"learning_rate": 6.625675675675676e-05, |
|
"loss": 0.9938, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 47.87, |
|
"eval_loss": 0.23732751607894897, |
|
"eval_runtime": 135.9562, |
|
"eval_samples_per_second": 20.183, |
|
"eval_steps_per_second": 2.523, |
|
"eval_wer": 0.36977712690494713, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 48.93, |
|
"learning_rate": 6.49054054054054e-05, |
|
"loss": 0.9717, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 49.99, |
|
"learning_rate": 6.355405405405406e-05, |
|
"loss": 0.9795, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 51.06, |
|
"learning_rate": 6.22027027027027e-05, |
|
"loss": 0.9804, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 52.13, |
|
"learning_rate": 6.085135135135135e-05, |
|
"loss": 0.9655, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 53.19, |
|
"learning_rate": 5.95e-05, |
|
"loss": 0.9655, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 53.19, |
|
"eval_loss": 0.2379022240638733, |
|
"eval_runtime": 134.3498, |
|
"eval_samples_per_second": 20.424, |
|
"eval_steps_per_second": 2.553, |
|
"eval_wer": 0.3674889021097433, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 54.25, |
|
"learning_rate": 5.8148648648648655e-05, |
|
"loss": 0.9616, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 55.32, |
|
"learning_rate": 5.67972972972973e-05, |
|
"loss": 0.9633, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 56.38, |
|
"learning_rate": 5.544594594594595e-05, |
|
"loss": 0.94, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 57.44, |
|
"learning_rate": 5.40945945945946e-05, |
|
"loss": 0.9355, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 58.51, |
|
"learning_rate": 5.274324324324325e-05, |
|
"loss": 0.9374, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 58.51, |
|
"eval_loss": 0.24859154224395752, |
|
"eval_runtime": 135.3597, |
|
"eval_samples_per_second": 20.272, |
|
"eval_steps_per_second": 2.534, |
|
"eval_wer": 0.3794792000366116, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 59.57, |
|
"learning_rate": 5.1391891891891894e-05, |
|
"loss": 0.93, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 60.63, |
|
"learning_rate": 5.0040540540540546e-05, |
|
"loss": 0.9212, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 61.7, |
|
"learning_rate": 4.868918918918919e-05, |
|
"loss": 0.9233, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 62.76, |
|
"learning_rate": 4.733783783783784e-05, |
|
"loss": 0.914, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 63.83, |
|
"learning_rate": 4.598648648648649e-05, |
|
"loss": 0.9065, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 63.83, |
|
"eval_loss": 0.22428132593631744, |
|
"eval_runtime": 136.639, |
|
"eval_samples_per_second": 20.082, |
|
"eval_steps_per_second": 2.51, |
|
"eval_wer": 0.3405336140222415, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 64.89, |
|
"learning_rate": 4.463513513513514e-05, |
|
"loss": 0.9023, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 65.95, |
|
"learning_rate": 4.3283783783783785e-05, |
|
"loss": 0.9105, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 67.02, |
|
"learning_rate": 4.193243243243244e-05, |
|
"loss": 0.907, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 68.08, |
|
"learning_rate": 4.058108108108108e-05, |
|
"loss": 0.8939, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 69.15, |
|
"learning_rate": 3.9229729729729734e-05, |
|
"loss": 0.888, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 69.15, |
|
"eval_loss": 0.21568605303764343, |
|
"eval_runtime": 139.7942, |
|
"eval_samples_per_second": 19.629, |
|
"eval_steps_per_second": 2.454, |
|
"eval_wer": 0.3277195551690998, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 70.21, |
|
"learning_rate": 3.78918918918919e-05, |
|
"loss": 0.8866, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 71.28, |
|
"learning_rate": 3.654054054054054e-05, |
|
"loss": 0.8881, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 72.34, |
|
"learning_rate": 3.5189189189189195e-05, |
|
"loss": 0.8849, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 73.4, |
|
"learning_rate": 3.383783783783784e-05, |
|
"loss": 0.8648, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 74.47, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.8646, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 74.47, |
|
"eval_loss": 0.21030458807945251, |
|
"eval_runtime": 135.9762, |
|
"eval_samples_per_second": 20.18, |
|
"eval_steps_per_second": 2.522, |
|
"eval_wer": 0.3287721385748936, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 75.53, |
|
"learning_rate": 3.114864864864865e-05, |
|
"loss": 0.8767, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 76.59, |
|
"learning_rate": 2.97972972972973e-05, |
|
"loss": 0.8655, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 77.66, |
|
"learning_rate": 2.8445945945945946e-05, |
|
"loss": 0.8672, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 78.72, |
|
"learning_rate": 2.7094594594594598e-05, |
|
"loss": 0.8625, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 79.78, |
|
"learning_rate": 2.5743243243243243e-05, |
|
"loss": 0.8602, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 79.78, |
|
"eval_loss": 0.20880180597305298, |
|
"eval_runtime": 135.4639, |
|
"eval_samples_per_second": 20.256, |
|
"eval_steps_per_second": 2.532, |
|
"eval_wer": 0.32378380852134914, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 80.85, |
|
"learning_rate": 2.4391891891891895e-05, |
|
"loss": 0.8515, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 81.91, |
|
"learning_rate": 2.3040540540540543e-05, |
|
"loss": 0.8554, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 82.97, |
|
"learning_rate": 2.1689189189189192e-05, |
|
"loss": 0.8563, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 84.04, |
|
"learning_rate": 2.033783783783784e-05, |
|
"loss": 0.8655, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 85.11, |
|
"learning_rate": 1.898648648648649e-05, |
|
"loss": 0.8442, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 85.11, |
|
"eval_loss": 0.2045026570558548, |
|
"eval_runtime": 138.8412, |
|
"eval_samples_per_second": 19.764, |
|
"eval_steps_per_second": 2.47, |
|
"eval_wer": 0.32657544277149786, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 86.17, |
|
"learning_rate": 1.7635135135135137e-05, |
|
"loss": 0.8323, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 87.23, |
|
"learning_rate": 1.6283783783783786e-05, |
|
"loss": 0.8384, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 88.3, |
|
"learning_rate": 1.4932432432432433e-05, |
|
"loss": 0.8391, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 89.36, |
|
"learning_rate": 1.3581081081081081e-05, |
|
"loss": 0.8292, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 90.42, |
|
"learning_rate": 1.222972972972973e-05, |
|
"loss": 0.8335, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 90.42, |
|
"eval_loss": 0.20376762747764587, |
|
"eval_runtime": 136.8377, |
|
"eval_samples_per_second": 20.053, |
|
"eval_steps_per_second": 2.507, |
|
"eval_wer": 0.3240583954967736, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 91.49, |
|
"learning_rate": 1.0878378378378378e-05, |
|
"loss": 0.8314, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 92.55, |
|
"learning_rate": 9.527027027027027e-06, |
|
"loss": 0.8254, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 93.61, |
|
"learning_rate": 8.175675675675675e-06, |
|
"loss": 0.8231, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 94.68, |
|
"learning_rate": 6.8243243243243244e-06, |
|
"loss": 0.8164, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 95.74, |
|
"learning_rate": 5.472972972972974e-06, |
|
"loss": 0.8288, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 95.74, |
|
"eval_loss": 0.20242640376091003, |
|
"eval_runtime": 135.0305, |
|
"eval_samples_per_second": 20.321, |
|
"eval_steps_per_second": 2.54, |
|
"eval_wer": 0.32799414214452427, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 96.8, |
|
"learning_rate": 4.121621621621622e-06, |
|
"loss": 0.816, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 97.87, |
|
"learning_rate": 2.7702702702702708e-06, |
|
"loss": 0.8163, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 98.93, |
|
"learning_rate": 1.418918918918919e-06, |
|
"loss": 0.8126, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 6.756756756756757e-08, |
|
"loss": 0.8084, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"step": 9400, |
|
"total_flos": 1.0839754269306731e+20, |
|
"train_loss": 1.3946523244330225, |
|
"train_runtime": 41836.5581, |
|
"train_samples_per_second": 14.423, |
|
"train_steps_per_second": 0.225 |
|
} |
|
], |
|
"max_steps": 9400, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.0839754269306731e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|