|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 18300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.950000000000001e-06, |
|
"loss": 13.5246, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 6.1441, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.4950000000000001e-05, |
|
"loss": 4.2189, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.995e-05, |
|
"loss": 3.6075, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.495e-05, |
|
"loss": 3.3043, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 3.241492748260498, |
|
"eval_runtime": 169.6485, |
|
"eval_samples_per_second": 15.444, |
|
"eval_steps_per_second": 1.933, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.995e-05, |
|
"loss": 3.2087, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.495e-05, |
|
"loss": 3.1835, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 3.995e-05, |
|
"loss": 3.1414, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 4.495e-05, |
|
"loss": 3.0995, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.995e-05, |
|
"loss": 3.0482, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.9591026306152344, |
|
"eval_runtime": 128.3332, |
|
"eval_samples_per_second": 20.416, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 5.495e-05, |
|
"loss": 2.7368, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 5.995000000000001e-05, |
|
"loss": 2.0079, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 6.494999999999999e-05, |
|
"loss": 1.6588, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 6.995e-05, |
|
"loss": 1.5644, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 7.495e-05, |
|
"loss": 1.4767, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 0.47794264554977417, |
|
"eval_runtime": 128.1544, |
|
"eval_samples_per_second": 20.444, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.5776572037593256, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 7.995e-05, |
|
"loss": 1.4421, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 8.495e-05, |
|
"loss": 1.4009, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 8.995e-05, |
|
"loss": 1.3682, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 9.495e-05, |
|
"loss": 1.3377, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 9.995e-05, |
|
"loss": 1.3152, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"eval_loss": 0.36967846751213074, |
|
"eval_runtime": 127.926, |
|
"eval_samples_per_second": 20.481, |
|
"eval_steps_per_second": 2.564, |
|
"eval_wer": 0.49384749539773276, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 9.939877300613497e-05, |
|
"loss": 1.3009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 9.878527607361964e-05, |
|
"loss": 1.2799, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 9.81717791411043e-05, |
|
"loss": 1.2559, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 9.756441717791411e-05, |
|
"loss": 1.2379, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 9.695092024539878e-05, |
|
"loss": 1.2246, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"eval_loss": 0.3084094822406769, |
|
"eval_runtime": 127.6781, |
|
"eval_samples_per_second": 20.52, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 0.44593547136905337, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 9.633742331288344e-05, |
|
"loss": 1.2209, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.75, |
|
"learning_rate": 9.57239263803681e-05, |
|
"loss": 1.1972, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 9.511042944785277e-05, |
|
"loss": 1.1885, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 9.449693251533743e-05, |
|
"loss": 1.1807, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 9.388343558282209e-05, |
|
"loss": 1.1781, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"eval_loss": 0.2842142581939697, |
|
"eval_runtime": 128.3875, |
|
"eval_samples_per_second": 20.407, |
|
"eval_steps_per_second": 2.555, |
|
"eval_wer": 0.4154151729483577, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 9.326993865030675e-05, |
|
"loss": 1.1757, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 9.265644171779141e-05, |
|
"loss": 1.155, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 9.204294478527608e-05, |
|
"loss": 1.1455, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 9.142944785276074e-05, |
|
"loss": 1.1376, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 9.081595092024541e-05, |
|
"loss": 1.1351, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 0.26151829957962036, |
|
"eval_runtime": 127.9176, |
|
"eval_samples_per_second": 20.482, |
|
"eval_steps_per_second": 2.564, |
|
"eval_wer": 0.3929367309369247, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 9.020245398773006e-05, |
|
"loss": 1.1262, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"learning_rate": 8.958895705521472e-05, |
|
"loss": 1.1265, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 8.897546012269939e-05, |
|
"loss": 1.1033, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 21.31, |
|
"learning_rate": 8.836196319018405e-05, |
|
"loss": 1.1016, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 8.774846625766872e-05, |
|
"loss": 1.1052, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"eval_loss": 0.24618586897850037, |
|
"eval_runtime": 128.2681, |
|
"eval_samples_per_second": 20.426, |
|
"eval_steps_per_second": 2.557, |
|
"eval_wer": 0.3746729968026354, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 8.714110429447854e-05, |
|
"loss": 1.0964, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 8.652760736196319e-05, |
|
"loss": 1.0848, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 8.591411042944786e-05, |
|
"loss": 1.0714, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 8.530061349693252e-05, |
|
"loss": 1.0696, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 8.468711656441717e-05, |
|
"loss": 1.0711, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"eval_loss": 0.23661433160305023, |
|
"eval_runtime": 128.1823, |
|
"eval_samples_per_second": 20.44, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.36522623776765817, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.14, |
|
"learning_rate": 8.407361963190185e-05, |
|
"loss": 1.0519, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"learning_rate": 8.346012269938652e-05, |
|
"loss": 1.0673, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 26.23, |
|
"learning_rate": 8.284662576687117e-05, |
|
"loss": 1.0606, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 26.78, |
|
"learning_rate": 8.223312883435583e-05, |
|
"loss": 1.0711, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 8.16196319018405e-05, |
|
"loss": 1.035, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"eval_loss": 0.22680768370628357, |
|
"eval_runtime": 127.7363, |
|
"eval_samples_per_second": 20.511, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.3556825888964248, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 27.87, |
|
"learning_rate": 8.100613496932515e-05, |
|
"loss": 1.0428, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 8.039877300613497e-05, |
|
"loss": 1.0322, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 7.978527607361964e-05, |
|
"loss": 1.0291, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 29.51, |
|
"learning_rate": 7.91717791411043e-05, |
|
"loss": 1.0258, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"learning_rate": 7.855828220858897e-05, |
|
"loss": 1.0277, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 30.05, |
|
"eval_loss": 0.22434431314468384, |
|
"eval_runtime": 127.9511, |
|
"eval_samples_per_second": 20.477, |
|
"eval_steps_per_second": 2.563, |
|
"eval_wer": 0.3449762619901172, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 7.794478527607363e-05, |
|
"loss": 1.0208, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 31.15, |
|
"learning_rate": 7.733128834355828e-05, |
|
"loss": 1.0252, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 31.69, |
|
"learning_rate": 7.671779141104295e-05, |
|
"loss": 1.0157, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"learning_rate": 7.610429447852761e-05, |
|
"loss": 1.0094, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 32.79, |
|
"learning_rate": 7.549079754601228e-05, |
|
"loss": 1.002, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.79, |
|
"eval_loss": 0.22044695913791656, |
|
"eval_runtime": 127.6026, |
|
"eval_samples_per_second": 20.532, |
|
"eval_steps_per_second": 2.57, |
|
"eval_wer": 0.3388722023059781, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 7.487730061349694e-05, |
|
"loss": 0.996, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"learning_rate": 7.42638036809816e-05, |
|
"loss": 1.002, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 34.43, |
|
"learning_rate": 7.365030674846626e-05, |
|
"loss": 0.9845, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"learning_rate": 7.303680981595092e-05, |
|
"loss": 0.9958, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"learning_rate": 7.242331288343559e-05, |
|
"loss": 0.9837, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 35.52, |
|
"eval_loss": 0.2156379520893097, |
|
"eval_runtime": 127.7813, |
|
"eval_samples_per_second": 20.504, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.33489971901947485, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.07, |
|
"learning_rate": 7.180981595092025e-05, |
|
"loss": 1.0028, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 36.61, |
|
"learning_rate": 7.119631901840491e-05, |
|
"loss": 0.9722, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 7.058282208588958e-05, |
|
"loss": 0.9784, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 37.7, |
|
"learning_rate": 6.996932515337423e-05, |
|
"loss": 0.9822, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"learning_rate": 6.93558282208589e-05, |
|
"loss": 0.9773, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"eval_loss": 0.21265123784542084, |
|
"eval_runtime": 128.3357, |
|
"eval_samples_per_second": 20.415, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 0.3288925491715919, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"learning_rate": 6.874233128834356e-05, |
|
"loss": 0.9649, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"learning_rate": 6.812883435582822e-05, |
|
"loss": 0.9728, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 39.89, |
|
"learning_rate": 6.751533742331289e-05, |
|
"loss": 0.9663, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 40.44, |
|
"learning_rate": 6.690184049079755e-05, |
|
"loss": 0.9762, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 6.629447852760736e-05, |
|
"loss": 0.9807, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"eval_loss": 0.21417580544948578, |
|
"eval_runtime": 128.3216, |
|
"eval_samples_per_second": 20.417, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 0.32743920162774925, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 41.53, |
|
"learning_rate": 6.568098159509203e-05, |
|
"loss": 0.9647, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 42.08, |
|
"learning_rate": 6.506748466257669e-05, |
|
"loss": 0.9748, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 42.62, |
|
"learning_rate": 6.445398773006134e-05, |
|
"loss": 0.9484, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 43.17, |
|
"learning_rate": 6.384049079754602e-05, |
|
"loss": 0.9558, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 43.72, |
|
"learning_rate": 6.322699386503069e-05, |
|
"loss": 0.9582, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 43.72, |
|
"eval_loss": 0.20038354396820068, |
|
"eval_runtime": 127.7709, |
|
"eval_samples_per_second": 20.505, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.314165294060653, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 44.26, |
|
"learning_rate": 6.261349693251534e-05, |
|
"loss": 0.949, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 44.81, |
|
"learning_rate": 6.2e-05, |
|
"loss": 0.9491, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 45.36, |
|
"learning_rate": 6.138650306748467e-05, |
|
"loss": 0.9447, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 45.9, |
|
"learning_rate": 6.0773006134969325e-05, |
|
"loss": 0.9368, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 6.015950920245399e-05, |
|
"loss": 0.9548, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"eval_loss": 0.20219053328037262, |
|
"eval_runtime": 127.6869, |
|
"eval_samples_per_second": 20.519, |
|
"eval_steps_per_second": 2.569, |
|
"eval_wer": 0.30500920453444436, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"learning_rate": 5.9546012269938655e-05, |
|
"loss": 0.9407, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 47.54, |
|
"learning_rate": 5.893251533742331e-05, |
|
"loss": 0.9312, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 48.09, |
|
"learning_rate": 5.831901840490798e-05, |
|
"loss": 0.9446, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 48.63, |
|
"learning_rate": 5.770552147239264e-05, |
|
"loss": 0.9225, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"learning_rate": 5.70920245398773e-05, |
|
"loss": 0.9251, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"eval_loss": 0.2018980085849762, |
|
"eval_runtime": 130.6299, |
|
"eval_samples_per_second": 20.057, |
|
"eval_steps_per_second": 2.511, |
|
"eval_wer": 0.3035074120724736, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 49.73, |
|
"learning_rate": 5.6478527607361965e-05, |
|
"loss": 0.9251, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 50.27, |
|
"learning_rate": 5.586503067484663e-05, |
|
"loss": 0.9172, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 50.82, |
|
"learning_rate": 5.5251533742331294e-05, |
|
"loss": 0.9103, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 51.37, |
|
"learning_rate": 5.463803680981595e-05, |
|
"loss": 0.9133, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"learning_rate": 5.402453987730062e-05, |
|
"loss": 0.9103, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 51.91, |
|
"eval_loss": 0.1963759958744049, |
|
"eval_runtime": 127.8377, |
|
"eval_samples_per_second": 20.495, |
|
"eval_steps_per_second": 2.566, |
|
"eval_wer": 0.30210250944675904, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 52.46, |
|
"learning_rate": 5.341104294478528e-05, |
|
"loss": 0.9109, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 5.279754601226994e-05, |
|
"loss": 0.9153, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 53.55, |
|
"learning_rate": 5.2184049079754604e-05, |
|
"loss": 0.9113, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 54.1, |
|
"learning_rate": 5.157055214723927e-05, |
|
"loss": 0.9181, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 54.64, |
|
"learning_rate": 5.095705521472393e-05, |
|
"loss": 0.915, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 54.64, |
|
"eval_loss": 0.19702854752540588, |
|
"eval_runtime": 128.314, |
|
"eval_samples_per_second": 20.419, |
|
"eval_steps_per_second": 2.556, |
|
"eval_wer": 0.30316829764557696, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 55.19, |
|
"learning_rate": 5.034969325153375e-05, |
|
"loss": 0.8971, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 55.74, |
|
"learning_rate": 4.973619631901841e-05, |
|
"loss": 0.9066, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 56.28, |
|
"learning_rate": 4.9122699386503065e-05, |
|
"loss": 0.8993, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 56.83, |
|
"learning_rate": 4.850920245398774e-05, |
|
"loss": 0.8933, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 57.38, |
|
"learning_rate": 4.7895705521472395e-05, |
|
"loss": 0.8962, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 57.38, |
|
"eval_loss": 0.2006961703300476, |
|
"eval_runtime": 128.3801, |
|
"eval_samples_per_second": 20.408, |
|
"eval_steps_per_second": 2.555, |
|
"eval_wer": 0.30462164518941964, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 57.92, |
|
"learning_rate": 4.728220858895705e-05, |
|
"loss": 0.8829, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 58.47, |
|
"learning_rate": 4.6668711656441724e-05, |
|
"loss": 0.8932, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 59.02, |
|
"learning_rate": 4.605521472392638e-05, |
|
"loss": 0.8986, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 59.56, |
|
"learning_rate": 4.544171779141104e-05, |
|
"loss": 0.8892, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 60.11, |
|
"learning_rate": 4.482822085889571e-05, |
|
"loss": 0.8729, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 60.11, |
|
"eval_loss": 0.19668185710906982, |
|
"eval_runtime": 128.5612, |
|
"eval_samples_per_second": 20.379, |
|
"eval_steps_per_second": 2.551, |
|
"eval_wer": 0.2942059877918806, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 60.66, |
|
"learning_rate": 4.421472392638037e-05, |
|
"loss": 0.876, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"learning_rate": 4.3601226993865034e-05, |
|
"loss": 0.8759, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 61.75, |
|
"learning_rate": 4.29877300613497e-05, |
|
"loss": 0.8813, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 62.3, |
|
"learning_rate": 4.237423312883436e-05, |
|
"loss": 0.8684, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 62.84, |
|
"learning_rate": 4.176073619631902e-05, |
|
"loss": 0.8744, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 62.84, |
|
"eval_loss": 0.19520752131938934, |
|
"eval_runtime": 127.705, |
|
"eval_samples_per_second": 20.516, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.2885379323708943, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 63.39, |
|
"learning_rate": 4.1147239263803686e-05, |
|
"loss": 0.8665, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 63.93, |
|
"learning_rate": 4.0533742331288344e-05, |
|
"loss": 0.8757, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 64.48, |
|
"learning_rate": 3.992024539877301e-05, |
|
"loss": 0.8694, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 65.03, |
|
"learning_rate": 3.930674846625767e-05, |
|
"loss": 0.8578, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 65.57, |
|
"learning_rate": 3.869938650306748e-05, |
|
"loss": 0.874, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 65.57, |
|
"eval_loss": 0.18939977884292603, |
|
"eval_runtime": 128.0037, |
|
"eval_samples_per_second": 20.468, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.28950683073345607, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 66.12, |
|
"learning_rate": 3.808588957055215e-05, |
|
"loss": 0.8628, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 3.747239263803681e-05, |
|
"loss": 0.8564, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 67.21, |
|
"learning_rate": 3.685889570552147e-05, |
|
"loss": 0.8502, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 67.76, |
|
"learning_rate": 3.6245398773006135e-05, |
|
"loss": 0.8521, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 68.31, |
|
"learning_rate": 3.56319018404908e-05, |
|
"loss": 0.8457, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 68.31, |
|
"eval_loss": 0.18946239352226257, |
|
"eval_runtime": 128.0623, |
|
"eval_samples_per_second": 20.459, |
|
"eval_steps_per_second": 2.561, |
|
"eval_wer": 0.28282143203177984, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 68.85, |
|
"learning_rate": 3.501840490797546e-05, |
|
"loss": 0.8624, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"learning_rate": 3.440490797546013e-05, |
|
"loss": 0.8394, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 69.95, |
|
"learning_rate": 3.379141104294479e-05, |
|
"loss": 0.8381, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 70.49, |
|
"learning_rate": 3.3177914110429445e-05, |
|
"loss": 0.8431, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 71.04, |
|
"learning_rate": 3.2564417177914117e-05, |
|
"loss": 0.8519, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 71.04, |
|
"eval_loss": 0.19119836390018463, |
|
"eval_runtime": 127.8936, |
|
"eval_samples_per_second": 20.486, |
|
"eval_steps_per_second": 2.565, |
|
"eval_wer": 0.28747214417207634, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 71.58, |
|
"learning_rate": 3.1950920245398774e-05, |
|
"loss": 0.8463, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 72.13, |
|
"learning_rate": 3.133742331288343e-05, |
|
"loss": 0.8492, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 72.68, |
|
"learning_rate": 3.0723926380368104e-05, |
|
"loss": 0.8354, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 73.22, |
|
"learning_rate": 3.0110429447852762e-05, |
|
"loss": 0.8408, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 73.77, |
|
"learning_rate": 2.9496932515337423e-05, |
|
"loss": 0.8301, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 73.77, |
|
"eval_loss": 0.18781304359436035, |
|
"eval_runtime": 127.7845, |
|
"eval_samples_per_second": 20.503, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.2760391434938475, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 74.32, |
|
"learning_rate": 2.8883435582822088e-05, |
|
"loss": 0.8425, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 74.86, |
|
"learning_rate": 2.826993865030675e-05, |
|
"loss": 0.8416, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 75.41, |
|
"learning_rate": 2.7656441717791414e-05, |
|
"loss": 0.8269, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 75.96, |
|
"learning_rate": 2.7042944785276075e-05, |
|
"loss": 0.8212, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"learning_rate": 2.6429447852760736e-05, |
|
"loss": 0.8226, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"eval_loss": 0.18075355887413025, |
|
"eval_runtime": 127.7671, |
|
"eval_samples_per_second": 20.506, |
|
"eval_steps_per_second": 2.567, |
|
"eval_wer": 0.2701288634822207, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 77.05, |
|
"learning_rate": 2.58159509202454e-05, |
|
"loss": 0.8265, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 77.6, |
|
"learning_rate": 2.5202453987730063e-05, |
|
"loss": 0.8279, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 78.14, |
|
"learning_rate": 2.4588957055214727e-05, |
|
"loss": 0.8273, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 78.69, |
|
"learning_rate": 2.3975460122699385e-05, |
|
"loss": 0.8067, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 79.23, |
|
"learning_rate": 2.33680981595092e-05, |
|
"loss": 0.8071, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 79.23, |
|
"eval_loss": 0.18486912548542023, |
|
"eval_runtime": 127.9551, |
|
"eval_samples_per_second": 20.476, |
|
"eval_steps_per_second": 2.563, |
|
"eval_wer": 0.27410134676872394, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 79.78, |
|
"learning_rate": 2.2754601226993866e-05, |
|
"loss": 0.8231, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 80.33, |
|
"learning_rate": 2.214110429447853e-05, |
|
"loss": 0.808, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 80.87, |
|
"learning_rate": 2.1527607361963192e-05, |
|
"loss": 0.8107, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 81.42, |
|
"learning_rate": 2.0914110429447853e-05, |
|
"loss": 0.7966, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"learning_rate": 2.0300613496932515e-05, |
|
"loss": 0.7999, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"eval_loss": 0.18083913624286652, |
|
"eval_runtime": 127.4398, |
|
"eval_samples_per_second": 20.559, |
|
"eval_steps_per_second": 2.574, |
|
"eval_wer": 0.2717275457804476, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 82.51, |
|
"learning_rate": 1.968711656441718e-05, |
|
"loss": 0.8171, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 83.06, |
|
"learning_rate": 1.907361963190184e-05, |
|
"loss": 0.8034, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 83.61, |
|
"learning_rate": 1.8460122699386502e-05, |
|
"loss": 0.8086, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 84.15, |
|
"learning_rate": 1.7846625766871167e-05, |
|
"loss": 0.8005, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 84.7, |
|
"learning_rate": 1.723312883435583e-05, |
|
"loss": 0.7947, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 84.7, |
|
"eval_loss": 0.1820572018623352, |
|
"eval_runtime": 127.1268, |
|
"eval_samples_per_second": 20.609, |
|
"eval_steps_per_second": 2.58, |
|
"eval_wer": 0.2715822110260634, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 85.25, |
|
"learning_rate": 1.661963190184049e-05, |
|
"loss": 0.7947, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 85.79, |
|
"learning_rate": 1.6006134969325154e-05, |
|
"loss": 0.7892, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 86.34, |
|
"learning_rate": 1.539263803680982e-05, |
|
"loss": 0.7969, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 86.89, |
|
"learning_rate": 1.477914110429448e-05, |
|
"loss": 0.801, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 87.43, |
|
"learning_rate": 1.4165644171779141e-05, |
|
"loss": 0.7783, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 87.43, |
|
"eval_loss": 0.18241995573043823, |
|
"eval_runtime": 127.0573, |
|
"eval_samples_per_second": 20.621, |
|
"eval_steps_per_second": 2.582, |
|
"eval_wer": 0.26610793527758936, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 87.98, |
|
"learning_rate": 1.3552147239263804e-05, |
|
"loss": 0.7915, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 88.52, |
|
"learning_rate": 1.2938650306748467e-05, |
|
"loss": 0.7862, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 89.07, |
|
"learning_rate": 1.232515337423313e-05, |
|
"loss": 0.7816, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 1.1711656441717792e-05, |
|
"loss": 0.7816, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 90.16, |
|
"learning_rate": 1.1098159509202455e-05, |
|
"loss": 0.7729, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 90.16, |
|
"eval_loss": 0.17727895081043243, |
|
"eval_runtime": 128.0807, |
|
"eval_samples_per_second": 20.456, |
|
"eval_steps_per_second": 2.561, |
|
"eval_wer": 0.2638794690436973, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 90.71, |
|
"learning_rate": 1.0484662576687116e-05, |
|
"loss": 0.7778, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 91.26, |
|
"learning_rate": 9.87116564417178e-06, |
|
"loss": 0.7844, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 91.8, |
|
"learning_rate": 9.257668711656442e-06, |
|
"loss": 0.7842, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 92.35, |
|
"learning_rate": 8.644171779141105e-06, |
|
"loss": 0.7755, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 92.9, |
|
"learning_rate": 8.030674846625766e-06, |
|
"loss": 0.7759, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 92.9, |
|
"eval_loss": 0.17666833102703094, |
|
"eval_runtime": 128.2582, |
|
"eval_samples_per_second": 20.428, |
|
"eval_steps_per_second": 2.557, |
|
"eval_wer": 0.26291057068113555, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 93.44, |
|
"learning_rate": 7.417177914110429e-06, |
|
"loss": 0.7672, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"learning_rate": 6.8036809815950924e-06, |
|
"loss": 0.7813, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 94.54, |
|
"learning_rate": 6.1963190184049085e-06, |
|
"loss": 0.7781, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 95.08, |
|
"learning_rate": 5.582822085889571e-06, |
|
"loss": 0.7711, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 95.63, |
|
"learning_rate": 4.969325153374233e-06, |
|
"loss": 0.7713, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 95.63, |
|
"eval_loss": 0.17804710566997528, |
|
"eval_runtime": 127.8273, |
|
"eval_samples_per_second": 20.496, |
|
"eval_steps_per_second": 2.566, |
|
"eval_wer": 0.26208700707295807, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 96.17, |
|
"learning_rate": 4.355828220858896e-06, |
|
"loss": 0.7789, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 96.72, |
|
"learning_rate": 3.7423312883435584e-06, |
|
"loss": 0.7732, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 97.27, |
|
"learning_rate": 3.128834355828221e-06, |
|
"loss": 0.7688, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 97.81, |
|
"learning_rate": 2.5153374233128836e-06, |
|
"loss": 0.7724, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 98.36, |
|
"learning_rate": 1.9018404907975462e-06, |
|
"loss": 0.7628, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 98.36, |
|
"eval_loss": 0.17734766006469727, |
|
"eval_runtime": 128.0092, |
|
"eval_samples_per_second": 20.467, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.2594225365759132, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 98.91, |
|
"learning_rate": 1.2883435582822088e-06, |
|
"loss": 0.7634, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 99.45, |
|
"learning_rate": 6.748466257668713e-07, |
|
"loss": 0.7638, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6.134969325153375e-08, |
|
"loss": 0.7746, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 18300, |
|
"total_flos": 1.0288191185677785e+20, |
|
"train_loss": 1.1684310275218526, |
|
"train_runtime": 46836.9916, |
|
"train_samples_per_second": 12.501, |
|
"train_steps_per_second": 0.391 |
|
} |
|
], |
|
"max_steps": 18300, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.0288191185677785e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|