|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.862593783494106, |
|
"global_step": 11500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 16.3142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 9.7017, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 6.7433, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 5.7032, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 4.7972, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 5.140112400054932, |
|
"eval_runtime": 323.0627, |
|
"eval_samples_per_second": 31.833, |
|
"eval_steps_per_second": 3.981, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 4.1097, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.6756, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.5057, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 3.4116, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 3.3241, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 3.3219833374023438, |
|
"eval_runtime": 316.4143, |
|
"eval_samples_per_second": 32.502, |
|
"eval_steps_per_second": 4.064, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 3.2968, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 3.2757, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 3.2343, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5.2462499999999994e-05, |
|
"loss": 3.1872, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.62125e-05, |
|
"loss": 3.1432, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 3.080551862716675, |
|
"eval_runtime": 318.2154, |
|
"eval_samples_per_second": 32.318, |
|
"eval_steps_per_second": 4.041, |
|
"eval_wer": 0.9998891086181085, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.9962499999999994e-05, |
|
"loss": 3.0996, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 6.37125e-05, |
|
"loss": 3.0582, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.746249999999999e-05, |
|
"loss": 3.0313, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 7.121249999999999e-05, |
|
"loss": 2.9824, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 7.49625e-05, |
|
"loss": 2.9297, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.567805528640747, |
|
"eval_runtime": 317.0688, |
|
"eval_samples_per_second": 32.435, |
|
"eval_steps_per_second": 4.056, |
|
"eval_wer": 1.005729388064391, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.423136645962732e-05, |
|
"loss": 2.8167, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.345496894409937e-05, |
|
"loss": 2.6315, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.26863354037267e-05, |
|
"loss": 2.475, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 7.190993788819875e-05, |
|
"loss": 2.3571, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.11335403726708e-05, |
|
"loss": 2.2593, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 1.1067590713500977, |
|
"eval_runtime": 319.3729, |
|
"eval_samples_per_second": 32.201, |
|
"eval_steps_per_second": 4.027, |
|
"eval_wer": 0.821834513094424, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.035714285714285e-05, |
|
"loss": 2.1855, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.95807453416149e-05, |
|
"loss": 2.1429, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.880434782608696e-05, |
|
"loss": 2.1143, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.802795031055901e-05, |
|
"loss": 2.081, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 6.725155279503105e-05, |
|
"loss": 2.0504, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.7877965569496155, |
|
"eval_runtime": 316.6537, |
|
"eval_samples_per_second": 32.477, |
|
"eval_steps_per_second": 4.061, |
|
"eval_wer": 0.7113866967305524, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.64751552795031e-05, |
|
"loss": 2.014, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.569875776397515e-05, |
|
"loss": 1.9885, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 6.49223602484472e-05, |
|
"loss": 1.9532, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 6.414596273291925e-05, |
|
"loss": 1.936, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 6.33695652173913e-05, |
|
"loss": 1.937, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6955012679100037, |
|
"eval_runtime": 319.8543, |
|
"eval_samples_per_second": 32.152, |
|
"eval_steps_per_second": 4.021, |
|
"eval_wer": 0.6449812408745633, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 6.259316770186334e-05, |
|
"loss": 1.9074, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.18167701863354e-05, |
|
"loss": 1.8975, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.104037267080745e-05, |
|
"loss": 1.8628, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.02639751552795e-05, |
|
"loss": 1.8605, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.948757763975155e-05, |
|
"loss": 1.8491, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_loss": 0.645221471786499, |
|
"eval_runtime": 318.4935, |
|
"eval_samples_per_second": 32.29, |
|
"eval_steps_per_second": 4.038, |
|
"eval_wer": 0.6303620603618756, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 5.87111801242236e-05, |
|
"loss": 1.85, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.7934782608695654e-05, |
|
"loss": 1.8298, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 5.715838509316769e-05, |
|
"loss": 1.8108, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 5.6381987577639744e-05, |
|
"loss": 1.8136, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 5.5605590062111795e-05, |
|
"loss": 1.803, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_loss": 0.5961059927940369, |
|
"eval_runtime": 322.5472, |
|
"eval_samples_per_second": 31.884, |
|
"eval_steps_per_second": 3.987, |
|
"eval_wer": 0.6041547304415326, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.482919254658385e-05, |
|
"loss": 1.7829, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 5.40527950310559e-05, |
|
"loss": 1.7755, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 5.327639751552795e-05, |
|
"loss": 1.7805, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 5.2499999999999995e-05, |
|
"loss": 1.7647, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 5.173136645962733e-05, |
|
"loss": 1.7545, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 0.5550380945205688, |
|
"eval_runtime": 317.6329, |
|
"eval_samples_per_second": 32.377, |
|
"eval_steps_per_second": 4.049, |
|
"eval_wer": 0.5747500323433197, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 5.095496894409938e-05, |
|
"loss": 1.735, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.017857142857142e-05, |
|
"loss": 1.7365, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.940217391304347e-05, |
|
"loss": 1.734, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 4.862577639751552e-05, |
|
"loss": 1.7254, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 4.7849378881987574e-05, |
|
"loss": 1.7045, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 0.5373523831367493, |
|
"eval_runtime": 318.8226, |
|
"eval_samples_per_second": 32.256, |
|
"eval_steps_per_second": 4.034, |
|
"eval_wer": 0.5743064668157539, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.7072981366459626e-05, |
|
"loss": 1.7031, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 4.629658385093168e-05, |
|
"loss": 1.7135, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 4.5520186335403715e-05, |
|
"loss": 1.7053, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.474378881987577e-05, |
|
"loss": 1.7078, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 4.396739130434782e-05, |
|
"loss": 1.6733, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 0.5336768627166748, |
|
"eval_runtime": 321.7447, |
|
"eval_samples_per_second": 31.963, |
|
"eval_steps_per_second": 3.997, |
|
"eval_wer": 0.5403737039569741, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 4.319099378881987e-05, |
|
"loss": 1.6737, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 4.241459627329192e-05, |
|
"loss": 1.6939, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.1638198757763974e-05, |
|
"loss": 1.674, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.0861801242236026e-05, |
|
"loss": 1.6685, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.008540372670807e-05, |
|
"loss": 1.6761, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 0.5054484009742737, |
|
"eval_runtime": 316.5106, |
|
"eval_samples_per_second": 32.492, |
|
"eval_steps_per_second": 4.063, |
|
"eval_wer": 0.5265862088084721, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.930900621118012e-05, |
|
"loss": 1.6729, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.853260869565217e-05, |
|
"loss": 1.6521, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 3.775621118012422e-05, |
|
"loss": 1.6876, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 3.697981366459627e-05, |
|
"loss": 1.6499, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.620341614906832e-05, |
|
"loss": 1.655, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.492550253868103, |
|
"eval_runtime": 321.8948, |
|
"eval_samples_per_second": 31.948, |
|
"eval_steps_per_second": 3.995, |
|
"eval_wer": 0.5243314173766795, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3.543478260869565e-05, |
|
"loss": 1.6501, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 3.4658385093167694e-05, |
|
"loss": 1.653, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 3.3881987577639746e-05, |
|
"loss": 1.6381, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 3.31055900621118e-05, |
|
"loss": 1.6231, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.232919254658385e-05, |
|
"loss": 1.6252, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 0.49458181858062744, |
|
"eval_runtime": 317.6079, |
|
"eval_samples_per_second": 32.38, |
|
"eval_steps_per_second": 4.049, |
|
"eval_wer": 0.5182508732696324, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.1552795031055894e-05, |
|
"loss": 1.6409, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.0776397515527946e-05, |
|
"loss": 1.6256, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.6113, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 2.9223602484472046e-05, |
|
"loss": 1.6096, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 2.8447204968944097e-05, |
|
"loss": 1.6209, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_loss": 0.491542249917984, |
|
"eval_runtime": 319.2369, |
|
"eval_samples_per_second": 32.214, |
|
"eval_steps_per_second": 4.028, |
|
"eval_wer": 0.5193597870885468, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 2.767080745341615e-05, |
|
"loss": 1.5917, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 2.6894409937888194e-05, |
|
"loss": 1.6049, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 2.6118012422360246e-05, |
|
"loss": 1.5891, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 2.5341614906832297e-05, |
|
"loss": 1.5915, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.4565217391304346e-05, |
|
"loss": 1.5772, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_loss": 0.47245877981185913, |
|
"eval_runtime": 314.2017, |
|
"eval_samples_per_second": 32.731, |
|
"eval_steps_per_second": 4.093, |
|
"eval_wer": 0.5103775851553404, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 2.3788819875776394e-05, |
|
"loss": 1.5932, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 2.3012422360248446e-05, |
|
"loss": 1.574, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 2.2236024844720494e-05, |
|
"loss": 1.5818, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 2.1459627329192546e-05, |
|
"loss": 1.5852, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 2.0683229813664594e-05, |
|
"loss": 1.5602, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"eval_loss": 0.47263726592063904, |
|
"eval_runtime": 323.393, |
|
"eval_samples_per_second": 31.8, |
|
"eval_steps_per_second": 3.977, |
|
"eval_wer": 0.509656791173046, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 1.9906832298136646e-05, |
|
"loss": 1.5737, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 1.9130434782608694e-05, |
|
"loss": 1.5894, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 1.8361801242236024e-05, |
|
"loss": 1.5694, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.7585403726708073e-05, |
|
"loss": 1.537, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 1.680900621118012e-05, |
|
"loss": 1.5783, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 0.4666772186756134, |
|
"eval_runtime": 318.3833, |
|
"eval_samples_per_second": 32.301, |
|
"eval_steps_per_second": 4.039, |
|
"eval_wer": 0.49557358567283344, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 1.604037267080745e-05, |
|
"loss": 1.5432, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 1.5263975155279503e-05, |
|
"loss": 1.5439, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 1.4487577639751551e-05, |
|
"loss": 1.5589, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.3711180124223601e-05, |
|
"loss": 1.5573, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 1.2934782608695651e-05, |
|
"loss": 1.5442, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"eval_loss": 0.46852901577949524, |
|
"eval_runtime": 317.9433, |
|
"eval_samples_per_second": 32.345, |
|
"eval_steps_per_second": 4.045, |
|
"eval_wer": 0.49374387787162477, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 1.2158385093167701e-05, |
|
"loss": 1.5679, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 1.1381987577639751e-05, |
|
"loss": 1.5604, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 1.06055900621118e-05, |
|
"loss": 1.5458, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 9.829192546583851e-06, |
|
"loss": 1.5501, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 9.0527950310559e-06, |
|
"loss": 1.5597, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 0.4707895815372467, |
|
"eval_runtime": 323.6683, |
|
"eval_samples_per_second": 31.773, |
|
"eval_steps_per_second": 3.973, |
|
"eval_wer": 0.495665995157743, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 8.27639751552795e-06, |
|
"loss": 1.5189, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 7.499999999999999e-06, |
|
"loss": 1.5272, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 6.7236024844720485e-06, |
|
"loss": 1.5319, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 5.9472049689440985e-06, |
|
"loss": 1.5175, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 5.1708074534161485e-06, |
|
"loss": 1.5406, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 0.45390617847442627, |
|
"eval_runtime": 320.1014, |
|
"eval_samples_per_second": 32.127, |
|
"eval_steps_per_second": 4.017, |
|
"eval_wer": 0.48104681464505517, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 4.3944099378881985e-06, |
|
"loss": 1.5326, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 3.6180124223602485e-06, |
|
"loss": 1.5142, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 2.8416149068322976e-06, |
|
"loss": 1.5195, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.0652173913043476e-06, |
|
"loss": 1.5147, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 1.2888198757763974e-06, |
|
"loss": 1.5274, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_loss": 0.45016008615493774, |
|
"eval_runtime": 317.8491, |
|
"eval_samples_per_second": 32.355, |
|
"eval_steps_per_second": 4.046, |
|
"eval_wer": 0.4782560482007873, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"step": 11500, |
|
"total_flos": 3.154318018894781e+19, |
|
"train_loss": 0.0, |
|
"train_runtime": 3.4176, |
|
"train_samples_per_second": 54596.703, |
|
"train_steps_per_second": 1705.873 |
|
} |
|
], |
|
"max_steps": 5830, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.154318018894781e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|