xls-r-uyghur-cv7 / trainer_state.json
lucio's picture
End of training
206bed3
raw
history blame
31.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 18300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.55,
"learning_rate": 4.950000000000001e-06,
"loss": 13.5246,
"step": 100
},
{
"epoch": 1.09,
"learning_rate": 9.950000000000001e-06,
"loss": 6.1441,
"step": 200
},
{
"epoch": 1.64,
"learning_rate": 1.4950000000000001e-05,
"loss": 4.2189,
"step": 300
},
{
"epoch": 2.19,
"learning_rate": 1.995e-05,
"loss": 3.6075,
"step": 400
},
{
"epoch": 2.73,
"learning_rate": 2.495e-05,
"loss": 3.3043,
"step": 500
},
{
"epoch": 2.73,
"eval_loss": 3.241492748260498,
"eval_runtime": 169.6485,
"eval_samples_per_second": 15.444,
"eval_steps_per_second": 1.933,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 3.28,
"learning_rate": 2.995e-05,
"loss": 3.2087,
"step": 600
},
{
"epoch": 3.83,
"learning_rate": 3.495e-05,
"loss": 3.1835,
"step": 700
},
{
"epoch": 4.37,
"learning_rate": 3.995e-05,
"loss": 3.1414,
"step": 800
},
{
"epoch": 4.92,
"learning_rate": 4.495e-05,
"loss": 3.0995,
"step": 900
},
{
"epoch": 5.46,
"learning_rate": 4.995e-05,
"loss": 3.0482,
"step": 1000
},
{
"epoch": 5.46,
"eval_loss": 2.9591026306152344,
"eval_runtime": 128.3332,
"eval_samples_per_second": 20.416,
"eval_steps_per_second": 2.556,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 6.01,
"learning_rate": 5.495e-05,
"loss": 2.7368,
"step": 1100
},
{
"epoch": 6.56,
"learning_rate": 5.995000000000001e-05,
"loss": 2.0079,
"step": 1200
},
{
"epoch": 7.1,
"learning_rate": 6.494999999999999e-05,
"loss": 1.6588,
"step": 1300
},
{
"epoch": 7.65,
"learning_rate": 6.995e-05,
"loss": 1.5644,
"step": 1400
},
{
"epoch": 8.2,
"learning_rate": 7.495e-05,
"loss": 1.4767,
"step": 1500
},
{
"epoch": 8.2,
"eval_loss": 0.47794264554977417,
"eval_runtime": 128.1544,
"eval_samples_per_second": 20.444,
"eval_steps_per_second": 2.559,
"eval_wer": 0.5776572037593256,
"step": 1500
},
{
"epoch": 8.74,
"learning_rate": 7.995e-05,
"loss": 1.4421,
"step": 1600
},
{
"epoch": 9.29,
"learning_rate": 8.495e-05,
"loss": 1.4009,
"step": 1700
},
{
"epoch": 9.84,
"learning_rate": 8.995e-05,
"loss": 1.3682,
"step": 1800
},
{
"epoch": 10.38,
"learning_rate": 9.495e-05,
"loss": 1.3377,
"step": 1900
},
{
"epoch": 10.93,
"learning_rate": 9.995e-05,
"loss": 1.3152,
"step": 2000
},
{
"epoch": 10.93,
"eval_loss": 0.36967846751213074,
"eval_runtime": 127.926,
"eval_samples_per_second": 20.481,
"eval_steps_per_second": 2.564,
"eval_wer": 0.49384749539773276,
"step": 2000
},
{
"epoch": 11.48,
"learning_rate": 9.939877300613497e-05,
"loss": 1.3009,
"step": 2100
},
{
"epoch": 12.02,
"learning_rate": 9.878527607361964e-05,
"loss": 1.2799,
"step": 2200
},
{
"epoch": 12.57,
"learning_rate": 9.81717791411043e-05,
"loss": 1.2559,
"step": 2300
},
{
"epoch": 13.11,
"learning_rate": 9.756441717791411e-05,
"loss": 1.2379,
"step": 2400
},
{
"epoch": 13.66,
"learning_rate": 9.695092024539878e-05,
"loss": 1.2246,
"step": 2500
},
{
"epoch": 13.66,
"eval_loss": 0.3084094822406769,
"eval_runtime": 127.6781,
"eval_samples_per_second": 20.52,
"eval_steps_per_second": 2.569,
"eval_wer": 0.44593547136905337,
"step": 2500
},
{
"epoch": 14.21,
"learning_rate": 9.633742331288344e-05,
"loss": 1.2209,
"step": 2600
},
{
"epoch": 14.75,
"learning_rate": 9.57239263803681e-05,
"loss": 1.1972,
"step": 2700
},
{
"epoch": 15.3,
"learning_rate": 9.511042944785277e-05,
"loss": 1.1885,
"step": 2800
},
{
"epoch": 15.85,
"learning_rate": 9.449693251533743e-05,
"loss": 1.1807,
"step": 2900
},
{
"epoch": 16.39,
"learning_rate": 9.388343558282209e-05,
"loss": 1.1781,
"step": 3000
},
{
"epoch": 16.39,
"eval_loss": 0.2842142581939697,
"eval_runtime": 128.3875,
"eval_samples_per_second": 20.407,
"eval_steps_per_second": 2.555,
"eval_wer": 0.4154151729483577,
"step": 3000
},
{
"epoch": 16.94,
"learning_rate": 9.326993865030675e-05,
"loss": 1.1757,
"step": 3100
},
{
"epoch": 17.49,
"learning_rate": 9.265644171779141e-05,
"loss": 1.155,
"step": 3200
},
{
"epoch": 18.03,
"learning_rate": 9.204294478527608e-05,
"loss": 1.1455,
"step": 3300
},
{
"epoch": 18.58,
"learning_rate": 9.142944785276074e-05,
"loss": 1.1376,
"step": 3400
},
{
"epoch": 19.13,
"learning_rate": 9.081595092024541e-05,
"loss": 1.1351,
"step": 3500
},
{
"epoch": 19.13,
"eval_loss": 0.26151829957962036,
"eval_runtime": 127.9176,
"eval_samples_per_second": 20.482,
"eval_steps_per_second": 2.564,
"eval_wer": 0.3929367309369247,
"step": 3500
},
{
"epoch": 19.67,
"learning_rate": 9.020245398773006e-05,
"loss": 1.1262,
"step": 3600
},
{
"epoch": 20.22,
"learning_rate": 8.958895705521472e-05,
"loss": 1.1265,
"step": 3700
},
{
"epoch": 20.77,
"learning_rate": 8.897546012269939e-05,
"loss": 1.1033,
"step": 3800
},
{
"epoch": 21.31,
"learning_rate": 8.836196319018405e-05,
"loss": 1.1016,
"step": 3900
},
{
"epoch": 21.86,
"learning_rate": 8.774846625766872e-05,
"loss": 1.1052,
"step": 4000
},
{
"epoch": 21.86,
"eval_loss": 0.24618586897850037,
"eval_runtime": 128.2681,
"eval_samples_per_second": 20.426,
"eval_steps_per_second": 2.557,
"eval_wer": 0.3746729968026354,
"step": 4000
},
{
"epoch": 22.4,
"learning_rate": 8.714110429447854e-05,
"loss": 1.0964,
"step": 4100
},
{
"epoch": 22.95,
"learning_rate": 8.652760736196319e-05,
"loss": 1.0848,
"step": 4200
},
{
"epoch": 23.5,
"learning_rate": 8.591411042944786e-05,
"loss": 1.0714,
"step": 4300
},
{
"epoch": 24.04,
"learning_rate": 8.530061349693252e-05,
"loss": 1.0696,
"step": 4400
},
{
"epoch": 24.59,
"learning_rate": 8.468711656441717e-05,
"loss": 1.0711,
"step": 4500
},
{
"epoch": 24.59,
"eval_loss": 0.23661433160305023,
"eval_runtime": 128.1823,
"eval_samples_per_second": 20.44,
"eval_steps_per_second": 2.559,
"eval_wer": 0.36522623776765817,
"step": 4500
},
{
"epoch": 25.14,
"learning_rate": 8.407361963190185e-05,
"loss": 1.0519,
"step": 4600
},
{
"epoch": 25.68,
"learning_rate": 8.346012269938652e-05,
"loss": 1.0673,
"step": 4700
},
{
"epoch": 26.23,
"learning_rate": 8.284662576687117e-05,
"loss": 1.0606,
"step": 4800
},
{
"epoch": 26.78,
"learning_rate": 8.223312883435583e-05,
"loss": 1.0711,
"step": 4900
},
{
"epoch": 27.32,
"learning_rate": 8.16196319018405e-05,
"loss": 1.035,
"step": 5000
},
{
"epoch": 27.32,
"eval_loss": 0.22680768370628357,
"eval_runtime": 127.7363,
"eval_samples_per_second": 20.511,
"eval_steps_per_second": 2.568,
"eval_wer": 0.3556825888964248,
"step": 5000
},
{
"epoch": 27.87,
"learning_rate": 8.100613496932515e-05,
"loss": 1.0428,
"step": 5100
},
{
"epoch": 28.42,
"learning_rate": 8.039877300613497e-05,
"loss": 1.0322,
"step": 5200
},
{
"epoch": 28.96,
"learning_rate": 7.978527607361964e-05,
"loss": 1.0291,
"step": 5300
},
{
"epoch": 29.51,
"learning_rate": 7.91717791411043e-05,
"loss": 1.0258,
"step": 5400
},
{
"epoch": 30.05,
"learning_rate": 7.855828220858897e-05,
"loss": 1.0277,
"step": 5500
},
{
"epoch": 30.05,
"eval_loss": 0.22434431314468384,
"eval_runtime": 127.9511,
"eval_samples_per_second": 20.477,
"eval_steps_per_second": 2.563,
"eval_wer": 0.3449762619901172,
"step": 5500
},
{
"epoch": 30.6,
"learning_rate": 7.794478527607363e-05,
"loss": 1.0208,
"step": 5600
},
{
"epoch": 31.15,
"learning_rate": 7.733128834355828e-05,
"loss": 1.0252,
"step": 5700
},
{
"epoch": 31.69,
"learning_rate": 7.671779141104295e-05,
"loss": 1.0157,
"step": 5800
},
{
"epoch": 32.24,
"learning_rate": 7.610429447852761e-05,
"loss": 1.0094,
"step": 5900
},
{
"epoch": 32.79,
"learning_rate": 7.549079754601228e-05,
"loss": 1.002,
"step": 6000
},
{
"epoch": 32.79,
"eval_loss": 0.22044695913791656,
"eval_runtime": 127.6026,
"eval_samples_per_second": 20.532,
"eval_steps_per_second": 2.57,
"eval_wer": 0.3388722023059781,
"step": 6000
},
{
"epoch": 33.33,
"learning_rate": 7.487730061349694e-05,
"loss": 0.996,
"step": 6100
},
{
"epoch": 33.88,
"learning_rate": 7.42638036809816e-05,
"loss": 1.002,
"step": 6200
},
{
"epoch": 34.43,
"learning_rate": 7.365030674846626e-05,
"loss": 0.9845,
"step": 6300
},
{
"epoch": 34.97,
"learning_rate": 7.303680981595092e-05,
"loss": 0.9958,
"step": 6400
},
{
"epoch": 35.52,
"learning_rate": 7.242331288343559e-05,
"loss": 0.9837,
"step": 6500
},
{
"epoch": 35.52,
"eval_loss": 0.2156379520893097,
"eval_runtime": 127.7813,
"eval_samples_per_second": 20.504,
"eval_steps_per_second": 2.567,
"eval_wer": 0.33489971901947485,
"step": 6500
},
{
"epoch": 36.07,
"learning_rate": 7.180981595092025e-05,
"loss": 1.0028,
"step": 6600
},
{
"epoch": 36.61,
"learning_rate": 7.119631901840491e-05,
"loss": 0.9722,
"step": 6700
},
{
"epoch": 37.16,
"learning_rate": 7.058282208588958e-05,
"loss": 0.9784,
"step": 6800
},
{
"epoch": 37.7,
"learning_rate": 6.996932515337423e-05,
"loss": 0.9822,
"step": 6900
},
{
"epoch": 38.25,
"learning_rate": 6.93558282208589e-05,
"loss": 0.9773,
"step": 7000
},
{
"epoch": 38.25,
"eval_loss": 0.21265123784542084,
"eval_runtime": 128.3357,
"eval_samples_per_second": 20.415,
"eval_steps_per_second": 2.556,
"eval_wer": 0.3288925491715919,
"step": 7000
},
{
"epoch": 38.8,
"learning_rate": 6.874233128834356e-05,
"loss": 0.9649,
"step": 7100
},
{
"epoch": 39.34,
"learning_rate": 6.812883435582822e-05,
"loss": 0.9728,
"step": 7200
},
{
"epoch": 39.89,
"learning_rate": 6.751533742331289e-05,
"loss": 0.9663,
"step": 7300
},
{
"epoch": 40.44,
"learning_rate": 6.690184049079755e-05,
"loss": 0.9762,
"step": 7400
},
{
"epoch": 40.98,
"learning_rate": 6.629447852760736e-05,
"loss": 0.9807,
"step": 7500
},
{
"epoch": 40.98,
"eval_loss": 0.21417580544948578,
"eval_runtime": 128.3216,
"eval_samples_per_second": 20.417,
"eval_steps_per_second": 2.556,
"eval_wer": 0.32743920162774925,
"step": 7500
},
{
"epoch": 41.53,
"learning_rate": 6.568098159509203e-05,
"loss": 0.9647,
"step": 7600
},
{
"epoch": 42.08,
"learning_rate": 6.506748466257669e-05,
"loss": 0.9748,
"step": 7700
},
{
"epoch": 42.62,
"learning_rate": 6.445398773006134e-05,
"loss": 0.9484,
"step": 7800
},
{
"epoch": 43.17,
"learning_rate": 6.384049079754602e-05,
"loss": 0.9558,
"step": 7900
},
{
"epoch": 43.72,
"learning_rate": 6.322699386503069e-05,
"loss": 0.9582,
"step": 8000
},
{
"epoch": 43.72,
"eval_loss": 0.20038354396820068,
"eval_runtime": 127.7709,
"eval_samples_per_second": 20.505,
"eval_steps_per_second": 2.567,
"eval_wer": 0.314165294060653,
"step": 8000
},
{
"epoch": 44.26,
"learning_rate": 6.261349693251534e-05,
"loss": 0.949,
"step": 8100
},
{
"epoch": 44.81,
"learning_rate": 6.2e-05,
"loss": 0.9491,
"step": 8200
},
{
"epoch": 45.36,
"learning_rate": 6.138650306748467e-05,
"loss": 0.9447,
"step": 8300
},
{
"epoch": 45.9,
"learning_rate": 6.0773006134969325e-05,
"loss": 0.9368,
"step": 8400
},
{
"epoch": 46.45,
"learning_rate": 6.015950920245399e-05,
"loss": 0.9548,
"step": 8500
},
{
"epoch": 46.45,
"eval_loss": 0.20219053328037262,
"eval_runtime": 127.6869,
"eval_samples_per_second": 20.519,
"eval_steps_per_second": 2.569,
"eval_wer": 0.30500920453444436,
"step": 8500
},
{
"epoch": 46.99,
"learning_rate": 5.9546012269938655e-05,
"loss": 0.9407,
"step": 8600
},
{
"epoch": 47.54,
"learning_rate": 5.893251533742331e-05,
"loss": 0.9312,
"step": 8700
},
{
"epoch": 48.09,
"learning_rate": 5.831901840490798e-05,
"loss": 0.9446,
"step": 8800
},
{
"epoch": 48.63,
"learning_rate": 5.770552147239264e-05,
"loss": 0.9225,
"step": 8900
},
{
"epoch": 49.18,
"learning_rate": 5.70920245398773e-05,
"loss": 0.9251,
"step": 9000
},
{
"epoch": 49.18,
"eval_loss": 0.2018980085849762,
"eval_runtime": 130.6299,
"eval_samples_per_second": 20.057,
"eval_steps_per_second": 2.511,
"eval_wer": 0.3035074120724736,
"step": 9000
},
{
"epoch": 49.73,
"learning_rate": 5.6478527607361965e-05,
"loss": 0.9251,
"step": 9100
},
{
"epoch": 50.27,
"learning_rate": 5.586503067484663e-05,
"loss": 0.9172,
"step": 9200
},
{
"epoch": 50.82,
"learning_rate": 5.5251533742331294e-05,
"loss": 0.9103,
"step": 9300
},
{
"epoch": 51.37,
"learning_rate": 5.463803680981595e-05,
"loss": 0.9133,
"step": 9400
},
{
"epoch": 51.91,
"learning_rate": 5.402453987730062e-05,
"loss": 0.9103,
"step": 9500
},
{
"epoch": 51.91,
"eval_loss": 0.1963759958744049,
"eval_runtime": 127.8377,
"eval_samples_per_second": 20.495,
"eval_steps_per_second": 2.566,
"eval_wer": 0.30210250944675904,
"step": 9500
},
{
"epoch": 52.46,
"learning_rate": 5.341104294478528e-05,
"loss": 0.9109,
"step": 9600
},
{
"epoch": 53.01,
"learning_rate": 5.279754601226994e-05,
"loss": 0.9153,
"step": 9700
},
{
"epoch": 53.55,
"learning_rate": 5.2184049079754604e-05,
"loss": 0.9113,
"step": 9800
},
{
"epoch": 54.1,
"learning_rate": 5.157055214723927e-05,
"loss": 0.9181,
"step": 9900
},
{
"epoch": 54.64,
"learning_rate": 5.095705521472393e-05,
"loss": 0.915,
"step": 10000
},
{
"epoch": 54.64,
"eval_loss": 0.19702854752540588,
"eval_runtime": 128.314,
"eval_samples_per_second": 20.419,
"eval_steps_per_second": 2.556,
"eval_wer": 0.30316829764557696,
"step": 10000
},
{
"epoch": 55.19,
"learning_rate": 5.034969325153375e-05,
"loss": 0.8971,
"step": 10100
},
{
"epoch": 55.74,
"learning_rate": 4.973619631901841e-05,
"loss": 0.9066,
"step": 10200
},
{
"epoch": 56.28,
"learning_rate": 4.9122699386503065e-05,
"loss": 0.8993,
"step": 10300
},
{
"epoch": 56.83,
"learning_rate": 4.850920245398774e-05,
"loss": 0.8933,
"step": 10400
},
{
"epoch": 57.38,
"learning_rate": 4.7895705521472395e-05,
"loss": 0.8962,
"step": 10500
},
{
"epoch": 57.38,
"eval_loss": 0.2006961703300476,
"eval_runtime": 128.3801,
"eval_samples_per_second": 20.408,
"eval_steps_per_second": 2.555,
"eval_wer": 0.30462164518941964,
"step": 10500
},
{
"epoch": 57.92,
"learning_rate": 4.728220858895705e-05,
"loss": 0.8829,
"step": 10600
},
{
"epoch": 58.47,
"learning_rate": 4.6668711656441724e-05,
"loss": 0.8932,
"step": 10700
},
{
"epoch": 59.02,
"learning_rate": 4.605521472392638e-05,
"loss": 0.8986,
"step": 10800
},
{
"epoch": 59.56,
"learning_rate": 4.544171779141104e-05,
"loss": 0.8892,
"step": 10900
},
{
"epoch": 60.11,
"learning_rate": 4.482822085889571e-05,
"loss": 0.8729,
"step": 11000
},
{
"epoch": 60.11,
"eval_loss": 0.19668185710906982,
"eval_runtime": 128.5612,
"eval_samples_per_second": 20.379,
"eval_steps_per_second": 2.551,
"eval_wer": 0.2942059877918806,
"step": 11000
},
{
"epoch": 60.66,
"learning_rate": 4.421472392638037e-05,
"loss": 0.876,
"step": 11100
},
{
"epoch": 61.2,
"learning_rate": 4.3601226993865034e-05,
"loss": 0.8759,
"step": 11200
},
{
"epoch": 61.75,
"learning_rate": 4.29877300613497e-05,
"loss": 0.8813,
"step": 11300
},
{
"epoch": 62.3,
"learning_rate": 4.237423312883436e-05,
"loss": 0.8684,
"step": 11400
},
{
"epoch": 62.84,
"learning_rate": 4.176073619631902e-05,
"loss": 0.8744,
"step": 11500
},
{
"epoch": 62.84,
"eval_loss": 0.19520752131938934,
"eval_runtime": 127.705,
"eval_samples_per_second": 20.516,
"eval_steps_per_second": 2.568,
"eval_wer": 0.2885379323708943,
"step": 11500
},
{
"epoch": 63.39,
"learning_rate": 4.1147239263803686e-05,
"loss": 0.8665,
"step": 11600
},
{
"epoch": 63.93,
"learning_rate": 4.0533742331288344e-05,
"loss": 0.8757,
"step": 11700
},
{
"epoch": 64.48,
"learning_rate": 3.992024539877301e-05,
"loss": 0.8694,
"step": 11800
},
{
"epoch": 65.03,
"learning_rate": 3.930674846625767e-05,
"loss": 0.8578,
"step": 11900
},
{
"epoch": 65.57,
"learning_rate": 3.869938650306748e-05,
"loss": 0.874,
"step": 12000
},
{
"epoch": 65.57,
"eval_loss": 0.18939977884292603,
"eval_runtime": 128.0037,
"eval_samples_per_second": 20.468,
"eval_steps_per_second": 2.562,
"eval_wer": 0.28950683073345607,
"step": 12000
},
{
"epoch": 66.12,
"learning_rate": 3.808588957055215e-05,
"loss": 0.8628,
"step": 12100
},
{
"epoch": 66.67,
"learning_rate": 3.747239263803681e-05,
"loss": 0.8564,
"step": 12200
},
{
"epoch": 67.21,
"learning_rate": 3.685889570552147e-05,
"loss": 0.8502,
"step": 12300
},
{
"epoch": 67.76,
"learning_rate": 3.6245398773006135e-05,
"loss": 0.8521,
"step": 12400
},
{
"epoch": 68.31,
"learning_rate": 3.56319018404908e-05,
"loss": 0.8457,
"step": 12500
},
{
"epoch": 68.31,
"eval_loss": 0.18946239352226257,
"eval_runtime": 128.0623,
"eval_samples_per_second": 20.459,
"eval_steps_per_second": 2.561,
"eval_wer": 0.28282143203177984,
"step": 12500
},
{
"epoch": 68.85,
"learning_rate": 3.501840490797546e-05,
"loss": 0.8624,
"step": 12600
},
{
"epoch": 69.4,
"learning_rate": 3.440490797546013e-05,
"loss": 0.8394,
"step": 12700
},
{
"epoch": 69.95,
"learning_rate": 3.379141104294479e-05,
"loss": 0.8381,
"step": 12800
},
{
"epoch": 70.49,
"learning_rate": 3.3177914110429445e-05,
"loss": 0.8431,
"step": 12900
},
{
"epoch": 71.04,
"learning_rate": 3.2564417177914117e-05,
"loss": 0.8519,
"step": 13000
},
{
"epoch": 71.04,
"eval_loss": 0.19119836390018463,
"eval_runtime": 127.8936,
"eval_samples_per_second": 20.486,
"eval_steps_per_second": 2.565,
"eval_wer": 0.28747214417207634,
"step": 13000
},
{
"epoch": 71.58,
"learning_rate": 3.1950920245398774e-05,
"loss": 0.8463,
"step": 13100
},
{
"epoch": 72.13,
"learning_rate": 3.133742331288343e-05,
"loss": 0.8492,
"step": 13200
},
{
"epoch": 72.68,
"learning_rate": 3.0723926380368104e-05,
"loss": 0.8354,
"step": 13300
},
{
"epoch": 73.22,
"learning_rate": 3.0110429447852762e-05,
"loss": 0.8408,
"step": 13400
},
{
"epoch": 73.77,
"learning_rate": 2.9496932515337423e-05,
"loss": 0.8301,
"step": 13500
},
{
"epoch": 73.77,
"eval_loss": 0.18781304359436035,
"eval_runtime": 127.7845,
"eval_samples_per_second": 20.503,
"eval_steps_per_second": 2.567,
"eval_wer": 0.2760391434938475,
"step": 13500
},
{
"epoch": 74.32,
"learning_rate": 2.8883435582822088e-05,
"loss": 0.8425,
"step": 13600
},
{
"epoch": 74.86,
"learning_rate": 2.826993865030675e-05,
"loss": 0.8416,
"step": 13700
},
{
"epoch": 75.41,
"learning_rate": 2.7656441717791414e-05,
"loss": 0.8269,
"step": 13800
},
{
"epoch": 75.96,
"learning_rate": 2.7042944785276075e-05,
"loss": 0.8212,
"step": 13900
},
{
"epoch": 76.5,
"learning_rate": 2.6429447852760736e-05,
"loss": 0.8226,
"step": 14000
},
{
"epoch": 76.5,
"eval_loss": 0.18075355887413025,
"eval_runtime": 127.7671,
"eval_samples_per_second": 20.506,
"eval_steps_per_second": 2.567,
"eval_wer": 0.2701288634822207,
"step": 14000
},
{
"epoch": 77.05,
"learning_rate": 2.58159509202454e-05,
"loss": 0.8265,
"step": 14100
},
{
"epoch": 77.6,
"learning_rate": 2.5202453987730063e-05,
"loss": 0.8279,
"step": 14200
},
{
"epoch": 78.14,
"learning_rate": 2.4588957055214727e-05,
"loss": 0.8273,
"step": 14300
},
{
"epoch": 78.69,
"learning_rate": 2.3975460122699385e-05,
"loss": 0.8067,
"step": 14400
},
{
"epoch": 79.23,
"learning_rate": 2.33680981595092e-05,
"loss": 0.8071,
"step": 14500
},
{
"epoch": 79.23,
"eval_loss": 0.18486912548542023,
"eval_runtime": 127.9551,
"eval_samples_per_second": 20.476,
"eval_steps_per_second": 2.563,
"eval_wer": 0.27410134676872394,
"step": 14500
},
{
"epoch": 79.78,
"learning_rate": 2.2754601226993866e-05,
"loss": 0.8231,
"step": 14600
},
{
"epoch": 80.33,
"learning_rate": 2.214110429447853e-05,
"loss": 0.808,
"step": 14700
},
{
"epoch": 80.87,
"learning_rate": 2.1527607361963192e-05,
"loss": 0.8107,
"step": 14800
},
{
"epoch": 81.42,
"learning_rate": 2.0914110429447853e-05,
"loss": 0.7966,
"step": 14900
},
{
"epoch": 81.97,
"learning_rate": 2.0300613496932515e-05,
"loss": 0.7999,
"step": 15000
},
{
"epoch": 81.97,
"eval_loss": 0.18083913624286652,
"eval_runtime": 127.4398,
"eval_samples_per_second": 20.559,
"eval_steps_per_second": 2.574,
"eval_wer": 0.2717275457804476,
"step": 15000
},
{
"epoch": 82.51,
"learning_rate": 1.968711656441718e-05,
"loss": 0.8171,
"step": 15100
},
{
"epoch": 83.06,
"learning_rate": 1.907361963190184e-05,
"loss": 0.8034,
"step": 15200
},
{
"epoch": 83.61,
"learning_rate": 1.8460122699386502e-05,
"loss": 0.8086,
"step": 15300
},
{
"epoch": 84.15,
"learning_rate": 1.7846625766871167e-05,
"loss": 0.8005,
"step": 15400
},
{
"epoch": 84.7,
"learning_rate": 1.723312883435583e-05,
"loss": 0.7947,
"step": 15500
},
{
"epoch": 84.7,
"eval_loss": 0.1820572018623352,
"eval_runtime": 127.1268,
"eval_samples_per_second": 20.609,
"eval_steps_per_second": 2.58,
"eval_wer": 0.2715822110260634,
"step": 15500
},
{
"epoch": 85.25,
"learning_rate": 1.661963190184049e-05,
"loss": 0.7947,
"step": 15600
},
{
"epoch": 85.79,
"learning_rate": 1.6006134969325154e-05,
"loss": 0.7892,
"step": 15700
},
{
"epoch": 86.34,
"learning_rate": 1.539263803680982e-05,
"loss": 0.7969,
"step": 15800
},
{
"epoch": 86.89,
"learning_rate": 1.477914110429448e-05,
"loss": 0.801,
"step": 15900
},
{
"epoch": 87.43,
"learning_rate": 1.4165644171779141e-05,
"loss": 0.7783,
"step": 16000
},
{
"epoch": 87.43,
"eval_loss": 0.18241995573043823,
"eval_runtime": 127.0573,
"eval_samples_per_second": 20.621,
"eval_steps_per_second": 2.582,
"eval_wer": 0.26610793527758936,
"step": 16000
},
{
"epoch": 87.98,
"learning_rate": 1.3552147239263804e-05,
"loss": 0.7915,
"step": 16100
},
{
"epoch": 88.52,
"learning_rate": 1.2938650306748467e-05,
"loss": 0.7862,
"step": 16200
},
{
"epoch": 89.07,
"learning_rate": 1.232515337423313e-05,
"loss": 0.7816,
"step": 16300
},
{
"epoch": 89.62,
"learning_rate": 1.1711656441717792e-05,
"loss": 0.7816,
"step": 16400
},
{
"epoch": 90.16,
"learning_rate": 1.1098159509202455e-05,
"loss": 0.7729,
"step": 16500
},
{
"epoch": 90.16,
"eval_loss": 0.17727895081043243,
"eval_runtime": 128.0807,
"eval_samples_per_second": 20.456,
"eval_steps_per_second": 2.561,
"eval_wer": 0.2638794690436973,
"step": 16500
},
{
"epoch": 90.71,
"learning_rate": 1.0484662576687116e-05,
"loss": 0.7778,
"step": 16600
},
{
"epoch": 91.26,
"learning_rate": 9.87116564417178e-06,
"loss": 0.7844,
"step": 16700
},
{
"epoch": 91.8,
"learning_rate": 9.257668711656442e-06,
"loss": 0.7842,
"step": 16800
},
{
"epoch": 92.35,
"learning_rate": 8.644171779141105e-06,
"loss": 0.7755,
"step": 16900
},
{
"epoch": 92.9,
"learning_rate": 8.030674846625766e-06,
"loss": 0.7759,
"step": 17000
},
{
"epoch": 92.9,
"eval_loss": 0.17666833102703094,
"eval_runtime": 128.2582,
"eval_samples_per_second": 20.428,
"eval_steps_per_second": 2.557,
"eval_wer": 0.26291057068113555,
"step": 17000
},
{
"epoch": 93.44,
"learning_rate": 7.417177914110429e-06,
"loss": 0.7672,
"step": 17100
},
{
"epoch": 93.99,
"learning_rate": 6.8036809815950924e-06,
"loss": 0.7813,
"step": 17200
},
{
"epoch": 94.54,
"learning_rate": 6.1963190184049085e-06,
"loss": 0.7781,
"step": 17300
},
{
"epoch": 95.08,
"learning_rate": 5.582822085889571e-06,
"loss": 0.7711,
"step": 17400
},
{
"epoch": 95.63,
"learning_rate": 4.969325153374233e-06,
"loss": 0.7713,
"step": 17500
},
{
"epoch": 95.63,
"eval_loss": 0.17804710566997528,
"eval_runtime": 127.8273,
"eval_samples_per_second": 20.496,
"eval_steps_per_second": 2.566,
"eval_wer": 0.26208700707295807,
"step": 17500
},
{
"epoch": 96.17,
"learning_rate": 4.355828220858896e-06,
"loss": 0.7789,
"step": 17600
},
{
"epoch": 96.72,
"learning_rate": 3.7423312883435584e-06,
"loss": 0.7732,
"step": 17700
},
{
"epoch": 97.27,
"learning_rate": 3.128834355828221e-06,
"loss": 0.7688,
"step": 17800
},
{
"epoch": 97.81,
"learning_rate": 2.5153374233128836e-06,
"loss": 0.7724,
"step": 17900
},
{
"epoch": 98.36,
"learning_rate": 1.9018404907975462e-06,
"loss": 0.7628,
"step": 18000
},
{
"epoch": 98.36,
"eval_loss": 0.17734766006469727,
"eval_runtime": 128.0092,
"eval_samples_per_second": 20.467,
"eval_steps_per_second": 2.562,
"eval_wer": 0.2594225365759132,
"step": 18000
},
{
"epoch": 98.91,
"learning_rate": 1.2883435582822088e-06,
"loss": 0.7634,
"step": 18100
},
{
"epoch": 99.45,
"learning_rate": 6.748466257668713e-07,
"loss": 0.7638,
"step": 18200
},
{
"epoch": 100.0,
"learning_rate": 6.134969325153375e-08,
"loss": 0.7746,
"step": 18300
},
{
"epoch": 100.0,
"step": 18300,
"total_flos": 1.0288191185677785e+20,
"train_loss": 1.1684310275218526,
"train_runtime": 46836.9916,
"train_samples_per_second": 12.501,
"train_steps_per_second": 0.391
}
],
"max_steps": 18300,
"num_train_epochs": 100,
"total_flos": 1.0288191185677785e+20,
"trial_name": null,
"trial_params": null
}