xls-r-300m-ur-cv8-hi / trainer_state.json
HarrisDePerceptron's picture
End of training
7849601
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 5100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.96,
"learning_rate": 5.069866666666666e-05,
"loss": 10.7052,
"step": 100
},
{
"epoch": 1.96,
"eval_loss": 3.4682674407958984,
"eval_runtime": 13.0004,
"eval_samples_per_second": 26.23,
"eval_steps_per_second": 3.308,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 3.92,
"learning_rate": 0.000102432,
"loss": 3.2395,
"step": 200
},
{
"epoch": 3.92,
"eval_loss": 3.14892840385437,
"eval_runtime": 12.7764,
"eval_samples_per_second": 26.69,
"eval_steps_per_second": 3.366,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 5.88,
"learning_rate": 0.00015416533333333332,
"loss": 2.9951,
"step": 300
},
{
"epoch": 5.88,
"eval_loss": 2.982297897338867,
"eval_runtime": 12.7866,
"eval_samples_per_second": 26.668,
"eval_steps_per_second": 3.363,
"eval_wer": 1.0007380073800738,
"step": 300
},
{
"epoch": 7.84,
"learning_rate": 0.00020589866666666665,
"loss": 2.3574,
"step": 400
},
{
"epoch": 7.84,
"eval_loss": 1.2614495754241943,
"eval_runtime": 12.8042,
"eval_samples_per_second": 26.632,
"eval_steps_per_second": 3.358,
"eval_wer": 0.7597785977859779,
"step": 400
},
{
"epoch": 9.8,
"learning_rate": 0.000257632,
"loss": 1.7287,
"step": 500
},
{
"epoch": 9.8,
"eval_loss": 1.1816928386688232,
"eval_runtime": 12.7085,
"eval_samples_per_second": 26.833,
"eval_steps_per_second": 3.384,
"eval_wer": 0.7420664206642067,
"step": 500
},
{
"epoch": 11.76,
"learning_rate": 0.00030936533333333335,
"loss": 1.6144,
"step": 600
},
{
"epoch": 11.76,
"eval_loss": 1.131492257118225,
"eval_runtime": 12.8371,
"eval_samples_per_second": 26.564,
"eval_steps_per_second": 3.35,
"eval_wer": 0.7321033210332103,
"step": 600
},
{
"epoch": 13.73,
"learning_rate": 0.00036109866666666666,
"loss": 1.5598,
"step": 700
},
{
"epoch": 13.73,
"eval_loss": 1.232160210609436,
"eval_runtime": 12.7503,
"eval_samples_per_second": 26.745,
"eval_steps_per_second": 3.372,
"eval_wer": 0.7549815498154981,
"step": 700
},
{
"epoch": 15.69,
"learning_rate": 0.0003837186206896552,
"loss": 1.5418,
"step": 800
},
{
"epoch": 15.69,
"eval_loss": 1.272075891494751,
"eval_runtime": 12.8108,
"eval_samples_per_second": 26.618,
"eval_steps_per_second": 3.357,
"eval_wer": 0.7819188191881918,
"step": 800
},
{
"epoch": 17.65,
"learning_rate": 0.0003747990804597701,
"loss": 1.4578,
"step": 900
},
{
"epoch": 17.65,
"eval_loss": 1.1709508895874023,
"eval_runtime": 12.7848,
"eval_samples_per_second": 26.672,
"eval_steps_per_second": 3.363,
"eval_wer": 0.7531365313653137,
"step": 900
},
{
"epoch": 19.61,
"learning_rate": 0.0003658795402298851,
"loss": 1.4311,
"step": 1000
},
{
"epoch": 19.61,
"eval_loss": 1.2042367458343506,
"eval_runtime": 12.6761,
"eval_samples_per_second": 26.901,
"eval_steps_per_second": 3.392,
"eval_wer": 0.7490774907749077,
"step": 1000
},
{
"epoch": 21.57,
"learning_rate": 0.00035696,
"loss": 1.3483,
"step": 1100
},
{
"epoch": 21.57,
"eval_loss": 1.1702170372009277,
"eval_runtime": 12.5762,
"eval_samples_per_second": 27.115,
"eval_steps_per_second": 3.419,
"eval_wer": 0.7464944649446494,
"step": 1100
},
{
"epoch": 23.53,
"learning_rate": 0.0003480404597701149,
"loss": 1.3078,
"step": 1200
},
{
"epoch": 23.53,
"eval_loss": 1.1963412761688232,
"eval_runtime": 12.6937,
"eval_samples_per_second": 26.864,
"eval_steps_per_second": 3.387,
"eval_wer": 0.7420664206642067,
"step": 1200
},
{
"epoch": 25.49,
"learning_rate": 0.00033912091954022987,
"loss": 1.2576,
"step": 1300
},
{
"epoch": 25.49,
"eval_loss": 1.1501450538635254,
"eval_runtime": 12.6626,
"eval_samples_per_second": 26.93,
"eval_steps_per_second": 3.396,
"eval_wer": 0.7280442804428044,
"step": 1300
},
{
"epoch": 27.45,
"learning_rate": 0.0003302013793103448,
"loss": 1.2173,
"step": 1400
},
{
"epoch": 27.45,
"eval_loss": 1.2525919675827026,
"eval_runtime": 12.6262,
"eval_samples_per_second": 27.007,
"eval_steps_per_second": 3.406,
"eval_wer": 0.7298892988929889,
"step": 1400
},
{
"epoch": 29.41,
"learning_rate": 0.00032128183908045977,
"loss": 1.2217,
"step": 1500
},
{
"epoch": 29.41,
"eval_loss": 1.2478744983673096,
"eval_runtime": 12.8026,
"eval_samples_per_second": 26.635,
"eval_steps_per_second": 3.359,
"eval_wer": 0.7309963099630996,
"step": 1500
},
{
"epoch": 31.37,
"learning_rate": 0.0003123622988505747,
"loss": 1.1536,
"step": 1600
},
{
"epoch": 31.37,
"eval_loss": 1.2567418813705444,
"eval_runtime": 12.6734,
"eval_samples_per_second": 26.907,
"eval_steps_per_second": 3.393,
"eval_wer": 0.7431734317343174,
"step": 1600
},
{
"epoch": 33.33,
"learning_rate": 0.00030344275862068966,
"loss": 1.0939,
"step": 1700
},
{
"epoch": 33.33,
"eval_loss": 1.2800976037979126,
"eval_runtime": 12.5686,
"eval_samples_per_second": 27.131,
"eval_steps_per_second": 3.421,
"eval_wer": 0.7247232472324723,
"step": 1700
},
{
"epoch": 35.29,
"learning_rate": 0.0002945232183908046,
"loss": 1.0745,
"step": 1800
},
{
"epoch": 35.29,
"eval_loss": 1.2340304851531982,
"eval_runtime": 12.6385,
"eval_samples_per_second": 26.981,
"eval_steps_per_second": 3.402,
"eval_wer": 0.7151291512915129,
"step": 1800
},
{
"epoch": 37.25,
"learning_rate": 0.00028560367816091956,
"loss": 1.0454,
"step": 1900
},
{
"epoch": 37.25,
"eval_loss": 1.237194299697876,
"eval_runtime": 12.4912,
"eval_samples_per_second": 27.299,
"eval_steps_per_second": 3.442,
"eval_wer": 0.7151291512915129,
"step": 1900
},
{
"epoch": 39.22,
"learning_rate": 0.00027668413793103446,
"loss": 1.0101,
"step": 2000
},
{
"epoch": 39.22,
"eval_loss": 1.2461133003234863,
"eval_runtime": 12.6855,
"eval_samples_per_second": 26.881,
"eval_steps_per_second": 3.39,
"eval_wer": 0.7376383763837638,
"step": 2000
},
{
"epoch": 41.18,
"learning_rate": 0.0002677645977011494,
"loss": 0.9833,
"step": 2100
},
{
"epoch": 41.18,
"eval_loss": 1.2552708387374878,
"eval_runtime": 12.6273,
"eval_samples_per_second": 27.005,
"eval_steps_per_second": 3.405,
"eval_wer": 0.7269372693726938,
"step": 2100
},
{
"epoch": 43.14,
"learning_rate": 0.00025884505747126435,
"loss": 0.9314,
"step": 2200
},
{
"epoch": 43.14,
"eval_loss": 1.2371633052825928,
"eval_runtime": 12.5794,
"eval_samples_per_second": 27.108,
"eval_steps_per_second": 3.418,
"eval_wer": 0.7014760147601476,
"step": 2200
},
{
"epoch": 45.1,
"learning_rate": 0.0002499255172413793,
"loss": 0.9147,
"step": 2300
},
{
"epoch": 45.1,
"eval_loss": 1.3035242557525635,
"eval_runtime": 12.8232,
"eval_samples_per_second": 26.592,
"eval_steps_per_second": 3.353,
"eval_wer": 0.7357933579335794,
"step": 2300
},
{
"epoch": 47.06,
"learning_rate": 0.00024109517241379313,
"loss": 0.8758,
"step": 2400
},
{
"epoch": 47.06,
"eval_loss": 1.2598013877868652,
"eval_runtime": 12.7812,
"eval_samples_per_second": 26.68,
"eval_steps_per_second": 3.364,
"eval_wer": 0.7092250922509226,
"step": 2400
},
{
"epoch": 49.02,
"learning_rate": 0.00023217563218390802,
"loss": 0.8356,
"step": 2500
},
{
"epoch": 49.02,
"eval_loss": 1.255703091621399,
"eval_runtime": 12.549,
"eval_samples_per_second": 27.173,
"eval_steps_per_second": 3.427,
"eval_wer": 0.7143911439114391,
"step": 2500
},
{
"epoch": 50.98,
"learning_rate": 0.00022325609195402297,
"loss": 0.8105,
"step": 2600
},
{
"epoch": 50.98,
"eval_loss": 1.2618709802627563,
"eval_runtime": 12.4348,
"eval_samples_per_second": 27.423,
"eval_steps_per_second": 3.458,
"eval_wer": 0.7236162361623616,
"step": 2600
},
{
"epoch": 52.94,
"learning_rate": 0.00021433655172413795,
"loss": 0.7947,
"step": 2700
},
{
"epoch": 52.94,
"eval_loss": 1.399444818496704,
"eval_runtime": 12.9361,
"eval_samples_per_second": 26.36,
"eval_steps_per_second": 3.324,
"eval_wer": 0.7490774907749077,
"step": 2700
},
{
"epoch": 54.9,
"learning_rate": 0.0002054170114942529,
"loss": 0.7623,
"step": 2800
},
{
"epoch": 54.9,
"eval_loss": 1.2931541204452515,
"eval_runtime": 12.7092,
"eval_samples_per_second": 26.831,
"eval_steps_per_second": 3.383,
"eval_wer": 0.7132841328413284,
"step": 2800
},
{
"epoch": 56.86,
"learning_rate": 0.0001964974712643678,
"loss": 0.7282,
"step": 2900
},
{
"epoch": 56.86,
"eval_loss": 1.2799276113510132,
"eval_runtime": 12.7788,
"eval_samples_per_second": 26.685,
"eval_steps_per_second": 3.365,
"eval_wer": 0.7088560885608856,
"step": 2900
},
{
"epoch": 58.82,
"learning_rate": 0.00018757793103448274,
"loss": 0.7108,
"step": 3000
},
{
"epoch": 58.82,
"eval_loss": 1.3615078926086426,
"eval_runtime": 12.6509,
"eval_samples_per_second": 26.955,
"eval_steps_per_second": 3.399,
"eval_wer": 0.714760147601476,
"step": 3000
},
{
"epoch": 60.78,
"learning_rate": 0.00017865839080459772,
"loss": 0.6896,
"step": 3100
},
{
"epoch": 60.78,
"eval_loss": 1.312876582145691,
"eval_runtime": 12.6527,
"eval_samples_per_second": 26.951,
"eval_steps_per_second": 3.398,
"eval_wer": 0.7040590405904059,
"step": 3100
},
{
"epoch": 62.75,
"learning_rate": 0.00016973885057471264,
"loss": 0.6496,
"step": 3200
},
{
"epoch": 62.75,
"eval_loss": 1.4050244092941284,
"eval_runtime": 12.6982,
"eval_samples_per_second": 26.854,
"eval_steps_per_second": 3.386,
"eval_wer": 0.6933579335793358,
"step": 3200
},
{
"epoch": 64.71,
"learning_rate": 0.0001608193103448276,
"loss": 0.6075,
"step": 3300
},
{
"epoch": 64.71,
"eval_loss": 1.35708749294281,
"eval_runtime": 12.752,
"eval_samples_per_second": 26.741,
"eval_steps_per_second": 3.372,
"eval_wer": 0.7025830258302583,
"step": 3300
},
{
"epoch": 66.67,
"learning_rate": 0.00015189977011494254,
"loss": 0.6242,
"step": 3400
},
{
"epoch": 66.67,
"eval_loss": 1.3368754386901855,
"eval_runtime": 12.8446,
"eval_samples_per_second": 26.548,
"eval_steps_per_second": 3.348,
"eval_wer": 0.7062730627306273,
"step": 3400
},
{
"epoch": 68.63,
"learning_rate": 0.00014298022988505749,
"loss": 0.5865,
"step": 3500
},
{
"epoch": 68.63,
"eval_loss": 1.4367624521255493,
"eval_runtime": 12.5885,
"eval_samples_per_second": 27.088,
"eval_steps_per_second": 3.416,
"eval_wer": 0.7140221402214022,
"step": 3500
},
{
"epoch": 70.59,
"learning_rate": 0.0001340606896551724,
"loss": 0.5721,
"step": 3600
},
{
"epoch": 70.59,
"eval_loss": 1.4223829507827759,
"eval_runtime": 12.6692,
"eval_samples_per_second": 26.916,
"eval_steps_per_second": 3.394,
"eval_wer": 0.7066420664206642,
"step": 3600
},
{
"epoch": 72.55,
"learning_rate": 0.00012514114942528736,
"loss": 0.5475,
"step": 3700
},
{
"epoch": 72.55,
"eval_loss": 1.4797747135162354,
"eval_runtime": 12.8068,
"eval_samples_per_second": 26.626,
"eval_steps_per_second": 3.358,
"eval_wer": 0.7118081180811808,
"step": 3700
},
{
"epoch": 74.51,
"learning_rate": 0.00011622160919540229,
"loss": 0.5086,
"step": 3800
},
{
"epoch": 74.51,
"eval_loss": 1.510679841041565,
"eval_runtime": 12.607,
"eval_samples_per_second": 27.048,
"eval_steps_per_second": 3.411,
"eval_wer": 0.7232472324723247,
"step": 3800
},
{
"epoch": 76.47,
"learning_rate": 0.00010730206896551725,
"loss": 0.4958,
"step": 3900
},
{
"epoch": 76.47,
"eval_loss": 1.4849300384521484,
"eval_runtime": 12.6845,
"eval_samples_per_second": 26.883,
"eval_steps_per_second": 3.39,
"eval_wer": 0.7088560885608856,
"step": 3900
},
{
"epoch": 78.43,
"learning_rate": 9.838252873563218e-05,
"loss": 0.5046,
"step": 4000
},
{
"epoch": 78.43,
"eval_loss": 1.4450523853302002,
"eval_runtime": 12.6526,
"eval_samples_per_second": 26.951,
"eval_steps_per_second": 3.399,
"eval_wer": 0.7114391143911439,
"step": 4000
},
{
"epoch": 80.39,
"learning_rate": 8.946298850574712e-05,
"loss": 0.4694,
"step": 4100
},
{
"epoch": 80.39,
"eval_loss": 1.4674367904663086,
"eval_runtime": 12.49,
"eval_samples_per_second": 27.302,
"eval_steps_per_second": 3.443,
"eval_wer": 0.7088560885608856,
"step": 4100
},
{
"epoch": 82.35,
"learning_rate": 8.054344827586206e-05,
"loss": 0.4386,
"step": 4200
},
{
"epoch": 82.35,
"eval_loss": 1.524474859237671,
"eval_runtime": 12.6393,
"eval_samples_per_second": 26.979,
"eval_steps_per_second": 3.402,
"eval_wer": 0.7103321033210332,
"step": 4200
},
{
"epoch": 84.31,
"learning_rate": 7.162390804597701e-05,
"loss": 0.4516,
"step": 4300
},
{
"epoch": 84.31,
"eval_loss": 1.5031787157058716,
"eval_runtime": 12.6415,
"eval_samples_per_second": 26.975,
"eval_steps_per_second": 3.401,
"eval_wer": 0.7103321033210332,
"step": 4300
},
{
"epoch": 86.27,
"learning_rate": 6.27935632183908e-05,
"loss": 0.4113,
"step": 4400
},
{
"epoch": 86.27,
"eval_loss": 1.5246329307556152,
"eval_runtime": 12.6508,
"eval_samples_per_second": 26.955,
"eval_steps_per_second": 3.399,
"eval_wer": 0.7195571955719557,
"step": 4400
},
{
"epoch": 88.24,
"learning_rate": 5.3874022988505745e-05,
"loss": 0.3972,
"step": 4500
},
{
"epoch": 88.24,
"eval_loss": 1.5318336486816406,
"eval_runtime": 12.5704,
"eval_samples_per_second": 27.127,
"eval_steps_per_second": 3.421,
"eval_wer": 0.7114391143911439,
"step": 4500
},
{
"epoch": 90.2,
"learning_rate": 4.4954482758620694e-05,
"loss": 0.4006,
"step": 4600
},
{
"epoch": 90.2,
"eval_loss": 1.554287314414978,
"eval_runtime": 12.5754,
"eval_samples_per_second": 27.116,
"eval_steps_per_second": 3.419,
"eval_wer": 0.6981549815498155,
"step": 4600
},
{
"epoch": 92.16,
"learning_rate": 3.603494252873563e-05,
"loss": 0.4014,
"step": 4700
},
{
"epoch": 92.16,
"eval_loss": 1.5442076921463013,
"eval_runtime": 12.6146,
"eval_samples_per_second": 27.032,
"eval_steps_per_second": 3.409,
"eval_wer": 0.7047970479704797,
"step": 4700
},
{
"epoch": 94.12,
"learning_rate": 2.711540229885057e-05,
"loss": 0.3672,
"step": 4800
},
{
"epoch": 94.12,
"eval_loss": 1.5541517734527588,
"eval_runtime": 12.6543,
"eval_samples_per_second": 26.947,
"eval_steps_per_second": 3.398,
"eval_wer": 0.7136531365313653,
"step": 4800
},
{
"epoch": 96.08,
"learning_rate": 1.8195862068965517e-05,
"loss": 0.3666,
"step": 4900
},
{
"epoch": 96.08,
"eval_loss": 1.5414179563522339,
"eval_runtime": 12.5239,
"eval_samples_per_second": 27.228,
"eval_steps_per_second": 3.433,
"eval_wer": 0.7018450184501845,
"step": 4900
},
{
"epoch": 98.04,
"learning_rate": 9.27632183908046e-06,
"loss": 0.3574,
"step": 5000
},
{
"epoch": 98.04,
"eval_loss": 1.5465455055236816,
"eval_runtime": 12.7119,
"eval_samples_per_second": 26.825,
"eval_steps_per_second": 3.383,
"eval_wer": 0.7059040590405904,
"step": 5000
},
{
"epoch": 100.0,
"learning_rate": 3.567816091954023e-07,
"loss": 0.3428,
"step": 5100
},
{
"epoch": 100.0,
"eval_loss": 1.5443140268325806,
"eval_runtime": 12.8582,
"eval_samples_per_second": 26.52,
"eval_steps_per_second": 3.344,
"eval_wer": 0.7029520295202952,
"step": 5100
},
{
"epoch": 100.0,
"step": 5100,
"total_flos": 9.838577578075728e+18,
"train_loss": 1.1548647121354645,
"train_runtime": 5602.3197,
"train_samples_per_second": 14.458,
"train_steps_per_second": 0.91
}
],
"max_steps": 5100,
"num_train_epochs": 100,
"total_flos": 9.838577578075728e+18,
"trial_name": null,
"trial_params": null
}