xls-r-300m-ur / trainer_state.json
HarrisDePerceptron's picture
added tags
5393373
raw
history blame
18.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 5100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.96,
"learning_rate": 7.5e-05,
"loss": 1.2991,
"step": 100
},
{
"epoch": 1.96,
"eval_loss": 0.9768741130828857,
"eval_runtime": 13.5978,
"eval_samples_per_second": 25.078,
"eval_steps_per_second": 3.162,
"eval_wer": 0.6627306273062731,
"step": 100
},
{
"epoch": 3.92,
"learning_rate": 7.35e-05,
"loss": 1.3415,
"step": 200
},
{
"epoch": 3.92,
"eval_loss": 0.9700897932052612,
"eval_runtime": 13.6932,
"eval_samples_per_second": 24.903,
"eval_steps_per_second": 3.14,
"eval_wer": 0.6594095940959409,
"step": 200
},
{
"epoch": 5.88,
"learning_rate": 7.199999999999999e-05,
"loss": 1.2998,
"step": 300
},
{
"epoch": 5.88,
"eval_loss": 0.9677999019622803,
"eval_runtime": 13.5745,
"eval_samples_per_second": 25.121,
"eval_steps_per_second": 3.168,
"eval_wer": 0.6667896678966789,
"step": 300
},
{
"epoch": 7.84,
"learning_rate": 7.049999999999999e-05,
"loss": 1.2881,
"step": 400
},
{
"epoch": 7.84,
"eval_loss": 0.9650289416313171,
"eval_runtime": 13.184,
"eval_samples_per_second": 25.865,
"eval_steps_per_second": 3.262,
"eval_wer": 0.6612546125461255,
"step": 400
},
{
"epoch": 9.8,
"learning_rate": 6.9e-05,
"loss": 1.2369,
"step": 500
},
{
"epoch": 9.8,
"eval_loss": 0.9391834735870361,
"eval_runtime": 13.3537,
"eval_samples_per_second": 25.536,
"eval_steps_per_second": 3.22,
"eval_wer": 0.6501845018450184,
"step": 500
},
{
"epoch": 11.76,
"learning_rate": 6.75e-05,
"loss": 1.2293,
"step": 600
},
{
"epoch": 11.76,
"eval_loss": 0.953644335269928,
"eval_runtime": 13.3175,
"eval_samples_per_second": 25.605,
"eval_steps_per_second": 3.229,
"eval_wer": 0.647970479704797,
"step": 600
},
{
"epoch": 13.73,
"learning_rate": 6.599999999999999e-05,
"loss": 1.1709,
"step": 700
},
{
"epoch": 13.73,
"eval_loss": 0.9265403151512146,
"eval_runtime": 13.1912,
"eval_samples_per_second": 25.851,
"eval_steps_per_second": 3.26,
"eval_wer": 0.6402214022140221,
"step": 700
},
{
"epoch": 15.69,
"learning_rate": 6.45e-05,
"loss": 1.1492,
"step": 800
},
{
"epoch": 15.69,
"eval_loss": 0.9636203646659851,
"eval_runtime": 13.2659,
"eval_samples_per_second": 25.705,
"eval_steps_per_second": 3.241,
"eval_wer": 0.6505535055350553,
"step": 800
},
{
"epoch": 17.65,
"learning_rate": 6.299999999999999e-05,
"loss": 1.1044,
"step": 900
},
{
"epoch": 17.65,
"eval_loss": 0.9304668307304382,
"eval_runtime": 13.2356,
"eval_samples_per_second": 25.764,
"eval_steps_per_second": 3.249,
"eval_wer": 0.6350553505535055,
"step": 900
},
{
"epoch": 19.61,
"learning_rate": 6.149999999999999e-05,
"loss": 1.0704,
"step": 1000
},
{
"epoch": 19.61,
"eval_loss": 0.9329050779342651,
"eval_runtime": 14.4895,
"eval_samples_per_second": 23.534,
"eval_steps_per_second": 2.968,
"eval_wer": 0.6280442804428045,
"step": 1000
},
{
"epoch": 21.57,
"learning_rate": 5.9999999999999995e-05,
"loss": 1.0039,
"step": 1100
},
{
"epoch": 21.57,
"eval_loss": 0.9413442015647888,
"eval_runtime": 13.1291,
"eval_samples_per_second": 25.973,
"eval_steps_per_second": 3.275,
"eval_wer": 0.629520295202952,
"step": 1100
},
{
"epoch": 23.53,
"learning_rate": 5.85e-05,
"loss": 0.9756,
"step": 1200
},
{
"epoch": 23.53,
"eval_loss": 0.9717814326286316,
"eval_runtime": 13.2074,
"eval_samples_per_second": 25.819,
"eval_steps_per_second": 3.256,
"eval_wer": 0.618450184501845,
"step": 1200
},
{
"epoch": 25.49,
"learning_rate": 5.7014999999999995e-05,
"loss": 0.9633,
"step": 1300
},
{
"epoch": 25.49,
"eval_loss": 0.9730961918830872,
"eval_runtime": 14.0128,
"eval_samples_per_second": 24.335,
"eval_steps_per_second": 3.069,
"eval_wer": 0.6132841328413284,
"step": 1300
},
{
"epoch": 27.45,
"learning_rate": 5.551499999999999e-05,
"loss": 0.932,
"step": 1400
},
{
"epoch": 27.45,
"eval_loss": 0.9659466743469238,
"eval_runtime": 13.4417,
"eval_samples_per_second": 25.369,
"eval_steps_per_second": 3.199,
"eval_wer": 0.6199261992619927,
"step": 1400
},
{
"epoch": 29.41,
"learning_rate": 5.401499999999999e-05,
"loss": 0.9252,
"step": 1500
},
{
"epoch": 29.41,
"eval_loss": 0.9766208529472351,
"eval_runtime": 13.236,
"eval_samples_per_second": 25.763,
"eval_steps_per_second": 3.249,
"eval_wer": 0.6195571955719558,
"step": 1500
},
{
"epoch": 31.37,
"learning_rate": 5.2515e-05,
"loss": 0.9172,
"step": 1600
},
{
"epoch": 31.37,
"eval_loss": 1.005155086517334,
"eval_runtime": 13.4612,
"eval_samples_per_second": 25.332,
"eval_steps_per_second": 3.194,
"eval_wer": 0.6199261992619927,
"step": 1600
},
{
"epoch": 33.33,
"learning_rate": 5.1015e-05,
"loss": 0.8733,
"step": 1700
},
{
"epoch": 33.33,
"eval_loss": 0.9955308437347412,
"eval_runtime": 13.2228,
"eval_samples_per_second": 25.789,
"eval_steps_per_second": 3.252,
"eval_wer": 0.6202952029520296,
"step": 1700
},
{
"epoch": 35.29,
"learning_rate": 4.9514999999999996e-05,
"loss": 0.868,
"step": 1800
},
{
"epoch": 35.29,
"eval_loss": 1.0068942308425903,
"eval_runtime": 13.1716,
"eval_samples_per_second": 25.889,
"eval_steps_per_second": 3.265,
"eval_wer": 0.6239852398523985,
"step": 1800
},
{
"epoch": 37.25,
"learning_rate": 4.8014999999999993e-05,
"loss": 0.8547,
"step": 1900
},
{
"epoch": 37.25,
"eval_loss": 0.9782727360725403,
"eval_runtime": 13.4477,
"eval_samples_per_second": 25.357,
"eval_steps_per_second": 3.198,
"eval_wer": 0.625830258302583,
"step": 1900
},
{
"epoch": 39.22,
"learning_rate": 4.651499999999999e-05,
"loss": 0.8451,
"step": 2000
},
{
"epoch": 39.22,
"eval_loss": 0.9844875931739807,
"eval_runtime": 13.5311,
"eval_samples_per_second": 25.201,
"eval_steps_per_second": 3.178,
"eval_wer": 0.6051660516605166,
"step": 2000
},
{
"epoch": 41.18,
"learning_rate": 4.5014999999999995e-05,
"loss": 0.8374,
"step": 2100
},
{
"epoch": 41.18,
"eval_loss": 0.9495627880096436,
"eval_runtime": 13.1208,
"eval_samples_per_second": 25.989,
"eval_steps_per_second": 3.277,
"eval_wer": 0.6136531365313653,
"step": 2100
},
{
"epoch": 43.14,
"learning_rate": 4.353e-05,
"loss": 0.8153,
"step": 2200
},
{
"epoch": 43.14,
"eval_loss": 0.9756118655204773,
"eval_runtime": 13.2792,
"eval_samples_per_second": 25.679,
"eval_steps_per_second": 3.238,
"eval_wer": 0.6121771217712177,
"step": 2200
},
{
"epoch": 45.1,
"learning_rate": 4.2029999999999996e-05,
"loss": 0.8134,
"step": 2300
},
{
"epoch": 45.1,
"eval_loss": 0.9711871147155762,
"eval_runtime": 13.4607,
"eval_samples_per_second": 25.333,
"eval_steps_per_second": 3.194,
"eval_wer": 0.6095940959409594,
"step": 2300
},
{
"epoch": 47.06,
"learning_rate": 4.052999999999999e-05,
"loss": 0.8019,
"step": 2400
},
{
"epoch": 47.06,
"eval_loss": 0.9564995765686035,
"eval_runtime": 13.1971,
"eval_samples_per_second": 25.839,
"eval_steps_per_second": 3.258,
"eval_wer": 0.5970479704797048,
"step": 2400
},
{
"epoch": 49.02,
"learning_rate": 3.903e-05,
"loss": 0.7746,
"step": 2500
},
{
"epoch": 49.02,
"eval_loss": 0.9864395260810852,
"eval_runtime": 13.2769,
"eval_samples_per_second": 25.684,
"eval_steps_per_second": 3.239,
"eval_wer": 0.6095940959409594,
"step": 2500
},
{
"epoch": 50.98,
"learning_rate": 3.7529999999999995e-05,
"loss": 0.7664,
"step": 2600
},
{
"epoch": 50.98,
"eval_loss": 0.9988436698913574,
"eval_runtime": 13.1392,
"eval_samples_per_second": 25.953,
"eval_steps_per_second": 3.273,
"eval_wer": 0.6092250922509225,
"step": 2600
},
{
"epoch": 52.94,
"learning_rate": 3.603e-05,
"loss": 0.7708,
"step": 2700
},
{
"epoch": 52.94,
"eval_loss": 1.0180705785751343,
"eval_runtime": 13.5466,
"eval_samples_per_second": 25.172,
"eval_steps_per_second": 3.174,
"eval_wer": 0.6254612546125461,
"step": 2700
},
{
"epoch": 54.9,
"learning_rate": 3.4529999999999996e-05,
"loss": 0.7468,
"step": 2800
},
{
"epoch": 54.9,
"eval_loss": 0.9917659759521484,
"eval_runtime": 13.328,
"eval_samples_per_second": 25.585,
"eval_steps_per_second": 3.226,
"eval_wer": 0.614760147601476,
"step": 2800
},
{
"epoch": 56.86,
"learning_rate": 3.303e-05,
"loss": 0.7241,
"step": 2900
},
{
"epoch": 56.86,
"eval_loss": 1.015049934387207,
"eval_runtime": 13.1895,
"eval_samples_per_second": 25.854,
"eval_steps_per_second": 3.26,
"eval_wer": 0.6018450184501845,
"step": 2900
},
{
"epoch": 58.82,
"learning_rate": 3.153e-05,
"loss": 0.7165,
"step": 3000
},
{
"epoch": 58.82,
"eval_loss": 1.0438742637634277,
"eval_runtime": 13.2139,
"eval_samples_per_second": 25.806,
"eval_steps_per_second": 3.254,
"eval_wer": 0.6062730627306273,
"step": 3000
},
{
"epoch": 60.78,
"learning_rate": 3.0029999999999995e-05,
"loss": 0.7104,
"step": 3100
},
{
"epoch": 60.78,
"eval_loss": 1.0015809535980225,
"eval_runtime": 13.0489,
"eval_samples_per_second": 26.133,
"eval_steps_per_second": 3.295,
"eval_wer": 0.603690036900369,
"step": 3100
},
{
"epoch": 62.75,
"learning_rate": 2.853e-05,
"loss": 0.6954,
"step": 3200
},
{
"epoch": 62.75,
"eval_loss": 1.0116804838180542,
"eval_runtime": 13.1802,
"eval_samples_per_second": 25.872,
"eval_steps_per_second": 3.262,
"eval_wer": 0.5970479704797048,
"step": 3200
},
{
"epoch": 64.71,
"learning_rate": 2.7029999999999997e-05,
"loss": 0.6753,
"step": 3300
},
{
"epoch": 64.71,
"eval_loss": 1.019060492515564,
"eval_runtime": 13.2987,
"eval_samples_per_second": 25.642,
"eval_steps_per_second": 3.233,
"eval_wer": 0.603690036900369,
"step": 3300
},
{
"epoch": 66.67,
"learning_rate": 2.5529999999999998e-05,
"loss": 0.6803,
"step": 3400
},
{
"epoch": 66.67,
"eval_loss": 1.0190043449401855,
"eval_runtime": 13.4724,
"eval_samples_per_second": 25.311,
"eval_steps_per_second": 3.192,
"eval_wer": 0.6033210332103321,
"step": 3400
},
{
"epoch": 68.63,
"learning_rate": 2.403e-05,
"loss": 0.661,
"step": 3500
},
{
"epoch": 68.63,
"eval_loss": 1.0283905267715454,
"eval_runtime": 13.7543,
"eval_samples_per_second": 24.792,
"eval_steps_per_second": 3.126,
"eval_wer": 0.6007380073800738,
"step": 3500
},
{
"epoch": 70.59,
"learning_rate": 2.253e-05,
"loss": 0.6597,
"step": 3600
},
{
"epoch": 70.59,
"eval_loss": 1.0060473680496216,
"eval_runtime": 13.2986,
"eval_samples_per_second": 25.642,
"eval_steps_per_second": 3.233,
"eval_wer": 0.5966789667896679,
"step": 3600
},
{
"epoch": 72.55,
"learning_rate": 2.1029999999999997e-05,
"loss": 0.6398,
"step": 3700
},
{
"epoch": 72.55,
"eval_loss": 1.0372449159622192,
"eval_runtime": 13.4322,
"eval_samples_per_second": 25.387,
"eval_steps_per_second": 3.201,
"eval_wer": 0.6047970479704797,
"step": 3700
},
{
"epoch": 74.51,
"learning_rate": 1.953e-05,
"loss": 0.6105,
"step": 3800
},
{
"epoch": 74.51,
"eval_loss": 1.0047756433486938,
"eval_runtime": 13.2181,
"eval_samples_per_second": 25.798,
"eval_steps_per_second": 3.253,
"eval_wer": 0.6044280442804428,
"step": 3800
},
{
"epoch": 76.47,
"learning_rate": 1.803e-05,
"loss": 0.6164,
"step": 3900
},
{
"epoch": 76.47,
"eval_loss": 1.0398120880126953,
"eval_runtime": 13.4893,
"eval_samples_per_second": 25.279,
"eval_steps_per_second": 3.188,
"eval_wer": 0.614760147601476,
"step": 3900
},
{
"epoch": 78.43,
"learning_rate": 1.653e-05,
"loss": 0.6354,
"step": 4000
},
{
"epoch": 78.43,
"eval_loss": 1.0271726846694946,
"eval_runtime": 13.2868,
"eval_samples_per_second": 25.665,
"eval_steps_per_second": 3.236,
"eval_wer": 0.6132841328413284,
"step": 4000
},
{
"epoch": 80.39,
"learning_rate": 1.5029999999999998e-05,
"loss": 0.5952,
"step": 4100
},
{
"epoch": 80.39,
"eval_loss": 1.0364222526550293,
"eval_runtime": 13.255,
"eval_samples_per_second": 25.726,
"eval_steps_per_second": 3.244,
"eval_wer": 0.6081180811808118,
"step": 4100
},
{
"epoch": 82.35,
"learning_rate": 1.353e-05,
"loss": 0.5814,
"step": 4200
},
{
"epoch": 82.35,
"eval_loss": 1.0418034791946411,
"eval_runtime": 13.1269,
"eval_samples_per_second": 25.977,
"eval_steps_per_second": 3.276,
"eval_wer": 0.6092250922509225,
"step": 4200
},
{
"epoch": 84.31,
"learning_rate": 1.2029999999999998e-05,
"loss": 0.6079,
"step": 4300
},
{
"epoch": 84.31,
"eval_loss": 1.0277141332626343,
"eval_runtime": 13.3962,
"eval_samples_per_second": 25.455,
"eval_steps_per_second": 3.21,
"eval_wer": 0.5966789667896679,
"step": 4300
},
{
"epoch": 86.27,
"learning_rate": 1.0529999999999999e-05,
"loss": 0.5748,
"step": 4400
},
{
"epoch": 86.27,
"eval_loss": 1.03615140914917,
"eval_runtime": 13.3767,
"eval_samples_per_second": 25.492,
"eval_steps_per_second": 3.215,
"eval_wer": 0.6040590405904059,
"step": 4400
},
{
"epoch": 88.24,
"learning_rate": 9.029999999999998e-06,
"loss": 0.5624,
"step": 4500
},
{
"epoch": 88.24,
"eval_loss": 1.042688250541687,
"eval_runtime": 13.1572,
"eval_samples_per_second": 25.917,
"eval_steps_per_second": 3.268,
"eval_wer": 0.6007380073800738,
"step": 4500
},
{
"epoch": 90.2,
"learning_rate": 7.53e-06,
"loss": 0.5767,
"step": 4600
},
{
"epoch": 90.2,
"eval_loss": 1.037009835243225,
"eval_runtime": 13.3232,
"eval_samples_per_second": 25.594,
"eval_steps_per_second": 3.227,
"eval_wer": 0.5918819188191882,
"step": 4600
},
{
"epoch": 92.16,
"learning_rate": 6.029999999999999e-06,
"loss": 0.5793,
"step": 4700
},
{
"epoch": 92.16,
"eval_loss": 1.0441827774047852,
"eval_runtime": 13.4001,
"eval_samples_per_second": 25.447,
"eval_steps_per_second": 3.209,
"eval_wer": 0.6011070110701107,
"step": 4700
},
{
"epoch": 94.12,
"learning_rate": 4.53e-06,
"loss": 0.547,
"step": 4800
},
{
"epoch": 94.12,
"eval_loss": 1.0516060590744019,
"eval_runtime": 13.1503,
"eval_samples_per_second": 25.931,
"eval_steps_per_second": 3.27,
"eval_wer": 0.5981549815498155,
"step": 4800
},
{
"epoch": 96.08,
"learning_rate": 3.03e-06,
"loss": 0.5513,
"step": 4900
},
{
"epoch": 96.08,
"eval_loss": 1.0460669994354248,
"eval_runtime": 13.2162,
"eval_samples_per_second": 25.802,
"eval_steps_per_second": 3.254,
"eval_wer": 0.5988929889298893,
"step": 4900
},
{
"epoch": 98.04,
"learning_rate": 1.53e-06,
"loss": 0.5429,
"step": 5000
},
{
"epoch": 98.04,
"eval_loss": 1.0503703355789185,
"eval_runtime": 13.1041,
"eval_samples_per_second": 26.022,
"eval_steps_per_second": 3.281,
"eval_wer": 0.5996309963099631,
"step": 5000
},
{
"epoch": 100.0,
"learning_rate": 3e-08,
"loss": 0.5404,
"step": 5100
},
{
"epoch": 100.0,
"eval_loss": 1.0516693592071533,
"eval_runtime": 13.1507,
"eval_samples_per_second": 25.93,
"eval_steps_per_second": 3.27,
"eval_wer": 0.5966789667896679,
"step": 5100
},
{
"epoch": 100.0,
"step": 5100,
"total_flos": 9.838577578075728e+18,
"train_loss": 0.8227130358826881,
"train_runtime": 4841.344,
"train_samples_per_second": 16.731,
"train_steps_per_second": 1.053
}
],
"max_steps": 5100,
"num_train_epochs": 100,
"total_flos": 9.838577578075728e+18,
"trial_name": null,
"trial_params": null
}