xls-r-uyghur-cv8 / trainer_state.json
lucio's picture
End of training
e818b32
raw
history blame
17.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 10300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.97,
"learning_rate": 4.950000000000001e-06,
"loss": 13.4586,
"step": 100
},
{
"epoch": 1.94,
"learning_rate": 9.950000000000001e-06,
"loss": 5.8722,
"step": 200
},
{
"epoch": 2.91,
"learning_rate": 1.4950000000000001e-05,
"loss": 4.0954,
"step": 300
},
{
"epoch": 3.88,
"learning_rate": 1.995e-05,
"loss": 3.572,
"step": 400
},
{
"epoch": 4.85,
"learning_rate": 2.495e-05,
"loss": 3.2914,
"step": 500
},
{
"epoch": 4.85,
"eval_loss": 3.2282841205596924,
"eval_runtime": 135.7815,
"eval_samples_per_second": 20.194,
"eval_steps_per_second": 2.526,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 5.83,
"learning_rate": 2.995e-05,
"loss": 3.2087,
"step": 600
},
{
"epoch": 6.8,
"learning_rate": 3.495e-05,
"loss": 3.1802,
"step": 700
},
{
"epoch": 7.77,
"learning_rate": 3.995e-05,
"loss": 3.1307,
"step": 800
},
{
"epoch": 8.74,
"learning_rate": 4.495e-05,
"loss": 3.0922,
"step": 900
},
{
"epoch": 9.71,
"learning_rate": 4.995e-05,
"loss": 3.0068,
"step": 1000
},
{
"epoch": 9.71,
"eval_loss": 2.7939300537109375,
"eval_runtime": 134.6432,
"eval_samples_per_second": 20.365,
"eval_steps_per_second": 2.547,
"eval_wer": 0.997957609040984,
"step": 1000
},
{
"epoch": 10.68,
"learning_rate": 5.495e-05,
"loss": 2.5073,
"step": 1100
},
{
"epoch": 11.65,
"learning_rate": 5.995000000000001e-05,
"loss": 1.8326,
"step": 1200
},
{
"epoch": 12.62,
"learning_rate": 6.494999999999999e-05,
"loss": 1.598,
"step": 1300
},
{
"epoch": 13.59,
"learning_rate": 6.995e-05,
"loss": 1.5016,
"step": 1400
},
{
"epoch": 14.56,
"learning_rate": 7.495e-05,
"loss": 1.4306,
"step": 1500
},
{
"epoch": 14.56,
"eval_loss": 0.48574715852737427,
"eval_runtime": 133.5239,
"eval_samples_per_second": 20.536,
"eval_steps_per_second": 2.569,
"eval_wer": 0.6313711251304861,
"step": 1500
},
{
"epoch": 15.53,
"learning_rate": 7.995e-05,
"loss": 1.3756,
"step": 1600
},
{
"epoch": 16.5,
"learning_rate": 8.495e-05,
"loss": 1.3583,
"step": 1700
},
{
"epoch": 17.48,
"learning_rate": 8.995e-05,
"loss": 1.3058,
"step": 1800
},
{
"epoch": 18.45,
"learning_rate": 9.495e-05,
"loss": 1.2949,
"step": 1900
},
{
"epoch": 19.42,
"learning_rate": 9.995e-05,
"loss": 1.2831,
"step": 2000
},
{
"epoch": 19.42,
"eval_loss": 0.3678707182407379,
"eval_runtime": 134.1491,
"eval_samples_per_second": 20.44,
"eval_steps_per_second": 2.557,
"eval_wer": 0.6065901148277584,
"step": 2000
},
{
"epoch": 20.39,
"learning_rate": 9.880722891566265e-05,
"loss": 1.2725,
"step": 2100
},
{
"epoch": 21.36,
"learning_rate": 9.76144578313253e-05,
"loss": 1.2436,
"step": 2200
},
{
"epoch": 22.33,
"learning_rate": 9.640963855421687e-05,
"loss": 1.2363,
"step": 2300
},
{
"epoch": 23.3,
"learning_rate": 9.521686746987952e-05,
"loss": 1.2243,
"step": 2400
},
{
"epoch": 24.27,
"learning_rate": 9.402409638554217e-05,
"loss": 1.2065,
"step": 2500
},
{
"epoch": 24.27,
"eval_loss": 0.33028003573417664,
"eval_runtime": 134.2277,
"eval_samples_per_second": 20.428,
"eval_steps_per_second": 2.555,
"eval_wer": 0.5559842055099169,
"step": 2500
},
{
"epoch": 25.24,
"learning_rate": 9.281927710843374e-05,
"loss": 1.192,
"step": 2600
},
{
"epoch": 26.21,
"learning_rate": 9.161445783132531e-05,
"loss": 1.1816,
"step": 2700
},
{
"epoch": 27.18,
"learning_rate": 9.040963855421686e-05,
"loss": 1.1869,
"step": 2800
},
{
"epoch": 28.16,
"learning_rate": 8.920481927710844e-05,
"loss": 1.1728,
"step": 2900
},
{
"epoch": 29.13,
"learning_rate": 8.800000000000001e-05,
"loss": 1.1449,
"step": 3000
},
{
"epoch": 29.13,
"eval_loss": 0.3007894456386566,
"eval_runtime": 133.5503,
"eval_samples_per_second": 20.532,
"eval_steps_per_second": 2.568,
"eval_wer": 0.46902373712159035,
"step": 3000
},
{
"epoch": 30.1,
"learning_rate": 8.679518072289157e-05,
"loss": 1.1408,
"step": 3100
},
{
"epoch": 31.07,
"learning_rate": 8.559036144578315e-05,
"loss": 1.1319,
"step": 3200
},
{
"epoch": 32.04,
"learning_rate": 8.43855421686747e-05,
"loss": 1.1178,
"step": 3300
},
{
"epoch": 33.01,
"learning_rate": 8.318072289156627e-05,
"loss": 1.1122,
"step": 3400
},
{
"epoch": 33.98,
"learning_rate": 8.197590361445784e-05,
"loss": 1.0926,
"step": 3500
},
{
"epoch": 33.98,
"eval_loss": 0.28173714876174927,
"eval_runtime": 132.429,
"eval_samples_per_second": 20.705,
"eval_steps_per_second": 2.59,
"eval_wer": 0.4618980619979122,
"step": 3500
},
{
"epoch": 34.95,
"learning_rate": 8.07710843373494e-05,
"loss": 1.0935,
"step": 3600
},
{
"epoch": 35.92,
"learning_rate": 7.956626506024096e-05,
"loss": 1.0815,
"step": 3700
},
{
"epoch": 36.89,
"learning_rate": 7.836144578313254e-05,
"loss": 1.0856,
"step": 3800
},
{
"epoch": 37.86,
"learning_rate": 7.71566265060241e-05,
"loss": 1.0732,
"step": 3900
},
{
"epoch": 38.83,
"learning_rate": 7.595180722891566e-05,
"loss": 1.0635,
"step": 4000
},
{
"epoch": 38.83,
"eval_loss": 0.2665168046951294,
"eval_runtime": 133.7977,
"eval_samples_per_second": 20.494,
"eval_steps_per_second": 2.564,
"eval_wer": 0.4391140561884446,
"step": 4000
},
{
"epoch": 39.81,
"learning_rate": 7.474698795180723e-05,
"loss": 1.0614,
"step": 4100
},
{
"epoch": 40.78,
"learning_rate": 7.35421686746988e-05,
"loss": 1.0457,
"step": 4200
},
{
"epoch": 41.75,
"learning_rate": 7.233734939759036e-05,
"loss": 1.039,
"step": 4300
},
{
"epoch": 42.72,
"learning_rate": 7.113253012048193e-05,
"loss": 1.0151,
"step": 4400
},
{
"epoch": 43.69,
"learning_rate": 6.99277108433735e-05,
"loss": 1.029,
"step": 4500
},
{
"epoch": 43.69,
"eval_loss": 0.26156488060951233,
"eval_runtime": 133.7699,
"eval_samples_per_second": 20.498,
"eval_steps_per_second": 2.564,
"eval_wer": 0.4175100984886307,
"step": 4500
},
{
"epoch": 44.66,
"learning_rate": 6.873493975903614e-05,
"loss": 1.0254,
"step": 4600
},
{
"epoch": 45.63,
"learning_rate": 6.753012048192771e-05,
"loss": 1.0328,
"step": 4700
},
{
"epoch": 46.6,
"learning_rate": 6.632530120481928e-05,
"loss": 1.022,
"step": 4800
},
{
"epoch": 47.57,
"learning_rate": 6.512048192771085e-05,
"loss": 1.0021,
"step": 4900
},
{
"epoch": 48.54,
"learning_rate": 6.391566265060241e-05,
"loss": 1.0064,
"step": 5000
},
{
"epoch": 48.54,
"eval_loss": 0.24684669077396393,
"eval_runtime": 133.5,
"eval_samples_per_second": 20.539,
"eval_steps_per_second": 2.569,
"eval_wer": 0.4051195933372668,
"step": 5000
},
{
"epoch": 49.51,
"learning_rate": 6.271084337349398e-05,
"loss": 0.9791,
"step": 5100
},
{
"epoch": 50.49,
"learning_rate": 6.150602409638555e-05,
"loss": 0.9722,
"step": 5200
},
{
"epoch": 51.46,
"learning_rate": 6.030120481927711e-05,
"loss": 0.9815,
"step": 5300
},
{
"epoch": 52.43,
"learning_rate": 5.909638554216868e-05,
"loss": 0.9633,
"step": 5400
},
{
"epoch": 53.4,
"learning_rate": 5.789156626506025e-05,
"loss": 0.9659,
"step": 5500
},
{
"epoch": 53.4,
"eval_loss": 0.2394031286239624,
"eval_runtime": 133.1725,
"eval_samples_per_second": 20.59,
"eval_steps_per_second": 2.576,
"eval_wer": 0.38596650478827216,
"step": 5500
},
{
"epoch": 54.37,
"learning_rate": 5.668674698795181e-05,
"loss": 0.9544,
"step": 5600
},
{
"epoch": 55.34,
"learning_rate": 5.5481927710843374e-05,
"loss": 0.9581,
"step": 5700
},
{
"epoch": 56.31,
"learning_rate": 5.427710843373495e-05,
"loss": 0.9437,
"step": 5800
},
{
"epoch": 57.28,
"learning_rate": 5.307228915662651e-05,
"loss": 0.9378,
"step": 5900
},
{
"epoch": 58.25,
"learning_rate": 5.186746987951807e-05,
"loss": 0.9254,
"step": 6000
},
{
"epoch": 58.25,
"eval_loss": 0.2373155653476715,
"eval_runtime": 133.3175,
"eval_samples_per_second": 20.567,
"eval_steps_per_second": 2.573,
"eval_wer": 0.3688558071982935,
"step": 6000
},
{
"epoch": 59.22,
"learning_rate": 5.0662650602409644e-05,
"loss": 0.9321,
"step": 6100
},
{
"epoch": 60.19,
"learning_rate": 4.9457831325301205e-05,
"loss": 0.9122,
"step": 6200
},
{
"epoch": 61.17,
"learning_rate": 4.825301204819277e-05,
"loss": 0.9148,
"step": 6300
},
{
"epoch": 62.14,
"learning_rate": 4.704819277108434e-05,
"loss": 0.9177,
"step": 6400
},
{
"epoch": 63.11,
"learning_rate": 4.584337349397591e-05,
"loss": 0.9209,
"step": 6500
},
{
"epoch": 63.11,
"eval_loss": 0.23466718196868896,
"eval_runtime": 134.3014,
"eval_samples_per_second": 20.417,
"eval_steps_per_second": 2.554,
"eval_wer": 0.367040348568057,
"step": 6500
},
{
"epoch": 64.08,
"learning_rate": 4.4638554216867476e-05,
"loss": 0.8981,
"step": 6600
},
{
"epoch": 65.05,
"learning_rate": 4.344578313253012e-05,
"loss": 0.8927,
"step": 6700
},
{
"epoch": 66.02,
"learning_rate": 4.224096385542169e-05,
"loss": 0.8986,
"step": 6800
},
{
"epoch": 66.99,
"learning_rate": 4.1036144578313255e-05,
"loss": 0.8867,
"step": 6900
},
{
"epoch": 67.96,
"learning_rate": 3.983132530120482e-05,
"loss": 0.889,
"step": 7000
},
{
"epoch": 67.96,
"eval_loss": 0.22911565005779266,
"eval_runtime": 133.5899,
"eval_samples_per_second": 20.526,
"eval_steps_per_second": 2.568,
"eval_wer": 0.36871964780102573,
"step": 7000
},
{
"epoch": 68.93,
"learning_rate": 3.862650602409639e-05,
"loss": 0.885,
"step": 7100
},
{
"epoch": 69.9,
"learning_rate": 3.742168674698796e-05,
"loss": 0.8772,
"step": 7200
},
{
"epoch": 70.87,
"learning_rate": 3.62289156626506e-05,
"loss": 0.8798,
"step": 7300
},
{
"epoch": 71.84,
"learning_rate": 3.502409638554217e-05,
"loss": 0.8808,
"step": 7400
},
{
"epoch": 72.82,
"learning_rate": 3.3819277108433736e-05,
"loss": 0.8859,
"step": 7500
},
{
"epoch": 72.82,
"eval_loss": 0.22717151045799255,
"eval_runtime": 134.7148,
"eval_samples_per_second": 20.354,
"eval_steps_per_second": 2.546,
"eval_wer": 0.3615939726773476,
"step": 7500
},
{
"epoch": 73.79,
"learning_rate": 3.2614457831325304e-05,
"loss": 0.8713,
"step": 7600
},
{
"epoch": 74.76,
"learning_rate": 3.140963855421687e-05,
"loss": 0.8734,
"step": 7700
},
{
"epoch": 75.73,
"learning_rate": 3.0204819277108436e-05,
"loss": 0.8565,
"step": 7800
},
{
"epoch": 76.7,
"learning_rate": 2.9e-05,
"loss": 0.8492,
"step": 7900
},
{
"epoch": 77.67,
"learning_rate": 2.7795180722891568e-05,
"loss": 0.8441,
"step": 8000
},
{
"epoch": 77.67,
"eval_loss": 0.22322185337543488,
"eval_runtime": 134.4634,
"eval_samples_per_second": 20.392,
"eval_steps_per_second": 2.551,
"eval_wer": 0.35383288703308674,
"step": 8000
},
{
"epoch": 78.64,
"learning_rate": 2.6590361445783136e-05,
"loss": 0.8516,
"step": 8100
},
{
"epoch": 79.61,
"learning_rate": 2.5385542168674696e-05,
"loss": 0.8451,
"step": 8200
},
{
"epoch": 80.58,
"learning_rate": 2.4180722891566264e-05,
"loss": 0.8346,
"step": 8300
},
{
"epoch": 81.55,
"learning_rate": 2.2975903614457832e-05,
"loss": 0.8378,
"step": 8400
},
{
"epoch": 82.52,
"learning_rate": 2.17710843373494e-05,
"loss": 0.8284,
"step": 8500
},
{
"epoch": 82.52,
"eval_loss": 0.22235004603862762,
"eval_runtime": 133.9778,
"eval_samples_per_second": 20.466,
"eval_steps_per_second": 2.56,
"eval_wer": 0.33817455634729726,
"step": 8500
},
{
"epoch": 83.5,
"learning_rate": 2.0566265060240967e-05,
"loss": 0.8269,
"step": 8600
},
{
"epoch": 84.47,
"learning_rate": 1.936144578313253e-05,
"loss": 0.8186,
"step": 8700
},
{
"epoch": 85.44,
"learning_rate": 1.8156626506024096e-05,
"loss": 0.8243,
"step": 8800
},
{
"epoch": 86.41,
"learning_rate": 1.6951807228915663e-05,
"loss": 0.8279,
"step": 8900
},
{
"epoch": 87.38,
"learning_rate": 1.574698795180723e-05,
"loss": 0.8142,
"step": 9000
},
{
"epoch": 87.38,
"eval_loss": 0.2192818820476532,
"eval_runtime": 132.2621,
"eval_samples_per_second": 20.732,
"eval_steps_per_second": 2.593,
"eval_wer": 0.33104888122361914,
"step": 9000
},
{
"epoch": 88.35,
"learning_rate": 1.4542168674698795e-05,
"loss": 0.8071,
"step": 9100
},
{
"epoch": 89.32,
"learning_rate": 1.3337349397590363e-05,
"loss": 0.8075,
"step": 9200
},
{
"epoch": 90.29,
"learning_rate": 1.2132530120481929e-05,
"loss": 0.8042,
"step": 9300
},
{
"epoch": 91.26,
"learning_rate": 1.0927710843373493e-05,
"loss": 0.7916,
"step": 9400
},
{
"epoch": 92.23,
"learning_rate": 9.722891566265061e-06,
"loss": 0.8012,
"step": 9500
},
{
"epoch": 92.23,
"eval_loss": 0.21682003140449524,
"eval_runtime": 133.9404,
"eval_samples_per_second": 20.472,
"eval_steps_per_second": 2.561,
"eval_wer": 0.3276448962919257,
"step": 9500
},
{
"epoch": 93.2,
"learning_rate": 8.518072289156627e-06,
"loss": 0.8055,
"step": 9600
},
{
"epoch": 94.17,
"learning_rate": 7.313253012048194e-06,
"loss": 0.7955,
"step": 9700
},
{
"epoch": 95.15,
"learning_rate": 6.108433734939759e-06,
"loss": 0.7961,
"step": 9800
},
{
"epoch": 96.12,
"learning_rate": 4.903614457831326e-06,
"loss": 0.7843,
"step": 9900
},
{
"epoch": 97.09,
"learning_rate": 3.6987951807228917e-06,
"loss": 0.7781,
"step": 10000
},
{
"epoch": 97.09,
"eval_loss": 0.21628263592720032,
"eval_runtime": 133.5255,
"eval_samples_per_second": 20.535,
"eval_steps_per_second": 2.569,
"eval_wer": 0.3240593654972087,
"step": 10000
},
{
"epoch": 98.06,
"learning_rate": 2.493975903614458e-06,
"loss": 0.7842,
"step": 10100
},
{
"epoch": 99.03,
"learning_rate": 1.2891566265060241e-06,
"loss": 0.7821,
"step": 10200
},
{
"epoch": 100.0,
"learning_rate": 9.638554216867469e-08,
"loss": 0.7779,
"step": 10300
},
{
"epoch": 100.0,
"step": 10300,
"total_flos": 5.823193156406256e+19,
"train_loss": 1.3660302423273476,
"train_runtime": 26867.6077,
"train_samples_per_second": 12.253,
"train_steps_per_second": 0.383
}
],
"max_steps": 10300,
"num_train_epochs": 100,
"total_flos": 5.823193156406256e+19,
"trial_name": null,
"trial_params": null
}