wav2vec2-xls-r-1B-german / trainer_state.json
AndrewMcDowell's picture
End of training
47dd099
raw
history blame
50.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.494497432134996,
"global_step": 34000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.675e-06,
"loss": 8.685,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 7.425e-06,
"loss": 3.0395,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.1174999999999999e-05,
"loss": 2.9522,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.4925e-05,
"loss": 2.4455,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.8675e-05,
"loss": 1.7543,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 2.2424999999999996e-05,
"loss": 1.4833,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 2.6174999999999996e-05,
"loss": 1.2163,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 2.9925e-05,
"loss": 1.1407,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 3.3675e-05,
"loss": 1.1119,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 3.7424999999999995e-05,
"loss": 1.0826,
"step": 1000
},
{
"epoch": 0.07,
"eval_loss": 0.46367982029914856,
"eval_runtime": 1042.4054,
"eval_samples_per_second": 15.344,
"eval_steps_per_second": 1.919,
"eval_wer": 0.4653905513746807,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 4.1175e-05,
"loss": 1.0901,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 4.4924999999999994e-05,
"loss": 1.0794,
"step": 1200
},
{
"epoch": 0.1,
"learning_rate": 4.8675e-05,
"loss": 1.0774,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 5.2424999999999994e-05,
"loss": 1.0604,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 5.6175e-05,
"loss": 1.084,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 5.9925e-05,
"loss": 1.0959,
"step": 1600
},
{
"epoch": 0.12,
"learning_rate": 6.367499999999999e-05,
"loss": 1.0873,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 6.7425e-05,
"loss": 1.0938,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 7.1175e-05,
"loss": 1.1218,
"step": 1900
},
{
"epoch": 0.15,
"learning_rate": 7.492499999999999e-05,
"loss": 1.118,
"step": 2000
},
{
"epoch": 0.15,
"eval_loss": 0.25947731733322144,
"eval_runtime": 1033.4206,
"eval_samples_per_second": 15.478,
"eval_steps_per_second": 1.935,
"eval_wer": 0.2686807708592267,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 7.477084957131722e-05,
"loss": 1.1204,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 7.453702260327356e-05,
"loss": 1.1267,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 7.430319563522992e-05,
"loss": 1.1174,
"step": 2300
},
{
"epoch": 0.18,
"learning_rate": 7.406936866718628e-05,
"loss": 1.1229,
"step": 2400
},
{
"epoch": 0.18,
"learning_rate": 7.383554169914263e-05,
"loss": 1.1171,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 7.360171473109897e-05,
"loss": 1.1209,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 7.336788776305533e-05,
"loss": 1.1378,
"step": 2700
},
{
"epoch": 0.21,
"learning_rate": 7.313406079501168e-05,
"loss": 1.1187,
"step": 2800
},
{
"epoch": 0.21,
"learning_rate": 7.290023382696804e-05,
"loss": 1.1289,
"step": 2900
},
{
"epoch": 0.22,
"learning_rate": 7.26664068589244e-05,
"loss": 1.1268,
"step": 3000
},
{
"epoch": 0.22,
"eval_loss": 0.26353907585144043,
"eval_runtime": 1021.5774,
"eval_samples_per_second": 15.657,
"eval_steps_per_second": 1.958,
"eval_wer": 0.26611306117431743,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 7.243257989088074e-05,
"loss": 1.1198,
"step": 3100
},
{
"epoch": 0.23,
"learning_rate": 7.219875292283709e-05,
"loss": 1.1013,
"step": 3200
},
{
"epoch": 0.24,
"learning_rate": 7.196492595479345e-05,
"loss": 1.1215,
"step": 3300
},
{
"epoch": 0.25,
"learning_rate": 7.17310989867498e-05,
"loss": 1.1323,
"step": 3400
},
{
"epoch": 0.26,
"learning_rate": 7.149727201870615e-05,
"loss": 1.1317,
"step": 3500
},
{
"epoch": 0.26,
"learning_rate": 7.126344505066251e-05,
"loss": 1.1058,
"step": 3600
},
{
"epoch": 0.27,
"learning_rate": 7.102961808261886e-05,
"loss": 1.0934,
"step": 3700
},
{
"epoch": 0.28,
"learning_rate": 7.07957911145752e-05,
"loss": 1.1386,
"step": 3800
},
{
"epoch": 0.29,
"learning_rate": 7.056196414653156e-05,
"loss": 1.0968,
"step": 3900
},
{
"epoch": 0.29,
"learning_rate": 7.032813717848791e-05,
"loss": 1.0919,
"step": 4000
},
{
"epoch": 0.29,
"eval_loss": 0.24173684418201447,
"eval_runtime": 1025.0912,
"eval_samples_per_second": 15.603,
"eval_steps_per_second": 1.951,
"eval_wer": 0.256627559173416,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 7.009431021044427e-05,
"loss": 1.1112,
"step": 4100
},
{
"epoch": 0.31,
"learning_rate": 6.986048324240062e-05,
"loss": 1.1122,
"step": 4200
},
{
"epoch": 0.32,
"learning_rate": 6.962665627435696e-05,
"loss": 1.1185,
"step": 4300
},
{
"epoch": 0.32,
"learning_rate": 6.939282930631332e-05,
"loss": 1.1078,
"step": 4400
},
{
"epoch": 0.33,
"learning_rate": 6.915900233826968e-05,
"loss": 1.0842,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 6.892517537022603e-05,
"loss": 1.0929,
"step": 4600
},
{
"epoch": 0.34,
"learning_rate": 6.869368667186282e-05,
"loss": 1.1068,
"step": 4700
},
{
"epoch": 0.35,
"learning_rate": 6.845985970381916e-05,
"loss": 1.1078,
"step": 4800
},
{
"epoch": 0.36,
"learning_rate": 6.822603273577552e-05,
"loss": 1.1208,
"step": 4900
},
{
"epoch": 0.37,
"learning_rate": 6.799220576773187e-05,
"loss": 1.1013,
"step": 5000
},
{
"epoch": 0.37,
"eval_loss": 0.24144533276557922,
"eval_runtime": 1024.2346,
"eval_samples_per_second": 15.617,
"eval_steps_per_second": 1.953,
"eval_wer": 0.2567436524304465,
"step": 5000
},
{
"epoch": 0.37,
"learning_rate": 6.775837879968823e-05,
"loss": 1.0729,
"step": 5100
},
{
"epoch": 0.38,
"learning_rate": 6.752455183164457e-05,
"loss": 1.0884,
"step": 5200
},
{
"epoch": 0.39,
"learning_rate": 6.729072486360093e-05,
"loss": 1.0744,
"step": 5300
},
{
"epoch": 0.4,
"learning_rate": 6.705689789555728e-05,
"loss": 1.0939,
"step": 5400
},
{
"epoch": 0.4,
"learning_rate": 6.682307092751364e-05,
"loss": 1.0989,
"step": 5500
},
{
"epoch": 0.41,
"learning_rate": 6.658924395946998e-05,
"loss": 1.0922,
"step": 5600
},
{
"epoch": 0.42,
"learning_rate": 6.635541699142633e-05,
"loss": 1.0885,
"step": 5700
},
{
"epoch": 0.43,
"learning_rate": 6.612159002338269e-05,
"loss": 1.0909,
"step": 5800
},
{
"epoch": 0.43,
"learning_rate": 6.588776305533905e-05,
"loss": 1.0773,
"step": 5900
},
{
"epoch": 0.44,
"learning_rate": 6.56539360872954e-05,
"loss": 1.0898,
"step": 6000
},
{
"epoch": 0.44,
"eval_loss": 0.25459304451942444,
"eval_runtime": 1029.5775,
"eval_samples_per_second": 15.535,
"eval_steps_per_second": 1.943,
"eval_wer": 0.27307182758102627,
"step": 6000
},
{
"epoch": 0.45,
"learning_rate": 6.542010911925175e-05,
"loss": 1.069,
"step": 6100
},
{
"epoch": 0.45,
"learning_rate": 6.51862821512081e-05,
"loss": 1.0899,
"step": 6200
},
{
"epoch": 0.46,
"learning_rate": 6.495245518316445e-05,
"loss": 1.0907,
"step": 6300
},
{
"epoch": 0.47,
"learning_rate": 6.47186282151208e-05,
"loss": 1.0875,
"step": 6400
},
{
"epoch": 0.48,
"learning_rate": 6.448480124707717e-05,
"loss": 1.0729,
"step": 6500
},
{
"epoch": 0.48,
"learning_rate": 6.425097427903351e-05,
"loss": 1.0823,
"step": 6600
},
{
"epoch": 0.49,
"learning_rate": 6.401714731098986e-05,
"loss": 1.0558,
"step": 6700
},
{
"epoch": 0.5,
"learning_rate": 6.378332034294622e-05,
"loss": 1.0742,
"step": 6800
},
{
"epoch": 0.51,
"learning_rate": 6.354949337490256e-05,
"loss": 1.0748,
"step": 6900
},
{
"epoch": 0.51,
"learning_rate": 6.331566640685892e-05,
"loss": 1.0808,
"step": 7000
},
{
"epoch": 0.51,
"eval_loss": 0.23990128934383392,
"eval_runtime": 1020.4541,
"eval_samples_per_second": 15.674,
"eval_steps_per_second": 1.96,
"eval_wer": 0.25348621221847384,
"step": 7000
},
{
"epoch": 0.52,
"learning_rate": 6.308183943881527e-05,
"loss": 1.0688,
"step": 7100
},
{
"epoch": 0.53,
"learning_rate": 6.284801247077163e-05,
"loss": 1.065,
"step": 7200
},
{
"epoch": 0.54,
"learning_rate": 6.261418550272797e-05,
"loss": 1.0603,
"step": 7300
},
{
"epoch": 0.54,
"learning_rate": 6.238035853468433e-05,
"loss": 1.072,
"step": 7400
},
{
"epoch": 0.55,
"learning_rate": 6.214653156664068e-05,
"loss": 1.0736,
"step": 7500
},
{
"epoch": 0.56,
"learning_rate": 6.191270459859704e-05,
"loss": 1.066,
"step": 7600
},
{
"epoch": 0.56,
"learning_rate": 6.167887763055338e-05,
"loss": 1.0761,
"step": 7700
},
{
"epoch": 0.57,
"learning_rate": 6.144505066250973e-05,
"loss": 1.0824,
"step": 7800
},
{
"epoch": 0.58,
"learning_rate": 6.121122369446609e-05,
"loss": 1.0571,
"step": 7900
},
{
"epoch": 0.59,
"learning_rate": 6.097973499610288e-05,
"loss": 1.0719,
"step": 8000
},
{
"epoch": 0.59,
"eval_loss": 0.23534023761749268,
"eval_runtime": 1024.9025,
"eval_samples_per_second": 15.606,
"eval_steps_per_second": 1.951,
"eval_wer": 0.2527691656309327,
"step": 8000
},
{
"epoch": 0.59,
"learning_rate": 6.074590802805923e-05,
"loss": 1.0739,
"step": 8100
},
{
"epoch": 0.6,
"learning_rate": 6.051208106001558e-05,
"loss": 1.0628,
"step": 8200
},
{
"epoch": 0.61,
"learning_rate": 6.027825409197193e-05,
"loss": 1.0791,
"step": 8300
},
{
"epoch": 0.62,
"learning_rate": 6.004442712392829e-05,
"loss": 1.0769,
"step": 8400
},
{
"epoch": 0.62,
"learning_rate": 5.981060015588464e-05,
"loss": 1.0729,
"step": 8500
},
{
"epoch": 0.63,
"learning_rate": 5.9576773187840996e-05,
"loss": 1.0848,
"step": 8600
},
{
"epoch": 0.64,
"learning_rate": 5.934294621979734e-05,
"loss": 1.0636,
"step": 8700
},
{
"epoch": 0.65,
"learning_rate": 5.9109119251753694e-05,
"loss": 1.0537,
"step": 8800
},
{
"epoch": 0.65,
"learning_rate": 5.887529228371005e-05,
"loss": 1.0623,
"step": 8900
},
{
"epoch": 0.66,
"learning_rate": 5.86414653156664e-05,
"loss": 1.0446,
"step": 9000
},
{
"epoch": 0.66,
"eval_loss": 0.24269308149814606,
"eval_runtime": 1017.8128,
"eval_samples_per_second": 15.715,
"eval_steps_per_second": 1.965,
"eval_wer": 0.25453788054686755,
"step": 9000
},
{
"epoch": 0.67,
"learning_rate": 5.840763834762276e-05,
"loss": 1.0692,
"step": 9100
},
{
"epoch": 0.67,
"learning_rate": 5.817614964925954e-05,
"loss": 1.042,
"step": 9200
},
{
"epoch": 0.68,
"learning_rate": 5.7942322681215896e-05,
"loss": 1.059,
"step": 9300
},
{
"epoch": 0.69,
"learning_rate": 5.770849571317225e-05,
"loss": 1.0443,
"step": 9400
},
{
"epoch": 0.7,
"learning_rate": 5.74746687451286e-05,
"loss": 1.0586,
"step": 9500
},
{
"epoch": 0.7,
"learning_rate": 5.7240841777084954e-05,
"loss": 1.0584,
"step": 9600
},
{
"epoch": 0.71,
"learning_rate": 5.70070148090413e-05,
"loss": 1.0414,
"step": 9700
},
{
"epoch": 0.72,
"learning_rate": 5.677318784099765e-05,
"loss": 1.0428,
"step": 9800
},
{
"epoch": 0.73,
"learning_rate": 5.653936087295401e-05,
"loss": 1.0408,
"step": 9900
},
{
"epoch": 0.73,
"learning_rate": 5.6305533904910365e-05,
"loss": 1.0347,
"step": 10000
},
{
"epoch": 0.73,
"eval_loss": 0.22658555209636688,
"eval_runtime": 1019.9931,
"eval_samples_per_second": 15.681,
"eval_steps_per_second": 1.961,
"eval_wer": 0.24018329076580575,
"step": 10000
},
{
"epoch": 0.74,
"learning_rate": 5.607170693686672e-05,
"loss": 1.0356,
"step": 10100
},
{
"epoch": 0.75,
"learning_rate": 5.583787996882306e-05,
"loss": 1.0553,
"step": 10200
},
{
"epoch": 0.76,
"learning_rate": 5.5604053000779416e-05,
"loss": 1.0464,
"step": 10300
},
{
"epoch": 0.76,
"learning_rate": 5.537022603273577e-05,
"loss": 1.0545,
"step": 10400
},
{
"epoch": 0.77,
"learning_rate": 5.513639906469212e-05,
"loss": 1.0457,
"step": 10500
},
{
"epoch": 0.78,
"learning_rate": 5.490257209664848e-05,
"loss": 1.0446,
"step": 10600
},
{
"epoch": 0.79,
"learning_rate": 5.4668745128604826e-05,
"loss": 1.051,
"step": 10700
},
{
"epoch": 0.79,
"learning_rate": 5.443491816056118e-05,
"loss": 1.0533,
"step": 10800
},
{
"epoch": 0.8,
"learning_rate": 5.420109119251753e-05,
"loss": 1.0414,
"step": 10900
},
{
"epoch": 0.81,
"learning_rate": 5.3967264224473884e-05,
"loss": 1.0457,
"step": 11000
},
{
"epoch": 0.81,
"eval_loss": 0.22899799048900604,
"eval_runtime": 1038.9889,
"eval_samples_per_second": 15.395,
"eval_steps_per_second": 1.925,
"eval_wer": 0.24484067907726348,
"step": 11000
},
{
"epoch": 0.81,
"learning_rate": 5.373343725643024e-05,
"loss": 1.0395,
"step": 11100
},
{
"epoch": 0.82,
"learning_rate": 5.349961028838658e-05,
"loss": 1.0475,
"step": 11200
},
{
"epoch": 0.83,
"learning_rate": 5.326578332034294e-05,
"loss": 1.0339,
"step": 11300
},
{
"epoch": 0.84,
"learning_rate": 5.3031956352299295e-05,
"loss": 1.0444,
"step": 11400
},
{
"epoch": 0.84,
"learning_rate": 5.279812938425565e-05,
"loss": 1.0359,
"step": 11500
},
{
"epoch": 0.85,
"learning_rate": 5.256664068589244e-05,
"loss": 1.0287,
"step": 11600
},
{
"epoch": 0.86,
"learning_rate": 5.2332813717848785e-05,
"loss": 1.0094,
"step": 11700
},
{
"epoch": 0.87,
"learning_rate": 5.209898674980514e-05,
"loss": 1.0234,
"step": 11800
},
{
"epoch": 0.87,
"learning_rate": 5.186515978176149e-05,
"loss": 1.0261,
"step": 11900
},
{
"epoch": 0.88,
"learning_rate": 5.163133281371784e-05,
"loss": 1.0124,
"step": 12000
},
{
"epoch": 0.88,
"eval_loss": 0.22948846220970154,
"eval_runtime": 1027.4606,
"eval_samples_per_second": 15.568,
"eval_steps_per_second": 1.947,
"eval_wer": 0.2447928759714274,
"step": 12000
},
{
"epoch": 0.89,
"learning_rate": 5.13975058456742e-05,
"loss": 1.0341,
"step": 12100
},
{
"epoch": 0.9,
"learning_rate": 5.116367887763055e-05,
"loss": 1.0335,
"step": 12200
},
{
"epoch": 0.9,
"learning_rate": 5.09298519095869e-05,
"loss": 1.0088,
"step": 12300
},
{
"epoch": 0.91,
"learning_rate": 5.069602494154325e-05,
"loss": 1.0151,
"step": 12400
},
{
"epoch": 0.92,
"learning_rate": 5.0462197973499606e-05,
"loss": 1.0037,
"step": 12500
},
{
"epoch": 0.92,
"learning_rate": 5.023070927513639e-05,
"loss": 1.0189,
"step": 12600
},
{
"epoch": 0.93,
"learning_rate": 4.9996882307092743e-05,
"loss": 0.9913,
"step": 12700
},
{
"epoch": 0.94,
"learning_rate": 4.9763055339049096e-05,
"loss": 0.9999,
"step": 12800
},
{
"epoch": 0.95,
"learning_rate": 4.9529228371005455e-05,
"loss": 1.0152,
"step": 12900
},
{
"epoch": 0.95,
"learning_rate": 4.929540140296181e-05,
"loss": 1.025,
"step": 13000
},
{
"epoch": 0.95,
"eval_loss": 0.21379277110099792,
"eval_runtime": 1021.0623,
"eval_samples_per_second": 15.665,
"eval_steps_per_second": 1.959,
"eval_wer": 0.2345493532922682,
"step": 13000
},
{
"epoch": 0.96,
"learning_rate": 4.9061574434918154e-05,
"loss": 0.9976,
"step": 13100
},
{
"epoch": 0.97,
"learning_rate": 4.8827747466874507e-05,
"loss": 1.0144,
"step": 13200
},
{
"epoch": 0.98,
"learning_rate": 4.859392049883086e-05,
"loss": 1.0086,
"step": 13300
},
{
"epoch": 0.98,
"learning_rate": 4.836009353078721e-05,
"loss": 0.9987,
"step": 13400
},
{
"epoch": 0.99,
"learning_rate": 4.812626656274357e-05,
"loss": 1.0206,
"step": 13500
},
{
"epoch": 1.0,
"learning_rate": 4.7892439594699924e-05,
"loss": 1.01,
"step": 13600
},
{
"epoch": 1.01,
"learning_rate": 4.765861262665627e-05,
"loss": 0.9801,
"step": 13700
},
{
"epoch": 1.01,
"learning_rate": 4.742478565861262e-05,
"loss": 1.0058,
"step": 13800
},
{
"epoch": 1.02,
"learning_rate": 4.7190958690568975e-05,
"loss": 0.999,
"step": 13900
},
{
"epoch": 1.03,
"learning_rate": 4.695713172252533e-05,
"loss": 1.0107,
"step": 14000
},
{
"epoch": 1.03,
"eval_loss": 0.21082927286624908,
"eval_runtime": 1032.6325,
"eval_samples_per_second": 15.49,
"eval_steps_per_second": 1.937,
"eval_wer": 0.22944124998292748,
"step": 14000
},
{
"epoch": 1.03,
"learning_rate": 4.672330475448168e-05,
"loss": 0.9762,
"step": 14100
},
{
"epoch": 1.04,
"learning_rate": 4.6489477786438026e-05,
"loss": 1.0014,
"step": 14200
},
{
"epoch": 1.05,
"learning_rate": 4.6255650818394385e-05,
"loss": 1.0038,
"step": 14300
},
{
"epoch": 1.06,
"learning_rate": 4.602182385035074e-05,
"loss": 0.9838,
"step": 14400
},
{
"epoch": 1.06,
"learning_rate": 4.578799688230709e-05,
"loss": 0.9931,
"step": 14500
},
{
"epoch": 1.07,
"learning_rate": 4.555416991426344e-05,
"loss": 0.9834,
"step": 14600
},
{
"epoch": 1.08,
"learning_rate": 4.532034294621979e-05,
"loss": 0.9867,
"step": 14700
},
{
"epoch": 1.09,
"learning_rate": 4.508651597817614e-05,
"loss": 1.0056,
"step": 14800
},
{
"epoch": 1.09,
"learning_rate": 4.4852689010132494e-05,
"loss": 0.9834,
"step": 14900
},
{
"epoch": 1.1,
"learning_rate": 4.4618862042088854e-05,
"loss": 0.9758,
"step": 15000
},
{
"epoch": 1.1,
"eval_loss": 0.20192867517471313,
"eval_runtime": 1027.9664,
"eval_samples_per_second": 15.56,
"eval_steps_per_second": 1.946,
"eval_wer": 0.2203996339647896,
"step": 15000
},
{
"epoch": 1.11,
"learning_rate": 4.4385035074045206e-05,
"loss": 0.9737,
"step": 15100
},
{
"epoch": 1.12,
"learning_rate": 4.415120810600155e-05,
"loss": 0.9755,
"step": 15200
},
{
"epoch": 1.12,
"learning_rate": 4.3917381137957905e-05,
"loss": 0.9793,
"step": 15300
},
{
"epoch": 1.13,
"learning_rate": 4.368355416991426e-05,
"loss": 0.9818,
"step": 15400
},
{
"epoch": 1.14,
"learning_rate": 4.344972720187061e-05,
"loss": 0.9867,
"step": 15500
},
{
"epoch": 1.14,
"learning_rate": 4.321590023382697e-05,
"loss": 0.9802,
"step": 15600
},
{
"epoch": 1.15,
"learning_rate": 4.298207326578332e-05,
"loss": 0.9823,
"step": 15700
},
{
"epoch": 1.16,
"learning_rate": 4.274824629773967e-05,
"loss": 0.9669,
"step": 15800
},
{
"epoch": 1.17,
"learning_rate": 4.251441932969602e-05,
"loss": 0.9626,
"step": 15900
},
{
"epoch": 1.17,
"learning_rate": 4.228059236165237e-05,
"loss": 0.9547,
"step": 16000
},
{
"epoch": 1.17,
"eval_loss": 0.19999034702777863,
"eval_runtime": 1020.7566,
"eval_samples_per_second": 15.67,
"eval_steps_per_second": 1.959,
"eval_wer": 0.2178250952647609,
"step": 16000
},
{
"epoch": 1.18,
"learning_rate": 4.2046765393608726e-05,
"loss": 0.9711,
"step": 16100
},
{
"epoch": 1.19,
"learning_rate": 4.181293842556508e-05,
"loss": 0.9871,
"step": 16200
},
{
"epoch": 1.2,
"learning_rate": 4.1579111457521424e-05,
"loss": 0.9638,
"step": 16300
},
{
"epoch": 1.2,
"learning_rate": 4.1345284489477784e-05,
"loss": 0.9632,
"step": 16400
},
{
"epoch": 1.21,
"learning_rate": 4.1111457521434136e-05,
"loss": 0.9732,
"step": 16500
},
{
"epoch": 1.22,
"learning_rate": 4.087763055339049e-05,
"loss": 0.9506,
"step": 16600
},
{
"epoch": 1.23,
"learning_rate": 4.064380358534684e-05,
"loss": 0.9718,
"step": 16700
},
{
"epoch": 1.23,
"learning_rate": 4.040997661730319e-05,
"loss": 0.9656,
"step": 16800
},
{
"epoch": 1.24,
"learning_rate": 4.017614964925954e-05,
"loss": 0.9722,
"step": 16900
},
{
"epoch": 1.25,
"learning_rate": 3.99423226812159e-05,
"loss": 0.986,
"step": 17000
},
{
"epoch": 1.25,
"eval_loss": 0.20177510380744934,
"eval_runtime": 1018.9329,
"eval_samples_per_second": 15.698,
"eval_steps_per_second": 1.963,
"eval_wer": 0.21997623502738434,
"step": 17000
},
{
"epoch": 1.25,
"learning_rate": 3.970849571317225e-05,
"loss": 0.9662,
"step": 17100
},
{
"epoch": 1.26,
"learning_rate": 3.9474668745128605e-05,
"loss": 0.969,
"step": 17200
},
{
"epoch": 1.27,
"learning_rate": 3.924084177708495e-05,
"loss": 0.9641,
"step": 17300
},
{
"epoch": 1.28,
"learning_rate": 3.90070148090413e-05,
"loss": 0.9647,
"step": 17400
},
{
"epoch": 1.28,
"learning_rate": 3.8773187840997656e-05,
"loss": 0.9658,
"step": 17500
},
{
"epoch": 1.29,
"learning_rate": 3.853936087295401e-05,
"loss": 0.9615,
"step": 17600
},
{
"epoch": 1.3,
"learning_rate": 3.830553390491037e-05,
"loss": 0.9639,
"step": 17700
},
{
"epoch": 1.31,
"learning_rate": 3.807170693686672e-05,
"loss": 0.9543,
"step": 17800
},
{
"epoch": 1.31,
"learning_rate": 3.7837879968823066e-05,
"loss": 0.9446,
"step": 17900
},
{
"epoch": 1.32,
"learning_rate": 3.760405300077942e-05,
"loss": 0.9588,
"step": 18000
},
{
"epoch": 1.32,
"eval_loss": 0.1992081105709076,
"eval_runtime": 1010.377,
"eval_samples_per_second": 15.831,
"eval_steps_per_second": 1.979,
"eval_wer": 0.21378914732917217,
"step": 18000
},
{
"epoch": 1.33,
"learning_rate": 3.737256430241621e-05,
"loss": 0.9683,
"step": 18100
},
{
"epoch": 1.34,
"learning_rate": 3.713873733437256e-05,
"loss": 0.958,
"step": 18200
},
{
"epoch": 1.34,
"learning_rate": 3.6904910366328916e-05,
"loss": 0.961,
"step": 18300
},
{
"epoch": 1.35,
"learning_rate": 3.667108339828526e-05,
"loss": 0.9441,
"step": 18400
},
{
"epoch": 1.36,
"learning_rate": 3.643725643024162e-05,
"loss": 0.9401,
"step": 18500
},
{
"epoch": 1.36,
"learning_rate": 3.6203429462197974e-05,
"loss": 0.939,
"step": 18600
},
{
"epoch": 1.37,
"learning_rate": 3.596960249415432e-05,
"loss": 0.9637,
"step": 18700
},
{
"epoch": 1.38,
"learning_rate": 3.573577552611068e-05,
"loss": 0.9412,
"step": 18800
},
{
"epoch": 1.39,
"learning_rate": 3.5501948558067025e-05,
"loss": 0.9399,
"step": 18900
},
{
"epoch": 1.39,
"learning_rate": 3.526812159002338e-05,
"loss": 0.9413,
"step": 19000
},
{
"epoch": 1.39,
"eval_loss": 0.18979620933532715,
"eval_runtime": 1016.8182,
"eval_samples_per_second": 15.73,
"eval_steps_per_second": 1.967,
"eval_wer": 0.20486362456806478,
"step": 19000
},
{
"epoch": 1.4,
"learning_rate": 3.503429462197973e-05,
"loss": 0.9497,
"step": 19100
},
{
"epoch": 1.41,
"learning_rate": 3.480280592361652e-05,
"loss": 0.9462,
"step": 19200
},
{
"epoch": 1.42,
"learning_rate": 3.4568978955572874e-05,
"loss": 0.9427,
"step": 19300
},
{
"epoch": 1.42,
"learning_rate": 3.433515198752923e-05,
"loss": 0.9466,
"step": 19400
},
{
"epoch": 1.43,
"learning_rate": 3.410132501948558e-05,
"loss": 0.9284,
"step": 19500
},
{
"epoch": 1.44,
"learning_rate": 3.3867498051441925e-05,
"loss": 0.9438,
"step": 19600
},
{
"epoch": 1.45,
"learning_rate": 3.3633671083398285e-05,
"loss": 0.9281,
"step": 19700
},
{
"epoch": 1.45,
"learning_rate": 3.339984411535464e-05,
"loss": 0.9291,
"step": 19800
},
{
"epoch": 1.46,
"learning_rate": 3.316601714731098e-05,
"loss": 0.9467,
"step": 19900
},
{
"epoch": 1.47,
"learning_rate": 3.293219017926734e-05,
"loss": 0.9339,
"step": 20000
},
{
"epoch": 1.47,
"eval_loss": 0.1874116212129593,
"eval_runtime": 1013.6201,
"eval_samples_per_second": 15.78,
"eval_steps_per_second": 1.973,
"eval_wer": 0.2056421322916809,
"step": 20000
},
{
"epoch": 1.47,
"learning_rate": 3.269836321122369e-05,
"loss": 0.9356,
"step": 20100
},
{
"epoch": 1.48,
"learning_rate": 3.246453624318004e-05,
"loss": 0.9206,
"step": 20200
},
{
"epoch": 1.49,
"learning_rate": 3.22307092751364e-05,
"loss": 0.9282,
"step": 20300
},
{
"epoch": 1.5,
"learning_rate": 3.1996882307092746e-05,
"loss": 0.927,
"step": 20400
},
{
"epoch": 1.5,
"learning_rate": 3.17630553390491e-05,
"loss": 0.9318,
"step": 20500
},
{
"epoch": 1.51,
"learning_rate": 3.152922837100545e-05,
"loss": 0.9394,
"step": 20600
},
{
"epoch": 1.52,
"learning_rate": 3.1295401402961804e-05,
"loss": 0.9227,
"step": 20700
},
{
"epoch": 1.53,
"learning_rate": 3.106157443491816e-05,
"loss": 0.9197,
"step": 20800
},
{
"epoch": 1.53,
"learning_rate": 3.082774746687451e-05,
"loss": 0.9176,
"step": 20900
},
{
"epoch": 1.54,
"learning_rate": 3.059392049883086e-05,
"loss": 0.9268,
"step": 21000
},
{
"epoch": 1.54,
"eval_loss": 0.1797132045030594,
"eval_runtime": 1006.5438,
"eval_samples_per_second": 15.891,
"eval_steps_per_second": 1.987,
"eval_wer": 0.19759072346586176,
"step": 21000
},
{
"epoch": 1.55,
"learning_rate": 3.0360093530787215e-05,
"loss": 0.93,
"step": 21100
},
{
"epoch": 1.56,
"learning_rate": 3.0126266562743567e-05,
"loss": 0.9069,
"step": 21200
},
{
"epoch": 1.56,
"learning_rate": 2.989243959469992e-05,
"loss": 0.9359,
"step": 21300
},
{
"epoch": 1.57,
"learning_rate": 2.966095089633671e-05,
"loss": 0.9289,
"step": 21400
},
{
"epoch": 1.58,
"learning_rate": 2.942712392829306e-05,
"loss": 0.9092,
"step": 21500
},
{
"epoch": 1.58,
"learning_rate": 2.919329696024941e-05,
"loss": 0.9282,
"step": 21600
},
{
"epoch": 1.59,
"learning_rate": 2.8959469992205766e-05,
"loss": 0.9108,
"step": 21700
},
{
"epoch": 1.6,
"learning_rate": 2.872564302416212e-05,
"loss": 0.9193,
"step": 21800
},
{
"epoch": 1.61,
"learning_rate": 2.8491816056118468e-05,
"loss": 0.898,
"step": 21900
},
{
"epoch": 1.61,
"learning_rate": 2.8257989088074824e-05,
"loss": 0.9194,
"step": 22000
},
{
"epoch": 1.61,
"eval_loss": 0.17433622479438782,
"eval_runtime": 1020.4225,
"eval_samples_per_second": 15.675,
"eval_steps_per_second": 1.96,
"eval_wer": 0.19052952183236133,
"step": 22000
},
{
"epoch": 1.62,
"learning_rate": 2.8024162120031173e-05,
"loss": 0.9215,
"step": 22100
},
{
"epoch": 1.63,
"learning_rate": 2.7790335151987526e-05,
"loss": 0.915,
"step": 22200
},
{
"epoch": 1.64,
"learning_rate": 2.7556508183943882e-05,
"loss": 0.901,
"step": 22300
},
{
"epoch": 1.64,
"learning_rate": 2.732268121590023e-05,
"loss": 0.913,
"step": 22400
},
{
"epoch": 1.65,
"learning_rate": 2.7088854247856584e-05,
"loss": 0.9155,
"step": 22500
},
{
"epoch": 1.66,
"learning_rate": 2.6855027279812936e-05,
"loss": 0.9,
"step": 22600
},
{
"epoch": 1.67,
"learning_rate": 2.662120031176929e-05,
"loss": 0.8909,
"step": 22700
},
{
"epoch": 1.67,
"learning_rate": 2.638737334372564e-05,
"loss": 0.9014,
"step": 22800
},
{
"epoch": 1.68,
"learning_rate": 2.615354637568199e-05,
"loss": 0.9036,
"step": 22900
},
{
"epoch": 1.69,
"learning_rate": 2.5919719407638347e-05,
"loss": 0.8987,
"step": 23000
},
{
"epoch": 1.69,
"eval_loss": 0.1737690269947052,
"eval_runtime": 1004.9747,
"eval_samples_per_second": 15.916,
"eval_steps_per_second": 1.99,
"eval_wer": 0.19324064083477882,
"step": 23000
},
{
"epoch": 1.69,
"learning_rate": 2.56858924395947e-05,
"loss": 0.9074,
"step": 23100
},
{
"epoch": 1.7,
"learning_rate": 2.545206547155105e-05,
"loss": 0.8877,
"step": 23200
},
{
"epoch": 1.71,
"learning_rate": 2.52182385035074e-05,
"loss": 0.8995,
"step": 23300
},
{
"epoch": 1.72,
"learning_rate": 2.4984411535463757e-05,
"loss": 0.9006,
"step": 23400
},
{
"epoch": 1.72,
"learning_rate": 2.4750584567420107e-05,
"loss": 0.9072,
"step": 23500
},
{
"epoch": 1.73,
"learning_rate": 2.451675759937646e-05,
"loss": 0.891,
"step": 23600
},
{
"epoch": 1.74,
"learning_rate": 2.428293063133281e-05,
"loss": 0.886,
"step": 23700
},
{
"epoch": 1.75,
"learning_rate": 2.4049103663289164e-05,
"loss": 0.9053,
"step": 23800
},
{
"epoch": 1.75,
"learning_rate": 2.3815276695245517e-05,
"loss": 0.9086,
"step": 23900
},
{
"epoch": 1.76,
"learning_rate": 2.3581449727201866e-05,
"loss": 0.8884,
"step": 24000
},
{
"epoch": 1.76,
"eval_loss": 0.17026115953922272,
"eval_runtime": 1004.0446,
"eval_samples_per_second": 15.931,
"eval_steps_per_second": 1.992,
"eval_wer": 0.18726525260526927,
"step": 24000
},
{
"epoch": 1.77,
"learning_rate": 2.3347622759158222e-05,
"loss": 0.8968,
"step": 24100
},
{
"epoch": 1.78,
"learning_rate": 2.311613406079501e-05,
"loss": 0.8838,
"step": 24200
},
{
"epoch": 1.78,
"learning_rate": 2.2882307092751363e-05,
"loss": 0.8787,
"step": 24300
},
{
"epoch": 1.79,
"learning_rate": 2.2648480124707713e-05,
"loss": 0.8904,
"step": 24400
},
{
"epoch": 1.8,
"learning_rate": 2.241465315666407e-05,
"loss": 0.8854,
"step": 24500
},
{
"epoch": 1.8,
"learning_rate": 2.218082618862042e-05,
"loss": 0.8795,
"step": 24600
},
{
"epoch": 1.81,
"learning_rate": 2.194699922057677e-05,
"loss": 0.8897,
"step": 24700
},
{
"epoch": 1.82,
"learning_rate": 2.1713172252533123e-05,
"loss": 0.8888,
"step": 24800
},
{
"epoch": 1.83,
"learning_rate": 2.1479345284489476e-05,
"loss": 0.8817,
"step": 24900
},
{
"epoch": 1.83,
"learning_rate": 2.1245518316445828e-05,
"loss": 0.8939,
"step": 25000
},
{
"epoch": 1.83,
"eval_loss": 0.16330334544181824,
"eval_runtime": 1011.258,
"eval_samples_per_second": 15.817,
"eval_steps_per_second": 1.978,
"eval_wer": 0.18310638239753063,
"step": 25000
},
{
"epoch": 1.84,
"learning_rate": 2.101169134840218e-05,
"loss": 0.8837,
"step": 25100
},
{
"epoch": 1.85,
"learning_rate": 2.077786438035853e-05,
"loss": 0.8832,
"step": 25200
},
{
"epoch": 1.86,
"learning_rate": 2.0544037412314886e-05,
"loss": 0.869,
"step": 25300
},
{
"epoch": 1.86,
"learning_rate": 2.0310210444271235e-05,
"loss": 0.8638,
"step": 25400
},
{
"epoch": 1.87,
"learning_rate": 2.0076383476227588e-05,
"loss": 0.8785,
"step": 25500
},
{
"epoch": 1.88,
"learning_rate": 1.9842556508183944e-05,
"loss": 0.8666,
"step": 25600
},
{
"epoch": 1.89,
"learning_rate": 1.9608729540140293e-05,
"loss": 0.8725,
"step": 25700
},
{
"epoch": 1.89,
"learning_rate": 1.9374902572096646e-05,
"loss": 0.8757,
"step": 25800
},
{
"epoch": 1.9,
"learning_rate": 1.9141075604053002e-05,
"loss": 0.8617,
"step": 25900
},
{
"epoch": 1.91,
"learning_rate": 1.890724863600935e-05,
"loss": 0.8629,
"step": 26000
},
{
"epoch": 1.91,
"eval_loss": 0.154932901263237,
"eval_runtime": 1005.7394,
"eval_samples_per_second": 15.904,
"eval_steps_per_second": 1.989,
"eval_wer": 0.17495936736003934,
"step": 26000
},
{
"epoch": 1.91,
"learning_rate": 1.8673421667965704e-05,
"loss": 0.8748,
"step": 26100
},
{
"epoch": 1.92,
"learning_rate": 1.8439594699922056e-05,
"loss": 0.8659,
"step": 26200
},
{
"epoch": 1.93,
"learning_rate": 1.820576773187841e-05,
"loss": 0.8594,
"step": 26300
},
{
"epoch": 1.94,
"learning_rate": 1.7974279033515197e-05,
"loss": 0.8568,
"step": 26400
},
{
"epoch": 1.94,
"learning_rate": 1.774045206547155e-05,
"loss": 0.8694,
"step": 26500
},
{
"epoch": 1.95,
"learning_rate": 1.7506625097427903e-05,
"loss": 0.8652,
"step": 26600
},
{
"epoch": 1.96,
"learning_rate": 1.7272798129384255e-05,
"loss": 0.8565,
"step": 26700
},
{
"epoch": 1.97,
"learning_rate": 1.7038971161340608e-05,
"loss": 0.8608,
"step": 26800
},
{
"epoch": 1.97,
"learning_rate": 1.680514419329696e-05,
"loss": 0.8554,
"step": 26900
},
{
"epoch": 1.98,
"learning_rate": 1.657131722525331e-05,
"loss": 0.8607,
"step": 27000
},
{
"epoch": 1.98,
"eval_loss": 0.15500280261039734,
"eval_runtime": 1016.9779,
"eval_samples_per_second": 15.728,
"eval_steps_per_second": 1.967,
"eval_wer": 0.17375746069901798,
"step": 27000
},
{
"epoch": 1.99,
"learning_rate": 1.6339828526890098e-05,
"loss": 0.8551,
"step": 27100
},
{
"epoch": 2.0,
"learning_rate": 1.6106001558846454e-05,
"loss": 0.8588,
"step": 27200
},
{
"epoch": 2.0,
"learning_rate": 1.5872174590802803e-05,
"loss": 0.8518,
"step": 27300
},
{
"epoch": 2.01,
"learning_rate": 1.5638347622759156e-05,
"loss": 0.8359,
"step": 27400
},
{
"epoch": 2.02,
"learning_rate": 1.540452065471551e-05,
"loss": 0.8404,
"step": 27500
},
{
"epoch": 2.02,
"learning_rate": 1.5170693686671863e-05,
"loss": 0.8596,
"step": 27600
},
{
"epoch": 2.03,
"learning_rate": 1.4936866718628214e-05,
"loss": 0.8499,
"step": 27700
},
{
"epoch": 2.04,
"learning_rate": 1.4703039750584566e-05,
"loss": 0.8425,
"step": 27800
},
{
"epoch": 2.05,
"learning_rate": 1.4469212782540917e-05,
"loss": 0.8391,
"step": 27900
},
{
"epoch": 2.05,
"learning_rate": 1.4235385814497272e-05,
"loss": 0.8316,
"step": 28000
},
{
"epoch": 2.05,
"eval_loss": 0.15122003853321075,
"eval_runtime": 1007.5325,
"eval_samples_per_second": 15.875,
"eval_steps_per_second": 1.985,
"eval_wer": 0.17086878730349508,
"step": 28000
},
{
"epoch": 2.06,
"learning_rate": 1.4001558846453624e-05,
"loss": 0.8325,
"step": 28100
},
{
"epoch": 2.07,
"learning_rate": 1.3767731878409975e-05,
"loss": 0.8311,
"step": 28200
},
{
"epoch": 2.08,
"learning_rate": 1.3533904910366328e-05,
"loss": 0.8336,
"step": 28300
},
{
"epoch": 2.08,
"learning_rate": 1.330007794232268e-05,
"loss": 0.8248,
"step": 28400
},
{
"epoch": 2.09,
"learning_rate": 1.3066250974279033e-05,
"loss": 0.8316,
"step": 28500
},
{
"epoch": 2.1,
"learning_rate": 1.2832424006235384e-05,
"loss": 0.8313,
"step": 28600
},
{
"epoch": 2.11,
"learning_rate": 1.2598597038191737e-05,
"loss": 0.8284,
"step": 28700
},
{
"epoch": 2.11,
"learning_rate": 1.236477007014809e-05,
"loss": 0.8309,
"step": 28800
},
{
"epoch": 2.12,
"learning_rate": 1.2130943102104442e-05,
"loss": 0.8287,
"step": 28900
},
{
"epoch": 2.13,
"learning_rate": 1.189945440374123e-05,
"loss": 0.8321,
"step": 29000
},
{
"epoch": 2.13,
"eval_loss": 0.14810478687286377,
"eval_runtime": 999.3435,
"eval_samples_per_second": 16.006,
"eval_steps_per_second": 2.001,
"eval_wer": 0.16567873581272108,
"step": 29000
},
{
"epoch": 2.13,
"learning_rate": 1.1665627435697581e-05,
"loss": 0.8264,
"step": 29100
},
{
"epoch": 2.14,
"learning_rate": 1.1431800467653935e-05,
"loss": 0.8186,
"step": 29200
},
{
"epoch": 2.15,
"learning_rate": 1.1197973499610288e-05,
"loss": 0.8264,
"step": 29300
},
{
"epoch": 2.16,
"learning_rate": 1.0964146531566639e-05,
"loss": 0.8285,
"step": 29400
},
{
"epoch": 2.16,
"learning_rate": 1.0730319563522992e-05,
"loss": 0.8321,
"step": 29500
},
{
"epoch": 2.17,
"learning_rate": 1.0496492595479346e-05,
"loss": 0.8261,
"step": 29600
},
{
"epoch": 2.18,
"learning_rate": 1.0262665627435697e-05,
"loss": 0.836,
"step": 29700
},
{
"epoch": 2.19,
"learning_rate": 1.002883865939205e-05,
"loss": 0.8178,
"step": 29800
},
{
"epoch": 2.19,
"learning_rate": 9.7950116913484e-06,
"loss": 0.8258,
"step": 29900
},
{
"epoch": 2.2,
"learning_rate": 9.561184723304755e-06,
"loss": 0.825,
"step": 30000
},
{
"epoch": 2.2,
"eval_loss": 0.14461292326450348,
"eval_runtime": 1002.0806,
"eval_samples_per_second": 15.962,
"eval_steps_per_second": 1.996,
"eval_wer": 0.16271494325088437,
"step": 30000
},
{
"epoch": 2.21,
"learning_rate": 9.327357755261106e-06,
"loss": 0.8247,
"step": 30100
},
{
"epoch": 2.22,
"learning_rate": 9.093530787217458e-06,
"loss": 0.8223,
"step": 30200
},
{
"epoch": 2.22,
"learning_rate": 8.85970381917381e-06,
"loss": 0.8144,
"step": 30300
},
{
"epoch": 2.23,
"learning_rate": 8.625876851130163e-06,
"loss": 0.821,
"step": 30400
},
{
"epoch": 2.24,
"learning_rate": 8.392049883086516e-06,
"loss": 0.8069,
"step": 30500
},
{
"epoch": 2.25,
"learning_rate": 8.158222915042867e-06,
"loss": 0.8122,
"step": 30600
},
{
"epoch": 2.25,
"learning_rate": 7.92439594699922e-06,
"loss": 0.8042,
"step": 30700
},
{
"epoch": 2.26,
"learning_rate": 7.690568978955572e-06,
"loss": 0.8159,
"step": 30800
},
{
"epoch": 2.27,
"learning_rate": 7.456742010911924e-06,
"loss": 0.8125,
"step": 30900
},
{
"epoch": 2.27,
"learning_rate": 7.222915042868277e-06,
"loss": 0.8115,
"step": 31000
},
{
"epoch": 2.27,
"eval_loss": 0.13957646489143372,
"eval_runtime": 1001.1222,
"eval_samples_per_second": 15.977,
"eval_steps_per_second": 1.998,
"eval_wer": 0.15831705751396533,
"step": 31000
},
{
"epoch": 2.28,
"learning_rate": 6.989088074824628e-06,
"loss": 0.7972,
"step": 31100
},
{
"epoch": 2.29,
"learning_rate": 6.755261106780982e-06,
"loss": 0.8031,
"step": 31200
},
{
"epoch": 2.3,
"learning_rate": 6.521434138737334e-06,
"loss": 0.8017,
"step": 31300
},
{
"epoch": 2.3,
"learning_rate": 6.287607170693686e-06,
"loss": 0.8036,
"step": 31400
},
{
"epoch": 2.31,
"learning_rate": 6.053780202650038e-06,
"loss": 0.8001,
"step": 31500
},
{
"epoch": 2.32,
"learning_rate": 5.819953234606391e-06,
"loss": 0.8133,
"step": 31600
},
{
"epoch": 2.33,
"learning_rate": 5.586126266562743e-06,
"loss": 0.8105,
"step": 31700
},
{
"epoch": 2.33,
"learning_rate": 5.352299298519096e-06,
"loss": 0.8104,
"step": 31800
},
{
"epoch": 2.34,
"learning_rate": 5.118472330475448e-06,
"loss": 0.7934,
"step": 31900
},
{
"epoch": 2.35,
"learning_rate": 4.8846453624318e-06,
"loss": 0.7959,
"step": 32000
},
{
"epoch": 2.35,
"eval_loss": 0.1389056146144867,
"eval_runtime": 1015.5862,
"eval_samples_per_second": 15.75,
"eval_steps_per_second": 1.969,
"eval_wer": 0.15688296433888305,
"step": 32000
},
{
"epoch": 2.36,
"learning_rate": 4.650818394388152e-06,
"loss": 0.8036,
"step": 32100
},
{
"epoch": 2.36,
"learning_rate": 4.416991426344505e-06,
"loss": 0.7954,
"step": 32200
},
{
"epoch": 2.37,
"learning_rate": 4.183164458300857e-06,
"loss": 0.7958,
"step": 32300
},
{
"epoch": 2.38,
"learning_rate": 3.949337490257209e-06,
"loss": 0.7973,
"step": 32400
},
{
"epoch": 2.38,
"learning_rate": 3.7155105222135617e-06,
"loss": 0.7989,
"step": 32500
},
{
"epoch": 2.39,
"learning_rate": 3.481683554169914e-06,
"loss": 0.7959,
"step": 32600
},
{
"epoch": 2.4,
"learning_rate": 3.2478565861262665e-06,
"loss": 0.7945,
"step": 32700
},
{
"epoch": 2.41,
"learning_rate": 3.0140296180826187e-06,
"loss": 0.8044,
"step": 32800
},
{
"epoch": 2.41,
"learning_rate": 2.780202650038971e-06,
"loss": 0.7979,
"step": 32900
},
{
"epoch": 2.42,
"learning_rate": 2.5463756819953235e-06,
"loss": 0.7835,
"step": 33000
},
{
"epoch": 2.42,
"eval_loss": 0.1362341344356537,
"eval_runtime": 1006.8998,
"eval_samples_per_second": 15.885,
"eval_steps_per_second": 1.986,
"eval_wer": 0.15451329609243755,
"step": 33000
},
{
"epoch": 2.43,
"learning_rate": 2.3125487139516753e-06,
"loss": 0.7871,
"step": 33100
},
{
"epoch": 2.44,
"learning_rate": 2.078721745908028e-06,
"loss": 0.7973,
"step": 33200
},
{
"epoch": 2.44,
"learning_rate": 1.84489477786438e-06,
"loss": 0.7855,
"step": 33300
},
{
"epoch": 2.45,
"learning_rate": 1.6110678098207325e-06,
"loss": 0.7884,
"step": 33400
},
{
"epoch": 2.46,
"learning_rate": 1.377240841777085e-06,
"loss": 0.7848,
"step": 33500
},
{
"epoch": 2.47,
"learning_rate": 1.1434138737334373e-06,
"loss": 0.7843,
"step": 33600
},
{
"epoch": 2.47,
"learning_rate": 9.095869056897894e-07,
"loss": 0.7996,
"step": 33700
},
{
"epoch": 2.48,
"learning_rate": 6.757599376461418e-07,
"loss": 0.7862,
"step": 33800
},
{
"epoch": 2.49,
"learning_rate": 4.419329696024941e-07,
"loss": 0.7888,
"step": 33900
},
{
"epoch": 2.49,
"learning_rate": 2.0810600155884645e-07,
"loss": 0.7959,
"step": 34000
},
{
"epoch": 2.49,
"eval_loss": 0.13550546765327454,
"eval_runtime": 1005.5212,
"eval_samples_per_second": 15.907,
"eval_steps_per_second": 1.989,
"eval_wer": 0.15314749306854966,
"step": 34000
},
{
"epoch": 2.49,
"step": 34000,
"total_flos": 5.0385420411785465e+20,
"train_loss": 0.0,
"train_runtime": 18.4382,
"train_samples_per_second": 70965.526,
"train_steps_per_second": 1108.841
}
],
"max_steps": 20445,
"num_train_epochs": 3,
"total_flos": 5.0385420411785465e+20,
"trial_name": null,
"trial_params": null
}