wav2vec2-xls-r-1B-german / trainer_state.json
AndrewMcDowell's picture
End of training
d53d90b
raw
history blame
40.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9999816584435355,
"global_step": 27260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.675e-06,
"loss": 11.4989,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 7.425e-06,
"loss": 3.2394,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.1174999999999999e-05,
"loss": 3.0303,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.4925e-05,
"loss": 2.9052,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.8675e-05,
"loss": 2.1033,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 2.2424999999999996e-05,
"loss": 1.674,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 2.6174999999999996e-05,
"loss": 1.5568,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 2.9925e-05,
"loss": 1.4654,
"step": 800
},
{
"epoch": 0.07,
"learning_rate": 3.3675e-05,
"loss": 1.3031,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 3.7424999999999995e-05,
"loss": 1.1842,
"step": 1000
},
{
"epoch": 0.07,
"eval_loss": 0.44609957933425903,
"eval_runtime": 1053.3237,
"eval_samples_per_second": 15.197,
"eval_steps_per_second": 1.9,
"eval_wer": 0.49177182344586473,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 4.1175e-05,
"loss": 1.1329,
"step": 1100
},
{
"epoch": 0.09,
"learning_rate": 4.4924999999999994e-05,
"loss": 1.1316,
"step": 1200
},
{
"epoch": 0.1,
"learning_rate": 4.8675e-05,
"loss": 1.1092,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 5.2424999999999994e-05,
"loss": 1.1215,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 5.6175e-05,
"loss": 1.1165,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 5.9925e-05,
"loss": 1.0946,
"step": 1600
},
{
"epoch": 0.12,
"learning_rate": 6.367499999999999e-05,
"loss": 1.1189,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 6.7425e-05,
"loss": 1.1175,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 7.1175e-05,
"loss": 1.1254,
"step": 1900
},
{
"epoch": 0.15,
"learning_rate": 7.492499999999999e-05,
"loss": 1.1317,
"step": 2000
},
{
"epoch": 0.15,
"eval_loss": 0.2668535113334656,
"eval_runtime": 988.5751,
"eval_samples_per_second": 16.192,
"eval_steps_per_second": 2.024,
"eval_wer": 0.2748006118212608,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 7.470902612826603e-05,
"loss": 1.1296,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 7.441211401425178e-05,
"loss": 1.1406,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 7.411520190023751e-05,
"loss": 1.1362,
"step": 2300
},
{
"epoch": 0.18,
"learning_rate": 7.381828978622327e-05,
"loss": 1.1292,
"step": 2400
},
{
"epoch": 0.18,
"learning_rate": 7.352137767220902e-05,
"loss": 1.105,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 7.322446555819477e-05,
"loss": 1.1231,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 7.292755344418051e-05,
"loss": 1.1187,
"step": 2700
},
{
"epoch": 0.21,
"learning_rate": 7.263064133016626e-05,
"loss": 1.1339,
"step": 2800
},
{
"epoch": 0.21,
"learning_rate": 7.233372921615201e-05,
"loss": 1.1241,
"step": 2900
},
{
"epoch": 0.22,
"learning_rate": 7.203681710213777e-05,
"loss": 1.1029,
"step": 3000
},
{
"epoch": 0.22,
"eval_loss": 0.2638496458530426,
"eval_runtime": 987.5568,
"eval_samples_per_second": 16.209,
"eval_steps_per_second": 2.026,
"eval_wer": 0.2705875122910521,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 7.173990498812351e-05,
"loss": 1.1215,
"step": 3100
},
{
"epoch": 0.23,
"learning_rate": 7.144299287410925e-05,
"loss": 1.1067,
"step": 3200
},
{
"epoch": 0.24,
"learning_rate": 7.114608076009501e-05,
"loss": 1.1126,
"step": 3300
},
{
"epoch": 0.25,
"learning_rate": 7.084916864608076e-05,
"loss": 1.109,
"step": 3400
},
{
"epoch": 0.26,
"learning_rate": 7.05522565320665e-05,
"loss": 1.1077,
"step": 3500
},
{
"epoch": 0.26,
"learning_rate": 7.025534441805225e-05,
"loss": 1.1,
"step": 3600
},
{
"epoch": 0.27,
"learning_rate": 6.9958432304038e-05,
"loss": 1.1061,
"step": 3700
},
{
"epoch": 0.28,
"learning_rate": 6.966152019002374e-05,
"loss": 1.103,
"step": 3800
},
{
"epoch": 0.29,
"learning_rate": 6.936460807600949e-05,
"loss": 1.0947,
"step": 3900
},
{
"epoch": 0.29,
"learning_rate": 6.906769596199525e-05,
"loss": 1.0949,
"step": 4000
},
{
"epoch": 0.29,
"eval_loss": 0.25188884139060974,
"eval_runtime": 996.0428,
"eval_samples_per_second": 16.071,
"eval_steps_per_second": 2.009,
"eval_wer": 0.26274172402490986,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 6.8770783847981e-05,
"loss": 1.1076,
"step": 4100
},
{
"epoch": 0.31,
"learning_rate": 6.847387173396674e-05,
"loss": 1.1012,
"step": 4200
},
{
"epoch": 0.32,
"learning_rate": 6.817695961995249e-05,
"loss": 1.081,
"step": 4300
},
{
"epoch": 0.32,
"learning_rate": 6.788004750593824e-05,
"loss": 1.0868,
"step": 4400
},
{
"epoch": 0.33,
"learning_rate": 6.758313539192398e-05,
"loss": 1.0956,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 6.728622327790973e-05,
"loss": 1.0953,
"step": 4600
},
{
"epoch": 0.34,
"learning_rate": 6.698931116389548e-05,
"loss": 1.0952,
"step": 4700
},
{
"epoch": 0.35,
"learning_rate": 6.669239904988122e-05,
"loss": 1.0968,
"step": 4800
},
{
"epoch": 0.36,
"learning_rate": 6.639548693586698e-05,
"loss": 1.0827,
"step": 4900
},
{
"epoch": 0.37,
"learning_rate": 6.609857482185273e-05,
"loss": 1.0923,
"step": 5000
},
{
"epoch": 0.37,
"eval_loss": 0.24751192331314087,
"eval_runtime": 984.8205,
"eval_samples_per_second": 16.254,
"eval_steps_per_second": 2.032,
"eval_wer": 0.25769556429585927,
"step": 5000
},
{
"epoch": 0.37,
"learning_rate": 6.580166270783846e-05,
"loss": 1.0895,
"step": 5100
},
{
"epoch": 0.38,
"learning_rate": 6.550771971496436e-05,
"loss": 1.0851,
"step": 5200
},
{
"epoch": 0.39,
"learning_rate": 6.521080760095011e-05,
"loss": 1.1124,
"step": 5300
},
{
"epoch": 0.4,
"learning_rate": 6.491686460807601e-05,
"loss": 1.0809,
"step": 5400
},
{
"epoch": 0.4,
"learning_rate": 6.461995249406176e-05,
"loss": 1.0985,
"step": 5500
},
{
"epoch": 0.41,
"learning_rate": 6.432304038004749e-05,
"loss": 1.086,
"step": 5600
},
{
"epoch": 0.42,
"learning_rate": 6.402612826603325e-05,
"loss": 1.0823,
"step": 5700
},
{
"epoch": 0.43,
"learning_rate": 6.3729216152019e-05,
"loss": 1.0732,
"step": 5800
},
{
"epoch": 0.43,
"learning_rate": 6.343230403800475e-05,
"loss": 1.076,
"step": 5900
},
{
"epoch": 0.44,
"learning_rate": 6.313539192399049e-05,
"loss": 1.0847,
"step": 6000
},
{
"epoch": 0.44,
"eval_loss": 0.24355509877204895,
"eval_runtime": 984.3756,
"eval_samples_per_second": 16.261,
"eval_steps_per_second": 2.033,
"eval_wer": 0.26121217087293785,
"step": 6000
},
{
"epoch": 0.45,
"learning_rate": 6.283847980997624e-05,
"loss": 1.0748,
"step": 6100
},
{
"epoch": 0.45,
"learning_rate": 6.254156769596199e-05,
"loss": 1.0836,
"step": 6200
},
{
"epoch": 0.46,
"learning_rate": 6.224465558194773e-05,
"loss": 1.084,
"step": 6300
},
{
"epoch": 0.47,
"learning_rate": 6.194774346793349e-05,
"loss": 1.0649,
"step": 6400
},
{
"epoch": 0.48,
"learning_rate": 6.165083135391923e-05,
"loss": 1.0751,
"step": 6500
},
{
"epoch": 0.48,
"learning_rate": 6.135391923990499e-05,
"loss": 1.0773,
"step": 6600
},
{
"epoch": 0.49,
"learning_rate": 6.105700712589073e-05,
"loss": 1.095,
"step": 6700
},
{
"epoch": 0.5,
"learning_rate": 6.076009501187648e-05,
"loss": 1.0629,
"step": 6800
},
{
"epoch": 0.51,
"learning_rate": 6.0463182897862234e-05,
"loss": 1.0904,
"step": 6900
},
{
"epoch": 0.51,
"learning_rate": 6.0166270783847974e-05,
"loss": 1.0667,
"step": 7000
},
{
"epoch": 0.51,
"eval_loss": 0.24724909663200378,
"eval_runtime": 983.1677,
"eval_samples_per_second": 16.281,
"eval_steps_per_second": 2.035,
"eval_wer": 0.26608762154484866,
"step": 7000
},
{
"epoch": 0.52,
"learning_rate": 5.986935866983372e-05,
"loss": 1.0825,
"step": 7100
},
{
"epoch": 0.53,
"learning_rate": 5.9572446555819474e-05,
"loss": 1.0811,
"step": 7200
},
{
"epoch": 0.54,
"learning_rate": 5.927553444180522e-05,
"loss": 1.0906,
"step": 7300
},
{
"epoch": 0.54,
"learning_rate": 5.8978622327790975e-05,
"loss": 1.0784,
"step": 7400
},
{
"epoch": 0.55,
"learning_rate": 5.8681710213776715e-05,
"loss": 1.0822,
"step": 7500
},
{
"epoch": 0.56,
"learning_rate": 5.838479809976246e-05,
"loss": 1.0802,
"step": 7600
},
{
"epoch": 0.56,
"learning_rate": 5.8087885985748215e-05,
"loss": 1.0805,
"step": 7700
},
{
"epoch": 0.57,
"learning_rate": 5.779097387173396e-05,
"loss": 1.093,
"step": 7800
},
{
"epoch": 0.58,
"learning_rate": 5.749406175771971e-05,
"loss": 1.0456,
"step": 7900
},
{
"epoch": 0.59,
"learning_rate": 5.7197149643705455e-05,
"loss": 1.0709,
"step": 8000
},
{
"epoch": 0.59,
"eval_loss": 0.24887976050376892,
"eval_runtime": 982.4054,
"eval_samples_per_second": 16.294,
"eval_steps_per_second": 2.037,
"eval_wer": 0.26095269310608543,
"step": 8000
},
{
"epoch": 0.59,
"learning_rate": 5.690023752969121e-05,
"loss": 1.0677,
"step": 8100
},
{
"epoch": 0.6,
"learning_rate": 5.6603325415676956e-05,
"loss": 1.0659,
"step": 8200
},
{
"epoch": 0.61,
"learning_rate": 5.630641330166271e-05,
"loss": 1.0788,
"step": 8300
},
{
"epoch": 0.62,
"learning_rate": 5.600950118764845e-05,
"loss": 1.071,
"step": 8400
},
{
"epoch": 0.62,
"learning_rate": 5.5712589073634196e-05,
"loss": 1.0669,
"step": 8500
},
{
"epoch": 0.63,
"learning_rate": 5.541567695961995e-05,
"loss": 1.0728,
"step": 8600
},
{
"epoch": 0.64,
"learning_rate": 5.512173396674584e-05,
"loss": 1.0652,
"step": 8700
},
{
"epoch": 0.65,
"learning_rate": 5.482482185273159e-05,
"loss": 1.0632,
"step": 8800
},
{
"epoch": 0.65,
"learning_rate": 5.452790973871733e-05,
"loss": 1.048,
"step": 8900
},
{
"epoch": 0.66,
"learning_rate": 5.4230997624703083e-05,
"loss": 1.0472,
"step": 9000
},
{
"epoch": 0.66,
"eval_loss": 0.23543120920658112,
"eval_runtime": 985.8901,
"eval_samples_per_second": 16.236,
"eval_steps_per_second": 2.03,
"eval_wer": 0.24997268655085764,
"step": 9000
},
{
"epoch": 0.67,
"learning_rate": 5.393408551068883e-05,
"loss": 1.0552,
"step": 9100
},
{
"epoch": 0.67,
"learning_rate": 5.3637173396674584e-05,
"loss": 1.0581,
"step": 9200
},
{
"epoch": 0.68,
"learning_rate": 5.3340261282660324e-05,
"loss": 1.0658,
"step": 9300
},
{
"epoch": 0.69,
"learning_rate": 5.304334916864607e-05,
"loss": 1.0603,
"step": 9400
},
{
"epoch": 0.7,
"learning_rate": 5.2746437054631824e-05,
"loss": 1.0661,
"step": 9500
},
{
"epoch": 0.7,
"learning_rate": 5.244952494061757e-05,
"loss": 1.0554,
"step": 9600
},
{
"epoch": 0.71,
"learning_rate": 5.2152612826603325e-05,
"loss": 1.0728,
"step": 9700
},
{
"epoch": 0.72,
"learning_rate": 5.1855700712589065e-05,
"loss": 1.0513,
"step": 9800
},
{
"epoch": 0.73,
"learning_rate": 5.155878859857482e-05,
"loss": 1.0379,
"step": 9900
},
{
"epoch": 0.73,
"learning_rate": 5.1261876484560565e-05,
"loss": 1.0604,
"step": 10000
},
{
"epoch": 0.73,
"eval_loss": 0.23458585143089294,
"eval_runtime": 986.1525,
"eval_samples_per_second": 16.232,
"eval_steps_per_second": 2.029,
"eval_wer": 0.2485182453840271,
"step": 10000
},
{
"epoch": 0.74,
"learning_rate": 5.096496437054632e-05,
"loss": 1.0632,
"step": 10100
},
{
"epoch": 0.75,
"learning_rate": 5.0668052256532065e-05,
"loss": 1.0526,
"step": 10200
},
{
"epoch": 0.76,
"learning_rate": 5.0371140142517805e-05,
"loss": 1.0314,
"step": 10300
},
{
"epoch": 0.76,
"learning_rate": 5.007422802850356e-05,
"loss": 1.0508,
"step": 10400
},
{
"epoch": 0.77,
"learning_rate": 4.9777315914489306e-05,
"loss": 1.0446,
"step": 10500
},
{
"epoch": 0.78,
"learning_rate": 4.948040380047506e-05,
"loss": 1.0361,
"step": 10600
},
{
"epoch": 0.79,
"learning_rate": 4.91834916864608e-05,
"loss": 1.0319,
"step": 10700
},
{
"epoch": 0.79,
"learning_rate": 4.8886579572446546e-05,
"loss": 1.0178,
"step": 10800
},
{
"epoch": 0.8,
"learning_rate": 4.85896674584323e-05,
"loss": 1.0301,
"step": 10900
},
{
"epoch": 0.81,
"learning_rate": 4.8292755344418046e-05,
"loss": 1.0375,
"step": 11000
},
{
"epoch": 0.81,
"eval_loss": 0.2285824865102768,
"eval_runtime": 979.8277,
"eval_samples_per_second": 16.337,
"eval_steps_per_second": 2.042,
"eval_wer": 0.23898585163334427,
"step": 11000
},
{
"epoch": 0.81,
"learning_rate": 4.79958432304038e-05,
"loss": 1.0398,
"step": 11100
},
{
"epoch": 0.82,
"learning_rate": 4.769893111638954e-05,
"loss": 1.0308,
"step": 11200
},
{
"epoch": 0.83,
"learning_rate": 4.7402019002375294e-05,
"loss": 1.0309,
"step": 11300
},
{
"epoch": 0.84,
"learning_rate": 4.710510688836104e-05,
"loss": 1.0287,
"step": 11400
},
{
"epoch": 0.84,
"learning_rate": 4.6808194774346794e-05,
"loss": 1.0195,
"step": 11500
},
{
"epoch": 0.85,
"learning_rate": 4.651128266033254e-05,
"loss": 1.0292,
"step": 11600
},
{
"epoch": 0.86,
"learning_rate": 4.621437054631828e-05,
"loss": 1.0147,
"step": 11700
},
{
"epoch": 0.87,
"learning_rate": 4.5917458432304034e-05,
"loss": 1.0242,
"step": 11800
},
{
"epoch": 0.87,
"learning_rate": 4.562054631828978e-05,
"loss": 1.029,
"step": 11900
},
{
"epoch": 0.88,
"learning_rate": 4.5326603325415675e-05,
"loss": 1.0193,
"step": 12000
},
{
"epoch": 0.88,
"eval_loss": 0.22122837603092194,
"eval_runtime": 981.4673,
"eval_samples_per_second": 16.309,
"eval_steps_per_second": 2.039,
"eval_wer": 0.23376215448486834,
"step": 12000
},
{
"epoch": 0.89,
"learning_rate": 4.502969121140143e-05,
"loss": 1.0249,
"step": 12100
},
{
"epoch": 0.9,
"learning_rate": 4.473277909738717e-05,
"loss": 1.0165,
"step": 12200
},
{
"epoch": 0.9,
"learning_rate": 4.4435866983372915e-05,
"loss": 1.0303,
"step": 12300
},
{
"epoch": 0.91,
"learning_rate": 4.413895486935867e-05,
"loss": 1.0295,
"step": 12400
},
{
"epoch": 0.92,
"learning_rate": 4.3842042755344415e-05,
"loss": 1.0112,
"step": 12500
},
{
"epoch": 0.92,
"learning_rate": 4.35480997624703e-05,
"loss": 1.0056,
"step": 12600
},
{
"epoch": 0.93,
"learning_rate": 4.325118764845605e-05,
"loss": 1.0108,
"step": 12700
},
{
"epoch": 0.94,
"learning_rate": 4.29542755344418e-05,
"loss": 1.0133,
"step": 12800
},
{
"epoch": 0.95,
"learning_rate": 4.265736342042755e-05,
"loss": 1.0063,
"step": 12900
},
{
"epoch": 0.95,
"learning_rate": 4.23604513064133e-05,
"loss": 1.0077,
"step": 13000
},
{
"epoch": 0.95,
"eval_loss": 0.21520280838012695,
"eval_runtime": 983.9086,
"eval_samples_per_second": 16.269,
"eval_steps_per_second": 2.034,
"eval_wer": 0.22689282202556538,
"step": 13000
},
{
"epoch": 0.96,
"learning_rate": 4.206353919239904e-05,
"loss": 1.0085,
"step": 13100
},
{
"epoch": 0.97,
"learning_rate": 4.176662707838479e-05,
"loss": 1.011,
"step": 13200
},
{
"epoch": 0.98,
"learning_rate": 4.146971496437054e-05,
"loss": 1.0131,
"step": 13300
},
{
"epoch": 0.98,
"learning_rate": 4.117280285035629e-05,
"loss": 0.998,
"step": 13400
},
{
"epoch": 0.99,
"learning_rate": 4.0875890736342043e-05,
"loss": 1.0002,
"step": 13500
},
{
"epoch": 1.0,
"learning_rate": 4.0578978622327783e-05,
"loss": 0.9916,
"step": 13600
},
{
"epoch": 1.01,
"learning_rate": 4.028206650831354e-05,
"loss": 0.9662,
"step": 13700
},
{
"epoch": 1.01,
"learning_rate": 3.9985154394299284e-05,
"loss": 0.9758,
"step": 13800
},
{
"epoch": 1.02,
"learning_rate": 3.968824228028504e-05,
"loss": 1.013,
"step": 13900
},
{
"epoch": 1.03,
"learning_rate": 3.939133016627078e-05,
"loss": 1.0004,
"step": 14000
},
{
"epoch": 1.03,
"eval_loss": 0.2093251347541809,
"eval_runtime": 986.9604,
"eval_samples_per_second": 16.218,
"eval_steps_per_second": 2.027,
"eval_wer": 0.22069949743253578,
"step": 14000
},
{
"epoch": 1.03,
"learning_rate": 3.9094418052256524e-05,
"loss": 0.9852,
"step": 14100
},
{
"epoch": 1.04,
"learning_rate": 3.879750593824228e-05,
"loss": 0.9765,
"step": 14200
},
{
"epoch": 1.05,
"learning_rate": 3.8500593824228025e-05,
"loss": 0.9978,
"step": 14300
},
{
"epoch": 1.06,
"learning_rate": 3.820368171021378e-05,
"loss": 0.9807,
"step": 14400
},
{
"epoch": 1.06,
"learning_rate": 3.790676959619952e-05,
"loss": 0.9988,
"step": 14500
},
{
"epoch": 1.07,
"learning_rate": 3.7609857482185265e-05,
"loss": 0.977,
"step": 14600
},
{
"epoch": 1.08,
"learning_rate": 3.731294536817102e-05,
"loss": 0.9735,
"step": 14700
},
{
"epoch": 1.09,
"learning_rate": 3.7016033254156765e-05,
"loss": 0.9767,
"step": 14800
},
{
"epoch": 1.09,
"learning_rate": 3.671912114014251e-05,
"loss": 0.9555,
"step": 14900
},
{
"epoch": 1.1,
"learning_rate": 3.6422209026128266e-05,
"loss": 0.9649,
"step": 15000
},
{
"epoch": 1.1,
"eval_loss": 0.19932541251182556,
"eval_runtime": 986.5773,
"eval_samples_per_second": 16.225,
"eval_steps_per_second": 2.028,
"eval_wer": 0.21130367092756475,
"step": 15000
},
{
"epoch": 1.11,
"learning_rate": 3.612529691211401e-05,
"loss": 0.9608,
"step": 15100
},
{
"epoch": 1.12,
"learning_rate": 3.582838479809976e-05,
"loss": 0.9549,
"step": 15200
},
{
"epoch": 1.12,
"learning_rate": 3.5531472684085506e-05,
"loss": 0.9636,
"step": 15300
},
{
"epoch": 1.13,
"learning_rate": 3.523456057007125e-05,
"loss": 0.9605,
"step": 15400
},
{
"epoch": 1.14,
"learning_rate": 3.4937648456057006e-05,
"loss": 0.962,
"step": 15500
},
{
"epoch": 1.14,
"learning_rate": 3.464073634204275e-05,
"loss": 0.9565,
"step": 15600
},
{
"epoch": 1.15,
"learning_rate": 3.43438242280285e-05,
"loss": 0.9609,
"step": 15700
},
{
"epoch": 1.16,
"learning_rate": 3.404691211401425e-05,
"loss": 0.9552,
"step": 15800
},
{
"epoch": 1.17,
"learning_rate": 3.375e-05,
"loss": 0.9503,
"step": 15900
},
{
"epoch": 1.17,
"learning_rate": 3.345308788598574e-05,
"loss": 0.9509,
"step": 16000
},
{
"epoch": 1.17,
"eval_loss": 0.19342663884162903,
"eval_runtime": 984.1094,
"eval_samples_per_second": 16.265,
"eval_steps_per_second": 2.033,
"eval_wer": 0.20888643067846607,
"step": 16000
},
{
"epoch": 1.18,
"learning_rate": 3.3156175771971494e-05,
"loss": 0.9369,
"step": 16100
},
{
"epoch": 1.19,
"learning_rate": 3.285926365795724e-05,
"loss": 0.9549,
"step": 16200
},
{
"epoch": 1.2,
"learning_rate": 3.256235154394299e-05,
"loss": 0.9503,
"step": 16300
},
{
"epoch": 1.2,
"learning_rate": 3.226543942992874e-05,
"loss": 0.9553,
"step": 16400
},
{
"epoch": 1.21,
"learning_rate": 3.196852731591449e-05,
"loss": 0.9508,
"step": 16500
},
{
"epoch": 1.22,
"learning_rate": 3.1671615201900235e-05,
"loss": 0.9411,
"step": 16600
},
{
"epoch": 1.23,
"learning_rate": 3.137470308788598e-05,
"loss": 0.9435,
"step": 16700
},
{
"epoch": 1.23,
"learning_rate": 3.107779097387173e-05,
"loss": 0.9439,
"step": 16800
},
{
"epoch": 1.24,
"learning_rate": 3.078087885985748e-05,
"loss": 0.946,
"step": 16900
},
{
"epoch": 1.25,
"learning_rate": 3.048396674584323e-05,
"loss": 0.9533,
"step": 17000
},
{
"epoch": 1.25,
"eval_loss": 0.18736572563648224,
"eval_runtime": 984.7341,
"eval_samples_per_second": 16.255,
"eval_steps_per_second": 2.032,
"eval_wer": 0.20231071779744347,
"step": 17000
},
{
"epoch": 1.25,
"learning_rate": 3.018705463182898e-05,
"loss": 0.9322,
"step": 17100
},
{
"epoch": 1.26,
"learning_rate": 2.9890142517814722e-05,
"loss": 0.94,
"step": 17200
},
{
"epoch": 1.27,
"learning_rate": 2.9593230403800473e-05,
"loss": 0.9373,
"step": 17300
},
{
"epoch": 1.28,
"learning_rate": 2.9299287410926363e-05,
"loss": 0.924,
"step": 17400
},
{
"epoch": 1.28,
"learning_rate": 2.9005344418052253e-05,
"loss": 0.9357,
"step": 17500
},
{
"epoch": 1.29,
"learning_rate": 2.8708432304038003e-05,
"loss": 0.9351,
"step": 17600
},
{
"epoch": 1.3,
"learning_rate": 2.841152019002375e-05,
"loss": 0.9371,
"step": 17700
},
{
"epoch": 1.31,
"learning_rate": 2.81146080760095e-05,
"loss": 0.9253,
"step": 17800
},
{
"epoch": 1.31,
"learning_rate": 2.7817695961995246e-05,
"loss": 0.9264,
"step": 17900
},
{
"epoch": 1.32,
"learning_rate": 2.7520783847980997e-05,
"loss": 0.9248,
"step": 18000
},
{
"epoch": 1.32,
"eval_loss": 0.1818237155675888,
"eval_runtime": 1114.2718,
"eval_samples_per_second": 14.365,
"eval_steps_per_second": 1.796,
"eval_wer": 0.19742843876324703,
"step": 18000
},
{
"epoch": 1.33,
"learning_rate": 2.722387173396674e-05,
"loss": 0.9448,
"step": 18100
},
{
"epoch": 1.34,
"learning_rate": 2.692695961995249e-05,
"loss": 0.9284,
"step": 18200
},
{
"epoch": 1.34,
"learning_rate": 2.663004750593824e-05,
"loss": 0.9141,
"step": 18300
},
{
"epoch": 1.35,
"learning_rate": 2.6333135391923987e-05,
"loss": 0.9117,
"step": 18400
},
{
"epoch": 1.36,
"learning_rate": 2.6036223277909737e-05,
"loss": 0.917,
"step": 18500
},
{
"epoch": 1.36,
"learning_rate": 2.5739311163895484e-05,
"loss": 0.9165,
"step": 18600
},
{
"epoch": 1.37,
"learning_rate": 2.5442399049881234e-05,
"loss": 0.9099,
"step": 18700
},
{
"epoch": 1.38,
"learning_rate": 2.5145486935866978e-05,
"loss": 0.9022,
"step": 18800
},
{
"epoch": 1.39,
"learning_rate": 2.4848574821852728e-05,
"loss": 0.9246,
"step": 18900
},
{
"epoch": 1.39,
"learning_rate": 2.4551662707838478e-05,
"loss": 0.9216,
"step": 19000
},
{
"epoch": 1.39,
"eval_loss": 0.17756715416908264,
"eval_runtime": 1032.2412,
"eval_samples_per_second": 15.507,
"eval_steps_per_second": 1.939,
"eval_wer": 0.19256664481590735,
"step": 19000
},
{
"epoch": 1.4,
"learning_rate": 2.4254750593824225e-05,
"loss": 0.9142,
"step": 19100
},
{
"epoch": 1.41,
"learning_rate": 2.3957838479809975e-05,
"loss": 0.9275,
"step": 19200
},
{
"epoch": 1.42,
"learning_rate": 2.3660926365795722e-05,
"loss": 0.9132,
"step": 19300
},
{
"epoch": 1.42,
"learning_rate": 2.3364014251781472e-05,
"loss": 0.9111,
"step": 19400
},
{
"epoch": 1.43,
"learning_rate": 2.3067102137767216e-05,
"loss": 0.8974,
"step": 19500
},
{
"epoch": 1.44,
"learning_rate": 2.2770190023752966e-05,
"loss": 0.9013,
"step": 19600
},
{
"epoch": 1.45,
"learning_rate": 2.2473277909738716e-05,
"loss": 0.9093,
"step": 19700
},
{
"epoch": 1.45,
"learning_rate": 2.2176365795724463e-05,
"loss": 0.8926,
"step": 19800
},
{
"epoch": 1.46,
"learning_rate": 2.1879453681710213e-05,
"loss": 0.9026,
"step": 19900
},
{
"epoch": 1.47,
"learning_rate": 2.158254156769596e-05,
"loss": 0.8964,
"step": 20000
},
{
"epoch": 1.47,
"eval_loss": 0.1722368746995926,
"eval_runtime": 1019.2936,
"eval_samples_per_second": 15.704,
"eval_steps_per_second": 1.963,
"eval_wer": 0.19043619578280346,
"step": 20000
},
{
"epoch": 1.47,
"learning_rate": 2.128859857482185e-05,
"loss": 0.8906,
"step": 20100
},
{
"epoch": 1.48,
"learning_rate": 2.09916864608076e-05,
"loss": 0.8878,
"step": 20200
},
{
"epoch": 1.49,
"learning_rate": 2.0694774346793347e-05,
"loss": 0.9024,
"step": 20300
},
{
"epoch": 1.5,
"learning_rate": 2.0397862232779097e-05,
"loss": 0.8903,
"step": 20400
},
{
"epoch": 1.5,
"learning_rate": 2.0100950118764844e-05,
"loss": 0.8843,
"step": 20500
},
{
"epoch": 1.51,
"learning_rate": 1.9804038004750594e-05,
"loss": 0.8911,
"step": 20600
},
{
"epoch": 1.52,
"learning_rate": 1.9507125890736337e-05,
"loss": 0.8795,
"step": 20700
},
{
"epoch": 1.53,
"learning_rate": 1.9210213776722087e-05,
"loss": 0.8777,
"step": 20800
},
{
"epoch": 1.53,
"learning_rate": 1.8913301662707838e-05,
"loss": 0.889,
"step": 20900
},
{
"epoch": 1.54,
"learning_rate": 1.8616389548693584e-05,
"loss": 0.8941,
"step": 21000
},
{
"epoch": 1.54,
"eval_loss": 0.16895848512649536,
"eval_runtime": 1022.9987,
"eval_samples_per_second": 15.647,
"eval_steps_per_second": 1.956,
"eval_wer": 0.18521932699661314,
"step": 21000
},
{
"epoch": 1.55,
"learning_rate": 1.831947743467933e-05,
"loss": 0.882,
"step": 21100
},
{
"epoch": 1.56,
"learning_rate": 1.802256532066508e-05,
"loss": 0.8801,
"step": 21200
},
{
"epoch": 1.56,
"learning_rate": 1.772565320665083e-05,
"loss": 0.8718,
"step": 21300
},
{
"epoch": 1.57,
"learning_rate": 1.742874109263658e-05,
"loss": 0.8904,
"step": 21400
},
{
"epoch": 1.58,
"learning_rate": 1.7131828978622325e-05,
"loss": 0.8729,
"step": 21500
},
{
"epoch": 1.58,
"learning_rate": 1.6834916864608075e-05,
"loss": 0.8722,
"step": 21600
},
{
"epoch": 1.59,
"learning_rate": 1.6538004750593822e-05,
"loss": 0.8739,
"step": 21700
},
{
"epoch": 1.6,
"learning_rate": 1.624109263657957e-05,
"loss": 0.8635,
"step": 21800
},
{
"epoch": 1.61,
"learning_rate": 1.594418052256532e-05,
"loss": 0.8767,
"step": 21900
},
{
"epoch": 1.61,
"learning_rate": 1.564726840855107e-05,
"loss": 0.871,
"step": 22000
},
{
"epoch": 1.61,
"eval_loss": 0.16269078850746155,
"eval_runtime": 1042.6643,
"eval_samples_per_second": 15.352,
"eval_steps_per_second": 1.919,
"eval_wer": 0.17805637495902982,
"step": 22000
},
{
"epoch": 1.62,
"learning_rate": 1.5350356294536816e-05,
"loss": 0.8663,
"step": 22100
},
{
"epoch": 1.63,
"learning_rate": 1.5056413301662706e-05,
"loss": 0.8732,
"step": 22200
},
{
"epoch": 1.64,
"learning_rate": 1.4759501187648455e-05,
"loss": 0.8625,
"step": 22300
},
{
"epoch": 1.64,
"learning_rate": 1.4462589073634203e-05,
"loss": 0.854,
"step": 22400
},
{
"epoch": 1.65,
"learning_rate": 1.416567695961995e-05,
"loss": 0.8692,
"step": 22500
},
{
"epoch": 1.66,
"learning_rate": 1.38687648456057e-05,
"loss": 0.8477,
"step": 22600
},
{
"epoch": 1.67,
"learning_rate": 1.3571852731591449e-05,
"loss": 0.8494,
"step": 22700
},
{
"epoch": 1.67,
"learning_rate": 1.3277909738717339e-05,
"loss": 0.8599,
"step": 22800
},
{
"epoch": 1.68,
"learning_rate": 1.2980997624703087e-05,
"loss": 0.863,
"step": 22900
},
{
"epoch": 1.69,
"learning_rate": 1.2684085510688834e-05,
"loss": 0.847,
"step": 23000
},
{
"epoch": 1.69,
"eval_loss": 0.15907420217990875,
"eval_runtime": 1036.4519,
"eval_samples_per_second": 15.444,
"eval_steps_per_second": 1.931,
"eval_wer": 0.17514066426308314,
"step": 23000
},
{
"epoch": 1.69,
"learning_rate": 1.2387173396674582e-05,
"loss": 0.8487,
"step": 23100
},
{
"epoch": 1.7,
"learning_rate": 1.2090261282660333e-05,
"loss": 0.8637,
"step": 23200
},
{
"epoch": 1.71,
"learning_rate": 1.1793349168646081e-05,
"loss": 0.8456,
"step": 23300
},
{
"epoch": 1.72,
"learning_rate": 1.1496437054631828e-05,
"loss": 0.8518,
"step": 23400
},
{
"epoch": 1.72,
"learning_rate": 1.1199524940617576e-05,
"loss": 0.8456,
"step": 23500
},
{
"epoch": 1.73,
"learning_rate": 1.0902612826603325e-05,
"loss": 0.8349,
"step": 23600
},
{
"epoch": 1.74,
"learning_rate": 1.0605700712589072e-05,
"loss": 0.8426,
"step": 23700
},
{
"epoch": 1.75,
"learning_rate": 1.030878859857482e-05,
"loss": 0.8503,
"step": 23800
},
{
"epoch": 1.75,
"learning_rate": 1.001187648456057e-05,
"loss": 0.844,
"step": 23900
},
{
"epoch": 1.76,
"learning_rate": 9.714964370546319e-06,
"loss": 0.822,
"step": 24000
},
{
"epoch": 1.76,
"eval_loss": 0.1550702005624771,
"eval_runtime": 1027.8442,
"eval_samples_per_second": 15.573,
"eval_steps_per_second": 1.947,
"eval_wer": 0.17010133289631815,
"step": 24000
},
{
"epoch": 1.77,
"learning_rate": 9.418052256532066e-06,
"loss": 0.8452,
"step": 24100
},
{
"epoch": 1.78,
"learning_rate": 9.121140142517814e-06,
"loss": 0.843,
"step": 24200
},
{
"epoch": 1.78,
"learning_rate": 8.824228028503563e-06,
"loss": 0.8429,
"step": 24300
},
{
"epoch": 1.79,
"learning_rate": 8.527315914489311e-06,
"loss": 0.8513,
"step": 24400
},
{
"epoch": 1.8,
"learning_rate": 8.23040380047506e-06,
"loss": 0.834,
"step": 24500
},
{
"epoch": 1.8,
"learning_rate": 7.933491686460806e-06,
"loss": 0.8383,
"step": 24600
},
{
"epoch": 1.81,
"learning_rate": 7.636579572446555e-06,
"loss": 0.8294,
"step": 24700
},
{
"epoch": 1.82,
"learning_rate": 7.339667458432303e-06,
"loss": 0.8335,
"step": 24800
},
{
"epoch": 1.83,
"learning_rate": 7.042755344418052e-06,
"loss": 0.8207,
"step": 24900
},
{
"epoch": 1.83,
"learning_rate": 6.745843230403799e-06,
"loss": 0.8188,
"step": 25000
},
{
"epoch": 1.83,
"eval_loss": 0.1527515947818756,
"eval_runtime": 1034.5359,
"eval_samples_per_second": 15.473,
"eval_steps_per_second": 1.934,
"eval_wer": 0.16672812192723696,
"step": 25000
},
{
"epoch": 1.84,
"learning_rate": 6.448931116389549e-06,
"loss": 0.8289,
"step": 25100
},
{
"epoch": 1.85,
"learning_rate": 6.152019002375296e-06,
"loss": 0.8306,
"step": 25200
},
{
"epoch": 1.86,
"learning_rate": 5.855106888361045e-06,
"loss": 0.8335,
"step": 25300
},
{
"epoch": 1.86,
"learning_rate": 5.5581947743467925e-06,
"loss": 0.8291,
"step": 25400
},
{
"epoch": 1.87,
"learning_rate": 5.261282660332541e-06,
"loss": 0.8206,
"step": 25500
},
{
"epoch": 1.88,
"learning_rate": 4.9643705463182895e-06,
"loss": 0.8242,
"step": 25600
},
{
"epoch": 1.89,
"learning_rate": 4.667458432304038e-06,
"loss": 0.8189,
"step": 25700
},
{
"epoch": 1.89,
"learning_rate": 4.370546318289786e-06,
"loss": 0.8275,
"step": 25800
},
{
"epoch": 1.9,
"learning_rate": 4.073634204275534e-06,
"loss": 0.8142,
"step": 25900
},
{
"epoch": 1.91,
"learning_rate": 3.776722090261282e-06,
"loss": 0.8305,
"step": 26000
},
{
"epoch": 1.91,
"eval_loss": 0.14921718835830688,
"eval_runtime": 1026.6478,
"eval_samples_per_second": 15.592,
"eval_steps_per_second": 1.949,
"eval_wer": 0.16312957500273134,
"step": 26000
},
{
"epoch": 1.91,
"learning_rate": 3.4798099762470307e-06,
"loss": 0.833,
"step": 26100
},
{
"epoch": 1.92,
"learning_rate": 3.1828978622327788e-06,
"loss": 0.8175,
"step": 26200
},
{
"epoch": 1.93,
"learning_rate": 2.888954869358669e-06,
"loss": 0.8259,
"step": 26300
},
{
"epoch": 1.94,
"learning_rate": 2.5920427553444177e-06,
"loss": 0.8262,
"step": 26400
},
{
"epoch": 1.94,
"learning_rate": 2.295130641330166e-06,
"loss": 0.8223,
"step": 26500
},
{
"epoch": 1.95,
"learning_rate": 1.9982185273159142e-06,
"loss": 0.8285,
"step": 26600
},
{
"epoch": 1.96,
"learning_rate": 1.7013064133016625e-06,
"loss": 0.8226,
"step": 26700
},
{
"epoch": 1.97,
"learning_rate": 1.404394299287411e-06,
"loss": 0.8154,
"step": 26800
},
{
"epoch": 1.97,
"learning_rate": 1.107482185273159e-06,
"loss": 0.8176,
"step": 26900
},
{
"epoch": 1.98,
"learning_rate": 8.105700712589074e-07,
"loss": 0.8122,
"step": 27000
},
{
"epoch": 1.98,
"eval_loss": 0.14789555966854095,
"eval_runtime": 1030.7995,
"eval_samples_per_second": 15.529,
"eval_steps_per_second": 1.941,
"eval_wer": 0.16106740959248333,
"step": 27000
},
{
"epoch": 1.99,
"learning_rate": 5.136579572446555e-07,
"loss": 0.818,
"step": 27100
},
{
"epoch": 2.0,
"learning_rate": 2.167458432304038e-07,
"loss": 0.8284,
"step": 27200
},
{
"epoch": 2.0,
"step": 27260,
"total_flos": 4.0396309180498005e+20,
"train_loss": 0.32739020716330625,
"train_runtime": 49115.8494,
"train_samples_per_second": 17.761,
"train_steps_per_second": 0.555
}
],
"max_steps": 27260,
"num_train_epochs": 2,
"total_flos": 4.0396309180498005e+20,
"trial_name": null,
"trial_params": null
}