{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9999816584435355, "global_step": 27260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.675e-06, "loss": 11.4989, "step": 100 }, { "epoch": 0.01, "learning_rate": 7.425e-06, "loss": 3.2394, "step": 200 }, { "epoch": 0.02, "learning_rate": 1.1174999999999999e-05, "loss": 3.0303, "step": 300 }, { "epoch": 0.03, "learning_rate": 1.4925e-05, "loss": 2.9052, "step": 400 }, { "epoch": 0.04, "learning_rate": 1.8675e-05, "loss": 2.1033, "step": 500 }, { "epoch": 0.04, "learning_rate": 2.2424999999999996e-05, "loss": 1.674, "step": 600 }, { "epoch": 0.05, "learning_rate": 2.6174999999999996e-05, "loss": 1.5568, "step": 700 }, { "epoch": 0.06, "learning_rate": 2.9925e-05, "loss": 1.4654, "step": 800 }, { "epoch": 0.07, "learning_rate": 3.3675e-05, "loss": 1.3031, "step": 900 }, { "epoch": 0.07, "learning_rate": 3.7424999999999995e-05, "loss": 1.1842, "step": 1000 }, { "epoch": 0.07, "eval_loss": 0.44609957933425903, "eval_runtime": 1053.3237, "eval_samples_per_second": 15.197, "eval_steps_per_second": 1.9, "eval_wer": 0.49177182344586473, "step": 1000 }, { "epoch": 0.08, "learning_rate": 4.1175e-05, "loss": 1.1329, "step": 1100 }, { "epoch": 0.09, "learning_rate": 4.4924999999999994e-05, "loss": 1.1316, "step": 1200 }, { "epoch": 0.1, "learning_rate": 4.8675e-05, "loss": 1.1092, "step": 1300 }, { "epoch": 0.1, "learning_rate": 5.2424999999999994e-05, "loss": 1.1215, "step": 1400 }, { "epoch": 0.11, "learning_rate": 5.6175e-05, "loss": 1.1165, "step": 1500 }, { "epoch": 0.12, "learning_rate": 5.9925e-05, "loss": 1.0946, "step": 1600 }, { "epoch": 0.12, "learning_rate": 6.367499999999999e-05, "loss": 1.1189, "step": 1700 }, { "epoch": 0.13, "learning_rate": 6.7425e-05, "loss": 1.1175, "step": 1800 }, { "epoch": 0.14, "learning_rate": 7.1175e-05, "loss": 1.1254, "step": 1900 }, { "epoch": 0.15, "learning_rate": 7.492499999999999e-05, "loss": 1.1317, "step": 2000 }, { "epoch": 0.15, "eval_loss": 0.2668535113334656, "eval_runtime": 988.5751, "eval_samples_per_second": 16.192, "eval_steps_per_second": 2.024, "eval_wer": 0.2748006118212608, "step": 2000 }, { "epoch": 0.15, "learning_rate": 7.470902612826603e-05, "loss": 1.1296, "step": 2100 }, { "epoch": 0.16, "learning_rate": 7.441211401425178e-05, "loss": 1.1406, "step": 2200 }, { "epoch": 0.17, "learning_rate": 7.411520190023751e-05, "loss": 1.1362, "step": 2300 }, { "epoch": 0.18, "learning_rate": 7.381828978622327e-05, "loss": 1.1292, "step": 2400 }, { "epoch": 0.18, "learning_rate": 7.352137767220902e-05, "loss": 1.105, "step": 2500 }, { "epoch": 0.19, "learning_rate": 7.322446555819477e-05, "loss": 1.1231, "step": 2600 }, { "epoch": 0.2, "learning_rate": 7.292755344418051e-05, "loss": 1.1187, "step": 2700 }, { "epoch": 0.21, "learning_rate": 7.263064133016626e-05, "loss": 1.1339, "step": 2800 }, { "epoch": 0.21, "learning_rate": 7.233372921615201e-05, "loss": 1.1241, "step": 2900 }, { "epoch": 0.22, "learning_rate": 7.203681710213777e-05, "loss": 1.1029, "step": 3000 }, { "epoch": 0.22, "eval_loss": 0.2638496458530426, "eval_runtime": 987.5568, "eval_samples_per_second": 16.209, "eval_steps_per_second": 2.026, "eval_wer": 0.2705875122910521, "step": 3000 }, { "epoch": 0.23, "learning_rate": 7.173990498812351e-05, "loss": 1.1215, "step": 3100 }, { "epoch": 0.23, "learning_rate": 7.144299287410925e-05, "loss": 1.1067, "step": 3200 }, { "epoch": 0.24, "learning_rate": 7.114608076009501e-05, "loss": 1.1126, "step": 3300 }, { "epoch": 0.25, "learning_rate": 7.084916864608076e-05, "loss": 1.109, "step": 3400 }, { "epoch": 0.26, "learning_rate": 7.05522565320665e-05, "loss": 1.1077, "step": 3500 }, { "epoch": 0.26, "learning_rate": 7.025534441805225e-05, "loss": 1.1, "step": 3600 }, { "epoch": 0.27, "learning_rate": 6.9958432304038e-05, "loss": 1.1061, "step": 3700 }, { "epoch": 0.28, "learning_rate": 6.966152019002374e-05, "loss": 1.103, "step": 3800 }, { "epoch": 0.29, "learning_rate": 6.936460807600949e-05, "loss": 1.0947, "step": 3900 }, { "epoch": 0.29, "learning_rate": 6.906769596199525e-05, "loss": 1.0949, "step": 4000 }, { "epoch": 0.29, "eval_loss": 0.25188884139060974, "eval_runtime": 996.0428, "eval_samples_per_second": 16.071, "eval_steps_per_second": 2.009, "eval_wer": 0.26274172402490986, "step": 4000 }, { "epoch": 0.3, "learning_rate": 6.8770783847981e-05, "loss": 1.1076, "step": 4100 }, { "epoch": 0.31, "learning_rate": 6.847387173396674e-05, "loss": 1.1012, "step": 4200 }, { "epoch": 0.32, "learning_rate": 6.817695961995249e-05, "loss": 1.081, "step": 4300 }, { "epoch": 0.32, "learning_rate": 6.788004750593824e-05, "loss": 1.0868, "step": 4400 }, { "epoch": 0.33, "learning_rate": 6.758313539192398e-05, "loss": 1.0956, "step": 4500 }, { "epoch": 0.34, "learning_rate": 6.728622327790973e-05, "loss": 1.0953, "step": 4600 }, { "epoch": 0.34, "learning_rate": 6.698931116389548e-05, "loss": 1.0952, "step": 4700 }, { "epoch": 0.35, "learning_rate": 6.669239904988122e-05, "loss": 1.0968, "step": 4800 }, { "epoch": 0.36, "learning_rate": 6.639548693586698e-05, "loss": 1.0827, "step": 4900 }, { "epoch": 0.37, "learning_rate": 6.609857482185273e-05, "loss": 1.0923, "step": 5000 }, { "epoch": 0.37, "eval_loss": 0.24751192331314087, "eval_runtime": 984.8205, "eval_samples_per_second": 16.254, "eval_steps_per_second": 2.032, "eval_wer": 0.25769556429585927, "step": 5000 }, { "epoch": 0.37, "learning_rate": 6.580166270783846e-05, "loss": 1.0895, "step": 5100 }, { "epoch": 0.38, "learning_rate": 6.550771971496436e-05, "loss": 1.0851, "step": 5200 }, { "epoch": 0.39, "learning_rate": 6.521080760095011e-05, "loss": 1.1124, "step": 5300 }, { "epoch": 0.4, "learning_rate": 6.491686460807601e-05, "loss": 1.0809, "step": 5400 }, { "epoch": 0.4, "learning_rate": 6.461995249406176e-05, "loss": 1.0985, "step": 5500 }, { "epoch": 0.41, "learning_rate": 6.432304038004749e-05, "loss": 1.086, "step": 5600 }, { "epoch": 0.42, "learning_rate": 6.402612826603325e-05, "loss": 1.0823, "step": 5700 }, { "epoch": 0.43, "learning_rate": 6.3729216152019e-05, "loss": 1.0732, "step": 5800 }, { "epoch": 0.43, "learning_rate": 6.343230403800475e-05, "loss": 1.076, "step": 5900 }, { "epoch": 0.44, "learning_rate": 6.313539192399049e-05, "loss": 1.0847, "step": 6000 }, { "epoch": 0.44, "eval_loss": 0.24355509877204895, "eval_runtime": 984.3756, "eval_samples_per_second": 16.261, "eval_steps_per_second": 2.033, "eval_wer": 0.26121217087293785, "step": 6000 }, { "epoch": 0.45, "learning_rate": 6.283847980997624e-05, "loss": 1.0748, "step": 6100 }, { "epoch": 0.45, "learning_rate": 6.254156769596199e-05, "loss": 1.0836, "step": 6200 }, { "epoch": 0.46, "learning_rate": 6.224465558194773e-05, "loss": 1.084, "step": 6300 }, { "epoch": 0.47, "learning_rate": 6.194774346793349e-05, "loss": 1.0649, "step": 6400 }, { "epoch": 0.48, "learning_rate": 6.165083135391923e-05, "loss": 1.0751, "step": 6500 }, { "epoch": 0.48, "learning_rate": 6.135391923990499e-05, "loss": 1.0773, "step": 6600 }, { "epoch": 0.49, "learning_rate": 6.105700712589073e-05, "loss": 1.095, "step": 6700 }, { "epoch": 0.5, "learning_rate": 6.076009501187648e-05, "loss": 1.0629, "step": 6800 }, { "epoch": 0.51, "learning_rate": 6.0463182897862234e-05, "loss": 1.0904, "step": 6900 }, { "epoch": 0.51, "learning_rate": 6.0166270783847974e-05, "loss": 1.0667, "step": 7000 }, { "epoch": 0.51, "eval_loss": 0.24724909663200378, "eval_runtime": 983.1677, "eval_samples_per_second": 16.281, "eval_steps_per_second": 2.035, "eval_wer": 0.26608762154484866, "step": 7000 }, { "epoch": 0.52, "learning_rate": 5.986935866983372e-05, "loss": 1.0825, "step": 7100 }, { "epoch": 0.53, "learning_rate": 5.9572446555819474e-05, "loss": 1.0811, "step": 7200 }, { "epoch": 0.54, "learning_rate": 5.927553444180522e-05, "loss": 1.0906, "step": 7300 }, { "epoch": 0.54, "learning_rate": 5.8978622327790975e-05, "loss": 1.0784, "step": 7400 }, { "epoch": 0.55, "learning_rate": 5.8681710213776715e-05, "loss": 1.0822, "step": 7500 }, { "epoch": 0.56, "learning_rate": 5.838479809976246e-05, "loss": 1.0802, "step": 7600 }, { "epoch": 0.56, "learning_rate": 5.8087885985748215e-05, "loss": 1.0805, "step": 7700 }, { "epoch": 0.57, "learning_rate": 5.779097387173396e-05, "loss": 1.093, "step": 7800 }, { "epoch": 0.58, "learning_rate": 5.749406175771971e-05, "loss": 1.0456, "step": 7900 }, { "epoch": 0.59, "learning_rate": 5.7197149643705455e-05, "loss": 1.0709, "step": 8000 }, { "epoch": 0.59, "eval_loss": 0.24887976050376892, "eval_runtime": 982.4054, "eval_samples_per_second": 16.294, "eval_steps_per_second": 2.037, "eval_wer": 0.26095269310608543, "step": 8000 }, { "epoch": 0.59, "learning_rate": 5.690023752969121e-05, "loss": 1.0677, "step": 8100 }, { "epoch": 0.6, "learning_rate": 5.6603325415676956e-05, "loss": 1.0659, "step": 8200 }, { "epoch": 0.61, "learning_rate": 5.630641330166271e-05, "loss": 1.0788, "step": 8300 }, { "epoch": 0.62, "learning_rate": 5.600950118764845e-05, "loss": 1.071, "step": 8400 }, { "epoch": 0.62, "learning_rate": 5.5712589073634196e-05, "loss": 1.0669, "step": 8500 }, { "epoch": 0.63, "learning_rate": 5.541567695961995e-05, "loss": 1.0728, "step": 8600 }, { "epoch": 0.64, "learning_rate": 5.512173396674584e-05, "loss": 1.0652, "step": 8700 }, { "epoch": 0.65, "learning_rate": 5.482482185273159e-05, "loss": 1.0632, "step": 8800 }, { "epoch": 0.65, "learning_rate": 5.452790973871733e-05, "loss": 1.048, "step": 8900 }, { "epoch": 0.66, "learning_rate": 5.4230997624703083e-05, "loss": 1.0472, "step": 9000 }, { "epoch": 0.66, "eval_loss": 0.23543120920658112, "eval_runtime": 985.8901, "eval_samples_per_second": 16.236, "eval_steps_per_second": 2.03, "eval_wer": 0.24997268655085764, "step": 9000 }, { "epoch": 0.67, "learning_rate": 5.393408551068883e-05, "loss": 1.0552, "step": 9100 }, { "epoch": 0.67, "learning_rate": 5.3637173396674584e-05, "loss": 1.0581, "step": 9200 }, { "epoch": 0.68, "learning_rate": 5.3340261282660324e-05, "loss": 1.0658, "step": 9300 }, { "epoch": 0.69, "learning_rate": 5.304334916864607e-05, "loss": 1.0603, "step": 9400 }, { "epoch": 0.7, "learning_rate": 5.2746437054631824e-05, "loss": 1.0661, "step": 9500 }, { "epoch": 0.7, "learning_rate": 5.244952494061757e-05, "loss": 1.0554, "step": 9600 }, { "epoch": 0.71, "learning_rate": 5.2152612826603325e-05, "loss": 1.0728, "step": 9700 }, { "epoch": 0.72, "learning_rate": 5.1855700712589065e-05, "loss": 1.0513, "step": 9800 }, { "epoch": 0.73, "learning_rate": 5.155878859857482e-05, "loss": 1.0379, "step": 9900 }, { "epoch": 0.73, "learning_rate": 5.1261876484560565e-05, "loss": 1.0604, "step": 10000 }, { "epoch": 0.73, "eval_loss": 0.23458585143089294, "eval_runtime": 986.1525, "eval_samples_per_second": 16.232, "eval_steps_per_second": 2.029, "eval_wer": 0.2485182453840271, "step": 10000 }, { "epoch": 0.74, "learning_rate": 5.096496437054632e-05, "loss": 1.0632, "step": 10100 }, { "epoch": 0.75, "learning_rate": 5.0668052256532065e-05, "loss": 1.0526, "step": 10200 }, { "epoch": 0.76, "learning_rate": 5.0371140142517805e-05, "loss": 1.0314, "step": 10300 }, { "epoch": 0.76, "learning_rate": 5.007422802850356e-05, "loss": 1.0508, "step": 10400 }, { "epoch": 0.77, "learning_rate": 4.9777315914489306e-05, "loss": 1.0446, "step": 10500 }, { "epoch": 0.78, "learning_rate": 4.948040380047506e-05, "loss": 1.0361, "step": 10600 }, { "epoch": 0.79, "learning_rate": 4.91834916864608e-05, "loss": 1.0319, "step": 10700 }, { "epoch": 0.79, "learning_rate": 4.8886579572446546e-05, "loss": 1.0178, "step": 10800 }, { "epoch": 0.8, "learning_rate": 4.85896674584323e-05, "loss": 1.0301, "step": 10900 }, { "epoch": 0.81, "learning_rate": 4.8292755344418046e-05, "loss": 1.0375, "step": 11000 }, { "epoch": 0.81, "eval_loss": 0.2285824865102768, "eval_runtime": 979.8277, "eval_samples_per_second": 16.337, "eval_steps_per_second": 2.042, "eval_wer": 0.23898585163334427, "step": 11000 }, { "epoch": 0.81, "learning_rate": 4.79958432304038e-05, "loss": 1.0398, "step": 11100 }, { "epoch": 0.82, "learning_rate": 4.769893111638954e-05, "loss": 1.0308, "step": 11200 }, { "epoch": 0.83, "learning_rate": 4.7402019002375294e-05, "loss": 1.0309, "step": 11300 }, { "epoch": 0.84, "learning_rate": 4.710510688836104e-05, "loss": 1.0287, "step": 11400 }, { "epoch": 0.84, "learning_rate": 4.6808194774346794e-05, "loss": 1.0195, "step": 11500 }, { "epoch": 0.85, "learning_rate": 4.651128266033254e-05, "loss": 1.0292, "step": 11600 }, { "epoch": 0.86, "learning_rate": 4.621437054631828e-05, "loss": 1.0147, "step": 11700 }, { "epoch": 0.87, "learning_rate": 4.5917458432304034e-05, "loss": 1.0242, "step": 11800 }, { "epoch": 0.87, "learning_rate": 4.562054631828978e-05, "loss": 1.029, "step": 11900 }, { "epoch": 0.88, "learning_rate": 4.5326603325415675e-05, "loss": 1.0193, "step": 12000 }, { "epoch": 0.88, "eval_loss": 0.22122837603092194, "eval_runtime": 981.4673, "eval_samples_per_second": 16.309, "eval_steps_per_second": 2.039, "eval_wer": 0.23376215448486834, "step": 12000 }, { "epoch": 0.89, "learning_rate": 4.502969121140143e-05, "loss": 1.0249, "step": 12100 }, { "epoch": 0.9, "learning_rate": 4.473277909738717e-05, "loss": 1.0165, "step": 12200 }, { "epoch": 0.9, "learning_rate": 4.4435866983372915e-05, "loss": 1.0303, "step": 12300 }, { "epoch": 0.91, "learning_rate": 4.413895486935867e-05, "loss": 1.0295, "step": 12400 }, { "epoch": 0.92, "learning_rate": 4.3842042755344415e-05, "loss": 1.0112, "step": 12500 }, { "epoch": 0.92, "learning_rate": 4.35480997624703e-05, "loss": 1.0056, "step": 12600 }, { "epoch": 0.93, "learning_rate": 4.325118764845605e-05, "loss": 1.0108, "step": 12700 }, { "epoch": 0.94, "learning_rate": 4.29542755344418e-05, "loss": 1.0133, "step": 12800 }, { "epoch": 0.95, "learning_rate": 4.265736342042755e-05, "loss": 1.0063, "step": 12900 }, { "epoch": 0.95, "learning_rate": 4.23604513064133e-05, "loss": 1.0077, "step": 13000 }, { "epoch": 0.95, "eval_loss": 0.21520280838012695, "eval_runtime": 983.9086, "eval_samples_per_second": 16.269, "eval_steps_per_second": 2.034, "eval_wer": 0.22689282202556538, "step": 13000 }, { "epoch": 0.96, "learning_rate": 4.206353919239904e-05, "loss": 1.0085, "step": 13100 }, { "epoch": 0.97, "learning_rate": 4.176662707838479e-05, "loss": 1.011, "step": 13200 }, { "epoch": 0.98, "learning_rate": 4.146971496437054e-05, "loss": 1.0131, "step": 13300 }, { "epoch": 0.98, "learning_rate": 4.117280285035629e-05, "loss": 0.998, "step": 13400 }, { "epoch": 0.99, "learning_rate": 4.0875890736342043e-05, "loss": 1.0002, "step": 13500 }, { "epoch": 1.0, "learning_rate": 4.0578978622327783e-05, "loss": 0.9916, "step": 13600 }, { "epoch": 1.01, "learning_rate": 4.028206650831354e-05, "loss": 0.9662, "step": 13700 }, { "epoch": 1.01, "learning_rate": 3.9985154394299284e-05, "loss": 0.9758, "step": 13800 }, { "epoch": 1.02, "learning_rate": 3.968824228028504e-05, "loss": 1.013, "step": 13900 }, { "epoch": 1.03, "learning_rate": 3.939133016627078e-05, "loss": 1.0004, "step": 14000 }, { "epoch": 1.03, "eval_loss": 0.2093251347541809, "eval_runtime": 986.9604, "eval_samples_per_second": 16.218, "eval_steps_per_second": 2.027, "eval_wer": 0.22069949743253578, "step": 14000 }, { "epoch": 1.03, "learning_rate": 3.9094418052256524e-05, "loss": 0.9852, "step": 14100 }, { "epoch": 1.04, "learning_rate": 3.879750593824228e-05, "loss": 0.9765, "step": 14200 }, { "epoch": 1.05, "learning_rate": 3.8500593824228025e-05, "loss": 0.9978, "step": 14300 }, { "epoch": 1.06, "learning_rate": 3.820368171021378e-05, "loss": 0.9807, "step": 14400 }, { "epoch": 1.06, "learning_rate": 3.790676959619952e-05, "loss": 0.9988, "step": 14500 }, { "epoch": 1.07, "learning_rate": 3.7609857482185265e-05, "loss": 0.977, "step": 14600 }, { "epoch": 1.08, "learning_rate": 3.731294536817102e-05, "loss": 0.9735, "step": 14700 }, { "epoch": 1.09, "learning_rate": 3.7016033254156765e-05, "loss": 0.9767, "step": 14800 }, { "epoch": 1.09, "learning_rate": 3.671912114014251e-05, "loss": 0.9555, "step": 14900 }, { "epoch": 1.1, "learning_rate": 3.6422209026128266e-05, "loss": 0.9649, "step": 15000 }, { "epoch": 1.1, "eval_loss": 0.19932541251182556, "eval_runtime": 986.5773, "eval_samples_per_second": 16.225, "eval_steps_per_second": 2.028, "eval_wer": 0.21130367092756475, "step": 15000 }, { "epoch": 1.11, "learning_rate": 3.612529691211401e-05, "loss": 0.9608, "step": 15100 }, { "epoch": 1.12, "learning_rate": 3.582838479809976e-05, "loss": 0.9549, "step": 15200 }, { "epoch": 1.12, "learning_rate": 3.5531472684085506e-05, "loss": 0.9636, "step": 15300 }, { "epoch": 1.13, "learning_rate": 3.523456057007125e-05, "loss": 0.9605, "step": 15400 }, { "epoch": 1.14, "learning_rate": 3.4937648456057006e-05, "loss": 0.962, "step": 15500 }, { "epoch": 1.14, "learning_rate": 3.464073634204275e-05, "loss": 0.9565, "step": 15600 }, { "epoch": 1.15, "learning_rate": 3.43438242280285e-05, "loss": 0.9609, "step": 15700 }, { "epoch": 1.16, "learning_rate": 3.404691211401425e-05, "loss": 0.9552, "step": 15800 }, { "epoch": 1.17, "learning_rate": 3.375e-05, "loss": 0.9503, "step": 15900 }, { "epoch": 1.17, "learning_rate": 3.345308788598574e-05, "loss": 0.9509, "step": 16000 }, { "epoch": 1.17, "eval_loss": 0.19342663884162903, "eval_runtime": 984.1094, "eval_samples_per_second": 16.265, "eval_steps_per_second": 2.033, "eval_wer": 0.20888643067846607, "step": 16000 }, { "epoch": 1.18, "learning_rate": 3.3156175771971494e-05, "loss": 0.9369, "step": 16100 }, { "epoch": 1.19, "learning_rate": 3.285926365795724e-05, "loss": 0.9549, "step": 16200 }, { "epoch": 1.2, "learning_rate": 3.256235154394299e-05, "loss": 0.9503, "step": 16300 }, { "epoch": 1.2, "learning_rate": 3.226543942992874e-05, "loss": 0.9553, "step": 16400 }, { "epoch": 1.21, "learning_rate": 3.196852731591449e-05, "loss": 0.9508, "step": 16500 }, { "epoch": 1.22, "learning_rate": 3.1671615201900235e-05, "loss": 0.9411, "step": 16600 }, { "epoch": 1.23, "learning_rate": 3.137470308788598e-05, "loss": 0.9435, "step": 16700 }, { "epoch": 1.23, "learning_rate": 3.107779097387173e-05, "loss": 0.9439, "step": 16800 }, { "epoch": 1.24, "learning_rate": 3.078087885985748e-05, "loss": 0.946, "step": 16900 }, { "epoch": 1.25, "learning_rate": 3.048396674584323e-05, "loss": 0.9533, "step": 17000 }, { "epoch": 1.25, "eval_loss": 0.18736572563648224, "eval_runtime": 984.7341, "eval_samples_per_second": 16.255, "eval_steps_per_second": 2.032, "eval_wer": 0.20231071779744347, "step": 17000 }, { "epoch": 1.25, "learning_rate": 3.018705463182898e-05, "loss": 0.9322, "step": 17100 }, { "epoch": 1.26, "learning_rate": 2.9890142517814722e-05, "loss": 0.94, "step": 17200 }, { "epoch": 1.27, "learning_rate": 2.9593230403800473e-05, "loss": 0.9373, "step": 17300 }, { "epoch": 1.28, "learning_rate": 2.9299287410926363e-05, "loss": 0.924, "step": 17400 }, { "epoch": 1.28, "learning_rate": 2.9005344418052253e-05, "loss": 0.9357, "step": 17500 }, { "epoch": 1.29, "learning_rate": 2.8708432304038003e-05, "loss": 0.9351, "step": 17600 }, { "epoch": 1.3, "learning_rate": 2.841152019002375e-05, "loss": 0.9371, "step": 17700 }, { "epoch": 1.31, "learning_rate": 2.81146080760095e-05, "loss": 0.9253, "step": 17800 }, { "epoch": 1.31, "learning_rate": 2.7817695961995246e-05, "loss": 0.9264, "step": 17900 }, { "epoch": 1.32, "learning_rate": 2.7520783847980997e-05, "loss": 0.9248, "step": 18000 }, { "epoch": 1.32, "eval_loss": 0.1818237155675888, "eval_runtime": 1114.2718, "eval_samples_per_second": 14.365, "eval_steps_per_second": 1.796, "eval_wer": 0.19742843876324703, "step": 18000 }, { "epoch": 1.33, "learning_rate": 2.722387173396674e-05, "loss": 0.9448, "step": 18100 }, { "epoch": 1.34, "learning_rate": 2.692695961995249e-05, "loss": 0.9284, "step": 18200 }, { "epoch": 1.34, "learning_rate": 2.663004750593824e-05, "loss": 0.9141, "step": 18300 }, { "epoch": 1.35, "learning_rate": 2.6333135391923987e-05, "loss": 0.9117, "step": 18400 }, { "epoch": 1.36, "learning_rate": 2.6036223277909737e-05, "loss": 0.917, "step": 18500 }, { "epoch": 1.36, "learning_rate": 2.5739311163895484e-05, "loss": 0.9165, "step": 18600 }, { "epoch": 1.37, "learning_rate": 2.5442399049881234e-05, "loss": 0.9099, "step": 18700 }, { "epoch": 1.38, "learning_rate": 2.5145486935866978e-05, "loss": 0.9022, "step": 18800 }, { "epoch": 1.39, "learning_rate": 2.4848574821852728e-05, "loss": 0.9246, "step": 18900 }, { "epoch": 1.39, "learning_rate": 2.4551662707838478e-05, "loss": 0.9216, "step": 19000 }, { "epoch": 1.39, "eval_loss": 0.17756715416908264, "eval_runtime": 1032.2412, "eval_samples_per_second": 15.507, "eval_steps_per_second": 1.939, "eval_wer": 0.19256664481590735, "step": 19000 }, { "epoch": 1.4, "learning_rate": 2.4254750593824225e-05, "loss": 0.9142, "step": 19100 }, { "epoch": 1.41, "learning_rate": 2.3957838479809975e-05, "loss": 0.9275, "step": 19200 }, { "epoch": 1.42, "learning_rate": 2.3660926365795722e-05, "loss": 0.9132, "step": 19300 }, { "epoch": 1.42, "learning_rate": 2.3364014251781472e-05, "loss": 0.9111, "step": 19400 }, { "epoch": 1.43, "learning_rate": 2.3067102137767216e-05, "loss": 0.8974, "step": 19500 }, { "epoch": 1.44, "learning_rate": 2.2770190023752966e-05, "loss": 0.9013, "step": 19600 }, { "epoch": 1.45, "learning_rate": 2.2473277909738716e-05, "loss": 0.9093, "step": 19700 }, { "epoch": 1.45, "learning_rate": 2.2176365795724463e-05, "loss": 0.8926, "step": 19800 }, { "epoch": 1.46, "learning_rate": 2.1879453681710213e-05, "loss": 0.9026, "step": 19900 }, { "epoch": 1.47, "learning_rate": 2.158254156769596e-05, "loss": 0.8964, "step": 20000 }, { "epoch": 1.47, "eval_loss": 0.1722368746995926, "eval_runtime": 1019.2936, "eval_samples_per_second": 15.704, "eval_steps_per_second": 1.963, "eval_wer": 0.19043619578280346, "step": 20000 }, { "epoch": 1.47, "learning_rate": 2.128859857482185e-05, "loss": 0.8906, "step": 20100 }, { "epoch": 1.48, "learning_rate": 2.09916864608076e-05, "loss": 0.8878, "step": 20200 }, { "epoch": 1.49, "learning_rate": 2.0694774346793347e-05, "loss": 0.9024, "step": 20300 }, { "epoch": 1.5, "learning_rate": 2.0397862232779097e-05, "loss": 0.8903, "step": 20400 }, { "epoch": 1.5, "learning_rate": 2.0100950118764844e-05, "loss": 0.8843, "step": 20500 }, { "epoch": 1.51, "learning_rate": 1.9804038004750594e-05, "loss": 0.8911, "step": 20600 }, { "epoch": 1.52, "learning_rate": 1.9507125890736337e-05, "loss": 0.8795, "step": 20700 }, { "epoch": 1.53, "learning_rate": 1.9210213776722087e-05, "loss": 0.8777, "step": 20800 }, { "epoch": 1.53, "learning_rate": 1.8913301662707838e-05, "loss": 0.889, "step": 20900 }, { "epoch": 1.54, "learning_rate": 1.8616389548693584e-05, "loss": 0.8941, "step": 21000 }, { "epoch": 1.54, "eval_loss": 0.16895848512649536, "eval_runtime": 1022.9987, "eval_samples_per_second": 15.647, "eval_steps_per_second": 1.956, "eval_wer": 0.18521932699661314, "step": 21000 }, { "epoch": 1.55, "learning_rate": 1.831947743467933e-05, "loss": 0.882, "step": 21100 }, { "epoch": 1.56, "learning_rate": 1.802256532066508e-05, "loss": 0.8801, "step": 21200 }, { "epoch": 1.56, "learning_rate": 1.772565320665083e-05, "loss": 0.8718, "step": 21300 }, { "epoch": 1.57, "learning_rate": 1.742874109263658e-05, "loss": 0.8904, "step": 21400 }, { "epoch": 1.58, "learning_rate": 1.7131828978622325e-05, "loss": 0.8729, "step": 21500 }, { "epoch": 1.58, "learning_rate": 1.6834916864608075e-05, "loss": 0.8722, "step": 21600 }, { "epoch": 1.59, "learning_rate": 1.6538004750593822e-05, "loss": 0.8739, "step": 21700 }, { "epoch": 1.6, "learning_rate": 1.624109263657957e-05, "loss": 0.8635, "step": 21800 }, { "epoch": 1.61, "learning_rate": 1.594418052256532e-05, "loss": 0.8767, "step": 21900 }, { "epoch": 1.61, "learning_rate": 1.564726840855107e-05, "loss": 0.871, "step": 22000 }, { "epoch": 1.61, "eval_loss": 0.16269078850746155, "eval_runtime": 1042.6643, "eval_samples_per_second": 15.352, "eval_steps_per_second": 1.919, "eval_wer": 0.17805637495902982, "step": 22000 }, { "epoch": 1.62, "learning_rate": 1.5350356294536816e-05, "loss": 0.8663, "step": 22100 }, { "epoch": 1.63, "learning_rate": 1.5056413301662706e-05, "loss": 0.8732, "step": 22200 }, { "epoch": 1.64, "learning_rate": 1.4759501187648455e-05, "loss": 0.8625, "step": 22300 }, { "epoch": 1.64, "learning_rate": 1.4462589073634203e-05, "loss": 0.854, "step": 22400 }, { "epoch": 1.65, "learning_rate": 1.416567695961995e-05, "loss": 0.8692, "step": 22500 }, { "epoch": 1.66, "learning_rate": 1.38687648456057e-05, "loss": 0.8477, "step": 22600 }, { "epoch": 1.67, "learning_rate": 1.3571852731591449e-05, "loss": 0.8494, "step": 22700 }, { "epoch": 1.67, "learning_rate": 1.3277909738717339e-05, "loss": 0.8599, "step": 22800 }, { "epoch": 1.68, "learning_rate": 1.2980997624703087e-05, "loss": 0.863, "step": 22900 }, { "epoch": 1.69, "learning_rate": 1.2684085510688834e-05, "loss": 0.847, "step": 23000 }, { "epoch": 1.69, "eval_loss": 0.15907420217990875, "eval_runtime": 1036.4519, "eval_samples_per_second": 15.444, "eval_steps_per_second": 1.931, "eval_wer": 0.17514066426308314, "step": 23000 }, { "epoch": 1.69, "learning_rate": 1.2387173396674582e-05, "loss": 0.8487, "step": 23100 }, { "epoch": 1.7, "learning_rate": 1.2090261282660333e-05, "loss": 0.8637, "step": 23200 }, { "epoch": 1.71, "learning_rate": 1.1793349168646081e-05, "loss": 0.8456, "step": 23300 }, { "epoch": 1.72, "learning_rate": 1.1496437054631828e-05, "loss": 0.8518, "step": 23400 }, { "epoch": 1.72, "learning_rate": 1.1199524940617576e-05, "loss": 0.8456, "step": 23500 }, { "epoch": 1.73, "learning_rate": 1.0902612826603325e-05, "loss": 0.8349, "step": 23600 }, { "epoch": 1.74, "learning_rate": 1.0605700712589072e-05, "loss": 0.8426, "step": 23700 }, { "epoch": 1.75, "learning_rate": 1.030878859857482e-05, "loss": 0.8503, "step": 23800 }, { "epoch": 1.75, "learning_rate": 1.001187648456057e-05, "loss": 0.844, "step": 23900 }, { "epoch": 1.76, "learning_rate": 9.714964370546319e-06, "loss": 0.822, "step": 24000 }, { "epoch": 1.76, "eval_loss": 0.1550702005624771, "eval_runtime": 1027.8442, "eval_samples_per_second": 15.573, "eval_steps_per_second": 1.947, "eval_wer": 0.17010133289631815, "step": 24000 }, { "epoch": 1.77, "learning_rate": 9.418052256532066e-06, "loss": 0.8452, "step": 24100 }, { "epoch": 1.78, "learning_rate": 9.121140142517814e-06, "loss": 0.843, "step": 24200 }, { "epoch": 1.78, "learning_rate": 8.824228028503563e-06, "loss": 0.8429, "step": 24300 }, { "epoch": 1.79, "learning_rate": 8.527315914489311e-06, "loss": 0.8513, "step": 24400 }, { "epoch": 1.8, "learning_rate": 8.23040380047506e-06, "loss": 0.834, "step": 24500 }, { "epoch": 1.8, "learning_rate": 7.933491686460806e-06, "loss": 0.8383, "step": 24600 }, { "epoch": 1.81, "learning_rate": 7.636579572446555e-06, "loss": 0.8294, "step": 24700 }, { "epoch": 1.82, "learning_rate": 7.339667458432303e-06, "loss": 0.8335, "step": 24800 }, { "epoch": 1.83, "learning_rate": 7.042755344418052e-06, "loss": 0.8207, "step": 24900 }, { "epoch": 1.83, "learning_rate": 6.745843230403799e-06, "loss": 0.8188, "step": 25000 }, { "epoch": 1.83, "eval_loss": 0.1527515947818756, "eval_runtime": 1034.5359, "eval_samples_per_second": 15.473, "eval_steps_per_second": 1.934, "eval_wer": 0.16672812192723696, "step": 25000 }, { "epoch": 1.84, "learning_rate": 6.448931116389549e-06, "loss": 0.8289, "step": 25100 }, { "epoch": 1.85, "learning_rate": 6.152019002375296e-06, "loss": 0.8306, "step": 25200 }, { "epoch": 1.86, "learning_rate": 5.855106888361045e-06, "loss": 0.8335, "step": 25300 }, { "epoch": 1.86, "learning_rate": 5.5581947743467925e-06, "loss": 0.8291, "step": 25400 }, { "epoch": 1.87, "learning_rate": 5.261282660332541e-06, "loss": 0.8206, "step": 25500 }, { "epoch": 1.88, "learning_rate": 4.9643705463182895e-06, "loss": 0.8242, "step": 25600 }, { "epoch": 1.89, "learning_rate": 4.667458432304038e-06, "loss": 0.8189, "step": 25700 }, { "epoch": 1.89, "learning_rate": 4.370546318289786e-06, "loss": 0.8275, "step": 25800 }, { "epoch": 1.9, "learning_rate": 4.073634204275534e-06, "loss": 0.8142, "step": 25900 }, { "epoch": 1.91, "learning_rate": 3.776722090261282e-06, "loss": 0.8305, "step": 26000 }, { "epoch": 1.91, "eval_loss": 0.14921718835830688, "eval_runtime": 1026.6478, "eval_samples_per_second": 15.592, "eval_steps_per_second": 1.949, "eval_wer": 0.16312957500273134, "step": 26000 }, { "epoch": 1.91, "learning_rate": 3.4798099762470307e-06, "loss": 0.833, "step": 26100 }, { "epoch": 1.92, "learning_rate": 3.1828978622327788e-06, "loss": 0.8175, "step": 26200 }, { "epoch": 1.93, "learning_rate": 2.888954869358669e-06, "loss": 0.8259, "step": 26300 }, { "epoch": 1.94, "learning_rate": 2.5920427553444177e-06, "loss": 0.8262, "step": 26400 }, { "epoch": 1.94, "learning_rate": 2.295130641330166e-06, "loss": 0.8223, "step": 26500 }, { "epoch": 1.95, "learning_rate": 1.9982185273159142e-06, "loss": 0.8285, "step": 26600 }, { "epoch": 1.96, "learning_rate": 1.7013064133016625e-06, "loss": 0.8226, "step": 26700 }, { "epoch": 1.97, "learning_rate": 1.404394299287411e-06, "loss": 0.8154, "step": 26800 }, { "epoch": 1.97, "learning_rate": 1.107482185273159e-06, "loss": 0.8176, "step": 26900 }, { "epoch": 1.98, "learning_rate": 8.105700712589074e-07, "loss": 0.8122, "step": 27000 }, { "epoch": 1.98, "eval_loss": 0.14789555966854095, "eval_runtime": 1030.7995, "eval_samples_per_second": 15.529, "eval_steps_per_second": 1.941, "eval_wer": 0.16106740959248333, "step": 27000 }, { "epoch": 1.99, "learning_rate": 5.136579572446555e-07, "loss": 0.818, "step": 27100 }, { "epoch": 2.0, "learning_rate": 2.167458432304038e-07, "loss": 0.8284, "step": 27200 }, { "epoch": 2.0, "step": 27260, "total_flos": 4.0396309180498005e+20, "train_loss": 0.32739020716330625, "train_runtime": 49115.8494, "train_samples_per_second": 17.761, "train_steps_per_second": 0.555 } ], "max_steps": 27260, "num_train_epochs": 2, "total_flos": 4.0396309180498005e+20, "trial_name": null, "trial_params": null }