{ "best_metric": 18.148300246851182, "best_model_checkpoint": "./checkpoint-9000", "epoch": 17.33102253032929, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 2.3000000000000004e-06, "loss": 2.8829, "step": 50 }, { "epoch": 0.17, "learning_rate": 4.800000000000001e-06, "loss": 1.5187, "step": 100 }, { "epoch": 0.26, "learning_rate": 7.3e-06, "loss": 0.8352, "step": 150 }, { "epoch": 0.35, "learning_rate": 9.800000000000001e-06, "loss": 0.4087, "step": 200 }, { "epoch": 0.43, "learning_rate": 9.953061224489797e-06, "loss": 0.3044, "step": 250 }, { "epoch": 0.52, "learning_rate": 9.902040816326531e-06, "loss": 0.2602, "step": 300 }, { "epoch": 0.61, "learning_rate": 9.851020408163267e-06, "loss": 0.2313, "step": 350 }, { "epoch": 0.69, "learning_rate": 9.800000000000001e-06, "loss": 0.2162, "step": 400 }, { "epoch": 0.78, "learning_rate": 9.748979591836735e-06, "loss": 0.2019, "step": 450 }, { "epoch": 0.87, "learning_rate": 9.697959183673469e-06, "loss": 0.1869, "step": 500 }, { "epoch": 0.95, "learning_rate": 9.646938775510205e-06, "loss": 0.1813, "step": 550 }, { "epoch": 1.04, "learning_rate": 9.595918367346939e-06, "loss": 0.1619, "step": 600 }, { "epoch": 1.13, "learning_rate": 9.544897959183675e-06, "loss": 0.1441, "step": 650 }, { "epoch": 1.21, "learning_rate": 9.493877551020409e-06, "loss": 0.1389, "step": 700 }, { "epoch": 1.3, "learning_rate": 9.442857142857144e-06, "loss": 0.1367, "step": 750 }, { "epoch": 1.39, "learning_rate": 9.391836734693878e-06, "loss": 0.1346, "step": 800 }, { "epoch": 1.47, "learning_rate": 9.340816326530612e-06, "loss": 0.1296, "step": 850 }, { "epoch": 1.56, "learning_rate": 9.289795918367348e-06, "loss": 0.1258, "step": 900 }, { "epoch": 1.65, "learning_rate": 9.238775510204082e-06, "loss": 0.1234, "step": 950 }, { "epoch": 1.73, "learning_rate": 9.187755102040818e-06, "loss": 0.1189, "step": 1000 }, { "epoch": 1.73, "eval_loss": 0.14634737372398376, "eval_runtime": 1874.6047, "eval_samples_per_second": 4.924, "eval_steps_per_second": 0.308, "eval_wer": 23.895374812153925, "step": 1000 }, { "epoch": 1.82, "learning_rate": 9.136734693877552e-06, "loss": 0.1164, "step": 1050 }, { "epoch": 1.91, "learning_rate": 9.085714285714286e-06, "loss": 0.1179, "step": 1100 }, { "epoch": 1.99, "learning_rate": 9.03469387755102e-06, "loss": 0.1155, "step": 1150 }, { "epoch": 2.08, "learning_rate": 8.983673469387756e-06, "loss": 0.0855, "step": 1200 }, { "epoch": 2.17, "learning_rate": 8.932653061224492e-06, "loss": 0.083, "step": 1250 }, { "epoch": 2.25, "learning_rate": 8.881632653061226e-06, "loss": 0.081, "step": 1300 }, { "epoch": 2.34, "learning_rate": 8.83061224489796e-06, "loss": 0.079, "step": 1350 }, { "epoch": 2.43, "learning_rate": 8.779591836734694e-06, "loss": 0.0773, "step": 1400 }, { "epoch": 2.51, "learning_rate": 8.72857142857143e-06, "loss": 0.0764, "step": 1450 }, { "epoch": 2.6, "learning_rate": 8.677551020408164e-06, "loss": 0.0804, "step": 1500 }, { "epoch": 2.69, "learning_rate": 8.6265306122449e-06, "loss": 0.0768, "step": 1550 }, { "epoch": 2.77, "learning_rate": 8.575510204081633e-06, "loss": 0.0765, "step": 1600 }, { "epoch": 2.86, "learning_rate": 8.524489795918367e-06, "loss": 0.081, "step": 1650 }, { "epoch": 2.95, "learning_rate": 8.473469387755101e-06, "loss": 0.0768, "step": 1700 }, { "epoch": 3.03, "learning_rate": 8.422448979591837e-06, "loss": 0.0676, "step": 1750 }, { "epoch": 3.12, "learning_rate": 8.371428571428573e-06, "loss": 0.0472, "step": 1800 }, { "epoch": 3.21, "learning_rate": 8.320408163265307e-06, "loss": 0.0503, "step": 1850 }, { "epoch": 3.29, "learning_rate": 8.269387755102043e-06, "loss": 0.0499, "step": 1900 }, { "epoch": 3.38, "learning_rate": 8.218367346938777e-06, "loss": 0.0492, "step": 1950 }, { "epoch": 3.47, "learning_rate": 8.16734693877551e-06, "loss": 0.0509, "step": 2000 }, { "epoch": 3.47, "eval_loss": 0.1334686577320099, "eval_runtime": 1883.4767, "eval_samples_per_second": 4.901, "eval_steps_per_second": 0.306, "eval_wer": 20.229612213492427, "step": 2000 }, { "epoch": 3.55, "learning_rate": 8.116326530612245e-06, "loss": 0.0501, "step": 2050 }, { "epoch": 3.64, "learning_rate": 8.06530612244898e-06, "loss": 0.0496, "step": 2100 }, { "epoch": 3.73, "learning_rate": 8.014285714285715e-06, "loss": 0.0493, "step": 2150 }, { "epoch": 3.81, "learning_rate": 7.96326530612245e-06, "loss": 0.0497, "step": 2200 }, { "epoch": 3.9, "learning_rate": 7.912244897959184e-06, "loss": 0.0509, "step": 2250 }, { "epoch": 3.99, "learning_rate": 7.861224489795918e-06, "loss": 0.049, "step": 2300 }, { "epoch": 4.07, "learning_rate": 7.810204081632654e-06, "loss": 0.031, "step": 2350 }, { "epoch": 4.16, "learning_rate": 7.759183673469388e-06, "loss": 0.0288, "step": 2400 }, { "epoch": 4.25, "learning_rate": 7.708163265306124e-06, "loss": 0.0293, "step": 2450 }, { "epoch": 4.33, "learning_rate": 7.657142857142858e-06, "loss": 0.0279, "step": 2500 }, { "epoch": 4.42, "learning_rate": 7.606122448979593e-06, "loss": 0.0304, "step": 2550 }, { "epoch": 4.51, "learning_rate": 7.555102040816327e-06, "loss": 0.0303, "step": 2600 }, { "epoch": 4.59, "learning_rate": 7.504081632653062e-06, "loss": 0.0317, "step": 2650 }, { "epoch": 4.68, "learning_rate": 7.4530612244897974e-06, "loss": 0.0324, "step": 2700 }, { "epoch": 4.77, "learning_rate": 7.4020408163265315e-06, "loss": 0.0294, "step": 2750 }, { "epoch": 4.85, "learning_rate": 7.351020408163266e-06, "loss": 0.0313, "step": 2800 }, { "epoch": 4.94, "learning_rate": 7.3e-06, "loss": 0.0313, "step": 2850 }, { "epoch": 5.03, "learning_rate": 7.248979591836735e-06, "loss": 0.0265, "step": 2900 }, { "epoch": 5.11, "learning_rate": 7.197959183673469e-06, "loss": 0.0185, "step": 2950 }, { "epoch": 5.2, "learning_rate": 7.146938775510205e-06, "loss": 0.0176, "step": 3000 }, { "epoch": 5.2, "eval_loss": 0.15767353773117065, "eval_runtime": 1882.3239, "eval_samples_per_second": 4.904, "eval_steps_per_second": 0.307, "eval_wer": 20.16237121543745, "step": 3000 }, { "epoch": 5.29, "learning_rate": 7.095918367346939e-06, "loss": 0.0168, "step": 3050 }, { "epoch": 5.37, "learning_rate": 7.044897959183674e-06, "loss": 0.017, "step": 3100 }, { "epoch": 5.46, "learning_rate": 6.993877551020408e-06, "loss": 0.0174, "step": 3150 }, { "epoch": 5.55, "learning_rate": 6.942857142857144e-06, "loss": 0.0181, "step": 3200 }, { "epoch": 5.63, "learning_rate": 6.891836734693879e-06, "loss": 0.0185, "step": 3250 }, { "epoch": 5.72, "learning_rate": 6.840816326530613e-06, "loss": 0.0159, "step": 3300 }, { "epoch": 5.81, "learning_rate": 6.789795918367348e-06, "loss": 0.0185, "step": 3350 }, { "epoch": 5.89, "learning_rate": 6.738775510204082e-06, "loss": 0.0187, "step": 3400 }, { "epoch": 5.98, "learning_rate": 6.687755102040817e-06, "loss": 0.0179, "step": 3450 }, { "epoch": 6.07, "learning_rate": 6.636734693877551e-06, "loss": 0.0119, "step": 3500 }, { "epoch": 6.15, "learning_rate": 6.585714285714286e-06, "loss": 0.0094, "step": 3550 }, { "epoch": 6.24, "learning_rate": 6.53469387755102e-06, "loss": 0.0101, "step": 3600 }, { "epoch": 6.33, "learning_rate": 6.483673469387755e-06, "loss": 0.01, "step": 3650 }, { "epoch": 6.41, "learning_rate": 6.432653061224491e-06, "loss": 0.0105, "step": 3700 }, { "epoch": 6.5, "learning_rate": 6.381632653061225e-06, "loss": 0.0111, "step": 3750 }, { "epoch": 6.59, "learning_rate": 6.33061224489796e-06, "loss": 0.0101, "step": 3800 }, { "epoch": 6.67, "learning_rate": 6.279591836734694e-06, "loss": 0.0101, "step": 3850 }, { "epoch": 6.76, "learning_rate": 6.22857142857143e-06, "loss": 0.0103, "step": 3900 }, { "epoch": 6.85, "learning_rate": 6.177551020408164e-06, "loss": 0.0102, "step": 3950 }, { "epoch": 6.93, "learning_rate": 6.126530612244899e-06, "loss": 0.0108, "step": 4000 }, { "epoch": 6.93, "eval_loss": 0.1795009821653366, "eval_runtime": 1880.0918, "eval_samples_per_second": 4.909, "eval_steps_per_second": 0.307, "eval_wer": 19.51523301939158, "step": 4000 }, { "epoch": 7.02, "learning_rate": 6.075510204081633e-06, "loss": 0.0093, "step": 4050 }, { "epoch": 7.11, "learning_rate": 6.0244897959183675e-06, "loss": 0.0054, "step": 4100 }, { "epoch": 7.19, "learning_rate": 5.973469387755102e-06, "loss": 0.0056, "step": 4150 }, { "epoch": 7.28, "learning_rate": 5.922448979591837e-06, "loss": 0.006, "step": 4200 }, { "epoch": 7.37, "learning_rate": 5.871428571428572e-06, "loss": 0.0062, "step": 4250 }, { "epoch": 7.45, "learning_rate": 5.820408163265306e-06, "loss": 0.0058, "step": 4300 }, { "epoch": 7.54, "learning_rate": 5.769387755102042e-06, "loss": 0.0064, "step": 4350 }, { "epoch": 7.63, "learning_rate": 5.718367346938776e-06, "loss": 0.0059, "step": 4400 }, { "epoch": 7.71, "learning_rate": 5.667346938775511e-06, "loss": 0.0064, "step": 4450 }, { "epoch": 7.8, "learning_rate": 5.616326530612245e-06, "loss": 0.0066, "step": 4500 }, { "epoch": 7.89, "learning_rate": 5.56530612244898e-06, "loss": 0.0061, "step": 4550 }, { "epoch": 7.97, "learning_rate": 5.514285714285714e-06, "loss": 0.0064, "step": 4600 }, { "epoch": 8.06, "learning_rate": 5.46326530612245e-06, "loss": 0.0049, "step": 4650 }, { "epoch": 8.15, "learning_rate": 5.4122448979591845e-06, "loss": 0.004, "step": 4700 }, { "epoch": 8.23, "learning_rate": 5.3612244897959186e-06, "loss": 0.0034, "step": 4750 }, { "epoch": 8.32, "learning_rate": 5.310204081632654e-06, "loss": 0.0043, "step": 4800 }, { "epoch": 8.41, "learning_rate": 5.259183673469388e-06, "loss": 0.0038, "step": 4850 }, { "epoch": 8.49, "learning_rate": 5.208163265306123e-06, "loss": 0.0039, "step": 4900 }, { "epoch": 8.58, "learning_rate": 5.157142857142857e-06, "loss": 0.0048, "step": 4950 }, { "epoch": 8.67, "learning_rate": 5.106122448979592e-06, "loss": 0.0042, "step": 5000 }, { "epoch": 8.67, "eval_loss": 0.21113774180412292, "eval_runtime": 1878.1595, "eval_samples_per_second": 4.914, "eval_steps_per_second": 0.307, "eval_wer": 18.799951261558455, "step": 5000 }, { "epoch": 8.75, "learning_rate": 5.055102040816326e-06, "loss": 0.0042, "step": 5050 }, { "epoch": 8.84, "learning_rate": 5.004081632653062e-06, "loss": 0.0043, "step": 5100 }, { "epoch": 8.93, "learning_rate": 4.953061224489796e-06, "loss": 0.004, "step": 5150 }, { "epoch": 9.01, "learning_rate": 4.902040816326531e-06, "loss": 0.004, "step": 5200 }, { "epoch": 9.1, "learning_rate": 4.851020408163266e-06, "loss": 0.0021, "step": 5250 }, { "epoch": 9.19, "learning_rate": 4.800000000000001e-06, "loss": 0.002, "step": 5300 }, { "epoch": 9.27, "learning_rate": 4.748979591836735e-06, "loss": 0.0022, "step": 5350 }, { "epoch": 9.36, "learning_rate": 4.69795918367347e-06, "loss": 0.0035, "step": 5400 }, { "epoch": 9.45, "learning_rate": 4.6469387755102044e-06, "loss": 0.0027, "step": 5450 }, { "epoch": 9.53, "learning_rate": 4.595918367346939e-06, "loss": 0.0032, "step": 5500 }, { "epoch": 9.62, "learning_rate": 4.544897959183674e-06, "loss": 0.0027, "step": 5550 }, { "epoch": 9.71, "learning_rate": 4.493877551020408e-06, "loss": 0.0029, "step": 5600 }, { "epoch": 9.79, "learning_rate": 4.442857142857143e-06, "loss": 0.0024, "step": 5650 }, { "epoch": 9.88, "learning_rate": 4.391836734693878e-06, "loss": 0.0026, "step": 5700 }, { "epoch": 9.97, "learning_rate": 4.340816326530612e-06, "loss": 0.0033, "step": 5750 }, { "epoch": 10.05, "learning_rate": 4.289795918367347e-06, "loss": 0.0021, "step": 5800 }, { "epoch": 10.14, "learning_rate": 4.238775510204082e-06, "loss": 0.0017, "step": 5850 }, { "epoch": 10.23, "learning_rate": 4.187755102040817e-06, "loss": 0.002, "step": 5900 }, { "epoch": 10.31, "learning_rate": 4.136734693877552e-06, "loss": 0.0019, "step": 5950 }, { "epoch": 10.4, "learning_rate": 4.0857142857142865e-06, "loss": 0.0023, "step": 6000 }, { "epoch": 10.4, "eval_loss": 0.23070654273033142, "eval_runtime": 1877.4278, "eval_samples_per_second": 4.916, "eval_steps_per_second": 0.307, "eval_wer": 18.98948964533758, "step": 6000 }, { "epoch": 10.49, "learning_rate": 4.0346938775510206e-06, "loss": 0.0016, "step": 6050 }, { "epoch": 10.57, "learning_rate": 3.9836734693877555e-06, "loss": 0.0017, "step": 6100 }, { "epoch": 10.66, "learning_rate": 3.93265306122449e-06, "loss": 0.0018, "step": 6150 }, { "epoch": 10.75, "learning_rate": 3.881632653061224e-06, "loss": 0.002, "step": 6200 }, { "epoch": 10.83, "learning_rate": 3.830612244897959e-06, "loss": 0.0017, "step": 6250 }, { "epoch": 10.92, "learning_rate": 3.779591836734694e-06, "loss": 0.0019, "step": 6300 }, { "epoch": 11.01, "learning_rate": 3.7285714285714286e-06, "loss": 0.002, "step": 6350 }, { "epoch": 11.09, "learning_rate": 3.677551020408164e-06, "loss": 0.0013, "step": 6400 }, { "epoch": 11.18, "learning_rate": 3.6265306122448984e-06, "loss": 0.0014, "step": 6450 }, { "epoch": 11.27, "learning_rate": 3.575510204081633e-06, "loss": 0.001, "step": 6500 }, { "epoch": 11.35, "learning_rate": 3.5244897959183678e-06, "loss": 0.0011, "step": 6550 }, { "epoch": 11.44, "learning_rate": 3.4734693877551022e-06, "loss": 0.0012, "step": 6600 }, { "epoch": 11.53, "learning_rate": 3.422448979591837e-06, "loss": 0.0015, "step": 6650 }, { "epoch": 11.61, "learning_rate": 3.3714285714285716e-06, "loss": 0.0013, "step": 6700 }, { "epoch": 11.7, "learning_rate": 3.320408163265306e-06, "loss": 0.0015, "step": 6750 }, { "epoch": 11.79, "learning_rate": 3.269387755102041e-06, "loss": 0.0013, "step": 6800 }, { "epoch": 11.87, "learning_rate": 3.2183673469387754e-06, "loss": 0.0015, "step": 6850 }, { "epoch": 11.96, "learning_rate": 3.1673469387755107e-06, "loss": 0.001, "step": 6900 }, { "epoch": 12.05, "learning_rate": 3.116326530612245e-06, "loss": 0.0009, "step": 6950 }, { "epoch": 12.13, "learning_rate": 3.06530612244898e-06, "loss": 0.0007, "step": 7000 }, { "epoch": 12.13, "eval_loss": 0.24875134229660034, "eval_runtime": 1886.6078, "eval_samples_per_second": 4.892, "eval_steps_per_second": 0.306, "eval_wer": 18.44569499663795, "step": 7000 }, { "epoch": 12.22, "learning_rate": 3.0142857142857145e-06, "loss": 0.0007, "step": 7050 }, { "epoch": 12.31, "learning_rate": 2.9632653061224494e-06, "loss": 0.0007, "step": 7100 }, { "epoch": 12.39, "learning_rate": 2.912244897959184e-06, "loss": 0.0009, "step": 7150 }, { "epoch": 12.48, "learning_rate": 2.8612244897959183e-06, "loss": 0.001, "step": 7200 }, { "epoch": 12.56, "learning_rate": 2.8102040816326532e-06, "loss": 0.0009, "step": 7250 }, { "epoch": 12.65, "learning_rate": 2.7591836734693877e-06, "loss": 0.0007, "step": 7300 }, { "epoch": 12.74, "learning_rate": 2.708163265306123e-06, "loss": 0.0006, "step": 7350 }, { "epoch": 12.82, "learning_rate": 2.6571428571428575e-06, "loss": 0.0006, "step": 7400 }, { "epoch": 12.91, "learning_rate": 2.6061224489795924e-06, "loss": 0.0006, "step": 7450 }, { "epoch": 13.0, "learning_rate": 2.555102040816327e-06, "loss": 0.0006, "step": 7500 }, { "epoch": 13.08, "learning_rate": 2.5040816326530613e-06, "loss": 0.0005, "step": 7550 }, { "epoch": 13.17, "learning_rate": 2.453061224489796e-06, "loss": 0.0006, "step": 7600 }, { "epoch": 13.26, "learning_rate": 2.4020408163265306e-06, "loss": 0.0006, "step": 7650 }, { "epoch": 13.34, "learning_rate": 2.3510204081632655e-06, "loss": 0.0005, "step": 7700 }, { "epoch": 13.43, "learning_rate": 2.3000000000000004e-06, "loss": 0.0005, "step": 7750 }, { "epoch": 13.52, "learning_rate": 2.248979591836735e-06, "loss": 0.0004, "step": 7800 }, { "epoch": 13.6, "learning_rate": 2.1979591836734694e-06, "loss": 0.0006, "step": 7850 }, { "epoch": 13.69, "learning_rate": 2.1469387755102042e-06, "loss": 0.0005, "step": 7900 }, { "epoch": 13.78, "learning_rate": 2.0959183673469387e-06, "loss": 0.0005, "step": 7950 }, { "epoch": 13.86, "learning_rate": 2.0448979591836736e-06, "loss": 0.0005, "step": 8000 }, { "epoch": 13.86, "eval_loss": 0.260904461145401, "eval_runtime": 1875.5399, "eval_samples_per_second": 4.921, "eval_steps_per_second": 0.308, "eval_wer": 18.28864890722096, "step": 8000 }, { "epoch": 13.95, "learning_rate": 1.9938775510204085e-06, "loss": 0.0005, "step": 8050 }, { "epoch": 14.04, "learning_rate": 1.942857142857143e-06, "loss": 0.0005, "step": 8100 }, { "epoch": 14.12, "learning_rate": 1.8918367346938776e-06, "loss": 0.0003, "step": 8150 }, { "epoch": 14.21, "learning_rate": 1.8408163265306123e-06, "loss": 0.0004, "step": 8200 }, { "epoch": 14.3, "learning_rate": 1.7897959183673472e-06, "loss": 0.0004, "step": 8250 }, { "epoch": 14.38, "learning_rate": 1.7387755102040819e-06, "loss": 0.0003, "step": 8300 }, { "epoch": 14.47, "learning_rate": 1.6877551020408163e-06, "loss": 0.0004, "step": 8350 }, { "epoch": 14.56, "learning_rate": 1.636734693877551e-06, "loss": 0.0003, "step": 8400 }, { "epoch": 14.64, "learning_rate": 1.5857142857142857e-06, "loss": 0.0002, "step": 8450 }, { "epoch": 14.73, "learning_rate": 1.5346938775510206e-06, "loss": 0.0004, "step": 8500 }, { "epoch": 14.82, "learning_rate": 1.4836734693877553e-06, "loss": 0.0003, "step": 8550 }, { "epoch": 14.9, "learning_rate": 1.43265306122449e-06, "loss": 0.0003, "step": 8600 }, { "epoch": 14.99, "learning_rate": 1.3816326530612246e-06, "loss": 0.0004, "step": 8650 }, { "epoch": 15.08, "learning_rate": 1.3306122448979595e-06, "loss": 0.0002, "step": 8700 }, { "epoch": 15.16, "learning_rate": 1.279591836734694e-06, "loss": 0.0001, "step": 8750 }, { "epoch": 15.25, "learning_rate": 1.2285714285714286e-06, "loss": 0.0002, "step": 8800 }, { "epoch": 15.34, "learning_rate": 1.1775510204081633e-06, "loss": 0.0001, "step": 8850 }, { "epoch": 15.42, "learning_rate": 1.1265306122448982e-06, "loss": 0.0001, "step": 8900 }, { "epoch": 15.51, "learning_rate": 1.0755102040816327e-06, "loss": 0.0001, "step": 8950 }, { "epoch": 15.6, "learning_rate": 1.0244897959183673e-06, "loss": 0.0001, "step": 9000 }, { "epoch": 15.6, "eval_loss": 0.27568453550338745, "eval_runtime": 1880.4752, "eval_samples_per_second": 4.908, "eval_steps_per_second": 0.307, "eval_wer": 18.148300246851182, "step": 9000 }, { "epoch": 15.68, "learning_rate": 9.734693877551022e-07, "loss": 0.0001, "step": 9050 }, { "epoch": 15.77, "learning_rate": 9.224489795918368e-07, "loss": 0.0002, "step": 9100 }, { "epoch": 15.86, "learning_rate": 8.714285714285716e-07, "loss": 0.0001, "step": 9150 }, { "epoch": 15.94, "learning_rate": 8.204081632653062e-07, "loss": 0.0001, "step": 9200 }, { "epoch": 16.03, "learning_rate": 7.693877551020409e-07, "loss": 0.0001, "step": 9250 }, { "epoch": 16.12, "learning_rate": 7.183673469387756e-07, "loss": 0.0001, "step": 9300 }, { "epoch": 16.2, "learning_rate": 6.673469387755102e-07, "loss": 0.0001, "step": 9350 }, { "epoch": 16.29, "learning_rate": 6.163265306122449e-07, "loss": 0.0001, "step": 9400 }, { "epoch": 16.38, "learning_rate": 5.653061224489796e-07, "loss": 0.0001, "step": 9450 }, { "epoch": 16.46, "learning_rate": 5.142857142857143e-07, "loss": 0.0001, "step": 9500 }, { "epoch": 16.55, "learning_rate": 4.63265306122449e-07, "loss": 0.0001, "step": 9550 }, { "epoch": 16.64, "learning_rate": 4.1224489795918373e-07, "loss": 0.0001, "step": 9600 }, { "epoch": 16.72, "learning_rate": 3.612244897959184e-07, "loss": 0.0001, "step": 9650 }, { "epoch": 16.81, "learning_rate": 3.102040816326531e-07, "loss": 0.0001, "step": 9700 }, { "epoch": 16.9, "learning_rate": 2.5918367346938776e-07, "loss": 0.0001, "step": 9750 }, { "epoch": 16.98, "learning_rate": 2.0816326530612246e-07, "loss": 0.0001, "step": 9800 }, { "epoch": 17.07, "learning_rate": 1.5714285714285717e-07, "loss": 0.0, "step": 9850 }, { "epoch": 17.16, "learning_rate": 1.0612244897959186e-07, "loss": 0.0, "step": 9900 }, { "epoch": 17.24, "learning_rate": 5.510204081632654e-08, "loss": 0.0, "step": 9950 }, { "epoch": 17.33, "learning_rate": 4.081632653061225e-09, "loss": 0.0, "step": 10000 }, { "epoch": 17.33, "eval_loss": 0.2883334159851074, "eval_runtime": 1887.5729, "eval_samples_per_second": 4.89, "eval_steps_per_second": 0.306, "eval_wer": 18.192525869732975, "step": 10000 }, { "epoch": 17.33, "step": 10000, "total_flos": 9.232770429517824e+19, "train_loss": 0.05581043657779228, "train_runtime": 40648.5911, "train_samples_per_second": 7.872, "train_steps_per_second": 0.246 } ], "max_steps": 10000, "num_train_epochs": 18, "total_flos": 9.232770429517824e+19, "trial_name": null, "trial_params": null }