{ "best_metric": null, "best_model_checkpoint": null, "epoch": 96.15347721822542, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.96, "learning_rate": 4.9500000000000004e-05, "loss": 14.732, "step": 100 }, { "epoch": 1.92, "learning_rate": 9.95e-05, "loss": 4.5706, "step": 200 }, { "epoch": 2.88, "learning_rate": 0.0001495, "loss": 3.4967, "step": 300 }, { "epoch": 3.84, "learning_rate": 0.00019950000000000002, "loss": 3.3326, "step": 400 }, { "epoch": 4.81, "learning_rate": 0.0002495, "loss": 3.0304, "step": 500 }, { "epoch": 4.81, "eval_loss": 1.5675914287567139, "eval_runtime": 78.6195, "eval_samples_per_second": 14.144, "eval_steps_per_second": 3.536, "eval_wer": 1.0554260089686098, "step": 500 }, { "epoch": 5.77, "learning_rate": 0.0002995, "loss": 2.0254, "step": 600 }, { "epoch": 6.73, "learning_rate": 0.0003495, "loss": 1.7226, "step": 700 }, { "epoch": 7.69, "learning_rate": 0.0003995, "loss": 1.6212, "step": 800 }, { "epoch": 8.65, "learning_rate": 0.00044950000000000003, "loss": 1.5773, "step": 900 }, { "epoch": 9.61, "learning_rate": 0.0004995, "loss": 1.5263, "step": 1000 }, { "epoch": 9.61, "eval_loss": 0.46925902366638184, "eval_runtime": 52.7689, "eval_samples_per_second": 21.073, "eval_steps_per_second": 5.268, "eval_wer": 0.8023318385650224, "step": 1000 }, { "epoch": 10.58, "learning_rate": 0.0005495, "loss": 1.5324, "step": 1100 }, { "epoch": 11.54, "learning_rate": 0.0005995000000000001, "loss": 1.5058, "step": 1200 }, { "epoch": 12.5, "learning_rate": 0.0006495, "loss": 1.512, "step": 1300 }, { "epoch": 13.46, "learning_rate": 0.0006995, "loss": 1.5222, "step": 1400 }, { "epoch": 14.42, "learning_rate": 0.0007495000000000001, "loss": 1.5299, "step": 1500 }, { "epoch": 14.42, "eval_loss": 0.43676647543907166, "eval_runtime": 52.3008, "eval_samples_per_second": 21.262, "eval_steps_per_second": 5.315, "eval_wer": 0.7311210762331839, "step": 1500 }, { "epoch": 15.38, "learning_rate": 0.0007995, "loss": 1.5168, "step": 1600 }, { "epoch": 16.35, "learning_rate": 0.0008495000000000001, "loss": 1.5392, "step": 1700 }, { "epoch": 17.31, "learning_rate": 0.0008995, "loss": 1.5365, "step": 1800 }, { "epoch": 18.27, "learning_rate": 0.0009495, "loss": 1.4991, "step": 1900 }, { "epoch": 19.23, "learning_rate": 0.0009995000000000002, "loss": 1.5063, "step": 2000 }, { "epoch": 19.23, "eval_loss": 0.43596330285072327, "eval_runtime": 50.4313, "eval_samples_per_second": 22.05, "eval_steps_per_second": 5.512, "eval_wer": 0.7302242152466367, "step": 2000 }, { "epoch": 20.19, "learning_rate": 0.0009882142857142856, "loss": 1.5283, "step": 2100 }, { "epoch": 21.15, "learning_rate": 0.0009763095238095238, "loss": 1.5146, "step": 2200 }, { "epoch": 22.12, "learning_rate": 0.0009644047619047619, "loss": 1.5236, "step": 2300 }, { "epoch": 23.08, "learning_rate": 0.0009525, "loss": 1.4972, "step": 2400 }, { "epoch": 24.04, "learning_rate": 0.0009405952380952381, "loss": 1.455, "step": 2500 }, { "epoch": 24.04, "eval_loss": 0.4213278293609619, "eval_runtime": 50.1354, "eval_samples_per_second": 22.18, "eval_steps_per_second": 5.545, "eval_wer": 0.6692376681614349, "step": 2500 }, { "epoch": 25.0, "learning_rate": 0.0009286904761904762, "loss": 1.4424, "step": 2600 }, { "epoch": 25.96, "learning_rate": 0.0009167857142857144, "loss": 1.4434, "step": 2700 }, { "epoch": 26.92, "learning_rate": 0.0009048809523809524, "loss": 1.4513, "step": 2800 }, { "epoch": 27.88, "learning_rate": 0.0008929761904761905, "loss": 1.4328, "step": 2900 }, { "epoch": 28.84, "learning_rate": 0.0008811904761904762, "loss": 1.4755, "step": 3000 }, { "epoch": 28.84, "eval_loss": 0.4329236149787903, "eval_runtime": 50.0668, "eval_samples_per_second": 22.21, "eval_steps_per_second": 5.553, "eval_wer": 0.5942600896860987, "step": 3000 }, { "epoch": 29.81, "learning_rate": 0.0008692857142857144, "loss": 1.4344, "step": 3100 }, { "epoch": 30.77, "learning_rate": 0.0008573809523809523, "loss": 1.4326, "step": 3200 }, { "epoch": 31.73, "learning_rate": 0.0008454761904761905, "loss": 1.4016, "step": 3300 }, { "epoch": 32.69, "learning_rate": 0.0008335714285714285, "loss": 1.3705, "step": 3400 }, { "epoch": 33.65, "learning_rate": 0.0008216666666666667, "loss": 1.352, "step": 3500 }, { "epoch": 33.65, "eval_loss": 0.40741658210754395, "eval_runtime": 76.8047, "eval_samples_per_second": 14.478, "eval_steps_per_second": 3.62, "eval_wer": 0.5765022421524664, "step": 3500 }, { "epoch": 34.61, "learning_rate": 0.0008097619047619048, "loss": 1.3511, "step": 3600 }, { "epoch": 35.58, "learning_rate": 0.0007978571428571428, "loss": 1.3237, "step": 3700 }, { "epoch": 36.54, "learning_rate": 0.000785952380952381, "loss": 1.3165, "step": 3800 }, { "epoch": 37.5, "learning_rate": 0.0007740476190476191, "loss": 1.3342, "step": 3900 }, { "epoch": 38.46, "learning_rate": 0.0007621428571428572, "loss": 1.3122, "step": 4000 }, { "epoch": 38.46, "eval_loss": 0.38659143447875977, "eval_runtime": 80.318, "eval_samples_per_second": 13.845, "eval_steps_per_second": 3.461, "eval_wer": 0.56304932735426, "step": 4000 }, { "epoch": 39.42, "learning_rate": 0.0007502380952380953, "loss": 1.3098, "step": 4100 }, { "epoch": 40.38, "learning_rate": 0.0007383333333333334, "loss": 1.3253, "step": 4200 }, { "epoch": 41.35, "learning_rate": 0.0007264285714285714, "loss": 1.2787, "step": 4300 }, { "epoch": 42.31, "learning_rate": 0.0007146428571428572, "loss": 1.274, "step": 4400 }, { "epoch": 43.27, "learning_rate": 0.0007027380952380952, "loss": 1.2799, "step": 4500 }, { "epoch": 43.27, "eval_loss": 0.3859865069389343, "eval_runtime": 77.9764, "eval_samples_per_second": 14.261, "eval_steps_per_second": 3.565, "eval_wer": 0.5479820627802691, "step": 4500 }, { "epoch": 44.23, "learning_rate": 0.0006908333333333333, "loss": 1.2525, "step": 4600 }, { "epoch": 45.19, "learning_rate": 0.0006789285714285714, "loss": 1.2245, "step": 4700 }, { "epoch": 46.15, "learning_rate": 0.0006670238095238096, "loss": 1.2158, "step": 4800 }, { "epoch": 47.12, "learning_rate": 0.0006551190476190476, "loss": 1.2028, "step": 4900 }, { "epoch": 48.08, "learning_rate": 0.0006432142857142857, "loss": 1.212, "step": 5000 }, { "epoch": 48.08, "eval_loss": 0.358958899974823, "eval_runtime": 78.2642, "eval_samples_per_second": 14.208, "eval_steps_per_second": 3.552, "eval_wer": 0.5316591928251121, "step": 5000 }, { "epoch": 49.04, "learning_rate": 0.0006313095238095238, "loss": 1.1808, "step": 5100 }, { "epoch": 50.0, "learning_rate": 0.0006194047619047619, "loss": 1.1783, "step": 5200 }, { "epoch": 50.96, "learning_rate": 0.0006075000000000001, "loss": 1.1675, "step": 5300 }, { "epoch": 51.92, "learning_rate": 0.0005955952380952381, "loss": 1.1867, "step": 5400 }, { "epoch": 52.88, "learning_rate": 0.0005836904761904763, "loss": 1.1645, "step": 5500 }, { "epoch": 52.88, "eval_loss": 0.328298419713974, "eval_runtime": 77.7284, "eval_samples_per_second": 14.306, "eval_steps_per_second": 3.577, "eval_wer": 0.475695067264574, "step": 5500 }, { "epoch": 53.84, "learning_rate": 0.0005717857142857142, "loss": 1.1407, "step": 5600 }, { "epoch": 54.81, "learning_rate": 0.0005598809523809523, "loss": 1.1217, "step": 5700 }, { "epoch": 55.77, "learning_rate": 0.0005479761904761905, "loss": 1.118, "step": 5800 }, { "epoch": 56.73, "learning_rate": 0.0005360714285714285, "loss": 1.0727, "step": 5900 }, { "epoch": 57.69, "learning_rate": 0.0005241666666666667, "loss": 1.0854, "step": 6000 }, { "epoch": 57.69, "eval_loss": 0.3161650598049164, "eval_runtime": 82.3243, "eval_samples_per_second": 13.508, "eval_steps_per_second": 3.377, "eval_wer": 0.4686995515695067, "step": 6000 }, { "epoch": 58.65, "learning_rate": 0.0005122619047619048, "loss": 1.0673, "step": 6100 }, { "epoch": 59.61, "learning_rate": 0.0005003571428571429, "loss": 1.0595, "step": 6200 }, { "epoch": 60.58, "learning_rate": 0.000488452380952381, "loss": 1.0474, "step": 6300 }, { "epoch": 61.54, "learning_rate": 0.00047654761904761906, "loss": 1.0424, "step": 6400 }, { "epoch": 62.5, "learning_rate": 0.00046464285714285715, "loss": 1.0292, "step": 6500 }, { "epoch": 62.5, "eval_loss": 0.3125934600830078, "eval_runtime": 78.0974, "eval_samples_per_second": 14.239, "eval_steps_per_second": 3.56, "eval_wer": 0.44161434977578473, "step": 6500 }, { "epoch": 63.46, "learning_rate": 0.00045285714285714287, "loss": 1.0259, "step": 6600 }, { "epoch": 64.42, "learning_rate": 0.00044095238095238096, "loss": 1.0141, "step": 6700 }, { "epoch": 65.38, "learning_rate": 0.00042916666666666667, "loss": 0.9839, "step": 6800 }, { "epoch": 66.35, "learning_rate": 0.00041726190476190476, "loss": 0.9553, "step": 6900 }, { "epoch": 67.31, "learning_rate": 0.00040535714285714285, "loss": 0.9607, "step": 7000 }, { "epoch": 67.31, "eval_loss": 0.2990323007106781, "eval_runtime": 78.5216, "eval_samples_per_second": 14.162, "eval_steps_per_second": 3.54, "eval_wer": 0.40663677130044845, "step": 7000 }, { "epoch": 68.27, "learning_rate": 0.00039345238095238094, "loss": 0.944, "step": 7100 }, { "epoch": 69.23, "learning_rate": 0.00038154761904761903, "loss": 0.9203, "step": 7200 }, { "epoch": 70.19, "learning_rate": 0.0003696428571428572, "loss": 0.9164, "step": 7300 }, { "epoch": 71.15, "learning_rate": 0.00035773809523809527, "loss": 0.8996, "step": 7400 }, { "epoch": 72.12, "learning_rate": 0.00034583333333333335, "loss": 0.9156, "step": 7500 }, { "epoch": 72.12, "eval_loss": 0.28695425391197205, "eval_runtime": 77.1644, "eval_samples_per_second": 14.411, "eval_steps_per_second": 3.603, "eval_wer": 0.4008968609865471, "step": 7500 }, { "epoch": 73.08, "learning_rate": 0.0003339285714285714, "loss": 0.8916, "step": 7600 }, { "epoch": 74.04, "learning_rate": 0.00032202380952380953, "loss": 0.8818, "step": 7700 }, { "epoch": 75.0, "learning_rate": 0.0003101190476190476, "loss": 0.8714, "step": 7800 }, { "epoch": 75.96, "learning_rate": 0.0002982142857142857, "loss": 0.8529, "step": 7900 }, { "epoch": 76.92, "learning_rate": 0.0002863095238095238, "loss": 0.8329, "step": 8000 }, { "epoch": 76.92, "eval_loss": 0.27907443046569824, "eval_runtime": 76.6666, "eval_samples_per_second": 14.504, "eval_steps_per_second": 3.626, "eval_wer": 0.39085201793721974, "step": 8000 }, { "epoch": 77.88, "learning_rate": 0.00027440476190476195, "loss": 0.822, "step": 8100 }, { "epoch": 78.84, "learning_rate": 0.00026250000000000004, "loss": 0.8164, "step": 8200 }, { "epoch": 79.81, "learning_rate": 0.0002505952380952381, "loss": 0.8086, "step": 8300 }, { "epoch": 80.77, "learning_rate": 0.00023869047619047622, "loss": 0.7969, "step": 8400 }, { "epoch": 81.73, "learning_rate": 0.00022678571428571428, "loss": 0.7979, "step": 8500 }, { "epoch": 81.73, "eval_loss": 0.27704960107803345, "eval_runtime": 76.769, "eval_samples_per_second": 14.485, "eval_steps_per_second": 3.621, "eval_wer": 0.3669955156950673, "step": 8500 }, { "epoch": 82.69, "learning_rate": 0.0002148809523809524, "loss": 0.7862, "step": 8600 }, { "epoch": 83.65, "learning_rate": 0.0002029761904761905, "loss": 0.7707, "step": 8700 }, { "epoch": 84.61, "learning_rate": 0.00019107142857142855, "loss": 0.7472, "step": 8800 }, { "epoch": 85.58, "learning_rate": 0.00017916666666666667, "loss": 0.7583, "step": 8900 }, { "epoch": 86.54, "learning_rate": 0.00016726190476190476, "loss": 0.7144, "step": 9000 }, { "epoch": 86.54, "eval_loss": 0.2840667963027954, "eval_runtime": 77.8679, "eval_samples_per_second": 14.281, "eval_steps_per_second": 3.57, "eval_wer": 0.36609865470852015, "step": 9000 }, { "epoch": 87.5, "learning_rate": 0.00015535714285714287, "loss": 0.7302, "step": 9100 }, { "epoch": 88.46, "learning_rate": 0.0001435714285714286, "loss": 0.7214, "step": 9200 }, { "epoch": 89.42, "learning_rate": 0.00013166666666666665, "loss": 0.7179, "step": 9300 }, { "epoch": 90.38, "learning_rate": 0.00011976190476190477, "loss": 0.7037, "step": 9400 }, { "epoch": 91.35, "learning_rate": 0.00010785714285714286, "loss": 0.6997, "step": 9500 }, { "epoch": 91.35, "eval_loss": 0.2721162438392639, "eval_runtime": 77.3441, "eval_samples_per_second": 14.377, "eval_steps_per_second": 3.594, "eval_wer": 0.3485201793721973, "step": 9500 }, { "epoch": 92.31, "learning_rate": 9.595238095238096e-05, "loss": 0.676, "step": 9600 }, { "epoch": 93.27, "learning_rate": 8.404761904761905e-05, "loss": 0.6748, "step": 9700 }, { "epoch": 94.23, "learning_rate": 7.214285714285715e-05, "loss": 0.6668, "step": 9800 }, { "epoch": 95.19, "learning_rate": 6.023809523809524e-05, "loss": 0.662, "step": 9900 }, { "epoch": 96.15, "learning_rate": 4.8333333333333334e-05, "loss": 0.6568, "step": 10000 }, { "epoch": 96.15, "eval_loss": 0.2680588364601135, "eval_runtime": 76.864, "eval_samples_per_second": 14.467, "eval_steps_per_second": 3.617, "eval_wer": 0.34367713004484307, "step": 10000 }, { "epoch": 96.15, "step": 10000, "total_flos": 4.5263494937120735e+19, "train_loss": 0.0, "train_runtime": 189.2748, "train_samples_per_second": 176.252, "train_steps_per_second": 2.747 } ], "max_steps": 520, "num_train_epochs": 10, "total_flos": 4.5263494937120735e+19, "trial_name": null, "trial_params": null }