{ "best_metric": 5.9265920052317504, "best_model_checkpoint": "./logs/whisper-small-cantonese/checkpoint-3600", "epoch": 3.00555, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.3e-06, "loss": 3.1962, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.800000000000001e-06, "loss": 1.1147, "step": 50 }, { "epoch": 0.0, "learning_rate": 7.2999999999999996e-06, "loss": 0.5054, "step": 75 }, { "epoch": 0.01, "learning_rate": 9.800000000000001e-06, "loss": 0.2708, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.23e-05, "loss": 0.209, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.48e-05, "loss": 0.1911, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.73e-05, "loss": 0.1912, "step": 175 }, { "epoch": 0.01, "learning_rate": 1.9800000000000004e-05, "loss": 0.1825, "step": 200 }, { "epoch": 0.01, "eval_cer": 16.815172075533393, "eval_loss": 0.17913122475147247, "eval_runtime": 193.2105, "eval_samples_per_second": 5.171, "eval_steps_per_second": 0.647, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.23e-05, "loss": 0.1855, "step": 225 }, { "epoch": 0.01, "learning_rate": 2.48e-05, "loss": 0.1854, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.7300000000000003e-05, "loss": 0.1807, "step": 275 }, { "epoch": 0.01, "learning_rate": 2.98e-05, "loss": 0.1683, "step": 300 }, { "epoch": 0.02, "learning_rate": 3.2300000000000006e-05, "loss": 0.1775, "step": 325 }, { "epoch": 0.02, "learning_rate": 3.48e-05, "loss": 0.1745, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.73e-05, "loss": 0.1837, "step": 375 }, { "epoch": 0.02, "learning_rate": 3.9800000000000005e-05, "loss": 0.1681, "step": 400 }, { "epoch": 0.02, "eval_cer": 45.50805199051745, "eval_loss": 0.17832206189632416, "eval_runtime": 190.8176, "eval_samples_per_second": 5.235, "eval_steps_per_second": 0.655, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.23e-05, "loss": 0.1827, "step": 425 }, { "epoch": 0.02, "learning_rate": 4.4800000000000005e-05, "loss": 0.1889, "step": 450 }, { "epoch": 0.02, "learning_rate": 4.73e-05, "loss": 0.1867, "step": 475 }, { "epoch": 0.03, "learning_rate": 4.9800000000000004e-05, "loss": 0.1924, "step": 500 }, { "epoch": 0.03, "learning_rate": 4.994102564102565e-05, "loss": 0.1906, "step": 525 }, { "epoch": 0.03, "learning_rate": 4.987692307692308e-05, "loss": 0.1954, "step": 550 }, { "epoch": 0.03, "learning_rate": 4.981282051282051e-05, "loss": 0.1805, "step": 575 }, { "epoch": 0.03, "learning_rate": 4.974871794871795e-05, "loss": 0.1908, "step": 600 }, { "epoch": 0.03, "eval_cer": 16.095806425243193, "eval_loss": 0.18026253581047058, "eval_runtime": 194.9477, "eval_samples_per_second": 5.124, "eval_steps_per_second": 0.641, "step": 600 }, { "epoch": 0.03, "learning_rate": 4.9684615384615384e-05, "loss": 0.1812, "step": 625 }, { "epoch": 0.03, "learning_rate": 4.962051282051282e-05, "loss": 0.1759, "step": 650 }, { "epoch": 0.03, "learning_rate": 4.9556410256410256e-05, "loss": 0.1768, "step": 675 }, { "epoch": 0.04, "learning_rate": 4.9492307692307695e-05, "loss": 0.1803, "step": 700 }, { "epoch": 0.04, "learning_rate": 4.942820512820513e-05, "loss": 0.1823, "step": 725 }, { "epoch": 0.04, "learning_rate": 4.9364102564102567e-05, "loss": 0.1549, "step": 750 }, { "epoch": 0.04, "learning_rate": 4.93e-05, "loss": 0.1749, "step": 775 }, { "epoch": 0.04, "learning_rate": 4.923589743589744e-05, "loss": 0.1634, "step": 800 }, { "epoch": 0.04, "eval_cer": 10.537071854818933, "eval_loss": 0.17489749193191528, "eval_runtime": 196.2031, "eval_samples_per_second": 5.092, "eval_steps_per_second": 0.637, "step": 800 }, { "epoch": 0.04, "learning_rate": 4.917179487179488e-05, "loss": 0.1752, "step": 825 }, { "epoch": 0.04, "learning_rate": 4.910769230769231e-05, "loss": 0.1602, "step": 850 }, { "epoch": 0.04, "learning_rate": 4.904358974358975e-05, "loss": 0.1629, "step": 875 }, { "epoch": 0.04, "learning_rate": 4.897948717948718e-05, "loss": 0.1623, "step": 900 }, { "epoch": 0.05, "learning_rate": 4.891538461538462e-05, "loss": 0.1663, "step": 925 }, { "epoch": 0.05, "learning_rate": 4.885128205128205e-05, "loss": 0.1596, "step": 950 }, { "epoch": 0.05, "learning_rate": 4.878717948717949e-05, "loss": 0.1543, "step": 975 }, { "epoch": 0.05, "learning_rate": 4.8723076923076925e-05, "loss": 0.15, "step": 1000 }, { "epoch": 0.05, "eval_cer": 7.65143464399575, "eval_loss": 0.1505207121372223, "eval_runtime": 194.4944, "eval_samples_per_second": 5.136, "eval_steps_per_second": 0.643, "step": 1000 }, { "epoch": 0.05, "learning_rate": 4.8658974358974364e-05, "loss": 0.1561, "step": 1025 }, { "epoch": 0.05, "learning_rate": 4.8594871794871796e-05, "loss": 0.1521, "step": 1050 }, { "epoch": 0.05, "learning_rate": 4.8530769230769236e-05, "loss": 0.1609, "step": 1075 }, { "epoch": 0.06, "learning_rate": 4.8466666666666675e-05, "loss": 0.1552, "step": 1100 }, { "epoch": 0.06, "learning_rate": 4.840256410256411e-05, "loss": 0.1526, "step": 1125 }, { "epoch": 0.06, "learning_rate": 4.833846153846154e-05, "loss": 0.157, "step": 1150 }, { "epoch": 1.0, "learning_rate": 4.827435897435897e-05, "loss": 0.1468, "step": 1175 }, { "epoch": 1.0, "learning_rate": 4.821025641025641e-05, "loss": 0.1459, "step": 1200 }, { "epoch": 1.0, "eval_cer": 7.455244012098422, "eval_loss": 0.15504512190818787, "eval_runtime": 195.0837, "eval_samples_per_second": 5.121, "eval_steps_per_second": 0.641, "step": 1200 }, { "epoch": 1.0, "learning_rate": 4.8146153846153844e-05, "loss": 0.1382, "step": 1225 }, { "epoch": 1.0, "learning_rate": 4.808205128205128e-05, "loss": 0.1417, "step": 1250 }, { "epoch": 1.01, "learning_rate": 4.8017948717948715e-05, "loss": 0.1308, "step": 1275 }, { "epoch": 1.01, "learning_rate": 4.7953846153846154e-05, "loss": 0.1178, "step": 1300 }, { "epoch": 1.01, "learning_rate": 4.7889743589743594e-05, "loss": 0.1091, "step": 1325 }, { "epoch": 1.01, "learning_rate": 4.7825641025641026e-05, "loss": 0.1084, "step": 1350 }, { "epoch": 1.01, "learning_rate": 4.7761538461538465e-05, "loss": 0.1044, "step": 1375 }, { "epoch": 1.01, "learning_rate": 4.76974358974359e-05, "loss": 0.111, "step": 1400 }, { "epoch": 1.01, "eval_cer": 6.645957655521949, "eval_loss": 0.135099858045578, "eval_runtime": 193.2354, "eval_samples_per_second": 5.17, "eval_steps_per_second": 0.647, "step": 1400 }, { "epoch": 1.01, "learning_rate": 4.763333333333334e-05, "loss": 0.0939, "step": 1425 }, { "epoch": 1.01, "learning_rate": 4.756923076923077e-05, "loss": 0.0914, "step": 1450 }, { "epoch": 1.02, "learning_rate": 4.750512820512821e-05, "loss": 0.0924, "step": 1475 }, { "epoch": 1.02, "learning_rate": 4.744102564102564e-05, "loss": 0.0873, "step": 1500 }, { "epoch": 1.02, "learning_rate": 4.737692307692308e-05, "loss": 0.0796, "step": 1525 }, { "epoch": 1.02, "learning_rate": 4.731282051282051e-05, "loss": 0.0828, "step": 1550 }, { "epoch": 1.02, "learning_rate": 4.724871794871795e-05, "loss": 0.0749, "step": 1575 }, { "epoch": 1.02, "learning_rate": 4.718461538461539e-05, "loss": 0.0824, "step": 1600 }, { "epoch": 1.02, "eval_cer": 6.7277037521458345, "eval_loss": 0.13797622919082642, "eval_runtime": 193.8345, "eval_samples_per_second": 5.154, "eval_steps_per_second": 0.645, "step": 1600 }, { "epoch": 1.02, "learning_rate": 4.7120512820512823e-05, "loss": 0.0786, "step": 1625 }, { "epoch": 1.02, "learning_rate": 4.705641025641026e-05, "loss": 0.0888, "step": 1650 }, { "epoch": 1.03, "learning_rate": 4.6992307692307695e-05, "loss": 0.0738, "step": 1675 }, { "epoch": 1.03, "learning_rate": 4.6928205128205134e-05, "loss": 0.0748, "step": 1700 }, { "epoch": 1.03, "learning_rate": 4.686410256410257e-05, "loss": 0.0782, "step": 1725 }, { "epoch": 1.03, "learning_rate": 4.6800000000000006e-05, "loss": 0.0754, "step": 1750 }, { "epoch": 1.03, "learning_rate": 4.673589743589744e-05, "loss": 0.0845, "step": 1775 }, { "epoch": 1.03, "learning_rate": 4.667179487179487e-05, "loss": 0.0749, "step": 1800 }, { "epoch": 1.03, "eval_cer": 7.29175181885065, "eval_loss": 0.14153698086738586, "eval_runtime": 196.5228, "eval_samples_per_second": 5.083, "eval_steps_per_second": 0.636, "step": 1800 }, { "epoch": 1.03, "learning_rate": 4.660769230769231e-05, "loss": 0.0754, "step": 1825 }, { "epoch": 1.03, "learning_rate": 4.654358974358974e-05, "loss": 0.0733, "step": 1850 }, { "epoch": 1.04, "learning_rate": 4.647948717948718e-05, "loss": 0.0823, "step": 1875 }, { "epoch": 1.04, "learning_rate": 4.6415384615384614e-05, "loss": 0.0656, "step": 1900 }, { "epoch": 1.04, "learning_rate": 4.635128205128205e-05, "loss": 0.0643, "step": 1925 }, { "epoch": 1.04, "learning_rate": 4.6287179487179486e-05, "loss": 0.0658, "step": 1950 }, { "epoch": 1.04, "learning_rate": 4.6223076923076925e-05, "loss": 0.0652, "step": 1975 }, { "epoch": 1.04, "learning_rate": 4.615897435897436e-05, "loss": 0.0739, "step": 2000 }, { "epoch": 1.04, "eval_cer": 6.744052971470612, "eval_loss": 0.13748891651630402, "eval_runtime": 193.5163, "eval_samples_per_second": 5.162, "eval_steps_per_second": 0.646, "step": 2000 }, { "epoch": 1.04, "learning_rate": 4.6094871794871797e-05, "loss": 0.0636, "step": 2025 }, { "epoch": 1.04, "learning_rate": 4.6030769230769236e-05, "loss": 0.0727, "step": 2050 }, { "epoch": 1.05, "learning_rate": 4.596666666666667e-05, "loss": 0.0695, "step": 2075 }, { "epoch": 1.05, "learning_rate": 4.590256410256411e-05, "loss": 0.0647, "step": 2100 }, { "epoch": 1.05, "learning_rate": 4.583846153846154e-05, "loss": 0.0736, "step": 2125 }, { "epoch": 1.05, "learning_rate": 4.577435897435898e-05, "loss": 0.064, "step": 2150 }, { "epoch": 1.05, "learning_rate": 4.571025641025641e-05, "loss": 0.0628, "step": 2175 }, { "epoch": 1.05, "learning_rate": 4.564615384615385e-05, "loss": 0.0597, "step": 2200 }, { "epoch": 1.05, "eval_cer": 6.269925611052073, "eval_loss": 0.13366959989070892, "eval_runtime": 197.479, "eval_samples_per_second": 5.059, "eval_steps_per_second": 0.633, "step": 2200 }, { "epoch": 1.05, "learning_rate": 4.558205128205128e-05, "loss": 0.0616, "step": 2225 }, { "epoch": 1.05, "learning_rate": 4.551794871794872e-05, "loss": 0.0674, "step": 2250 }, { "epoch": 1.06, "learning_rate": 4.5453846153846155e-05, "loss": 0.0615, "step": 2275 }, { "epoch": 1.06, "learning_rate": 4.5389743589743594e-05, "loss": 0.0644, "step": 2300 }, { "epoch": 1.06, "learning_rate": 4.532564102564103e-05, "loss": 0.0602, "step": 2325 }, { "epoch": 2.0, "learning_rate": 4.5261538461538466e-05, "loss": 0.0609, "step": 2350 }, { "epoch": 2.0, "learning_rate": 4.51974358974359e-05, "loss": 0.0575, "step": 2375 }, { "epoch": 2.0, "learning_rate": 4.513333333333333e-05, "loss": 0.0626, "step": 2400 }, { "epoch": 2.0, "eval_cer": 6.4088939753126795, "eval_loss": 0.13802234828472137, "eval_runtime": 195.604, "eval_samples_per_second": 5.107, "eval_steps_per_second": 0.639, "step": 2400 }, { "epoch": 2.0, "learning_rate": 4.506923076923077e-05, "loss": 0.0611, "step": 2425 }, { "epoch": 2.01, "learning_rate": 4.50051282051282e-05, "loss": 0.0558, "step": 2450 }, { "epoch": 2.01, "learning_rate": 4.494102564102564e-05, "loss": 0.047, "step": 2475 }, { "epoch": 2.01, "learning_rate": 4.487692307692308e-05, "loss": 0.0517, "step": 2500 }, { "epoch": 2.01, "learning_rate": 4.481282051282051e-05, "loss": 0.0575, "step": 2525 }, { "epoch": 2.01, "learning_rate": 4.474871794871795e-05, "loss": 0.0481, "step": 2550 }, { "epoch": 2.01, "learning_rate": 4.4684615384615384e-05, "loss": 0.0477, "step": 2575 }, { "epoch": 2.01, "learning_rate": 4.4620512820512824e-05, "loss": 0.049, "step": 2600 }, { "epoch": 2.01, "eval_cer": 5.779449031308755, "eval_loss": 0.132478266954422, "eval_runtime": 194.062, "eval_samples_per_second": 5.148, "eval_steps_per_second": 0.644, "step": 2600 }, { "epoch": 2.01, "learning_rate": 4.4556410256410256e-05, "loss": 0.0455, "step": 2625 }, { "epoch": 2.02, "learning_rate": 4.4492307692307695e-05, "loss": 0.0473, "step": 2650 }, { "epoch": 2.02, "learning_rate": 4.442820512820513e-05, "loss": 0.0458, "step": 2675 }, { "epoch": 2.02, "learning_rate": 4.436410256410257e-05, "loss": 0.0432, "step": 2700 }, { "epoch": 2.02, "learning_rate": 4.43e-05, "loss": 0.0397, "step": 2725 }, { "epoch": 2.02, "learning_rate": 4.423589743589744e-05, "loss": 0.0387, "step": 2750 }, { "epoch": 2.02, "learning_rate": 4.417179487179488e-05, "loss": 0.043, "step": 2775 }, { "epoch": 2.02, "learning_rate": 4.410769230769231e-05, "loss": 0.0482, "step": 2800 }, { "epoch": 2.02, "eval_cer": 6.081909588817134, "eval_loss": 0.13691607117652893, "eval_runtime": 198.4463, "eval_samples_per_second": 5.034, "eval_steps_per_second": 0.63, "step": 2800 }, { "epoch": 2.02, "learning_rate": 4.404358974358975e-05, "loss": 0.0441, "step": 2825 }, { "epoch": 2.03, "learning_rate": 4.397948717948718e-05, "loss": 0.0385, "step": 2850 }, { "epoch": 2.03, "learning_rate": 4.391538461538462e-05, "loss": 0.0411, "step": 2875 }, { "epoch": 2.03, "learning_rate": 4.3851282051282053e-05, "loss": 0.0375, "step": 2900 }, { "epoch": 2.03, "learning_rate": 4.378717948717949e-05, "loss": 0.0416, "step": 2925 }, { "epoch": 2.03, "learning_rate": 4.3723076923076925e-05, "loss": 0.0364, "step": 2950 }, { "epoch": 2.03, "learning_rate": 4.3658974358974364e-05, "loss": 0.0384, "step": 2975 }, { "epoch": 2.03, "learning_rate": 4.35948717948718e-05, "loss": 0.0353, "step": 3000 }, { "epoch": 2.03, "eval_cer": 6.964767432355106, "eval_loss": 0.14869514107704163, "eval_runtime": 199.0379, "eval_samples_per_second": 5.019, "eval_steps_per_second": 0.628, "step": 3000 }, { "epoch": 2.03, "learning_rate": 4.353076923076923e-05, "loss": 0.0413, "step": 3025 }, { "epoch": 2.04, "learning_rate": 4.346666666666667e-05, "loss": 0.0389, "step": 3050 }, { "epoch": 2.04, "learning_rate": 4.34025641025641e-05, "loss": 0.0349, "step": 3075 }, { "epoch": 2.04, "learning_rate": 4.333846153846154e-05, "loss": 0.0346, "step": 3100 }, { "epoch": 2.04, "learning_rate": 4.327435897435897e-05, "loss": 0.0387, "step": 3125 }, { "epoch": 2.04, "learning_rate": 4.321025641025641e-05, "loss": 0.037, "step": 3150 }, { "epoch": 2.04, "learning_rate": 4.3146153846153844e-05, "loss": 0.0308, "step": 3175 }, { "epoch": 2.04, "learning_rate": 4.308205128205128e-05, "loss": 0.0352, "step": 3200 }, { "epoch": 2.04, "eval_cer": 6.425243194637456, "eval_loss": 0.14293982088565826, "eval_runtime": 194.7471, "eval_samples_per_second": 5.13, "eval_steps_per_second": 0.642, "step": 3200 }, { "epoch": 2.04, "learning_rate": 4.301794871794872e-05, "loss": 0.0393, "step": 3225 }, { "epoch": 2.05, "learning_rate": 4.2953846153846155e-05, "loss": 0.0334, "step": 3250 }, { "epoch": 2.05, "learning_rate": 4.2889743589743594e-05, "loss": 0.0342, "step": 3275 }, { "epoch": 2.05, "learning_rate": 4.2825641025641027e-05, "loss": 0.0384, "step": 3300 }, { "epoch": 2.05, "learning_rate": 4.2761538461538466e-05, "loss": 0.0328, "step": 3325 }, { "epoch": 2.05, "learning_rate": 4.26974358974359e-05, "loss": 0.0337, "step": 3350 }, { "epoch": 2.05, "learning_rate": 4.263333333333334e-05, "loss": 0.0318, "step": 3375 }, { "epoch": 2.05, "learning_rate": 4.256923076923077e-05, "loss": 0.0335, "step": 3400 }, { "epoch": 2.05, "eval_cer": 6.188179514428186, "eval_loss": 0.1369846612215042, "eval_runtime": 192.0655, "eval_samples_per_second": 5.201, "eval_steps_per_second": 0.651, "step": 3400 }, { "epoch": 2.05, "learning_rate": 4.250512820512821e-05, "loss": 0.0327, "step": 3425 }, { "epoch": 2.06, "learning_rate": 4.244102564102564e-05, "loss": 0.0359, "step": 3450 }, { "epoch": 2.06, "learning_rate": 4.237692307692308e-05, "loss": 0.0335, "step": 3475 }, { "epoch": 3.0, "learning_rate": 4.231282051282052e-05, "loss": 0.0349, "step": 3500 }, { "epoch": 3.0, "learning_rate": 4.224871794871795e-05, "loss": 0.0328, "step": 3525 }, { "epoch": 3.0, "learning_rate": 4.218461538461539e-05, "loss": 0.0346, "step": 3550 }, { "epoch": 3.0, "learning_rate": 4.2120512820512824e-05, "loss": 0.0343, "step": 3575 }, { "epoch": 3.01, "learning_rate": 4.2056410256410256e-05, "loss": 0.0298, "step": 3600 }, { "epoch": 3.01, "eval_cer": 5.9265920052317504, "eval_loss": 0.1369575709104538, "eval_runtime": 194.5941, "eval_samples_per_second": 5.134, "eval_steps_per_second": 0.642, "step": 3600 } ], "max_steps": 20000, "num_train_epochs": 9223372036854775807, "total_flos": 5.817189065490432e+19, "trial_name": null, "trial_params": null }