|
{ |
|
"best_metric": 5.9265920052317504, |
|
"best_model_checkpoint": "./logs/whisper-small-cantonese/checkpoint-3600", |
|
"epoch": 3.00555, |
|
"global_step": 3600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.3e-06, |
|
"loss": 3.1962, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 1.1147, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 7.2999999999999996e-06, |
|
"loss": 0.5054, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.2708, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.23e-05, |
|
"loss": 0.209, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.48e-05, |
|
"loss": 0.1911, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.73e-05, |
|
"loss": 0.1912, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9800000000000004e-05, |
|
"loss": 0.1825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_cer": 16.815172075533393, |
|
"eval_loss": 0.17913122475147247, |
|
"eval_runtime": 193.2105, |
|
"eval_samples_per_second": 5.171, |
|
"eval_steps_per_second": 0.647, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.23e-05, |
|
"loss": 0.1855, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.48e-05, |
|
"loss": 0.1854, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.7300000000000003e-05, |
|
"loss": 0.1807, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.98e-05, |
|
"loss": 0.1683, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.2300000000000006e-05, |
|
"loss": 0.1775, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.48e-05, |
|
"loss": 0.1745, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.73e-05, |
|
"loss": 0.1837, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.9800000000000005e-05, |
|
"loss": 0.1681, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_cer": 45.50805199051745, |
|
"eval_loss": 0.17832206189632416, |
|
"eval_runtime": 190.8176, |
|
"eval_samples_per_second": 5.235, |
|
"eval_steps_per_second": 0.655, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.23e-05, |
|
"loss": 0.1827, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.4800000000000005e-05, |
|
"loss": 0.1889, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.73e-05, |
|
"loss": 0.1867, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9800000000000004e-05, |
|
"loss": 0.1924, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.994102564102565e-05, |
|
"loss": 0.1906, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.987692307692308e-05, |
|
"loss": 0.1954, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.981282051282051e-05, |
|
"loss": 0.1805, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.974871794871795e-05, |
|
"loss": 0.1908, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_cer": 16.095806425243193, |
|
"eval_loss": 0.18026253581047058, |
|
"eval_runtime": 194.9477, |
|
"eval_samples_per_second": 5.124, |
|
"eval_steps_per_second": 0.641, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9684615384615384e-05, |
|
"loss": 0.1812, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.962051282051282e-05, |
|
"loss": 0.1759, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9556410256410256e-05, |
|
"loss": 0.1768, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9492307692307695e-05, |
|
"loss": 0.1803, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.942820512820513e-05, |
|
"loss": 0.1823, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9364102564102567e-05, |
|
"loss": 0.1549, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.93e-05, |
|
"loss": 0.1749, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.923589743589744e-05, |
|
"loss": 0.1634, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_cer": 10.537071854818933, |
|
"eval_loss": 0.17489749193191528, |
|
"eval_runtime": 196.2031, |
|
"eval_samples_per_second": 5.092, |
|
"eval_steps_per_second": 0.637, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.917179487179488e-05, |
|
"loss": 0.1752, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.910769230769231e-05, |
|
"loss": 0.1602, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.904358974358975e-05, |
|
"loss": 0.1629, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.897948717948718e-05, |
|
"loss": 0.1623, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.891538461538462e-05, |
|
"loss": 0.1663, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.885128205128205e-05, |
|
"loss": 0.1596, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.878717948717949e-05, |
|
"loss": 0.1543, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8723076923076925e-05, |
|
"loss": 0.15, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_cer": 7.65143464399575, |
|
"eval_loss": 0.1505207121372223, |
|
"eval_runtime": 194.4944, |
|
"eval_samples_per_second": 5.136, |
|
"eval_steps_per_second": 0.643, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8658974358974364e-05, |
|
"loss": 0.1561, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8594871794871796e-05, |
|
"loss": 0.1521, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.8530769230769236e-05, |
|
"loss": 0.1609, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8466666666666675e-05, |
|
"loss": 0.1552, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.840256410256411e-05, |
|
"loss": 0.1526, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.833846153846154e-05, |
|
"loss": 0.157, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.827435897435897e-05, |
|
"loss": 0.1468, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.821025641025641e-05, |
|
"loss": 0.1459, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 7.455244012098422, |
|
"eval_loss": 0.15504512190818787, |
|
"eval_runtime": 195.0837, |
|
"eval_samples_per_second": 5.121, |
|
"eval_steps_per_second": 0.641, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8146153846153844e-05, |
|
"loss": 0.1382, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.808205128205128e-05, |
|
"loss": 0.1417, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8017948717948715e-05, |
|
"loss": 0.1308, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.7953846153846154e-05, |
|
"loss": 0.1178, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.7889743589743594e-05, |
|
"loss": 0.1091, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.7825641025641026e-05, |
|
"loss": 0.1084, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.7761538461538465e-05, |
|
"loss": 0.1044, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.76974358974359e-05, |
|
"loss": 0.111, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_cer": 6.645957655521949, |
|
"eval_loss": 0.135099858045578, |
|
"eval_runtime": 193.2354, |
|
"eval_samples_per_second": 5.17, |
|
"eval_steps_per_second": 0.647, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.763333333333334e-05, |
|
"loss": 0.0939, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.756923076923077e-05, |
|
"loss": 0.0914, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.750512820512821e-05, |
|
"loss": 0.0924, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.744102564102564e-05, |
|
"loss": 0.0873, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.737692307692308e-05, |
|
"loss": 0.0796, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.731282051282051e-05, |
|
"loss": 0.0828, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.724871794871795e-05, |
|
"loss": 0.0749, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.718461538461539e-05, |
|
"loss": 0.0824, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_cer": 6.7277037521458345, |
|
"eval_loss": 0.13797622919082642, |
|
"eval_runtime": 193.8345, |
|
"eval_samples_per_second": 5.154, |
|
"eval_steps_per_second": 0.645, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7120512820512823e-05, |
|
"loss": 0.0786, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.705641025641026e-05, |
|
"loss": 0.0888, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.6992307692307695e-05, |
|
"loss": 0.0738, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.6928205128205134e-05, |
|
"loss": 0.0748, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.686410256410257e-05, |
|
"loss": 0.0782, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.6800000000000006e-05, |
|
"loss": 0.0754, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.673589743589744e-05, |
|
"loss": 0.0845, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.667179487179487e-05, |
|
"loss": 0.0749, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_cer": 7.29175181885065, |
|
"eval_loss": 0.14153698086738586, |
|
"eval_runtime": 196.5228, |
|
"eval_samples_per_second": 5.083, |
|
"eval_steps_per_second": 0.636, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.660769230769231e-05, |
|
"loss": 0.0754, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.654358974358974e-05, |
|
"loss": 0.0733, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.647948717948718e-05, |
|
"loss": 0.0823, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.6415384615384614e-05, |
|
"loss": 0.0656, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.635128205128205e-05, |
|
"loss": 0.0643, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.6287179487179486e-05, |
|
"loss": 0.0658, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.6223076923076925e-05, |
|
"loss": 0.0652, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.615897435897436e-05, |
|
"loss": 0.0739, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_cer": 6.744052971470612, |
|
"eval_loss": 0.13748891651630402, |
|
"eval_runtime": 193.5163, |
|
"eval_samples_per_second": 5.162, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.6094871794871797e-05, |
|
"loss": 0.0636, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.6030769230769236e-05, |
|
"loss": 0.0727, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.596666666666667e-05, |
|
"loss": 0.0695, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.590256410256411e-05, |
|
"loss": 0.0647, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.583846153846154e-05, |
|
"loss": 0.0736, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.577435897435898e-05, |
|
"loss": 0.064, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.571025641025641e-05, |
|
"loss": 0.0628, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.564615384615385e-05, |
|
"loss": 0.0597, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_cer": 6.269925611052073, |
|
"eval_loss": 0.13366959989070892, |
|
"eval_runtime": 197.479, |
|
"eval_samples_per_second": 5.059, |
|
"eval_steps_per_second": 0.633, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.558205128205128e-05, |
|
"loss": 0.0616, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.551794871794872e-05, |
|
"loss": 0.0674, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.5453846153846155e-05, |
|
"loss": 0.0615, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.5389743589743594e-05, |
|
"loss": 0.0644, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.532564102564103e-05, |
|
"loss": 0.0602, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.5261538461538466e-05, |
|
"loss": 0.0609, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.51974358974359e-05, |
|
"loss": 0.0575, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.513333333333333e-05, |
|
"loss": 0.0626, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 6.4088939753126795, |
|
"eval_loss": 0.13802234828472137, |
|
"eval_runtime": 195.604, |
|
"eval_samples_per_second": 5.107, |
|
"eval_steps_per_second": 0.639, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.506923076923077e-05, |
|
"loss": 0.0611, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.50051282051282e-05, |
|
"loss": 0.0558, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.494102564102564e-05, |
|
"loss": 0.047, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.487692307692308e-05, |
|
"loss": 0.0517, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.481282051282051e-05, |
|
"loss": 0.0575, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.474871794871795e-05, |
|
"loss": 0.0481, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.4684615384615384e-05, |
|
"loss": 0.0477, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.4620512820512824e-05, |
|
"loss": 0.049, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_cer": 5.779449031308755, |
|
"eval_loss": 0.132478266954422, |
|
"eval_runtime": 194.062, |
|
"eval_samples_per_second": 5.148, |
|
"eval_steps_per_second": 0.644, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.4556410256410256e-05, |
|
"loss": 0.0455, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.4492307692307695e-05, |
|
"loss": 0.0473, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.442820512820513e-05, |
|
"loss": 0.0458, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.436410256410257e-05, |
|
"loss": 0.0432, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.43e-05, |
|
"loss": 0.0397, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.423589743589744e-05, |
|
"loss": 0.0387, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.417179487179488e-05, |
|
"loss": 0.043, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.410769230769231e-05, |
|
"loss": 0.0482, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_cer": 6.081909588817134, |
|
"eval_loss": 0.13691607117652893, |
|
"eval_runtime": 198.4463, |
|
"eval_samples_per_second": 5.034, |
|
"eval_steps_per_second": 0.63, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.404358974358975e-05, |
|
"loss": 0.0441, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.397948717948718e-05, |
|
"loss": 0.0385, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.391538461538462e-05, |
|
"loss": 0.0411, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.3851282051282053e-05, |
|
"loss": 0.0375, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.378717948717949e-05, |
|
"loss": 0.0416, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.3723076923076925e-05, |
|
"loss": 0.0364, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.3658974358974364e-05, |
|
"loss": 0.0384, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.35948717948718e-05, |
|
"loss": 0.0353, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_cer": 6.964767432355106, |
|
"eval_loss": 0.14869514107704163, |
|
"eval_runtime": 199.0379, |
|
"eval_samples_per_second": 5.019, |
|
"eval_steps_per_second": 0.628, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.353076923076923e-05, |
|
"loss": 0.0413, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.346666666666667e-05, |
|
"loss": 0.0389, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.34025641025641e-05, |
|
"loss": 0.0349, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.333846153846154e-05, |
|
"loss": 0.0346, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.327435897435897e-05, |
|
"loss": 0.0387, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.321025641025641e-05, |
|
"loss": 0.037, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.3146153846153844e-05, |
|
"loss": 0.0308, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.308205128205128e-05, |
|
"loss": 0.0352, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_cer": 6.425243194637456, |
|
"eval_loss": 0.14293982088565826, |
|
"eval_runtime": 194.7471, |
|
"eval_samples_per_second": 5.13, |
|
"eval_steps_per_second": 0.642, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.301794871794872e-05, |
|
"loss": 0.0393, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.2953846153846155e-05, |
|
"loss": 0.0334, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.2889743589743594e-05, |
|
"loss": 0.0342, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.2825641025641027e-05, |
|
"loss": 0.0384, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.2761538461538466e-05, |
|
"loss": 0.0328, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.26974358974359e-05, |
|
"loss": 0.0337, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.263333333333334e-05, |
|
"loss": 0.0318, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.256923076923077e-05, |
|
"loss": 0.0335, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_cer": 6.188179514428186, |
|
"eval_loss": 0.1369846612215042, |
|
"eval_runtime": 192.0655, |
|
"eval_samples_per_second": 5.201, |
|
"eval_steps_per_second": 0.651, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.250512820512821e-05, |
|
"loss": 0.0327, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.244102564102564e-05, |
|
"loss": 0.0359, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.237692307692308e-05, |
|
"loss": 0.0335, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.231282051282052e-05, |
|
"loss": 0.0349, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.224871794871795e-05, |
|
"loss": 0.0328, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.218461538461539e-05, |
|
"loss": 0.0346, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.2120512820512824e-05, |
|
"loss": 0.0343, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.2056410256410256e-05, |
|
"loss": 0.0298, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_cer": 5.9265920052317504, |
|
"eval_loss": 0.1369575709104538, |
|
"eval_runtime": 194.5941, |
|
"eval_samples_per_second": 5.134, |
|
"eval_steps_per_second": 0.642, |
|
"step": 3600 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 9223372036854775807, |
|
"total_flos": 5.817189065490432e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|