{ "best_metric": 0.2793618440628052, "best_model_checkpoint": "ai-light-dance_drums_ft_pretrain_wav2vec2-base-new_onset-idmt-2/checkpoint-279", "epoch": 100.0, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 0.30894508957862854, "eval_runtime": 2.673, "eval_samples_per_second": 6.36, "eval_steps_per_second": 1.871, "eval_wer": 0.28555555555555556, "step": 9 }, { "epoch": 1.11, "learning_rate": 9.999999999999999e-05, "loss": 0.2871, "step": 10 }, { "epoch": 2.0, "eval_loss": 0.3208143711090088, "eval_runtime": 2.862, "eval_samples_per_second": 5.94, "eval_steps_per_second": 1.747, "eval_wer": 0.28, "step": 18 }, { "epoch": 2.22, "learning_rate": 0.00019999999999999998, "loss": 0.2997, "step": 20 }, { "epoch": 3.0, "eval_loss": 0.39483940601348877, "eval_runtime": 2.7822, "eval_samples_per_second": 6.11, "eval_steps_per_second": 1.797, "eval_wer": 0.2877777777777778, "step": 27 }, { "epoch": 3.33, "learning_rate": 0.0003, "loss": 0.299, "step": 30 }, { "epoch": 4.0, "eval_loss": 0.3137438893318176, "eval_runtime": 2.7766, "eval_samples_per_second": 6.123, "eval_steps_per_second": 1.801, "eval_wer": 0.3011111111111111, "step": 36 }, { "epoch": 4.44, "learning_rate": 0.00029655172413793103, "loss": 0.3462, "step": 40 }, { "epoch": 5.0, "eval_loss": 0.30665457248687744, "eval_runtime": 2.8279, "eval_samples_per_second": 6.012, "eval_steps_per_second": 1.768, "eval_wer": 0.2688888888888889, "step": 45 }, { "epoch": 5.56, "learning_rate": 0.00029310344827586203, "loss": 0.3098, "step": 50 }, { "epoch": 6.0, "eval_loss": 0.3271433115005493, "eval_runtime": 2.7609, "eval_samples_per_second": 6.157, "eval_steps_per_second": 1.811, "eval_wer": 0.2811111111111111, "step": 54 }, { "epoch": 6.67, "learning_rate": 0.0002896551724137931, "loss": 0.2812, "step": 60 }, { "epoch": 7.0, "eval_loss": 0.4907187819480896, "eval_runtime": 2.7909, "eval_samples_per_second": 6.091, "eval_steps_per_second": 1.792, "eval_wer": 0.26, "step": 63 }, { "epoch": 7.78, "learning_rate": 0.0002862068965517241, "loss": 0.3151, "step": 70 }, { "epoch": 8.0, "eval_loss": 0.5852011442184448, "eval_runtime": 2.7422, "eval_samples_per_second": 6.199, "eval_steps_per_second": 1.823, "eval_wer": 0.2777777777777778, "step": 72 }, { "epoch": 8.89, "learning_rate": 0.00028275862068965514, "loss": 0.3038, "step": 80 }, { "epoch": 9.0, "eval_loss": 0.2981448471546173, "eval_runtime": 2.7964, "eval_samples_per_second": 6.079, "eval_steps_per_second": 1.788, "eval_wer": 0.27666666666666667, "step": 81 }, { "epoch": 10.0, "learning_rate": 0.0002793103448275862, "loss": 0.3248, "step": 90 }, { "epoch": 10.0, "eval_loss": 0.3129429519176483, "eval_runtime": 2.7598, "eval_samples_per_second": 6.16, "eval_steps_per_second": 1.812, "eval_wer": 0.2811111111111111, "step": 90 }, { "epoch": 11.0, "eval_loss": 0.4089965522289276, "eval_runtime": 2.7275, "eval_samples_per_second": 6.233, "eval_steps_per_second": 1.833, "eval_wer": 0.27666666666666667, "step": 99 }, { "epoch": 11.11, "learning_rate": 0.0002762068965517241, "loss": 0.3106, "step": 100 }, { "epoch": 12.0, "eval_loss": 0.5353591442108154, "eval_runtime": 2.7463, "eval_samples_per_second": 6.19, "eval_steps_per_second": 1.821, "eval_wer": 0.3, "step": 108 }, { "epoch": 12.22, "learning_rate": 0.0002727586206896551, "loss": 0.2702, "step": 110 }, { "epoch": 13.0, "eval_loss": 0.5543289184570312, "eval_runtime": 2.737, "eval_samples_per_second": 6.211, "eval_steps_per_second": 1.827, "eval_wer": 0.3, "step": 117 }, { "epoch": 13.33, "learning_rate": 0.00026931034482758617, "loss": 0.3021, "step": 120 }, { "epoch": 14.0, "eval_loss": 0.5436674952507019, "eval_runtime": 2.6623, "eval_samples_per_second": 6.386, "eval_steps_per_second": 1.878, "eval_wer": 0.2688888888888889, "step": 126 }, { "epoch": 14.44, "learning_rate": 0.0002658620689655172, "loss": 0.2622, "step": 130 }, { "epoch": 15.0, "eval_loss": 0.5897889733314514, "eval_runtime": 2.6886, "eval_samples_per_second": 6.323, "eval_steps_per_second": 1.86, "eval_wer": 0.2777777777777778, "step": 135 }, { "epoch": 15.56, "learning_rate": 0.0002624137931034483, "loss": 0.2465, "step": 140 }, { "epoch": 16.0, "eval_loss": 0.2900051176548004, "eval_runtime": 2.6745, "eval_samples_per_second": 6.356, "eval_steps_per_second": 1.869, "eval_wer": 0.2722222222222222, "step": 144 }, { "epoch": 16.67, "learning_rate": 0.0002589655172413793, "loss": 0.3077, "step": 150 }, { "epoch": 17.0, "eval_loss": 0.44073647260665894, "eval_runtime": 2.6947, "eval_samples_per_second": 6.309, "eval_steps_per_second": 1.855, "eval_wer": 0.2544444444444444, "step": 153 }, { "epoch": 17.78, "learning_rate": 0.00025551724137931034, "loss": 0.2959, "step": 160 }, { "epoch": 18.0, "eval_loss": 0.4078582227230072, "eval_runtime": 2.6646, "eval_samples_per_second": 6.38, "eval_steps_per_second": 1.876, "eval_wer": 0.29444444444444445, "step": 162 }, { "epoch": 18.89, "learning_rate": 0.0002520689655172414, "loss": 0.2843, "step": 170 }, { "epoch": 19.0, "eval_loss": 0.504199743270874, "eval_runtime": 2.6688, "eval_samples_per_second": 6.37, "eval_steps_per_second": 1.874, "eval_wer": 0.2722222222222222, "step": 171 }, { "epoch": 20.0, "learning_rate": 0.0002486206896551724, "loss": 0.254, "step": 180 }, { "epoch": 20.0, "eval_loss": 0.38511529564857483, "eval_runtime": 2.6831, "eval_samples_per_second": 6.336, "eval_steps_per_second": 1.864, "eval_wer": 0.2877777777777778, "step": 180 }, { "epoch": 21.0, "eval_loss": 0.39123114943504333, "eval_runtime": 2.7483, "eval_samples_per_second": 6.186, "eval_steps_per_second": 1.819, "eval_wer": 0.2677777777777778, "step": 189 }, { "epoch": 21.11, "learning_rate": 0.0002451724137931034, "loss": 0.2532, "step": 190 }, { "epoch": 22.0, "eval_loss": 0.4699125289916992, "eval_runtime": 2.6552, "eval_samples_per_second": 6.402, "eval_steps_per_second": 1.883, "eval_wer": 0.2577777777777778, "step": 198 }, { "epoch": 22.22, "learning_rate": 0.00024172413793103445, "loss": 0.3011, "step": 200 }, { "epoch": 23.0, "eval_loss": 0.746636688709259, "eval_runtime": 2.6717, "eval_samples_per_second": 6.363, "eval_steps_per_second": 1.871, "eval_wer": 0.27444444444444444, "step": 207 }, { "epoch": 23.33, "learning_rate": 0.0002382758620689655, "loss": 0.2601, "step": 210 }, { "epoch": 24.0, "eval_loss": 0.42384907603263855, "eval_runtime": 2.671, "eval_samples_per_second": 6.365, "eval_steps_per_second": 1.872, "eval_wer": 0.28, "step": 216 }, { "epoch": 24.44, "learning_rate": 0.00023482758620689653, "loss": 0.2873, "step": 220 }, { "epoch": 25.0, "eval_loss": 0.3816550672054291, "eval_runtime": 2.7122, "eval_samples_per_second": 6.268, "eval_steps_per_second": 1.844, "eval_wer": 0.24555555555555555, "step": 225 }, { "epoch": 25.56, "learning_rate": 0.00023137931034482756, "loss": 0.2791, "step": 230 }, { "epoch": 26.0, "eval_loss": 0.3488330543041229, "eval_runtime": 2.7083, "eval_samples_per_second": 6.277, "eval_steps_per_second": 1.846, "eval_wer": 0.24888888888888888, "step": 234 }, { "epoch": 26.67, "learning_rate": 0.00022793103448275862, "loss": 0.2399, "step": 240 }, { "epoch": 27.0, "eval_loss": 0.29803043603897095, "eval_runtime": 2.6607, "eval_samples_per_second": 6.389, "eval_steps_per_second": 1.879, "eval_wer": 0.2611111111111111, "step": 243 }, { "epoch": 27.78, "learning_rate": 0.00022448275862068965, "loss": 0.2592, "step": 250 }, { "epoch": 28.0, "eval_loss": 0.29419198632240295, "eval_runtime": 2.6723, "eval_samples_per_second": 6.361, "eval_steps_per_second": 1.871, "eval_wer": 0.27, "step": 252 }, { "epoch": 28.89, "learning_rate": 0.00022103448275862065, "loss": 0.2191, "step": 260 }, { "epoch": 29.0, "eval_loss": 0.2920997440814972, "eval_runtime": 2.6701, "eval_samples_per_second": 6.367, "eval_steps_per_second": 1.873, "eval_wer": 0.2833333333333333, "step": 261 }, { "epoch": 30.0, "learning_rate": 0.0002175862068965517, "loss": 0.2285, "step": 270 }, { "epoch": 30.0, "eval_loss": 0.2851235568523407, "eval_runtime": 2.6768, "eval_samples_per_second": 6.351, "eval_steps_per_second": 1.868, "eval_wer": 0.27444444444444444, "step": 270 }, { "epoch": 31.0, "eval_loss": 0.2793618440628052, "eval_runtime": 2.6674, "eval_samples_per_second": 6.373, "eval_steps_per_second": 1.874, "eval_wer": 0.2733333333333333, "step": 279 }, { "epoch": 31.11, "learning_rate": 0.00021413793103448273, "loss": 0.2489, "step": 280 }, { "epoch": 32.0, "eval_loss": 0.30359551310539246, "eval_runtime": 2.6757, "eval_samples_per_second": 6.353, "eval_steps_per_second": 1.869, "eval_wer": 0.2677777777777778, "step": 288 }, { "epoch": 32.22, "learning_rate": 0.0002106896551724138, "loss": 0.2445, "step": 290 }, { "epoch": 33.0, "eval_loss": 0.28508973121643066, "eval_runtime": 2.7054, "eval_samples_per_second": 6.284, "eval_steps_per_second": 1.848, "eval_wer": 0.2677777777777778, "step": 297 }, { "epoch": 33.33, "learning_rate": 0.00020724137931034482, "loss": 0.2261, "step": 300 }, { "epoch": 34.0, "eval_loss": 0.2863776385784149, "eval_runtime": 2.7065, "eval_samples_per_second": 6.281, "eval_steps_per_second": 1.847, "eval_wer": 0.2733333333333333, "step": 306 }, { "epoch": 34.44, "learning_rate": 0.00020379310344827584, "loss": 0.2391, "step": 310 }, { "epoch": 35.0, "eval_loss": 0.3054599165916443, "eval_runtime": 2.6546, "eval_samples_per_second": 6.404, "eval_steps_per_second": 1.884, "eval_wer": 0.2611111111111111, "step": 315 }, { "epoch": 35.56, "learning_rate": 0.0002006896551724138, "loss": 0.3939, "step": 320 }, { "epoch": 36.0, "eval_loss": 0.2927255928516388, "eval_runtime": 2.6884, "eval_samples_per_second": 6.324, "eval_steps_per_second": 1.86, "eval_wer": 0.26, "step": 324 }, { "epoch": 36.67, "learning_rate": 0.00019724137931034482, "loss": 0.2521, "step": 330 }, { "epoch": 37.0, "eval_loss": 0.34698253870010376, "eval_runtime": 2.673, "eval_samples_per_second": 6.36, "eval_steps_per_second": 1.871, "eval_wer": 0.2577777777777778, "step": 333 }, { "epoch": 37.78, "learning_rate": 0.00019379310344827584, "loss": 0.2378, "step": 340 }, { "epoch": 38.0, "eval_loss": 0.2841470539569855, "eval_runtime": 2.6684, "eval_samples_per_second": 6.371, "eval_steps_per_second": 1.874, "eval_wer": 0.26555555555555554, "step": 342 }, { "epoch": 38.89, "learning_rate": 0.0001903448275862069, "loss": 0.2653, "step": 350 }, { "epoch": 39.0, "eval_loss": 0.2888979911804199, "eval_runtime": 2.6688, "eval_samples_per_second": 6.37, "eval_steps_per_second": 1.874, "eval_wer": 0.2388888888888889, "step": 351 }, { "epoch": 40.0, "learning_rate": 0.0001868965517241379, "loss": 0.2235, "step": 360 }, { "epoch": 40.0, "eval_loss": 0.31759706139564514, "eval_runtime": 2.6864, "eval_samples_per_second": 6.328, "eval_steps_per_second": 1.861, "eval_wer": 0.25, "step": 360 }, { "epoch": 41.0, "eval_loss": 0.31878939270973206, "eval_runtime": 2.69, "eval_samples_per_second": 6.32, "eval_steps_per_second": 1.859, "eval_wer": 0.26666666666666666, "step": 369 }, { "epoch": 41.11, "learning_rate": 0.00018344827586206893, "loss": 0.2474, "step": 370 }, { "epoch": 42.0, "eval_loss": 0.37820303440093994, "eval_runtime": 2.703, "eval_samples_per_second": 6.289, "eval_steps_per_second": 1.85, "eval_wer": 0.2633333333333333, "step": 378 }, { "epoch": 42.22, "learning_rate": 0.00017999999999999998, "loss": 0.222, "step": 380 }, { "epoch": 43.0, "eval_loss": 0.320123553276062, "eval_runtime": 2.6965, "eval_samples_per_second": 6.304, "eval_steps_per_second": 1.854, "eval_wer": 0.27666666666666667, "step": 387 }, { "epoch": 43.33, "learning_rate": 0.000176551724137931, "loss": 0.2411, "step": 390 }, { "epoch": 44.0, "eval_loss": 0.3416379392147064, "eval_runtime": 2.672, "eval_samples_per_second": 6.362, "eval_steps_per_second": 1.871, "eval_wer": 0.2722222222222222, "step": 396 }, { "epoch": 44.44, "learning_rate": 0.00017310344827586207, "loss": 0.2561, "step": 400 }, { "epoch": 45.0, "eval_loss": 0.3050036132335663, "eval_runtime": 2.675, "eval_samples_per_second": 6.355, "eval_steps_per_second": 1.869, "eval_wer": 0.27111111111111114, "step": 405 }, { "epoch": 45.56, "learning_rate": 0.0001696551724137931, "loss": 0.2169, "step": 410 }, { "epoch": 46.0, "eval_loss": 0.39679646492004395, "eval_runtime": 2.6457, "eval_samples_per_second": 6.425, "eval_steps_per_second": 1.89, "eval_wer": 0.2511111111111111, "step": 414 }, { "epoch": 46.67, "learning_rate": 0.00016620689655172412, "loss": 0.2296, "step": 420 }, { "epoch": 47.0, "eval_loss": 0.3721458911895752, "eval_runtime": 2.6664, "eval_samples_per_second": 6.376, "eval_steps_per_second": 1.875, "eval_wer": 0.25666666666666665, "step": 423 }, { "epoch": 47.78, "learning_rate": 0.00016275862068965518, "loss": 0.1989, "step": 430 }, { "epoch": 48.0, "eval_loss": 0.3204912543296814, "eval_runtime": 2.6839, "eval_samples_per_second": 6.334, "eval_steps_per_second": 1.863, "eval_wer": 0.26666666666666666, "step": 432 }, { "epoch": 48.89, "learning_rate": 0.00015931034482758618, "loss": 0.2408, "step": 440 }, { "epoch": 49.0, "eval_loss": 0.4523827135562897, "eval_runtime": 2.6923, "eval_samples_per_second": 6.314, "eval_steps_per_second": 1.857, "eval_wer": 0.24888888888888888, "step": 441 }, { "epoch": 50.0, "learning_rate": 0.0001558620689655172, "loss": 0.2163, "step": 450 }, { "epoch": 50.0, "eval_loss": 0.48503080010414124, "eval_runtime": 2.7497, "eval_samples_per_second": 6.183, "eval_steps_per_second": 1.818, "eval_wer": 0.25666666666666665, "step": 450 }, { "epoch": 51.0, "eval_loss": 0.3777279555797577, "eval_runtime": 2.6773, "eval_samples_per_second": 6.35, "eval_steps_per_second": 1.868, "eval_wer": 0.27111111111111114, "step": 459 }, { "epoch": 51.11, "learning_rate": 0.00015241379310344826, "loss": 0.2001, "step": 460 }, { "epoch": 52.0, "eval_loss": 0.5525699853897095, "eval_runtime": 2.7459, "eval_samples_per_second": 6.191, "eval_steps_per_second": 1.821, "eval_wer": 0.2644444444444444, "step": 468 }, { "epoch": 52.22, "learning_rate": 0.0001489655172413793, "loss": 0.2373, "step": 470 }, { "epoch": 53.0, "eval_loss": 0.514091968536377, "eval_runtime": 2.6662, "eval_samples_per_second": 6.376, "eval_steps_per_second": 1.875, "eval_wer": 0.2588888888888889, "step": 477 }, { "epoch": 53.33, "learning_rate": 0.00014551724137931032, "loss": 0.2132, "step": 480 }, { "epoch": 54.0, "eval_loss": 0.5407584309577942, "eval_runtime": 2.6748, "eval_samples_per_second": 6.356, "eval_steps_per_second": 1.869, "eval_wer": 0.2611111111111111, "step": 486 }, { "epoch": 54.44, "learning_rate": 0.00014206896551724138, "loss": 0.2687, "step": 490 }, { "epoch": 55.0, "eval_loss": 0.5389086604118347, "eval_runtime": 2.6621, "eval_samples_per_second": 6.386, "eval_steps_per_second": 1.878, "eval_wer": 0.2677777777777778, "step": 495 }, { "epoch": 55.56, "learning_rate": 0.0001386206896551724, "loss": 0.2244, "step": 500 }, { "epoch": 56.0, "eval_loss": 0.5729237794876099, "eval_runtime": 2.6691, "eval_samples_per_second": 6.369, "eval_steps_per_second": 1.873, "eval_wer": 0.2577777777777778, "step": 504 }, { "epoch": 56.67, "learning_rate": 0.00013517241379310343, "loss": 0.2102, "step": 510 }, { "epoch": 57.0, "eval_loss": 0.6249426603317261, "eval_runtime": 2.6765, "eval_samples_per_second": 6.351, "eval_steps_per_second": 1.868, "eval_wer": 0.24888888888888888, "step": 513 }, { "epoch": 57.78, "learning_rate": 0.00013172413793103446, "loss": 0.2076, "step": 520 }, { "epoch": 58.0, "eval_loss": 0.5538017749786377, "eval_runtime": 2.6836, "eval_samples_per_second": 6.335, "eval_steps_per_second": 1.863, "eval_wer": 0.25, "step": 522 }, { "epoch": 58.89, "learning_rate": 0.00012827586206896552, "loss": 0.208, "step": 530 }, { "epoch": 59.0, "eval_loss": 0.549868643283844, "eval_runtime": 2.7033, "eval_samples_per_second": 6.289, "eval_steps_per_second": 1.85, "eval_wer": 0.24666666666666667, "step": 531 }, { "epoch": 60.0, "learning_rate": 0.00012482758620689654, "loss": 0.2167, "step": 540 }, { "epoch": 60.0, "eval_loss": 0.6480904817581177, "eval_runtime": 2.6966, "eval_samples_per_second": 6.304, "eval_steps_per_second": 1.854, "eval_wer": 0.24333333333333335, "step": 540 }, { "epoch": 61.0, "eval_loss": 0.6797173619270325, "eval_runtime": 2.6683, "eval_samples_per_second": 6.371, "eval_steps_per_second": 1.874, "eval_wer": 0.2588888888888889, "step": 549 }, { "epoch": 61.11, "learning_rate": 0.00012137931034482757, "loss": 0.2218, "step": 550 }, { "epoch": 62.0, "eval_loss": 0.5400562882423401, "eval_runtime": 2.6672, "eval_samples_per_second": 6.374, "eval_steps_per_second": 1.875, "eval_wer": 0.26555555555555554, "step": 558 }, { "epoch": 62.22, "learning_rate": 0.00011793103448275861, "loss": 0.2102, "step": 560 }, { "epoch": 63.0, "eval_loss": 0.5152256488800049, "eval_runtime": 2.6712, "eval_samples_per_second": 6.364, "eval_steps_per_second": 1.872, "eval_wer": 0.26, "step": 567 }, { "epoch": 63.33, "learning_rate": 0.00011448275862068966, "loss": 0.2176, "step": 570 }, { "epoch": 64.0, "eval_loss": 0.5581179857254028, "eval_runtime": 2.6803, "eval_samples_per_second": 6.343, "eval_steps_per_second": 1.865, "eval_wer": 0.26, "step": 576 }, { "epoch": 64.44, "learning_rate": 0.00011103448275862067, "loss": 0.2068, "step": 580 }, { "epoch": 65.0, "eval_loss": 0.7224693298339844, "eval_runtime": 2.6758, "eval_samples_per_second": 6.353, "eval_steps_per_second": 1.869, "eval_wer": 0.25333333333333335, "step": 585 }, { "epoch": 65.56, "learning_rate": 0.00010758620689655171, "loss": 0.2123, "step": 590 }, { "epoch": 66.0, "eval_loss": 0.6330079436302185, "eval_runtime": 2.667, "eval_samples_per_second": 6.374, "eval_steps_per_second": 1.875, "eval_wer": 0.2633333333333333, "step": 594 }, { "epoch": 66.67, "learning_rate": 0.00010413793103448275, "loss": 0.2212, "step": 600 }, { "epoch": 67.0, "eval_loss": 0.5943337082862854, "eval_runtime": 2.6815, "eval_samples_per_second": 6.34, "eval_steps_per_second": 1.865, "eval_wer": 0.2588888888888889, "step": 603 }, { "epoch": 67.78, "learning_rate": 0.0001006896551724138, "loss": 0.2013, "step": 610 }, { "epoch": 68.0, "eval_loss": 0.7557083368301392, "eval_runtime": 2.7707, "eval_samples_per_second": 6.136, "eval_steps_per_second": 1.805, "eval_wer": 0.25, "step": 612 }, { "epoch": 68.89, "learning_rate": 9.724137931034481e-05, "loss": 0.2304, "step": 620 }, { "epoch": 69.0, "eval_loss": 0.9143911004066467, "eval_runtime": 2.682, "eval_samples_per_second": 6.339, "eval_steps_per_second": 1.864, "eval_wer": 0.24666666666666667, "step": 621 }, { "epoch": 70.0, "learning_rate": 9.379310344827585e-05, "loss": 0.209, "step": 630 }, { "epoch": 70.0, "eval_loss": 0.7790440320968628, "eval_runtime": 2.6874, "eval_samples_per_second": 6.326, "eval_steps_per_second": 1.861, "eval_wer": 0.24, "step": 630 }, { "epoch": 71.0, "eval_loss": 0.6202754974365234, "eval_runtime": 2.6775, "eval_samples_per_second": 6.349, "eval_steps_per_second": 1.867, "eval_wer": 0.2411111111111111, "step": 639 }, { "epoch": 71.11, "learning_rate": 9.03448275862069e-05, "loss": 0.191, "step": 640 }, { "epoch": 72.0, "eval_loss": 0.628011167049408, "eval_runtime": 2.6846, "eval_samples_per_second": 6.332, "eval_steps_per_second": 1.862, "eval_wer": 0.23222222222222222, "step": 648 }, { "epoch": 72.22, "learning_rate": 8.689655172413794e-05, "loss": 0.2313, "step": 650 }, { "epoch": 73.0, "eval_loss": 0.5491462349891663, "eval_runtime": 2.7834, "eval_samples_per_second": 6.108, "eval_steps_per_second": 1.796, "eval_wer": 0.23777777777777778, "step": 657 }, { "epoch": 73.33, "learning_rate": 8.344827586206895e-05, "loss": 0.1869, "step": 660 }, { "epoch": 74.0, "eval_loss": 0.465344101190567, "eval_runtime": 2.6446, "eval_samples_per_second": 6.428, "eval_steps_per_second": 1.891, "eval_wer": 0.2411111111111111, "step": 666 }, { "epoch": 74.44, "learning_rate": 7.999999999999999e-05, "loss": 0.2313, "step": 670 }, { "epoch": 75.0, "eval_loss": 0.6015957593917847, "eval_runtime": 2.6596, "eval_samples_per_second": 6.392, "eval_steps_per_second": 1.88, "eval_wer": 0.24888888888888888, "step": 675 }, { "epoch": 75.56, "learning_rate": 7.655172413793103e-05, "loss": 0.1806, "step": 680 }, { "epoch": 76.0, "eval_loss": 0.6491662859916687, "eval_runtime": 2.7026, "eval_samples_per_second": 6.29, "eval_steps_per_second": 1.85, "eval_wer": 0.2477777777777778, "step": 684 }, { "epoch": 76.67, "learning_rate": 7.310344827586206e-05, "loss": 0.1934, "step": 690 }, { "epoch": 77.0, "eval_loss": 0.6185407042503357, "eval_runtime": 2.7206, "eval_samples_per_second": 6.249, "eval_steps_per_second": 1.838, "eval_wer": 0.2477777777777778, "step": 693 }, { "epoch": 77.78, "learning_rate": 6.96551724137931e-05, "loss": 0.1954, "step": 700 }, { "epoch": 78.0, "eval_loss": 0.5618109703063965, "eval_runtime": 2.6904, "eval_samples_per_second": 6.319, "eval_steps_per_second": 1.858, "eval_wer": 0.24888888888888888, "step": 702 }, { "epoch": 78.89, "learning_rate": 6.620689655172413e-05, "loss": 0.2077, "step": 710 }, { "epoch": 79.0, "eval_loss": 0.5759626030921936, "eval_runtime": 2.6815, "eval_samples_per_second": 6.34, "eval_steps_per_second": 1.865, "eval_wer": 0.25222222222222224, "step": 711 }, { "epoch": 80.0, "learning_rate": 6.275862068965517e-05, "loss": 0.2052, "step": 720 }, { "epoch": 80.0, "eval_loss": 0.6171647310256958, "eval_runtime": 2.6846, "eval_samples_per_second": 6.332, "eval_steps_per_second": 1.862, "eval_wer": 0.25, "step": 720 }, { "epoch": 81.0, "eval_loss": 0.6858766078948975, "eval_runtime": 2.7081, "eval_samples_per_second": 6.278, "eval_steps_per_second": 1.846, "eval_wer": 0.24666666666666667, "step": 729 }, { "epoch": 81.11, "learning_rate": 5.93103448275862e-05, "loss": 0.1804, "step": 730 }, { "epoch": 82.0, "eval_loss": 0.7642585039138794, "eval_runtime": 2.6849, "eval_samples_per_second": 6.332, "eval_steps_per_second": 1.862, "eval_wer": 0.24222222222222223, "step": 738 }, { "epoch": 82.22, "learning_rate": 5.586206896551724e-05, "loss": 0.1995, "step": 740 }, { "epoch": 83.0, "eval_loss": 0.8360317349433899, "eval_runtime": 2.6914, "eval_samples_per_second": 6.316, "eval_steps_per_second": 1.858, "eval_wer": 0.23666666666666666, "step": 747 }, { "epoch": 83.33, "learning_rate": 5.241379310344827e-05, "loss": 0.1869, "step": 750 }, { "epoch": 84.0, "eval_loss": 0.6984127759933472, "eval_runtime": 2.6921, "eval_samples_per_second": 6.315, "eval_steps_per_second": 1.857, "eval_wer": 0.24888888888888888, "step": 756 }, { "epoch": 84.44, "learning_rate": 4.896551724137931e-05, "loss": 0.2135, "step": 760 }, { "epoch": 85.0, "eval_loss": 0.6759489178657532, "eval_runtime": 2.6874, "eval_samples_per_second": 6.326, "eval_steps_per_second": 1.861, "eval_wer": 0.24222222222222223, "step": 765 }, { "epoch": 85.56, "learning_rate": 4.551724137931034e-05, "loss": 0.178, "step": 770 }, { "epoch": 86.0, "eval_loss": 0.6790638566017151, "eval_runtime": 2.6812, "eval_samples_per_second": 6.34, "eval_steps_per_second": 1.865, "eval_wer": 0.24444444444444444, "step": 774 }, { "epoch": 86.67, "learning_rate": 4.206896551724138e-05, "loss": 0.1734, "step": 780 }, { "epoch": 87.0, "eval_loss": 0.7283732891082764, "eval_runtime": 2.6818, "eval_samples_per_second": 6.339, "eval_steps_per_second": 1.864, "eval_wer": 0.2411111111111111, "step": 783 }, { "epoch": 87.78, "learning_rate": 3.862068965517241e-05, "loss": 0.1881, "step": 790 }, { "epoch": 88.0, "eval_loss": 0.8172265291213989, "eval_runtime": 2.6717, "eval_samples_per_second": 6.363, "eval_steps_per_second": 1.871, "eval_wer": 0.23444444444444446, "step": 792 }, { "epoch": 88.89, "learning_rate": 3.517241379310344e-05, "loss": 0.1625, "step": 800 }, { "epoch": 89.0, "eval_loss": 0.8061416745185852, "eval_runtime": 2.719, "eval_samples_per_second": 6.252, "eval_steps_per_second": 1.839, "eval_wer": 0.23555555555555555, "step": 801 }, { "epoch": 90.0, "learning_rate": 3.1724137931034476e-05, "loss": 0.181, "step": 810 }, { "epoch": 90.0, "eval_loss": 0.7644045948982239, "eval_runtime": 2.7, "eval_samples_per_second": 6.296, "eval_steps_per_second": 1.852, "eval_wer": 0.2388888888888889, "step": 810 }, { "epoch": 91.0, "eval_loss": 0.7413058876991272, "eval_runtime": 2.6915, "eval_samples_per_second": 6.316, "eval_steps_per_second": 1.858, "eval_wer": 0.24, "step": 819 }, { "epoch": 91.11, "learning_rate": 2.8275862068965515e-05, "loss": 0.1942, "step": 820 }, { "epoch": 92.0, "eval_loss": 0.6439424157142639, "eval_runtime": 2.668, "eval_samples_per_second": 6.372, "eval_steps_per_second": 1.874, "eval_wer": 0.24333333333333335, "step": 828 }, { "epoch": 92.22, "learning_rate": 2.482758620689655e-05, "loss": 0.1806, "step": 830 }, { "epoch": 93.0, "eval_loss": 0.6250465512275696, "eval_runtime": 2.7104, "eval_samples_per_second": 6.272, "eval_steps_per_second": 1.845, "eval_wer": 0.24666666666666667, "step": 837 }, { "epoch": 93.33, "learning_rate": 2.1379310344827585e-05, "loss": 0.1651, "step": 840 }, { "epoch": 94.0, "eval_loss": 0.6516769528388977, "eval_runtime": 2.6556, "eval_samples_per_second": 6.402, "eval_steps_per_second": 1.883, "eval_wer": 0.24333333333333335, "step": 846 }, { "epoch": 94.44, "learning_rate": 1.793103448275862e-05, "loss": 0.1833, "step": 850 }, { "epoch": 95.0, "eval_loss": 0.6628164052963257, "eval_runtime": 2.7043, "eval_samples_per_second": 6.286, "eval_steps_per_second": 1.849, "eval_wer": 0.2388888888888889, "step": 855 }, { "epoch": 95.56, "learning_rate": 1.4482758620689653e-05, "loss": 0.1873, "step": 860 }, { "epoch": 96.0, "eval_loss": 0.6581894159317017, "eval_runtime": 2.6489, "eval_samples_per_second": 6.418, "eval_steps_per_second": 1.888, "eval_wer": 0.23777777777777778, "step": 864 }, { "epoch": 96.67, "learning_rate": 1.1034482758620688e-05, "loss": 0.1672, "step": 870 }, { "epoch": 97.0, "eval_loss": 0.6548393964767456, "eval_runtime": 2.6503, "eval_samples_per_second": 6.414, "eval_steps_per_second": 1.887, "eval_wer": 0.2388888888888889, "step": 873 }, { "epoch": 97.78, "learning_rate": 7.586206896551723e-06, "loss": 0.1871, "step": 880 }, { "epoch": 98.0, "eval_loss": 0.6655335426330566, "eval_runtime": 2.6305, "eval_samples_per_second": 6.463, "eval_steps_per_second": 1.901, "eval_wer": 0.24, "step": 882 }, { "epoch": 98.89, "learning_rate": 4.137931034482758e-06, "loss": 0.2429, "step": 890 }, { "epoch": 99.0, "eval_loss": 0.6694830060005188, "eval_runtime": 2.6212, "eval_samples_per_second": 6.486, "eval_steps_per_second": 1.908, "eval_wer": 0.24, "step": 891 }, { "epoch": 100.0, "learning_rate": 6.89655172413793e-07, "loss": 0.1832, "step": 900 }, { "epoch": 100.0, "eval_loss": 0.6699547171592712, "eval_runtime": 2.7031, "eval_samples_per_second": 6.289, "eval_steps_per_second": 1.85, "eval_wer": 0.2388888888888889, "step": 900 }, { "epoch": 100.0, "step": 900, "total_flos": 1.389987159899058e+18, "train_loss": 0.2346874083413018, "train_runtime": 3461.7851, "train_samples_per_second": 4.073, "train_steps_per_second": 0.26 } ], "max_steps": 900, "num_train_epochs": 100, "total_flos": 1.389987159899058e+18, "trial_name": null, "trial_params": null }