{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 5100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.96, "learning_rate": 5.069866666666666e-05, "loss": 10.7052, "step": 100 }, { "epoch": 1.96, "eval_loss": 3.4682674407958984, "eval_runtime": 13.0004, "eval_samples_per_second": 26.23, "eval_steps_per_second": 3.308, "eval_wer": 1.0, "step": 100 }, { "epoch": 3.92, "learning_rate": 0.000102432, "loss": 3.2395, "step": 200 }, { "epoch": 3.92, "eval_loss": 3.14892840385437, "eval_runtime": 12.7764, "eval_samples_per_second": 26.69, "eval_steps_per_second": 3.366, "eval_wer": 1.0, "step": 200 }, { "epoch": 5.88, "learning_rate": 0.00015416533333333332, "loss": 2.9951, "step": 300 }, { "epoch": 5.88, "eval_loss": 2.982297897338867, "eval_runtime": 12.7866, "eval_samples_per_second": 26.668, "eval_steps_per_second": 3.363, "eval_wer": 1.0007380073800738, "step": 300 }, { "epoch": 7.84, "learning_rate": 0.00020589866666666665, "loss": 2.3574, "step": 400 }, { "epoch": 7.84, "eval_loss": 1.2614495754241943, "eval_runtime": 12.8042, "eval_samples_per_second": 26.632, "eval_steps_per_second": 3.358, "eval_wer": 0.7597785977859779, "step": 400 }, { "epoch": 9.8, "learning_rate": 0.000257632, "loss": 1.7287, "step": 500 }, { "epoch": 9.8, "eval_loss": 1.1816928386688232, "eval_runtime": 12.7085, "eval_samples_per_second": 26.833, "eval_steps_per_second": 3.384, "eval_wer": 0.7420664206642067, "step": 500 }, { "epoch": 11.76, "learning_rate": 0.00030936533333333335, "loss": 1.6144, "step": 600 }, { "epoch": 11.76, "eval_loss": 1.131492257118225, "eval_runtime": 12.8371, "eval_samples_per_second": 26.564, "eval_steps_per_second": 3.35, "eval_wer": 0.7321033210332103, "step": 600 }, { "epoch": 13.73, "learning_rate": 0.00036109866666666666, "loss": 1.5598, "step": 700 }, { "epoch": 13.73, "eval_loss": 1.232160210609436, "eval_runtime": 12.7503, "eval_samples_per_second": 26.745, "eval_steps_per_second": 3.372, "eval_wer": 0.7549815498154981, "step": 700 }, { "epoch": 15.69, "learning_rate": 0.0003837186206896552, "loss": 1.5418, "step": 800 }, { "epoch": 15.69, "eval_loss": 1.272075891494751, "eval_runtime": 12.8108, "eval_samples_per_second": 26.618, "eval_steps_per_second": 3.357, "eval_wer": 0.7819188191881918, "step": 800 }, { "epoch": 17.65, "learning_rate": 0.0003747990804597701, "loss": 1.4578, "step": 900 }, { "epoch": 17.65, "eval_loss": 1.1709508895874023, "eval_runtime": 12.7848, "eval_samples_per_second": 26.672, "eval_steps_per_second": 3.363, "eval_wer": 0.7531365313653137, "step": 900 }, { "epoch": 19.61, "learning_rate": 0.0003658795402298851, "loss": 1.4311, "step": 1000 }, { "epoch": 19.61, "eval_loss": 1.2042367458343506, "eval_runtime": 12.6761, "eval_samples_per_second": 26.901, "eval_steps_per_second": 3.392, "eval_wer": 0.7490774907749077, "step": 1000 }, { "epoch": 21.57, "learning_rate": 0.00035696, "loss": 1.3483, "step": 1100 }, { "epoch": 21.57, "eval_loss": 1.1702170372009277, "eval_runtime": 12.5762, "eval_samples_per_second": 27.115, "eval_steps_per_second": 3.419, "eval_wer": 0.7464944649446494, "step": 1100 }, { "epoch": 23.53, "learning_rate": 0.0003480404597701149, "loss": 1.3078, "step": 1200 }, { "epoch": 23.53, "eval_loss": 1.1963412761688232, "eval_runtime": 12.6937, "eval_samples_per_second": 26.864, "eval_steps_per_second": 3.387, "eval_wer": 0.7420664206642067, "step": 1200 }, { "epoch": 25.49, "learning_rate": 0.00033912091954022987, "loss": 1.2576, "step": 1300 }, { "epoch": 25.49, "eval_loss": 1.1501450538635254, "eval_runtime": 12.6626, "eval_samples_per_second": 26.93, "eval_steps_per_second": 3.396, "eval_wer": 0.7280442804428044, "step": 1300 }, { "epoch": 27.45, "learning_rate": 0.0003302013793103448, "loss": 1.2173, "step": 1400 }, { "epoch": 27.45, "eval_loss": 1.2525919675827026, "eval_runtime": 12.6262, "eval_samples_per_second": 27.007, "eval_steps_per_second": 3.406, "eval_wer": 0.7298892988929889, "step": 1400 }, { "epoch": 29.41, "learning_rate": 0.00032128183908045977, "loss": 1.2217, "step": 1500 }, { "epoch": 29.41, "eval_loss": 1.2478744983673096, "eval_runtime": 12.8026, "eval_samples_per_second": 26.635, "eval_steps_per_second": 3.359, "eval_wer": 0.7309963099630996, "step": 1500 }, { "epoch": 31.37, "learning_rate": 0.0003123622988505747, "loss": 1.1536, "step": 1600 }, { "epoch": 31.37, "eval_loss": 1.2567418813705444, "eval_runtime": 12.6734, "eval_samples_per_second": 26.907, "eval_steps_per_second": 3.393, "eval_wer": 0.7431734317343174, "step": 1600 }, { "epoch": 33.33, "learning_rate": 0.00030344275862068966, "loss": 1.0939, "step": 1700 }, { "epoch": 33.33, "eval_loss": 1.2800976037979126, "eval_runtime": 12.5686, "eval_samples_per_second": 27.131, "eval_steps_per_second": 3.421, "eval_wer": 0.7247232472324723, "step": 1700 }, { "epoch": 35.29, "learning_rate": 0.0002945232183908046, "loss": 1.0745, "step": 1800 }, { "epoch": 35.29, "eval_loss": 1.2340304851531982, "eval_runtime": 12.6385, "eval_samples_per_second": 26.981, "eval_steps_per_second": 3.402, "eval_wer": 0.7151291512915129, "step": 1800 }, { "epoch": 37.25, "learning_rate": 0.00028560367816091956, "loss": 1.0454, "step": 1900 }, { "epoch": 37.25, "eval_loss": 1.237194299697876, "eval_runtime": 12.4912, "eval_samples_per_second": 27.299, "eval_steps_per_second": 3.442, "eval_wer": 0.7151291512915129, "step": 1900 }, { "epoch": 39.22, "learning_rate": 0.00027668413793103446, "loss": 1.0101, "step": 2000 }, { "epoch": 39.22, "eval_loss": 1.2461133003234863, "eval_runtime": 12.6855, "eval_samples_per_second": 26.881, "eval_steps_per_second": 3.39, "eval_wer": 0.7376383763837638, "step": 2000 }, { "epoch": 41.18, "learning_rate": 0.0002677645977011494, "loss": 0.9833, "step": 2100 }, { "epoch": 41.18, "eval_loss": 1.2552708387374878, "eval_runtime": 12.6273, "eval_samples_per_second": 27.005, "eval_steps_per_second": 3.405, "eval_wer": 0.7269372693726938, "step": 2100 }, { "epoch": 43.14, "learning_rate": 0.00025884505747126435, "loss": 0.9314, "step": 2200 }, { "epoch": 43.14, "eval_loss": 1.2371633052825928, "eval_runtime": 12.5794, "eval_samples_per_second": 27.108, "eval_steps_per_second": 3.418, "eval_wer": 0.7014760147601476, "step": 2200 }, { "epoch": 45.1, "learning_rate": 0.0002499255172413793, "loss": 0.9147, "step": 2300 }, { "epoch": 45.1, "eval_loss": 1.3035242557525635, "eval_runtime": 12.8232, "eval_samples_per_second": 26.592, "eval_steps_per_second": 3.353, "eval_wer": 0.7357933579335794, "step": 2300 }, { "epoch": 47.06, "learning_rate": 0.00024109517241379313, "loss": 0.8758, "step": 2400 }, { "epoch": 47.06, "eval_loss": 1.2598013877868652, "eval_runtime": 12.7812, "eval_samples_per_second": 26.68, "eval_steps_per_second": 3.364, "eval_wer": 0.7092250922509226, "step": 2400 }, { "epoch": 49.02, "learning_rate": 0.00023217563218390802, "loss": 0.8356, "step": 2500 }, { "epoch": 49.02, "eval_loss": 1.255703091621399, "eval_runtime": 12.549, "eval_samples_per_second": 27.173, "eval_steps_per_second": 3.427, "eval_wer": 0.7143911439114391, "step": 2500 }, { "epoch": 50.98, "learning_rate": 0.00022325609195402297, "loss": 0.8105, "step": 2600 }, { "epoch": 50.98, "eval_loss": 1.2618709802627563, "eval_runtime": 12.4348, "eval_samples_per_second": 27.423, "eval_steps_per_second": 3.458, "eval_wer": 0.7236162361623616, "step": 2600 }, { "epoch": 52.94, "learning_rate": 0.00021433655172413795, "loss": 0.7947, "step": 2700 }, { "epoch": 52.94, "eval_loss": 1.399444818496704, "eval_runtime": 12.9361, "eval_samples_per_second": 26.36, "eval_steps_per_second": 3.324, "eval_wer": 0.7490774907749077, "step": 2700 }, { "epoch": 54.9, "learning_rate": 0.0002054170114942529, "loss": 0.7623, "step": 2800 }, { "epoch": 54.9, "eval_loss": 1.2931541204452515, "eval_runtime": 12.7092, "eval_samples_per_second": 26.831, "eval_steps_per_second": 3.383, "eval_wer": 0.7132841328413284, "step": 2800 }, { "epoch": 56.86, "learning_rate": 0.0001964974712643678, "loss": 0.7282, "step": 2900 }, { "epoch": 56.86, "eval_loss": 1.2799276113510132, "eval_runtime": 12.7788, "eval_samples_per_second": 26.685, "eval_steps_per_second": 3.365, "eval_wer": 0.7088560885608856, "step": 2900 }, { "epoch": 58.82, "learning_rate": 0.00018757793103448274, "loss": 0.7108, "step": 3000 }, { "epoch": 58.82, "eval_loss": 1.3615078926086426, "eval_runtime": 12.6509, "eval_samples_per_second": 26.955, "eval_steps_per_second": 3.399, "eval_wer": 0.714760147601476, "step": 3000 }, { "epoch": 60.78, "learning_rate": 0.00017865839080459772, "loss": 0.6896, "step": 3100 }, { "epoch": 60.78, "eval_loss": 1.312876582145691, "eval_runtime": 12.6527, "eval_samples_per_second": 26.951, "eval_steps_per_second": 3.398, "eval_wer": 0.7040590405904059, "step": 3100 }, { "epoch": 62.75, "learning_rate": 0.00016973885057471264, "loss": 0.6496, "step": 3200 }, { "epoch": 62.75, "eval_loss": 1.4050244092941284, "eval_runtime": 12.6982, "eval_samples_per_second": 26.854, "eval_steps_per_second": 3.386, "eval_wer": 0.6933579335793358, "step": 3200 }, { "epoch": 64.71, "learning_rate": 0.0001608193103448276, "loss": 0.6075, "step": 3300 }, { "epoch": 64.71, "eval_loss": 1.35708749294281, "eval_runtime": 12.752, "eval_samples_per_second": 26.741, "eval_steps_per_second": 3.372, "eval_wer": 0.7025830258302583, "step": 3300 }, { "epoch": 66.67, "learning_rate": 0.00015189977011494254, "loss": 0.6242, "step": 3400 }, { "epoch": 66.67, "eval_loss": 1.3368754386901855, "eval_runtime": 12.8446, "eval_samples_per_second": 26.548, "eval_steps_per_second": 3.348, "eval_wer": 0.7062730627306273, "step": 3400 }, { "epoch": 68.63, "learning_rate": 0.00014298022988505749, "loss": 0.5865, "step": 3500 }, { "epoch": 68.63, "eval_loss": 1.4367624521255493, "eval_runtime": 12.5885, "eval_samples_per_second": 27.088, "eval_steps_per_second": 3.416, "eval_wer": 0.7140221402214022, "step": 3500 }, { "epoch": 70.59, "learning_rate": 0.0001340606896551724, "loss": 0.5721, "step": 3600 }, { "epoch": 70.59, "eval_loss": 1.4223829507827759, "eval_runtime": 12.6692, "eval_samples_per_second": 26.916, "eval_steps_per_second": 3.394, "eval_wer": 0.7066420664206642, "step": 3600 }, { "epoch": 72.55, "learning_rate": 0.00012514114942528736, "loss": 0.5475, "step": 3700 }, { "epoch": 72.55, "eval_loss": 1.4797747135162354, "eval_runtime": 12.8068, "eval_samples_per_second": 26.626, "eval_steps_per_second": 3.358, "eval_wer": 0.7118081180811808, "step": 3700 }, { "epoch": 74.51, "learning_rate": 0.00011622160919540229, "loss": 0.5086, "step": 3800 }, { "epoch": 74.51, "eval_loss": 1.510679841041565, "eval_runtime": 12.607, "eval_samples_per_second": 27.048, "eval_steps_per_second": 3.411, "eval_wer": 0.7232472324723247, "step": 3800 }, { "epoch": 76.47, "learning_rate": 0.00010730206896551725, "loss": 0.4958, "step": 3900 }, { "epoch": 76.47, "eval_loss": 1.4849300384521484, "eval_runtime": 12.6845, "eval_samples_per_second": 26.883, "eval_steps_per_second": 3.39, "eval_wer": 0.7088560885608856, "step": 3900 }, { "epoch": 78.43, "learning_rate": 9.838252873563218e-05, "loss": 0.5046, "step": 4000 }, { "epoch": 78.43, "eval_loss": 1.4450523853302002, "eval_runtime": 12.6526, "eval_samples_per_second": 26.951, "eval_steps_per_second": 3.399, "eval_wer": 0.7114391143911439, "step": 4000 }, { "epoch": 80.39, "learning_rate": 8.946298850574712e-05, "loss": 0.4694, "step": 4100 }, { "epoch": 80.39, "eval_loss": 1.4674367904663086, "eval_runtime": 12.49, "eval_samples_per_second": 27.302, "eval_steps_per_second": 3.443, "eval_wer": 0.7088560885608856, "step": 4100 }, { "epoch": 82.35, "learning_rate": 8.054344827586206e-05, "loss": 0.4386, "step": 4200 }, { "epoch": 82.35, "eval_loss": 1.524474859237671, "eval_runtime": 12.6393, "eval_samples_per_second": 26.979, "eval_steps_per_second": 3.402, "eval_wer": 0.7103321033210332, "step": 4200 }, { "epoch": 84.31, "learning_rate": 7.162390804597701e-05, "loss": 0.4516, "step": 4300 }, { "epoch": 84.31, "eval_loss": 1.5031787157058716, "eval_runtime": 12.6415, "eval_samples_per_second": 26.975, "eval_steps_per_second": 3.401, "eval_wer": 0.7103321033210332, "step": 4300 }, { "epoch": 86.27, "learning_rate": 6.27935632183908e-05, "loss": 0.4113, "step": 4400 }, { "epoch": 86.27, "eval_loss": 1.5246329307556152, "eval_runtime": 12.6508, "eval_samples_per_second": 26.955, "eval_steps_per_second": 3.399, "eval_wer": 0.7195571955719557, "step": 4400 }, { "epoch": 88.24, "learning_rate": 5.3874022988505745e-05, "loss": 0.3972, "step": 4500 }, { "epoch": 88.24, "eval_loss": 1.5318336486816406, "eval_runtime": 12.5704, "eval_samples_per_second": 27.127, "eval_steps_per_second": 3.421, "eval_wer": 0.7114391143911439, "step": 4500 }, { "epoch": 90.2, "learning_rate": 4.4954482758620694e-05, "loss": 0.4006, "step": 4600 }, { "epoch": 90.2, "eval_loss": 1.554287314414978, "eval_runtime": 12.5754, "eval_samples_per_second": 27.116, "eval_steps_per_second": 3.419, "eval_wer": 0.6981549815498155, "step": 4600 }, { "epoch": 92.16, "learning_rate": 3.603494252873563e-05, "loss": 0.4014, "step": 4700 }, { "epoch": 92.16, "eval_loss": 1.5442076921463013, "eval_runtime": 12.6146, "eval_samples_per_second": 27.032, "eval_steps_per_second": 3.409, "eval_wer": 0.7047970479704797, "step": 4700 }, { "epoch": 94.12, "learning_rate": 2.711540229885057e-05, "loss": 0.3672, "step": 4800 }, { "epoch": 94.12, "eval_loss": 1.5541517734527588, "eval_runtime": 12.6543, "eval_samples_per_second": 26.947, "eval_steps_per_second": 3.398, "eval_wer": 0.7136531365313653, "step": 4800 }, { "epoch": 96.08, "learning_rate": 1.8195862068965517e-05, "loss": 0.3666, "step": 4900 }, { "epoch": 96.08, "eval_loss": 1.5414179563522339, "eval_runtime": 12.5239, "eval_samples_per_second": 27.228, "eval_steps_per_second": 3.433, "eval_wer": 0.7018450184501845, "step": 4900 }, { "epoch": 98.04, "learning_rate": 9.27632183908046e-06, "loss": 0.3574, "step": 5000 }, { "epoch": 98.04, "eval_loss": 1.5465455055236816, "eval_runtime": 12.7119, "eval_samples_per_second": 26.825, "eval_steps_per_second": 3.383, "eval_wer": 0.7059040590405904, "step": 5000 }, { "epoch": 100.0, "learning_rate": 3.567816091954023e-07, "loss": 0.3428, "step": 5100 }, { "epoch": 100.0, "eval_loss": 1.5443140268325806, "eval_runtime": 12.8582, "eval_samples_per_second": 26.52, "eval_steps_per_second": 3.344, "eval_wer": 0.7029520295202952, "step": 5100 }, { "epoch": 100.0, "step": 5100, "total_flos": 9.838577578075728e+18, "train_loss": 1.1548647121354645, "train_runtime": 5602.3197, "train_samples_per_second": 14.458, "train_steps_per_second": 0.91 } ], "max_steps": 5100, "num_train_epochs": 100, "total_flos": 9.838577578075728e+18, "trial_name": null, "trial_params": null }