{ "best_metric": 32.82730701996504, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-listen-dsw-base-id/checkpoint-600", "epoch": 12.064, "eval_steps": 200, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.898977360288234e-06, "loss": 1.4272, "step": 25 }, { "epoch": 0.03, "learning_rate": 6.160712527409633e-06, "loss": 1.0004, "step": 50 }, { "epoch": 0.04, "learning_rate": 6.85912902234906e-06, "loss": 0.8231, "step": 75 }, { "epoch": 0.05, "learning_rate": 7.344547104469332e-06, "loss": 0.7054, "step": 100 }, { "epoch": 0.06, "learning_rate": 7.716963756434345e-06, "loss": 0.531, "step": 125 }, { "epoch": 0.07, "learning_rate": 8.019180844200955e-06, "loss": 0.4107, "step": 150 }, { "epoch": 1.01, "learning_rate": 8.27351214279797e-06, "loss": 0.546, "step": 175 }, { "epoch": 1.02, "learning_rate": 8.49307723936858e-06, "loss": 0.5452, "step": 200 }, { "epoch": 1.02, "eval_loss": 0.54638671875, "eval_runtime": 2392.0752, "eval_samples_per_second": 1.523, "eval_steps_per_second": 0.095, "eval_wer": 35.168828779096515, "step": 200 }, { "epoch": 1.03, "learning_rate": 8.686247975778677e-06, "loss": 0.5942, "step": 225 }, { "epoch": 1.05, "learning_rate": 8.858694625217149e-06, "loss": 0.4591, "step": 250 }, { "epoch": 1.06, "learning_rate": 9.014436199608479e-06, "loss": 0.3818, "step": 275 }, { "epoch": 1.07, "learning_rate": 9.156425255148058e-06, "loss": 0.2725, "step": 300 }, { "epoch": 2.01, "learning_rate": 9.28689473531776e-06, "loss": 0.3299, "step": 325 }, { "epoch": 2.02, "learning_rate": 9.407574351377137e-06, "loss": 0.3841, "step": 350 }, { "epoch": 2.03, "learning_rate": 9.519831289296397e-06, "loss": 0.4134, "step": 375 }, { "epoch": 2.04, "learning_rate": 9.624764935335318e-06, "loss": 0.3445, "step": 400 }, { "epoch": 2.04, "eval_loss": 0.54052734375, "eval_runtime": 2421.7938, "eval_samples_per_second": 1.504, "eval_steps_per_second": 0.094, "eval_wer": 34.06937160732358, "step": 400 }, { "epoch": 2.06, "learning_rate": 9.723272550712454e-06, "loss": 0.2894, "step": 425 }, { "epoch": 2.07, "learning_rate": 9.816095971633122e-06, "loss": 0.199, "step": 450 }, { "epoch": 3.0, "learning_rate": 9.90385555539545e-06, "loss": 0.1939, "step": 475 }, { "epoch": 3.02, "learning_rate": 9.987075336738768e-06, "loss": 0.2661, "step": 500 }, { "epoch": 3.03, "learning_rate": 9.866666666666668e-06, "loss": 0.2885, "step": 525 }, { "epoch": 3.04, "learning_rate": 9.7e-06, "loss": 0.2474, "step": 550 }, { "epoch": 3.05, "learning_rate": 9.533333333333334e-06, "loss": 0.195, "step": 575 }, { "epoch": 3.07, "learning_rate": 9.366666666666668e-06, "loss": 0.1397, "step": 600 }, { "epoch": 3.07, "eval_loss": 0.53466796875, "eval_runtime": 2319.7619, "eval_samples_per_second": 1.57, "eval_steps_per_second": 0.098, "eval_wer": 32.82730701996504, "step": 600 }, { "epoch": 4.0, "learning_rate": 9.200000000000002e-06, "loss": 0.1158, "step": 625 }, { "epoch": 4.01, "learning_rate": 9.033333333333334e-06, "loss": 0.1686, "step": 650 }, { "epoch": 4.03, "learning_rate": 8.866666666666668e-06, "loss": 0.183, "step": 675 }, { "epoch": 4.04, "learning_rate": 8.700000000000001e-06, "loss": 0.1702, "step": 700 }, { "epoch": 4.05, "learning_rate": 8.533333333333335e-06, "loss": 0.138, "step": 725 }, { "epoch": 4.06, "learning_rate": 8.366666666666667e-06, "loss": 0.1002, "step": 750 }, { "epoch": 4.08, "learning_rate": 8.2e-06, "loss": 0.0697, "step": 775 }, { "epoch": 5.01, "learning_rate": 8.033333333333335e-06, "loss": 0.0988, "step": 800 }, { "epoch": 5.01, "eval_loss": 0.5654296875, "eval_runtime": 2396.345, "eval_samples_per_second": 1.52, "eval_steps_per_second": 0.095, "eval_wer": 35.67485509246481, "step": 800 }, { "epoch": 5.02, "learning_rate": 7.866666666666667e-06, "loss": 0.1161, "step": 825 }, { "epoch": 5.04, "learning_rate": 7.7e-06, "loss": 0.1152, "step": 850 }, { "epoch": 5.05, "learning_rate": 7.533333333333334e-06, "loss": 0.0922, "step": 875 }, { "epoch": 5.06, "learning_rate": 7.3666666666666676e-06, "loss": 0.073, "step": 900 }, { "epoch": 5.07, "learning_rate": 7.2000000000000005e-06, "loss": 0.0481, "step": 925 }, { "epoch": 6.01, "learning_rate": 7.033333333333334e-06, "loss": 0.056, "step": 950 }, { "epoch": 6.02, "learning_rate": 6.866666666666667e-06, "loss": 0.0705, "step": 975 }, { "epoch": 6.03, "learning_rate": 6.700000000000001e-06, "loss": 0.077, "step": 1000 }, { "epoch": 6.03, "eval_loss": 0.57861328125, "eval_runtime": 2311.9939, "eval_samples_per_second": 1.575, "eval_steps_per_second": 0.099, "eval_wer": 33.945165148587726, "step": 1000 }, { "epoch": 6.04, "learning_rate": 6.533333333333334e-06, "loss": 0.0629, "step": 1025 }, { "epoch": 6.06, "learning_rate": 6.366666666666668e-06, "loss": 0.0496, "step": 1050 }, { "epoch": 6.07, "learning_rate": 6.200000000000001e-06, "loss": 0.0328, "step": 1075 }, { "epoch": 7.0, "learning_rate": 6.033333333333335e-06, "loss": 0.0331, "step": 1100 }, { "epoch": 7.02, "learning_rate": 5.8666666666666675e-06, "loss": 0.0432, "step": 1125 }, { "epoch": 7.03, "learning_rate": 5.7e-06, "loss": 0.0475, "step": 1150 }, { "epoch": 7.04, "learning_rate": 5.533333333333334e-06, "loss": 0.0446, "step": 1175 }, { "epoch": 7.05, "learning_rate": 5.366666666666666e-06, "loss": 0.0338, "step": 1200 }, { "epoch": 7.05, "eval_loss": 0.60498046875, "eval_runtime": 2281.4213, "eval_samples_per_second": 1.596, "eval_steps_per_second": 0.1, "eval_wer": 33.98196706228724, "step": 1200 }, { "epoch": 7.07, "learning_rate": 5.2e-06, "loss": 0.0242, "step": 1225 }, { "epoch": 8.0, "learning_rate": 5.033333333333333e-06, "loss": 0.0197, "step": 1250 }, { "epoch": 8.01, "learning_rate": 4.866666666666667e-06, "loss": 0.0269, "step": 1275 }, { "epoch": 8.03, "learning_rate": 4.7e-06, "loss": 0.0319, "step": 1300 }, { "epoch": 8.04, "learning_rate": 4.533333333333334e-06, "loss": 0.0293, "step": 1325 }, { "epoch": 8.05, "learning_rate": 4.366666666666667e-06, "loss": 0.023, "step": 1350 }, { "epoch": 8.06, "learning_rate": 4.2000000000000004e-06, "loss": 0.0189, "step": 1375 }, { "epoch": 8.08, "learning_rate": 4.033333333333333e-06, "loss": 0.0137, "step": 1400 }, { "epoch": 8.08, "eval_loss": 0.6220703125, "eval_runtime": 2300.7263, "eval_samples_per_second": 1.583, "eval_steps_per_second": 0.099, "eval_wer": 34.10157328181065, "step": 1400 }, { "epoch": 9.01, "learning_rate": 3.866666666666667e-06, "loss": 0.0189, "step": 1425 }, { "epoch": 9.02, "learning_rate": 3.7e-06, "loss": 0.02, "step": 1450 }, { "epoch": 9.04, "learning_rate": 3.5333333333333335e-06, "loss": 0.0213, "step": 1475 }, { "epoch": 9.05, "learning_rate": 3.366666666666667e-06, "loss": 0.0179, "step": 1500 }, { "epoch": 9.06, "learning_rate": 3.2000000000000003e-06, "loss": 0.0139, "step": 1525 }, { "epoch": 9.07, "learning_rate": 3.0333333333333337e-06, "loss": 0.0111, "step": 1550 }, { "epoch": 10.01, "learning_rate": 2.866666666666667e-06, "loss": 0.0129, "step": 1575 }, { "epoch": 10.02, "learning_rate": 2.7000000000000004e-06, "loss": 0.0153, "step": 1600 }, { "epoch": 10.02, "eval_loss": 0.64306640625, "eval_runtime": 2254.9316, "eval_samples_per_second": 1.615, "eval_steps_per_second": 0.101, "eval_wer": 33.90376299567578, "step": 1600 }, { "epoch": 10.03, "learning_rate": 2.5333333333333338e-06, "loss": 0.0166, "step": 1625 }, { "epoch": 10.04, "learning_rate": 2.3666666666666667e-06, "loss": 0.0142, "step": 1650 }, { "epoch": 10.06, "learning_rate": 2.2e-06, "loss": 0.0119, "step": 1675 }, { "epoch": 10.07, "learning_rate": 2.0333333333333335e-06, "loss": 0.0101, "step": 1700 }, { "epoch": 11.0, "learning_rate": 1.8666666666666669e-06, "loss": 0.0101, "step": 1725 }, { "epoch": 11.02, "learning_rate": 1.7000000000000002e-06, "loss": 0.0125, "step": 1750 }, { "epoch": 11.03, "learning_rate": 1.5333333333333334e-06, "loss": 0.0141, "step": 1775 }, { "epoch": 11.04, "learning_rate": 1.3666666666666668e-06, "loss": 0.0125, "step": 1800 }, { "epoch": 11.04, "eval_loss": 0.6513671875, "eval_runtime": 2241.116, "eval_samples_per_second": 1.625, "eval_steps_per_second": 0.102, "eval_wer": 33.75195510166529, "step": 1800 }, { "epoch": 11.05, "learning_rate": 1.2000000000000002e-06, "loss": 0.0114, "step": 1825 }, { "epoch": 11.07, "learning_rate": 1.0333333333333333e-06, "loss": 0.0093, "step": 1850 }, { "epoch": 12.0, "learning_rate": 8.666666666666668e-07, "loss": 0.0087, "step": 1875 }, { "epoch": 12.01, "learning_rate": 7.000000000000001e-07, "loss": 0.0111, "step": 1900 }, { "epoch": 12.03, "learning_rate": 5.333333333333335e-07, "loss": 0.0129, "step": 1925 }, { "epoch": 12.04, "learning_rate": 3.666666666666667e-07, "loss": 0.0118, "step": 1950 }, { "epoch": 12.05, "learning_rate": 2.0000000000000002e-07, "loss": 0.011, "step": 1975 }, { "epoch": 12.06, "learning_rate": 3.333333333333334e-08, "loss": 0.0092, "step": 2000 }, { "epoch": 12.06, "eval_loss": 0.65283203125, "eval_runtime": 2242.3057, "eval_samples_per_second": 1.624, "eval_steps_per_second": 0.102, "eval_wer": 33.82555892906431, "step": 2000 }, { "epoch": 12.06, "step": 2000, "total_flos": 4.1331384945643356e+18, "train_loss": 0.16917743301391602, "train_runtime": 75059.9366, "train_samples_per_second": 0.853, "train_steps_per_second": 0.027 } ], "logging_steps": 25, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 200, "total_flos": 4.1331384945643356e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }