{ "best_metric": 0.20606324076652527, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-swagen-female-model/checkpoint-2100", "epoch": 2.9411764705882355, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11764705882352941, "grad_norm": 42.9699821472168, "learning_rate": 0.00029099999999999997, "loss": 8.5907, "step": 100 }, { "epoch": 0.11764705882352941, "eval_loss": 4.55902099609375, "eval_runtime": 32.5172, "eval_samples_per_second": 17.744, "eval_steps_per_second": 4.459, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.23529411764705882, "grad_norm": 23.00685691833496, "learning_rate": 0.00028812499999999997, "loss": 4.3006, "step": 200 }, { "epoch": 0.23529411764705882, "eval_loss": 4.2573113441467285, "eval_runtime": 32.1998, "eval_samples_per_second": 17.919, "eval_steps_per_second": 4.503, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.35294117647058826, "grad_norm": 7.895191192626953, "learning_rate": 0.00027575, "loss": 4.1906, "step": 300 }, { "epoch": 0.35294117647058826, "eval_loss": 3.843606948852539, "eval_runtime": 32.1765, "eval_samples_per_second": 17.932, "eval_steps_per_second": 4.506, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.47058823529411764, "grad_norm": 1.9934626817703247, "learning_rate": 0.00026325, "loss": 1.1198, "step": 400 }, { "epoch": 0.47058823529411764, "eval_loss": 0.25689366459846497, "eval_runtime": 32.1, "eval_samples_per_second": 17.975, "eval_steps_per_second": 4.517, "eval_wer": 0.19961427193828352, "step": 400 }, { "epoch": 0.5882352941176471, "grad_norm": 1.5836255550384521, "learning_rate": 0.00025075, "loss": 0.3012, "step": 500 }, { "epoch": 0.5882352941176471, "eval_loss": 0.2465960681438446, "eval_runtime": 32.4617, "eval_samples_per_second": 17.775, "eval_steps_per_second": 4.467, "eval_wer": 0.19421407907425264, "step": 500 }, { "epoch": 0.7058823529411765, "grad_norm": 1.9752272367477417, "learning_rate": 0.00023825, "loss": 0.2919, "step": 600 }, { "epoch": 0.7058823529411765, "eval_loss": 0.25010332465171814, "eval_runtime": 32.532, "eval_samples_per_second": 17.736, "eval_steps_per_second": 4.457, "eval_wer": 0.19903567984570877, "step": 600 }, { "epoch": 0.8235294117647058, "grad_norm": 4.631561279296875, "learning_rate": 0.00022574999999999996, "loss": 0.267, "step": 700 }, { "epoch": 0.8235294117647058, "eval_loss": 0.2353355437517166, "eval_runtime": 32.2905, "eval_samples_per_second": 17.869, "eval_steps_per_second": 4.49, "eval_wer": 0.18939247830279654, "step": 700 }, { "epoch": 0.9411764705882353, "grad_norm": 3.183828830718994, "learning_rate": 0.00021324999999999998, "loss": 0.2666, "step": 800 }, { "epoch": 0.9411764705882353, "eval_loss": 0.22800631821155548, "eval_runtime": 32.2537, "eval_samples_per_second": 17.889, "eval_steps_per_second": 4.496, "eval_wer": 0.19035679845708775, "step": 800 }, { "epoch": 1.0588235294117647, "grad_norm": 1.4982892274856567, "learning_rate": 0.00020075, "loss": 0.2396, "step": 900 }, { "epoch": 1.0588235294117647, "eval_loss": 0.22375665605068207, "eval_runtime": 32.5796, "eval_samples_per_second": 17.71, "eval_steps_per_second": 4.451, "eval_wer": 0.1891996142719383, "step": 900 }, { "epoch": 1.1764705882352942, "grad_norm": 1.2470210790634155, "learning_rate": 0.00018824999999999997, "loss": 0.258, "step": 1000 }, { "epoch": 1.1764705882352942, "eval_loss": 0.22019432485103607, "eval_runtime": 32.5522, "eval_samples_per_second": 17.725, "eval_steps_per_second": 4.454, "eval_wer": 0.189778206364513, "step": 1000 }, { "epoch": 1.2941176470588236, "grad_norm": 2.3243045806884766, "learning_rate": 0.00017575, "loss": 0.2192, "step": 1100 }, { "epoch": 1.2941176470588236, "eval_loss": 0.21961148083209991, "eval_runtime": 32.5996, "eval_samples_per_second": 17.7, "eval_steps_per_second": 4.448, "eval_wer": 0.19382835101253615, "step": 1100 }, { "epoch": 1.4117647058823528, "grad_norm": 2.2011804580688477, "learning_rate": 0.00016324999999999998, "loss": 0.2353, "step": 1200 }, { "epoch": 1.4117647058823528, "eval_loss": 0.2168532758951187, "eval_runtime": 32.3432, "eval_samples_per_second": 17.84, "eval_steps_per_second": 4.483, "eval_wer": 0.18746383799421407, "step": 1200 }, { "epoch": 1.5294117647058822, "grad_norm": 2.5243468284606934, "learning_rate": 0.00015074999999999998, "loss": 0.2398, "step": 1300 }, { "epoch": 1.5294117647058822, "eval_loss": 0.2163960486650467, "eval_runtime": 32.6916, "eval_samples_per_second": 17.65, "eval_steps_per_second": 4.435, "eval_wer": 0.18958534233365476, "step": 1300 }, { "epoch": 1.6470588235294117, "grad_norm": 2.1168859004974365, "learning_rate": 0.00013824999999999997, "loss": 0.2419, "step": 1400 }, { "epoch": 1.6470588235294117, "eval_loss": 0.21455629169940948, "eval_runtime": 32.7536, "eval_samples_per_second": 17.616, "eval_steps_per_second": 4.427, "eval_wer": 0.191321118611379, "step": 1400 }, { "epoch": 1.7647058823529411, "grad_norm": 2.818471670150757, "learning_rate": 0.00012575, "loss": 0.2582, "step": 1500 }, { "epoch": 1.7647058823529411, "eval_loss": 0.21221759915351868, "eval_runtime": 32.4823, "eval_samples_per_second": 17.764, "eval_steps_per_second": 4.464, "eval_wer": 0.190549662487946, "step": 1500 }, { "epoch": 1.8823529411764706, "grad_norm": 1.6471282243728638, "learning_rate": 0.00011324999999999999, "loss": 0.2417, "step": 1600 }, { "epoch": 1.8823529411764706, "eval_loss": 0.2105189561843872, "eval_runtime": 32.2537, "eval_samples_per_second": 17.889, "eval_steps_per_second": 4.496, "eval_wer": 0.18611378977820636, "step": 1600 }, { "epoch": 2.0, "grad_norm": 1.615028977394104, "learning_rate": 0.00010074999999999998, "loss": 0.2375, "step": 1700 }, { "epoch": 2.0, "eval_loss": 0.20913735032081604, "eval_runtime": 32.7492, "eval_samples_per_second": 17.619, "eval_steps_per_second": 4.428, "eval_wer": 0.1891996142719383, "step": 1700 }, { "epoch": 2.1176470588235294, "grad_norm": 2.387188673019409, "learning_rate": 8.825e-05, "loss": 0.2353, "step": 1800 }, { "epoch": 2.1176470588235294, "eval_loss": 0.20865142345428467, "eval_runtime": 32.6893, "eval_samples_per_second": 17.651, "eval_steps_per_second": 4.436, "eval_wer": 0.18823529411764706, "step": 1800 }, { "epoch": 2.235294117647059, "grad_norm": 7.139828681945801, "learning_rate": 7.575e-05, "loss": 0.23, "step": 1900 }, { "epoch": 2.235294117647059, "eval_loss": 0.20906075835227966, "eval_runtime": 32.4818, "eval_samples_per_second": 17.764, "eval_steps_per_second": 4.464, "eval_wer": 0.19035679845708775, "step": 1900 }, { "epoch": 2.3529411764705883, "grad_norm": 1.5875645875930786, "learning_rate": 6.324999999999999e-05, "loss": 0.2378, "step": 2000 }, { "epoch": 2.3529411764705883, "eval_loss": 0.20674680173397064, "eval_runtime": 32.5201, "eval_samples_per_second": 17.743, "eval_steps_per_second": 4.459, "eval_wer": 0.189778206364513, "step": 2000 }, { "epoch": 2.4705882352941178, "grad_norm": 3.247025966644287, "learning_rate": 5.0749999999999994e-05, "loss": 0.2343, "step": 2100 }, { "epoch": 2.4705882352941178, "eval_loss": 0.20606324076652527, "eval_runtime": 32.7292, "eval_samples_per_second": 17.63, "eval_steps_per_second": 4.43, "eval_wer": 0.18900675024108005, "step": 2100 }, { "epoch": 2.588235294117647, "grad_norm": 15.340932846069336, "learning_rate": 3.8249999999999995e-05, "loss": 0.2083, "step": 2200 }, { "epoch": 2.588235294117647, "eval_loss": 0.2084166258573532, "eval_runtime": 32.8037, "eval_samples_per_second": 17.59, "eval_steps_per_second": 4.42, "eval_wer": 0.1884281581485053, "step": 2200 }, { "epoch": 2.7058823529411766, "grad_norm": 1.6703847646713257, "learning_rate": 2.5749999999999996e-05, "loss": 0.2058, "step": 2300 }, { "epoch": 2.7058823529411766, "eval_loss": 0.20847396552562714, "eval_runtime": 32.6507, "eval_samples_per_second": 17.672, "eval_steps_per_second": 4.441, "eval_wer": 0.18823529411764706, "step": 2300 }, { "epoch": 2.8235294117647056, "grad_norm": 0.9312336444854736, "learning_rate": 1.3249999999999999e-05, "loss": 0.2197, "step": 2400 }, { "epoch": 2.8235294117647056, "eval_loss": 0.2061544954776764, "eval_runtime": 32.4321, "eval_samples_per_second": 17.791, "eval_steps_per_second": 4.471, "eval_wer": 0.19035679845708775, "step": 2400 }, { "epoch": 2.9411764705882355, "grad_norm": 1.8859847784042358, "learning_rate": 7.499999999999999e-07, "loss": 0.2317, "step": 2500 }, { "epoch": 2.9411764705882355, "eval_loss": 0.2064579278230667, "eval_runtime": 33.0285, "eval_samples_per_second": 17.47, "eval_steps_per_second": 4.39, "eval_wer": 0.18958534233365476, "step": 2500 }, { "epoch": 2.9411764705882355, "step": 2500, "total_flos": 5.091528497919981e+18, "train_loss": 0.9320957969665528, "train_runtime": 2465.2512, "train_samples_per_second": 4.056, "train_steps_per_second": 1.014 } ], "logging_steps": 100, "max_steps": 2500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.091528497919981e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }