{ "best_metric": 0.508959949016571, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bigcgen-baseline-model/checkpoint-3200", "epoch": 11.009174311926605, "eval_steps": 100, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3058103975535168, "grad_norm": 6.544164657592773, "learning_rate": 0.000285, "loss": 12.3367, "step": 100 }, { "epoch": 0.3058103975535168, "eval_loss": 1.3241018056869507, "eval_runtime": 33.411, "eval_samples_per_second": 13.648, "eval_steps_per_second": 3.412, "eval_wer": 0.8893355403168598, "step": 100 }, { "epoch": 0.6116207951070336, "grad_norm": 25.74690818786621, "learning_rate": 0.00029706488156539646, "loss": 1.8764, "step": 200 }, { "epoch": 0.6116207951070336, "eval_loss": 0.689376950263977, "eval_runtime": 31.5961, "eval_samples_per_second": 14.432, "eval_steps_per_second": 3.608, "eval_wer": 0.5814613383778671, "step": 200 }, { "epoch": 0.9174311926605505, "grad_norm": 6.327740669250488, "learning_rate": 0.00029397528321318227, "loss": 1.6712, "step": 300 }, { "epoch": 0.9174311926605505, "eval_loss": 0.6389656662940979, "eval_runtime": 31.4047, "eval_samples_per_second": 14.52, "eval_steps_per_second": 3.63, "eval_wer": 0.5514305982501774, "step": 300 }, { "epoch": 1.2232415902140672, "grad_norm": 1.2617219686508179, "learning_rate": 0.00029088568486096807, "loss": 1.5044, "step": 400 }, { "epoch": 1.2232415902140672, "eval_loss": 0.6300708651542664, "eval_runtime": 31.4218, "eval_samples_per_second": 14.512, "eval_steps_per_second": 3.628, "eval_wer": 0.5351146843225348, "step": 400 }, { "epoch": 1.529051987767584, "grad_norm": 2.4596145153045654, "learning_rate": 0.0002877960865087538, "loss": 1.6648, "step": 500 }, { "epoch": 1.529051987767584, "eval_loss": 0.6076481342315674, "eval_runtime": 31.6805, "eval_samples_per_second": 14.394, "eval_steps_per_second": 3.598, "eval_wer": 0.5282572712225112, "step": 500 }, { "epoch": 1.834862385321101, "grad_norm": 3.1689512729644775, "learning_rate": 0.0002847064881565396, "loss": 1.6411, "step": 600 }, { "epoch": 1.834862385321101, "eval_loss": 0.6073117256164551, "eval_runtime": 31.4984, "eval_samples_per_second": 14.477, "eval_steps_per_second": 3.619, "eval_wer": 0.5282572712225112, "step": 600 }, { "epoch": 2.140672782874618, "grad_norm": 3.601820468902588, "learning_rate": 0.00028161688980432543, "loss": 1.4016, "step": 700 }, { "epoch": 2.140672782874618, "eval_loss": 0.5994192361831665, "eval_runtime": 31.3579, "eval_samples_per_second": 14.542, "eval_steps_per_second": 3.635, "eval_wer": 0.5124142823362497, "step": 700 }, { "epoch": 2.4464831804281344, "grad_norm": 1.8951566219329834, "learning_rate": 0.0002785272914521112, "loss": 1.5703, "step": 800 }, { "epoch": 2.4464831804281344, "eval_loss": 0.5996993184089661, "eval_runtime": 31.3279, "eval_samples_per_second": 14.556, "eval_steps_per_second": 3.639, "eval_wer": 0.5161976826672973, "step": 800 }, { "epoch": 2.7522935779816513, "grad_norm": 2.722160577774048, "learning_rate": 0.00027546858908341916, "loss": 1.4165, "step": 900 }, { "epoch": 2.7522935779816513, "eval_loss": 0.5849923491477966, "eval_runtime": 31.6108, "eval_samples_per_second": 14.425, "eval_steps_per_second": 3.606, "eval_wer": 0.5083944194845117, "step": 900 }, { "epoch": 3.058103975535168, "grad_norm": 6.126498699188232, "learning_rate": 0.0002723789907312049, "loss": 1.4703, "step": 1000 }, { "epoch": 3.058103975535168, "eval_loss": 0.5912389755249023, "eval_runtime": 31.5441, "eval_samples_per_second": 14.456, "eval_steps_per_second": 3.614, "eval_wer": 0.5126507448569402, "step": 1000 }, { "epoch": 3.363914373088685, "grad_norm": 2.3694710731506348, "learning_rate": 0.0002692893923789907, "loss": 1.48, "step": 1100 }, { "epoch": 3.363914373088685, "eval_loss": 0.570675253868103, "eval_runtime": 31.3265, "eval_samples_per_second": 14.556, "eval_steps_per_second": 3.639, "eval_wer": 0.4998817687396548, "step": 1100 }, { "epoch": 3.669724770642202, "grad_norm": 2.2272789478302, "learning_rate": 0.0002661997940267765, "loss": 1.4769, "step": 1200 }, { "epoch": 3.669724770642202, "eval_loss": 0.5674562454223633, "eval_runtime": 31.4068, "eval_samples_per_second": 14.519, "eval_steps_per_second": 3.63, "eval_wer": 0.4949160558051549, "step": 1200 }, { "epoch": 3.9755351681957185, "grad_norm": 1.9602124691009521, "learning_rate": 0.0002631101956745623, "loss": 1.312, "step": 1300 }, { "epoch": 3.9755351681957185, "eval_loss": 0.585631787776947, "eval_runtime": 31.7069, "eval_samples_per_second": 14.382, "eval_steps_per_second": 3.595, "eval_wer": 0.497990068574131, "step": 1300 }, { "epoch": 4.281345565749236, "grad_norm": 2.401339054107666, "learning_rate": 0.00026002059732234807, "loss": 1.3821, "step": 1400 }, { "epoch": 4.281345565749236, "eval_loss": 0.5641556978225708, "eval_runtime": 31.5622, "eval_samples_per_second": 14.448, "eval_steps_per_second": 3.612, "eval_wer": 0.49917238117758334, "step": 1400 }, { "epoch": 4.587155963302752, "grad_norm": 3.3140265941619873, "learning_rate": 0.0002569309989701339, "loss": 1.457, "step": 1500 }, { "epoch": 4.587155963302752, "eval_loss": 0.558822751045227, "eval_runtime": 31.4842, "eval_samples_per_second": 14.483, "eval_steps_per_second": 3.621, "eval_wer": 0.5053204067155356, "step": 1500 }, { "epoch": 4.892966360856269, "grad_norm": 1.205606460571289, "learning_rate": 0.00025384140061791963, "loss": 1.3606, "step": 1600 }, { "epoch": 4.892966360856269, "eval_loss": 0.5637312531471252, "eval_runtime": 31.5743, "eval_samples_per_second": 14.442, "eval_steps_per_second": 3.611, "eval_wer": 0.48663986758098843, "step": 1600 }, { "epoch": 5.198776758409786, "grad_norm": 5.104424953460693, "learning_rate": 0.00025075180226570543, "loss": 1.3986, "step": 1700 }, { "epoch": 5.198776758409786, "eval_loss": 0.5511406064033508, "eval_runtime": 31.6463, "eval_samples_per_second": 14.409, "eval_steps_per_second": 3.602, "eval_wer": 0.48663986758098843, "step": 1700 }, { "epoch": 5.504587155963303, "grad_norm": 2.85310435295105, "learning_rate": 0.00024766220391349124, "loss": 1.421, "step": 1800 }, { "epoch": 5.504587155963303, "eval_loss": 0.5845613479614258, "eval_runtime": 31.614, "eval_samples_per_second": 14.424, "eval_steps_per_second": 3.606, "eval_wer": 0.534641759281154, "step": 1800 }, { "epoch": 5.81039755351682, "grad_norm": 1.2034293413162231, "learning_rate": 0.00024457260556127704, "loss": 1.3004, "step": 1900 }, { "epoch": 5.81039755351682, "eval_loss": 0.5440376400947571, "eval_runtime": 31.6052, "eval_samples_per_second": 14.428, "eval_steps_per_second": 3.607, "eval_wer": 0.47363442894301255, "step": 1900 }, { "epoch": 6.116207951070336, "grad_norm": 5.384260654449463, "learning_rate": 0.0002414830072090628, "loss": 1.3319, "step": 2000 }, { "epoch": 6.116207951070336, "eval_loss": 0.531833291053772, "eval_runtime": 31.4837, "eval_samples_per_second": 14.484, "eval_steps_per_second": 3.621, "eval_wer": 0.47860014187751243, "step": 2000 }, { "epoch": 6.422018348623853, "grad_norm": 1.9580897092819214, "learning_rate": 0.0002383934088568486, "loss": 1.2665, "step": 2100 }, { "epoch": 6.422018348623853, "eval_loss": 0.5487645268440247, "eval_runtime": 31.7917, "eval_samples_per_second": 14.343, "eval_steps_per_second": 3.586, "eval_wer": 0.5065027193189879, "step": 2100 }, { "epoch": 6.72782874617737, "grad_norm": 2.0100314617156982, "learning_rate": 0.00023530381050463438, "loss": 1.3703, "step": 2200 }, { "epoch": 6.72782874617737, "eval_loss": 0.53043133020401, "eval_runtime": 31.6267, "eval_samples_per_second": 14.418, "eval_steps_per_second": 3.605, "eval_wer": 0.4878221801844408, "step": 2200 }, { "epoch": 7.033639143730887, "grad_norm": 2.1251752376556396, "learning_rate": 0.00023221421215242018, "loss": 1.1954, "step": 2300 }, { "epoch": 7.033639143730887, "eval_loss": 0.5297590494155884, "eval_runtime": 31.5678, "eval_samples_per_second": 14.445, "eval_steps_per_second": 3.611, "eval_wer": 0.48072830456372667, "step": 2300 }, { "epoch": 7.339449541284404, "grad_norm": 3.3306617736816406, "learning_rate": 0.00022912461380020596, "loss": 1.2973, "step": 2400 }, { "epoch": 7.339449541284404, "eval_loss": 0.5258393883705139, "eval_runtime": 31.6969, "eval_samples_per_second": 14.386, "eval_steps_per_second": 3.597, "eval_wer": 0.47056041617403643, "step": 2400 }, { "epoch": 7.6452599388379205, "grad_norm": 1.3215351104736328, "learning_rate": 0.00022603501544799176, "loss": 1.2086, "step": 2500 }, { "epoch": 7.6452599388379205, "eval_loss": 0.5230885744094849, "eval_runtime": 31.7287, "eval_samples_per_second": 14.372, "eval_steps_per_second": 3.593, "eval_wer": 0.48072830456372667, "step": 2500 }, { "epoch": 7.951070336391437, "grad_norm": 7.100344181060791, "learning_rate": 0.0002229454170957775, "loss": 1.2796, "step": 2600 }, { "epoch": 7.951070336391437, "eval_loss": 0.5403640866279602, "eval_runtime": 31.7077, "eval_samples_per_second": 14.381, "eval_steps_per_second": 3.595, "eval_wer": 0.473870891463703, "step": 2600 }, { "epoch": 8.256880733944953, "grad_norm": 2.845608949661255, "learning_rate": 0.00021985581874356332, "loss": 1.1428, "step": 2700 }, { "epoch": 8.256880733944953, "eval_loss": 0.5328220725059509, "eval_runtime": 31.486, "eval_samples_per_second": 14.483, "eval_steps_per_second": 3.621, "eval_wer": 0.48309292977063134, "step": 2700 }, { "epoch": 8.562691131498472, "grad_norm": 2.0753355026245117, "learning_rate": 0.0002167662203913491, "loss": 1.3118, "step": 2800 }, { "epoch": 8.562691131498472, "eval_loss": 0.5198370814323425, "eval_runtime": 31.5982, "eval_samples_per_second": 14.431, "eval_steps_per_second": 3.608, "eval_wer": 0.4769449042326791, "step": 2800 }, { "epoch": 8.868501529051988, "grad_norm": 1.6540861129760742, "learning_rate": 0.0002136766220391349, "loss": 1.2569, "step": 2900 }, { "epoch": 8.868501529051988, "eval_loss": 0.530635416507721, "eval_runtime": 31.7976, "eval_samples_per_second": 14.341, "eval_steps_per_second": 3.585, "eval_wer": 0.48474816741546467, "step": 2900 }, { "epoch": 9.174311926605505, "grad_norm": 3.073190450668335, "learning_rate": 0.00021058702368692068, "loss": 1.1718, "step": 3000 }, { "epoch": 9.174311926605505, "eval_loss": 0.5160026550292969, "eval_runtime": 31.7143, "eval_samples_per_second": 14.378, "eval_steps_per_second": 3.595, "eval_wer": 0.4648853156774651, "step": 3000 }, { "epoch": 9.480122324159021, "grad_norm": 5.7398881912231445, "learning_rate": 0.00020749742533470648, "loss": 1.1354, "step": 3100 }, { "epoch": 9.480122324159021, "eval_loss": 0.5265021920204163, "eval_runtime": 31.5861, "eval_samples_per_second": 14.437, "eval_steps_per_second": 3.609, "eval_wer": 0.47765429179475055, "step": 3100 }, { "epoch": 9.785932721712538, "grad_norm": 7.869950294494629, "learning_rate": 0.00020440782698249226, "loss": 1.2795, "step": 3200 }, { "epoch": 9.785932721712538, "eval_loss": 0.508959949016571, "eval_runtime": 31.4657, "eval_samples_per_second": 14.492, "eval_steps_per_second": 3.623, "eval_wer": 0.45897375266020335, "step": 3200 }, { "epoch": 10.091743119266056, "grad_norm": 2.3973193168640137, "learning_rate": 0.00020131822863027807, "loss": 1.1793, "step": 3300 }, { "epoch": 10.091743119266056, "eval_loss": 0.5265406370162964, "eval_runtime": 31.7563, "eval_samples_per_second": 14.359, "eval_steps_per_second": 3.59, "eval_wer": 0.4684322534878222, "step": 3300 }, { "epoch": 10.397553516819572, "grad_norm": 2.35862398147583, "learning_rate": 0.00019822863027806382, "loss": 1.1647, "step": 3400 }, { "epoch": 10.397553516819572, "eval_loss": 0.538512110710144, "eval_runtime": 31.6591, "eval_samples_per_second": 14.403, "eval_steps_per_second": 3.601, "eval_wer": 0.4762355166706077, "step": 3400 }, { "epoch": 10.703363914373089, "grad_norm": 1.5017459392547607, "learning_rate": 0.00019513903192584962, "loss": 1.1978, "step": 3500 }, { "epoch": 10.703363914373089, "eval_loss": 0.5132156610488892, "eval_runtime": 31.4578, "eval_samples_per_second": 14.496, "eval_steps_per_second": 3.624, "eval_wer": 0.4715062662567983, "step": 3500 }, { "epoch": 11.009174311926605, "grad_norm": 8.986639022827148, "learning_rate": 0.0001920494335736354, "loss": 1.1802, "step": 3600 }, { "epoch": 11.009174311926605, "eval_loss": 0.5129914283752441, "eval_runtime": 31.4287, "eval_samples_per_second": 14.509, "eval_steps_per_second": 3.627, "eval_wer": 0.45968314022227474, "step": 3600 }, { "epoch": 11.009174311926605, "step": 3600, "total_flos": 1.8690129169621533e+19, "train_loss": 1.669767551422119, "train_runtime": 5274.2639, "train_samples_per_second": 14.874, "train_steps_per_second": 1.86 } ], "logging_steps": 100, "max_steps": 9810, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 4 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.8690129169621533e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }