|
{ |
|
"best_metric": 0.20606324076652527, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-swagen-female-model/checkpoint-2100", |
|
"epoch": 2.9411764705882355, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 42.9699821472168, |
|
"learning_rate": 0.00029099999999999997, |
|
"loss": 8.5907, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"eval_loss": 4.55902099609375, |
|
"eval_runtime": 32.5172, |
|
"eval_samples_per_second": 17.744, |
|
"eval_steps_per_second": 4.459, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"grad_norm": 23.00685691833496, |
|
"learning_rate": 0.00028812499999999997, |
|
"loss": 4.3006, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23529411764705882, |
|
"eval_loss": 4.2573113441467285, |
|
"eval_runtime": 32.1998, |
|
"eval_samples_per_second": 17.919, |
|
"eval_steps_per_second": 4.503, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"grad_norm": 7.895191192626953, |
|
"learning_rate": 0.00027575, |
|
"loss": 4.1906, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.35294117647058826, |
|
"eval_loss": 3.843606948852539, |
|
"eval_runtime": 32.1765, |
|
"eval_samples_per_second": 17.932, |
|
"eval_steps_per_second": 4.506, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"grad_norm": 1.9934626817703247, |
|
"learning_rate": 0.00026325, |
|
"loss": 1.1198, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47058823529411764, |
|
"eval_loss": 0.25689366459846497, |
|
"eval_runtime": 32.1, |
|
"eval_samples_per_second": 17.975, |
|
"eval_steps_per_second": 4.517, |
|
"eval_wer": 0.19961427193828352, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 1.5836255550384521, |
|
"learning_rate": 0.00025075, |
|
"loss": 0.3012, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"eval_loss": 0.2465960681438446, |
|
"eval_runtime": 32.4617, |
|
"eval_samples_per_second": 17.775, |
|
"eval_steps_per_second": 4.467, |
|
"eval_wer": 0.19421407907425264, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"grad_norm": 1.9752272367477417, |
|
"learning_rate": 0.00023825, |
|
"loss": 0.2919, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7058823529411765, |
|
"eval_loss": 0.25010332465171814, |
|
"eval_runtime": 32.532, |
|
"eval_samples_per_second": 17.736, |
|
"eval_steps_per_second": 4.457, |
|
"eval_wer": 0.19903567984570877, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8235294117647058, |
|
"grad_norm": 4.631561279296875, |
|
"learning_rate": 0.00022574999999999996, |
|
"loss": 0.267, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8235294117647058, |
|
"eval_loss": 0.2353355437517166, |
|
"eval_runtime": 32.2905, |
|
"eval_samples_per_second": 17.869, |
|
"eval_steps_per_second": 4.49, |
|
"eval_wer": 0.18939247830279654, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"grad_norm": 3.183828830718994, |
|
"learning_rate": 0.00021324999999999998, |
|
"loss": 0.2666, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9411764705882353, |
|
"eval_loss": 0.22800631821155548, |
|
"eval_runtime": 32.2537, |
|
"eval_samples_per_second": 17.889, |
|
"eval_steps_per_second": 4.496, |
|
"eval_wer": 0.19035679845708775, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"grad_norm": 1.4982892274856567, |
|
"learning_rate": 0.00020075, |
|
"loss": 0.2396, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0588235294117647, |
|
"eval_loss": 0.22375665605068207, |
|
"eval_runtime": 32.5796, |
|
"eval_samples_per_second": 17.71, |
|
"eval_steps_per_second": 4.451, |
|
"eval_wer": 0.1891996142719383, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 1.2470210790634155, |
|
"learning_rate": 0.00018824999999999997, |
|
"loss": 0.258, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"eval_loss": 0.22019432485103607, |
|
"eval_runtime": 32.5522, |
|
"eval_samples_per_second": 17.725, |
|
"eval_steps_per_second": 4.454, |
|
"eval_wer": 0.189778206364513, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.2941176470588236, |
|
"grad_norm": 2.3243045806884766, |
|
"learning_rate": 0.00017575, |
|
"loss": 0.2192, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2941176470588236, |
|
"eval_loss": 0.21961148083209991, |
|
"eval_runtime": 32.5996, |
|
"eval_samples_per_second": 17.7, |
|
"eval_steps_per_second": 4.448, |
|
"eval_wer": 0.19382835101253615, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"grad_norm": 2.2011804580688477, |
|
"learning_rate": 0.00016324999999999998, |
|
"loss": 0.2353, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4117647058823528, |
|
"eval_loss": 0.2168532758951187, |
|
"eval_runtime": 32.3432, |
|
"eval_samples_per_second": 17.84, |
|
"eval_steps_per_second": 4.483, |
|
"eval_wer": 0.18746383799421407, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.5294117647058822, |
|
"grad_norm": 2.5243468284606934, |
|
"learning_rate": 0.00015074999999999998, |
|
"loss": 0.2398, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.5294117647058822, |
|
"eval_loss": 0.2163960486650467, |
|
"eval_runtime": 32.6916, |
|
"eval_samples_per_second": 17.65, |
|
"eval_steps_per_second": 4.435, |
|
"eval_wer": 0.18958534233365476, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6470588235294117, |
|
"grad_norm": 2.1168859004974365, |
|
"learning_rate": 0.00013824999999999997, |
|
"loss": 0.2419, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.6470588235294117, |
|
"eval_loss": 0.21455629169940948, |
|
"eval_runtime": 32.7536, |
|
"eval_samples_per_second": 17.616, |
|
"eval_steps_per_second": 4.427, |
|
"eval_wer": 0.191321118611379, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 2.818471670150757, |
|
"learning_rate": 0.00012575, |
|
"loss": 0.2582, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"eval_loss": 0.21221759915351868, |
|
"eval_runtime": 32.4823, |
|
"eval_samples_per_second": 17.764, |
|
"eval_steps_per_second": 4.464, |
|
"eval_wer": 0.190549662487946, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"grad_norm": 1.6471282243728638, |
|
"learning_rate": 0.00011324999999999999, |
|
"loss": 0.2417, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.8823529411764706, |
|
"eval_loss": 0.2105189561843872, |
|
"eval_runtime": 32.2537, |
|
"eval_samples_per_second": 17.889, |
|
"eval_steps_per_second": 4.496, |
|
"eval_wer": 0.18611378977820636, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.615028977394104, |
|
"learning_rate": 0.00010074999999999998, |
|
"loss": 0.2375, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.20913735032081604, |
|
"eval_runtime": 32.7492, |
|
"eval_samples_per_second": 17.619, |
|
"eval_steps_per_second": 4.428, |
|
"eval_wer": 0.1891996142719383, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.1176470588235294, |
|
"grad_norm": 2.387188673019409, |
|
"learning_rate": 8.825e-05, |
|
"loss": 0.2353, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.1176470588235294, |
|
"eval_loss": 0.20865142345428467, |
|
"eval_runtime": 32.6893, |
|
"eval_samples_per_second": 17.651, |
|
"eval_steps_per_second": 4.436, |
|
"eval_wer": 0.18823529411764706, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.235294117647059, |
|
"grad_norm": 7.139828681945801, |
|
"learning_rate": 7.575e-05, |
|
"loss": 0.23, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.235294117647059, |
|
"eval_loss": 0.20906075835227966, |
|
"eval_runtime": 32.4818, |
|
"eval_samples_per_second": 17.764, |
|
"eval_steps_per_second": 4.464, |
|
"eval_wer": 0.19035679845708775, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 1.5875645875930786, |
|
"learning_rate": 6.324999999999999e-05, |
|
"loss": 0.2378, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"eval_loss": 0.20674680173397064, |
|
"eval_runtime": 32.5201, |
|
"eval_samples_per_second": 17.743, |
|
"eval_steps_per_second": 4.459, |
|
"eval_wer": 0.189778206364513, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.4705882352941178, |
|
"grad_norm": 3.247025966644287, |
|
"learning_rate": 5.0749999999999994e-05, |
|
"loss": 0.2343, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.4705882352941178, |
|
"eval_loss": 0.20606324076652527, |
|
"eval_runtime": 32.7292, |
|
"eval_samples_per_second": 17.63, |
|
"eval_steps_per_second": 4.43, |
|
"eval_wer": 0.18900675024108005, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.588235294117647, |
|
"grad_norm": 15.340932846069336, |
|
"learning_rate": 3.8249999999999995e-05, |
|
"loss": 0.2083, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.588235294117647, |
|
"eval_loss": 0.2084166258573532, |
|
"eval_runtime": 32.8037, |
|
"eval_samples_per_second": 17.59, |
|
"eval_steps_per_second": 4.42, |
|
"eval_wer": 0.1884281581485053, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.7058823529411766, |
|
"grad_norm": 1.6703847646713257, |
|
"learning_rate": 2.5749999999999996e-05, |
|
"loss": 0.2058, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.7058823529411766, |
|
"eval_loss": 0.20847396552562714, |
|
"eval_runtime": 32.6507, |
|
"eval_samples_per_second": 17.672, |
|
"eval_steps_per_second": 4.441, |
|
"eval_wer": 0.18823529411764706, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.8235294117647056, |
|
"grad_norm": 0.9312336444854736, |
|
"learning_rate": 1.3249999999999999e-05, |
|
"loss": 0.2197, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.8235294117647056, |
|
"eval_loss": 0.2061544954776764, |
|
"eval_runtime": 32.4321, |
|
"eval_samples_per_second": 17.791, |
|
"eval_steps_per_second": 4.471, |
|
"eval_wer": 0.19035679845708775, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 1.8859847784042358, |
|
"learning_rate": 7.499999999999999e-07, |
|
"loss": 0.2317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"eval_loss": 0.2064579278230667, |
|
"eval_runtime": 33.0285, |
|
"eval_samples_per_second": 17.47, |
|
"eval_steps_per_second": 4.39, |
|
"eval_wer": 0.18958534233365476, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"step": 2500, |
|
"total_flos": 5.091528497919981e+18, |
|
"train_loss": 0.9320957969665528, |
|
"train_runtime": 2465.2512, |
|
"train_samples_per_second": 4.056, |
|
"train_steps_per_second": 1.014 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.091528497919981e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|