|
{ |
|
"best_metric": 0.2767798602581024, |
|
"best_model_checkpoint": "/scratch/skscla001/results/mms-zeroshot-300m-genbed-combined-model/checkpoint-5000", |
|
"epoch": 7.6923076923076925, |
|
"eval_steps": 200, |
|
"global_step": 5600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27472527472527475, |
|
"eval_loss": 2.203052520751953, |
|
"eval_runtime": 115.6497, |
|
"eval_samples_per_second": 16.766, |
|
"eval_steps_per_second": 2.101, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"eval_loss": 0.4235461354255676, |
|
"eval_runtime": 114.7031, |
|
"eval_samples_per_second": 16.905, |
|
"eval_steps_per_second": 2.119, |
|
"eval_wer": 0.6042162162162162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6868131868131868, |
|
"grad_norm": 1.5520302057266235, |
|
"learning_rate": 0.00029452161913523457, |
|
"loss": 2.7468, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8241758241758241, |
|
"eval_loss": 0.3791254162788391, |
|
"eval_runtime": 114.91, |
|
"eval_samples_per_second": 16.874, |
|
"eval_steps_per_second": 2.115, |
|
"eval_wer": 0.5721621621621622, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.098901098901099, |
|
"eval_loss": 0.35884976387023926, |
|
"eval_runtime": 115.1243, |
|
"eval_samples_per_second": 16.843, |
|
"eval_steps_per_second": 2.111, |
|
"eval_wer": 0.5631891891891891, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3736263736263736, |
|
"grad_norm": 0.5800592303276062, |
|
"learning_rate": 0.00028762189512419504, |
|
"loss": 0.5446, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.3736263736263736, |
|
"eval_loss": 0.34893664717674255, |
|
"eval_runtime": 115.1834, |
|
"eval_samples_per_second": 16.834, |
|
"eval_steps_per_second": 2.11, |
|
"eval_wer": 0.5446486486486487, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6483516483516483, |
|
"eval_loss": 0.34060972929000854, |
|
"eval_runtime": 116.725, |
|
"eval_samples_per_second": 16.612, |
|
"eval_steps_per_second": 2.082, |
|
"eval_wer": 0.5457837837837838, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"eval_loss": 0.33255019783973694, |
|
"eval_runtime": 114.9472, |
|
"eval_samples_per_second": 16.869, |
|
"eval_steps_per_second": 2.114, |
|
"eval_wer": 0.5182702702702703, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0604395604395602, |
|
"grad_norm": 0.7607436776161194, |
|
"learning_rate": 0.00028072217111315545, |
|
"loss": 0.4932, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.197802197802198, |
|
"eval_loss": 0.3234807848930359, |
|
"eval_runtime": 116.1043, |
|
"eval_samples_per_second": 16.701, |
|
"eval_steps_per_second": 2.093, |
|
"eval_wer": 0.5237297297297298, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4725274725274726, |
|
"eval_loss": 0.3202188313007355, |
|
"eval_runtime": 114.8249, |
|
"eval_samples_per_second": 16.887, |
|
"eval_steps_per_second": 2.116, |
|
"eval_wer": 0.5057837837837837, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.7472527472527473, |
|
"grad_norm": 0.899978756904602, |
|
"learning_rate": 0.0002738224471021159, |
|
"loss": 0.4644, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7472527472527473, |
|
"eval_loss": 0.32043078541755676, |
|
"eval_runtime": 114.5219, |
|
"eval_samples_per_second": 16.931, |
|
"eval_steps_per_second": 2.122, |
|
"eval_wer": 0.504, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.021978021978022, |
|
"eval_loss": 0.31440961360931396, |
|
"eval_runtime": 115.5011, |
|
"eval_samples_per_second": 16.788, |
|
"eval_steps_per_second": 2.104, |
|
"eval_wer": 0.490972972972973, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.2967032967032965, |
|
"eval_loss": 0.30763551592826843, |
|
"eval_runtime": 115.3262, |
|
"eval_samples_per_second": 16.813, |
|
"eval_steps_per_second": 2.107, |
|
"eval_wer": 0.49572972972972973, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.4340659340659343, |
|
"grad_norm": 0.6096507906913757, |
|
"learning_rate": 0.00026692272309107633, |
|
"loss": 0.4505, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"eval_loss": 0.3038763701915741, |
|
"eval_runtime": 114.948, |
|
"eval_samples_per_second": 16.868, |
|
"eval_steps_per_second": 2.114, |
|
"eval_wer": 0.4750810810810811, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"eval_loss": 0.3026913106441498, |
|
"eval_runtime": 115.6051, |
|
"eval_samples_per_second": 16.773, |
|
"eval_steps_per_second": 2.102, |
|
"eval_wer": 0.4785405405405405, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.1208791208791204, |
|
"grad_norm": 0.8457227945327759, |
|
"learning_rate": 0.00026002299908003674, |
|
"loss": 0.433, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.1208791208791204, |
|
"eval_loss": 0.3013380765914917, |
|
"eval_runtime": 116.3739, |
|
"eval_samples_per_second": 16.662, |
|
"eval_steps_per_second": 2.088, |
|
"eval_wer": 0.46816216216216217, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.395604395604396, |
|
"eval_loss": 0.2960599362850189, |
|
"eval_runtime": 115.9661, |
|
"eval_samples_per_second": 16.72, |
|
"eval_steps_per_second": 2.095, |
|
"eval_wer": 0.4765405405405405, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.670329670329671, |
|
"eval_loss": 0.29141488671302795, |
|
"eval_runtime": 115.6468, |
|
"eval_samples_per_second": 16.767, |
|
"eval_steps_per_second": 2.101, |
|
"eval_wer": 0.468, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.8076923076923075, |
|
"grad_norm": 0.6512793302536011, |
|
"learning_rate": 0.0002531232750689972, |
|
"loss": 0.4203, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.945054945054945, |
|
"eval_loss": 0.29088592529296875, |
|
"eval_runtime": 116.2657, |
|
"eval_samples_per_second": 16.677, |
|
"eval_steps_per_second": 2.09, |
|
"eval_wer": 0.46562162162162163, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.21978021978022, |
|
"eval_loss": 0.29455265402793884, |
|
"eval_runtime": 116.5267, |
|
"eval_samples_per_second": 16.64, |
|
"eval_steps_per_second": 2.085, |
|
"eval_wer": 0.4508108108108108, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.45442482829093933, |
|
"learning_rate": 0.0002462235510579577, |
|
"loss": 0.4042, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"eval_loss": 0.2901473343372345, |
|
"eval_runtime": 116.1982, |
|
"eval_samples_per_second": 16.687, |
|
"eval_steps_per_second": 2.091, |
|
"eval_wer": 0.44335135135135134, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.769230769230769, |
|
"eval_loss": 0.2899898886680603, |
|
"eval_runtime": 115.6122, |
|
"eval_samples_per_second": 16.772, |
|
"eval_steps_per_second": 2.102, |
|
"eval_wer": 0.44632432432432434, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.043956043956044, |
|
"eval_loss": 0.2839508354663849, |
|
"eval_runtime": 115.0777, |
|
"eval_samples_per_second": 16.849, |
|
"eval_steps_per_second": 2.112, |
|
"eval_wer": 0.4448648648648649, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 6.181318681318682, |
|
"grad_norm": 0.6338995695114136, |
|
"learning_rate": 0.0002393238270469181, |
|
"loss": 0.3962, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 6.318681318681318, |
|
"eval_loss": 0.2820078432559967, |
|
"eval_runtime": 114.3206, |
|
"eval_samples_per_second": 16.961, |
|
"eval_steps_per_second": 2.126, |
|
"eval_wer": 0.44475675675675674, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.593406593406593, |
|
"eval_loss": 0.27714216709136963, |
|
"eval_runtime": 115.2116, |
|
"eval_samples_per_second": 16.83, |
|
"eval_steps_per_second": 2.109, |
|
"eval_wer": 0.4325945945945946, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.868131868131869, |
|
"grad_norm": 0.5752519965171814, |
|
"learning_rate": 0.00023242410303587856, |
|
"loss": 0.3881, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.868131868131869, |
|
"eval_loss": 0.2767798602581024, |
|
"eval_runtime": 114.4373, |
|
"eval_samples_per_second": 16.944, |
|
"eval_steps_per_second": 2.123, |
|
"eval_wer": 0.4294054054054054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"eval_loss": 0.27870669960975647, |
|
"eval_runtime": 117.3195, |
|
"eval_samples_per_second": 16.528, |
|
"eval_steps_per_second": 2.071, |
|
"eval_wer": 0.43854054054054054, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 7.417582417582418, |
|
"eval_loss": 0.2795184254646301, |
|
"eval_runtime": 116.3581, |
|
"eval_samples_per_second": 16.664, |
|
"eval_steps_per_second": 2.088, |
|
"eval_wer": 0.4309189189189189, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 7.554945054945055, |
|
"grad_norm": 0.30598416924476624, |
|
"learning_rate": 0.000225524379024839, |
|
"loss": 0.3784, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"eval_loss": 0.2802658975124359, |
|
"eval_runtime": 115.2901, |
|
"eval_samples_per_second": 16.818, |
|
"eval_steps_per_second": 2.108, |
|
"eval_wer": 0.43567567567567567, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"step": 5600, |
|
"total_flos": 8.779281950497536e+18, |
|
"train_loss": 0.6424136958803449, |
|
"train_runtime": 7260.5039, |
|
"train_samples_per_second": 24.036, |
|
"train_steps_per_second": 3.008 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21840, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.779281950497536e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|