|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 197.5483870967742, |
|
"global_step": 6124, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.2639705882352941e-05, |
|
"loss": 14.7415, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 2.477205882352941e-05, |
|
"loss": 6.4415, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.690441176470588e-05, |
|
"loss": 4.4273, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 4.9036764705882346e-05, |
|
"loss": 3.5184, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_cer": 1.0, |
|
"eval_loss": 3.4210262298583984, |
|
"eval_runtime": 96.9515, |
|
"eval_samples_per_second": 18.628, |
|
"eval_steps_per_second": 0.299, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 6.116911764705881e-05, |
|
"loss": 3.3554, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 7.330147058823529e-05, |
|
"loss": 3.2617, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"learning_rate": 7.5e-05, |
|
"loss": 3.0887, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 7.5e-05, |
|
"loss": 2.3797, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"eval_cer": 0.2583848190644307, |
|
"eval_loss": 1.1068178415298462, |
|
"eval_runtime": 99.1553, |
|
"eval_samples_per_second": 18.214, |
|
"eval_steps_per_second": 0.292, |
|
"eval_wer": 0.8389036136789708, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.8558, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.6732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 35.48, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.5624, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.5022, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"eval_cer": 0.1516846387044783, |
|
"eval_loss": 0.5277877449989319, |
|
"eval_runtime": 102.5824, |
|
"eval_samples_per_second": 17.605, |
|
"eval_steps_per_second": 0.283, |
|
"eval_wer": 0.6279656086366281, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.4358, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.388, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 48.39, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3493, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3181, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"eval_cer": 0.12968648067846042, |
|
"eval_loss": 0.4253957271575928, |
|
"eval_runtime": 98.985, |
|
"eval_samples_per_second": 18.245, |
|
"eval_steps_per_second": 0.293, |
|
"eval_wer": 0.5587303639537139, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 54.84, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2814, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2547, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 61.29, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2244, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.2037, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"eval_cer": 0.11762730726428489, |
|
"eval_loss": 0.3835846781730652, |
|
"eval_runtime": 99.4181, |
|
"eval_samples_per_second": 18.166, |
|
"eval_steps_per_second": 0.292, |
|
"eval_wer": 0.5142543150817764, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 67.74, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1751, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1642, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 74.19, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1397, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 77.42, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1245, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 77.42, |
|
"eval_cer": 0.11111324302544226, |
|
"eval_loss": 0.36429011821746826, |
|
"eval_runtime": 98.0055, |
|
"eval_samples_per_second": 18.428, |
|
"eval_steps_per_second": 0.296, |
|
"eval_wer": 0.48710323873553557, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 80.65, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.1057, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 83.87, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0852, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 87.1, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.073, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0582, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"eval_cer": 0.10623009324993285, |
|
"eval_loss": 0.3561805486679077, |
|
"eval_runtime": 94.8015, |
|
"eval_samples_per_second": 19.05, |
|
"eval_steps_per_second": 0.306, |
|
"eval_wer": 0.46758032193419097, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 93.55, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0376, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 96.77, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.0341, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.416258570029382e-05, |
|
"loss": 1.017, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 103.23, |
|
"learning_rate": 7.183643486777668e-05, |
|
"loss": 1.0027, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 103.23, |
|
"eval_cer": 0.10575041252542308, |
|
"eval_loss": 0.3529968559741974, |
|
"eval_runtime": 98.9163, |
|
"eval_samples_per_second": 18.258, |
|
"eval_steps_per_second": 0.293, |
|
"eval_wer": 0.4625379791841748, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 106.45, |
|
"learning_rate": 6.951028403525955e-05, |
|
"loss": 0.9864, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 109.68, |
|
"learning_rate": 6.71841332027424e-05, |
|
"loss": 0.9769, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 112.9, |
|
"learning_rate": 6.485798237022525e-05, |
|
"loss": 0.9587, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 116.13, |
|
"learning_rate": 6.253183153770813e-05, |
|
"loss": 0.9382, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 116.13, |
|
"eval_cer": 0.10022449057907057, |
|
"eval_loss": 0.3388434052467346, |
|
"eval_runtime": 98.6206, |
|
"eval_samples_per_second": 18.313, |
|
"eval_steps_per_second": 0.294, |
|
"eval_wer": 0.44424332536039823, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 119.35, |
|
"learning_rate": 6.020568070519098e-05, |
|
"loss": 0.9296, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 122.58, |
|
"learning_rate": 5.787952987267385e-05, |
|
"loss": 0.9145, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 125.81, |
|
"learning_rate": 5.5553379040156705e-05, |
|
"loss": 0.9043, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 129.03, |
|
"learning_rate": 5.3227228207639564e-05, |
|
"loss": 0.8915, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 129.03, |
|
"eval_cer": 0.10004221190375685, |
|
"eval_loss": 0.3429908752441406, |
|
"eval_runtime": 98.3419, |
|
"eval_samples_per_second": 18.364, |
|
"eval_steps_per_second": 0.295, |
|
"eval_wer": 0.44269183528347017, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 132.26, |
|
"learning_rate": 5.090107737512243e-05, |
|
"loss": 0.8768, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 135.48, |
|
"learning_rate": 4.857492654260528e-05, |
|
"loss": 0.8697, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 138.71, |
|
"learning_rate": 4.6248775710088145e-05, |
|
"loss": 0.8604, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 141.94, |
|
"learning_rate": 4.394588638589617e-05, |
|
"loss": 0.853, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 141.94, |
|
"eval_cer": 0.09997505660232549, |
|
"eval_loss": 0.35362082719802856, |
|
"eval_runtime": 98.9414, |
|
"eval_samples_per_second": 18.253, |
|
"eval_steps_per_second": 0.293, |
|
"eval_wer": 0.437455556273838, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 145.16, |
|
"learning_rate": 4.161973555337904e-05, |
|
"loss": 0.8447, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 148.39, |
|
"learning_rate": 3.9293584720861896e-05, |
|
"loss": 0.8309, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 151.61, |
|
"learning_rate": 3.6967433888344755e-05, |
|
"loss": 0.827, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 154.84, |
|
"learning_rate": 3.464128305582762e-05, |
|
"loss": 0.8127, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 154.84, |
|
"eval_cer": 0.09857438888675697, |
|
"eval_loss": 0.3511004149913788, |
|
"eval_runtime": 97.5708, |
|
"eval_samples_per_second": 18.51, |
|
"eval_steps_per_second": 0.297, |
|
"eval_wer": 0.4343525761199819, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 158.06, |
|
"learning_rate": 3.231513222331048e-05, |
|
"loss": 0.8123, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 161.29, |
|
"learning_rate": 2.9988981390793336e-05, |
|
"loss": 0.8063, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 164.52, |
|
"learning_rate": 2.76628305582762e-05, |
|
"loss": 0.8046, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 167.74, |
|
"learning_rate": 2.5336679725759056e-05, |
|
"loss": 0.7861, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 167.74, |
|
"eval_cer": 0.09927472274454123, |
|
"eval_loss": 0.3595349192619324, |
|
"eval_runtime": 97.9334, |
|
"eval_samples_per_second": 18.441, |
|
"eval_steps_per_second": 0.296, |
|
"eval_wer": 0.43719697459435, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 170.97, |
|
"learning_rate": 2.301052889324192e-05, |
|
"loss": 0.7904, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 174.19, |
|
"learning_rate": 2.068437806072478e-05, |
|
"loss": 0.775, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 177.42, |
|
"learning_rate": 1.8358227228207637e-05, |
|
"loss": 0.7737, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 180.65, |
|
"learning_rate": 1.6032076395690502e-05, |
|
"loss": 0.7619, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 180.65, |
|
"eval_cer": 0.0985456080432864, |
|
"eval_loss": 0.3628048300743103, |
|
"eval_runtime": 97.525, |
|
"eval_samples_per_second": 18.518, |
|
"eval_steps_per_second": 0.297, |
|
"eval_wer": 0.43157282306548583, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 183.87, |
|
"learning_rate": 1.3705925563173359e-05, |
|
"loss": 0.7654, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 187.1, |
|
"learning_rate": 1.1379774730656215e-05, |
|
"loss": 0.7586, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 190.32, |
|
"learning_rate": 9.053623898139082e-06, |
|
"loss": 0.7537, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 193.55, |
|
"learning_rate": 6.727473065621939e-06, |
|
"loss": 0.7537, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 193.55, |
|
"eval_cer": 0.09426685598065927, |
|
"eval_loss": 0.3633102476596832, |
|
"eval_runtime": 94.1997, |
|
"eval_samples_per_second": 19.172, |
|
"eval_steps_per_second": 0.308, |
|
"eval_wer": 0.41735083069364537, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 196.77, |
|
"learning_rate": 4.4245837414299736e-06, |
|
"loss": 0.7486, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 197.55, |
|
"step": 6124, |
|
"total_flos": 1.4975736611439493e+20, |
|
"train_loss": 1.5647427196365646, |
|
"train_runtime": 50885.6899, |
|
"train_samples_per_second": 15.405, |
|
"train_steps_per_second": 0.12 |
|
} |
|
], |
|
"max_steps": 6124, |
|
"num_train_epochs": 198, |
|
"total_flos": 1.4975736611439493e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|