|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 299.9818181818182, |
|
"global_step": 8100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 12.2644, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 4.6184, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 5.96e-05, |
|
"loss": 3.5365, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 7.960000000000001e-05, |
|
"loss": 3.2788, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 9.960000000000001e-05, |
|
"loss": 3.1079, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"eval_cer": 0.9825416052243522, |
|
"eval_loss": 2.679511547088623, |
|
"eval_runtime": 34.6418, |
|
"eval_samples_per_second": 23.296, |
|
"eval_steps_per_second": 2.916, |
|
"eval_wer": 0.9996329601761791, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 22.22, |
|
"learning_rate": 9.871052631578948e-05, |
|
"loss": 2.193, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"learning_rate": 9.739473684210527e-05, |
|
"loss": 1.3185, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.62, |
|
"learning_rate": 9.607894736842105e-05, |
|
"loss": 1.0508, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 9.476315789473684e-05, |
|
"loss": 0.9319, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"learning_rate": 9.344736842105263e-05, |
|
"loss": 0.8506, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 37.04, |
|
"eval_cer": 0.096087002317253, |
|
"eval_loss": 0.43226873874664307, |
|
"eval_runtime": 34.4996, |
|
"eval_samples_per_second": 23.392, |
|
"eval_steps_per_second": 2.928, |
|
"eval_wer": 0.37181134153055606, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.73, |
|
"learning_rate": 9.213157894736843e-05, |
|
"loss": 0.8002, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 44.44, |
|
"learning_rate": 9.081578947368421e-05, |
|
"loss": 0.765, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 48.15, |
|
"learning_rate": 8.950000000000001e-05, |
|
"loss": 0.7327, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 51.84, |
|
"learning_rate": 8.818421052631579e-05, |
|
"loss": 0.7094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 55.55, |
|
"learning_rate": 8.686842105263159e-05, |
|
"loss": 0.6821, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 55.55, |
|
"eval_cer": 0.0877922898672846, |
|
"eval_loss": 0.41051027178764343, |
|
"eval_runtime": 34.0911, |
|
"eval_samples_per_second": 23.672, |
|
"eval_steps_per_second": 2.963, |
|
"eval_wer": 0.3310699210864379, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 59.25, |
|
"learning_rate": 8.555263157894737e-05, |
|
"loss": 0.6763, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 62.95, |
|
"learning_rate": 8.423684210526316e-05, |
|
"loss": 0.6484, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 66.65, |
|
"learning_rate": 8.292105263157896e-05, |
|
"loss": 0.6371, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 70.36, |
|
"learning_rate": 8.160526315789474e-05, |
|
"loss": 0.6149, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 74.07, |
|
"learning_rate": 8.028947368421052e-05, |
|
"loss": 0.6091, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 74.07, |
|
"eval_cer": 0.08513271539919949, |
|
"eval_loss": 0.4281017482280731, |
|
"eval_runtime": 36.3671, |
|
"eval_samples_per_second": 22.19, |
|
"eval_steps_per_second": 2.777, |
|
"eval_wer": 0.3167553679574234, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 77.76, |
|
"learning_rate": 7.897368421052632e-05, |
|
"loss": 0.5908, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 81.47, |
|
"learning_rate": 7.76578947368421e-05, |
|
"loss": 0.5809, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 85.18, |
|
"learning_rate": 7.63421052631579e-05, |
|
"loss": 0.568, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 88.87, |
|
"learning_rate": 7.50263157894737e-05, |
|
"loss": 0.5531, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 92.58, |
|
"learning_rate": 7.371052631578948e-05, |
|
"loss": 0.5429, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 92.58, |
|
"eval_cer": 0.08421108068253634, |
|
"eval_loss": 0.4524887800216675, |
|
"eval_runtime": 33.8823, |
|
"eval_samples_per_second": 23.818, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 0.3147366489264085, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 96.29, |
|
"learning_rate": 7.239473684210527e-05, |
|
"loss": 0.5476, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 99.98, |
|
"learning_rate": 7.107894736842106e-05, |
|
"loss": 0.5312, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 103.69, |
|
"learning_rate": 6.976315789473684e-05, |
|
"loss": 0.5228, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 107.4, |
|
"learning_rate": 6.846052631578947e-05, |
|
"loss": 0.5085, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"learning_rate": 6.714473684210527e-05, |
|
"loss": 0.5063, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 111.11, |
|
"eval_cer": 0.08392142405729934, |
|
"eval_loss": 0.46191853284835815, |
|
"eval_runtime": 34.7379, |
|
"eval_samples_per_second": 23.231, |
|
"eval_steps_per_second": 2.907, |
|
"eval_wer": 0.31436960910258765, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 114.8, |
|
"learning_rate": 6.582894736842105e-05, |
|
"loss": 0.4929, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 118.51, |
|
"learning_rate": 6.451315789473685e-05, |
|
"loss": 0.4921, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 122.22, |
|
"learning_rate": 6.319736842105264e-05, |
|
"loss": 0.486, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 125.91, |
|
"learning_rate": 6.188157894736843e-05, |
|
"loss": 0.4775, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 129.62, |
|
"learning_rate": 6.056578947368421e-05, |
|
"loss": 0.4661, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 129.62, |
|
"eval_cer": 0.08178849799873604, |
|
"eval_loss": 0.4659934639930725, |
|
"eval_runtime": 33.3081, |
|
"eval_samples_per_second": 24.228, |
|
"eval_steps_per_second": 3.032, |
|
"eval_wer": 0.30390897412369244, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 5.9250000000000004e-05, |
|
"loss": 0.4571, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 137.04, |
|
"learning_rate": 5.793421052631579e-05, |
|
"loss": 0.4525, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 140.73, |
|
"learning_rate": 5.6618421052631575e-05, |
|
"loss": 0.4502, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 144.44, |
|
"learning_rate": 5.530263157894737e-05, |
|
"loss": 0.4458, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 148.15, |
|
"learning_rate": 5.398684210526316e-05, |
|
"loss": 0.4353, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 148.15, |
|
"eval_cer": 0.08202548978302085, |
|
"eval_loss": 0.46947482228279114, |
|
"eval_runtime": 33.4569, |
|
"eval_samples_per_second": 24.121, |
|
"eval_steps_per_second": 3.019, |
|
"eval_wer": 0.308313452009543, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 151.84, |
|
"learning_rate": 5.2671052631578957e-05, |
|
"loss": 0.4299, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 155.55, |
|
"learning_rate": 5.135526315789474e-05, |
|
"loss": 0.4253, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 159.25, |
|
"learning_rate": 5.003947368421053e-05, |
|
"loss": 0.4202, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 162.95, |
|
"learning_rate": 4.872368421052632e-05, |
|
"loss": 0.4133, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 166.65, |
|
"learning_rate": 4.740789473684211e-05, |
|
"loss": 0.4048, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 166.65, |
|
"eval_cer": 0.08244680851063829, |
|
"eval_loss": 0.49092647433280945, |
|
"eval_runtime": 33.6459, |
|
"eval_samples_per_second": 23.985, |
|
"eval_steps_per_second": 3.002, |
|
"eval_wer": 0.3084969719214535, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 170.36, |
|
"learning_rate": 4.6092105263157896e-05, |
|
"loss": 0.3994, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 174.07, |
|
"learning_rate": 4.4776315789473685e-05, |
|
"loss": 0.397, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 177.76, |
|
"learning_rate": 4.3460526315789474e-05, |
|
"loss": 0.3836, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 181.47, |
|
"learning_rate": 4.2144736842105264e-05, |
|
"loss": 0.3922, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 185.18, |
|
"learning_rate": 4.082894736842105e-05, |
|
"loss": 0.3852, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 185.18, |
|
"eval_cer": 0.08120918474826207, |
|
"eval_loss": 0.5073934197425842, |
|
"eval_runtime": 34.5846, |
|
"eval_samples_per_second": 23.334, |
|
"eval_steps_per_second": 2.92, |
|
"eval_wer": 0.3048265736832446, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 188.87, |
|
"learning_rate": 3.951315789473685e-05, |
|
"loss": 0.3784, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 192.58, |
|
"learning_rate": 3.819736842105263e-05, |
|
"loss": 0.3733, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 196.29, |
|
"learning_rate": 3.688157894736842e-05, |
|
"loss": 0.3775, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 199.98, |
|
"learning_rate": 3.5565789473684217e-05, |
|
"loss": 0.3574, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 203.69, |
|
"learning_rate": 3.4250000000000006e-05, |
|
"loss": 0.3567, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 203.69, |
|
"eval_cer": 0.08099852538445333, |
|
"eval_loss": 0.5110859274864197, |
|
"eval_runtime": 34.3591, |
|
"eval_samples_per_second": 23.487, |
|
"eval_steps_per_second": 2.94, |
|
"eval_wer": 0.3011561754450358, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 207.4, |
|
"learning_rate": 3.293421052631579e-05, |
|
"loss": 0.3585, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 211.11, |
|
"learning_rate": 3.161842105263158e-05, |
|
"loss": 0.3503, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 214.8, |
|
"learning_rate": 3.030263157894737e-05, |
|
"loss": 0.3452, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 218.51, |
|
"learning_rate": 2.8986842105263156e-05, |
|
"loss": 0.3446, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 222.22, |
|
"learning_rate": 2.768421052631579e-05, |
|
"loss": 0.3451, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 222.22, |
|
"eval_cer": 0.08041921213397936, |
|
"eval_loss": 0.5224971175193787, |
|
"eval_runtime": 33.9572, |
|
"eval_samples_per_second": 23.765, |
|
"eval_steps_per_second": 2.974, |
|
"eval_wer": 0.2982198568544687, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 225.91, |
|
"learning_rate": 2.6368421052631582e-05, |
|
"loss": 0.3334, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 229.62, |
|
"learning_rate": 2.505263157894737e-05, |
|
"loss": 0.338, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 233.33, |
|
"learning_rate": 2.373684210526316e-05, |
|
"loss": 0.332, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 237.04, |
|
"learning_rate": 2.242105263157895e-05, |
|
"loss": 0.3301, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 240.73, |
|
"learning_rate": 2.110526315789474e-05, |
|
"loss": 0.325, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 240.73, |
|
"eval_cer": 0.07955024225826839, |
|
"eval_loss": 0.5269995927810669, |
|
"eval_runtime": 33.5845, |
|
"eval_samples_per_second": 24.029, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 0.2954670581758121, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 244.44, |
|
"learning_rate": 1.9789473684210528e-05, |
|
"loss": 0.327, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 248.15, |
|
"learning_rate": 1.8473684210526317e-05, |
|
"loss": 0.3161, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 251.84, |
|
"learning_rate": 1.7157894736842107e-05, |
|
"loss": 0.3193, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 255.55, |
|
"learning_rate": 1.5842105263157896e-05, |
|
"loss": 0.3104, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 259.25, |
|
"learning_rate": 1.4539473684210528e-05, |
|
"loss": 0.3089, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 259.25, |
|
"eval_cer": 0.07933958289445966, |
|
"eval_loss": 0.5381476283073425, |
|
"eval_runtime": 35.2467, |
|
"eval_samples_per_second": 22.896, |
|
"eval_steps_per_second": 2.866, |
|
"eval_wer": 0.29289777940906586, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 262.95, |
|
"learning_rate": 1.3223684210526315e-05, |
|
"loss": 0.3087, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 266.65, |
|
"learning_rate": 1.1907894736842106e-05, |
|
"loss": 0.305, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 270.36, |
|
"learning_rate": 1.0592105263157895e-05, |
|
"loss": 0.3028, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 274.07, |
|
"learning_rate": 9.276315789473685e-06, |
|
"loss": 0.2989, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 277.76, |
|
"learning_rate": 7.960526315789474e-06, |
|
"loss": 0.2941, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 277.76, |
|
"eval_cer": 0.07941858015588793, |
|
"eval_loss": 0.5565056204795837, |
|
"eval_runtime": 34.8538, |
|
"eval_samples_per_second": 23.154, |
|
"eval_steps_per_second": 2.898, |
|
"eval_wer": 0.29234721967333455, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 281.47, |
|
"learning_rate": 6.644736842105263e-06, |
|
"loss": 0.2983, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 285.18, |
|
"learning_rate": 5.328947368421053e-06, |
|
"loss": 0.2944, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 288.87, |
|
"learning_rate": 4.013157894736842e-06, |
|
"loss": 0.2941, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 292.58, |
|
"learning_rate": 2.6973684210526316e-06, |
|
"loss": 0.2947, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 296.29, |
|
"learning_rate": 1.381578947368421e-06, |
|
"loss": 0.2945, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 296.29, |
|
"eval_cer": 0.07894459658731831, |
|
"eval_loss": 0.549480140209198, |
|
"eval_runtime": 34.1061, |
|
"eval_samples_per_second": 23.661, |
|
"eval_steps_per_second": 2.961, |
|
"eval_wer": 0.2951000183519912, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 299.98, |
|
"learning_rate": 6.578947368421053e-08, |
|
"loss": 0.2913, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 299.98, |
|
"step": 8100, |
|
"total_flos": 1.5339574491038086e+20, |
|
"train_loss": 0.7936776961809323, |
|
"train_runtime": 54993.0112, |
|
"train_samples_per_second": 19.142, |
|
"train_steps_per_second": 0.147 |
|
} |
|
], |
|
"max_steps": 8100, |
|
"num_train_epochs": 300, |
|
"total_flos": 1.5339574491038086e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|