|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.449877750611247, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.3047, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.905050505050506e-06, |
|
"loss": 0.179, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.804040404040405e-06, |
|
"loss": 0.1628, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.703030303030305e-06, |
|
"loss": 0.1468, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 9.602020202020203e-06, |
|
"loss": 0.0966, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.501010101010102e-06, |
|
"loss": 0.0854, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.0881, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.2989898989899e-06, |
|
"loss": 0.0841, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.197979797979799e-06, |
|
"loss": 0.0541, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.096969696969698e-06, |
|
"loss": 0.0523, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 0.21230381727218628, |
|
"eval_runtime": 1923.3381, |
|
"eval_samples_per_second": 1.505, |
|
"eval_steps_per_second": 0.188, |
|
"eval_wer": 0.2664437484127656, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 8.995959595959598e-06, |
|
"loss": 0.0502, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.896969696969697e-06, |
|
"loss": 0.0468, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 8.795959595959596e-06, |
|
"loss": 0.0328, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 8.694949494949496e-06, |
|
"loss": 0.0273, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.593939393939395e-06, |
|
"loss": 0.0283, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.492929292929295e-06, |
|
"loss": 0.0316, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.391919191919192e-06, |
|
"loss": 0.0194, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.290909090909092e-06, |
|
"loss": 0.0151, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 8.18989898989899e-06, |
|
"loss": 0.0164, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 8.08888888888889e-06, |
|
"loss": 0.0187, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_loss": 0.22370614111423492, |
|
"eval_runtime": 1885.8595, |
|
"eval_samples_per_second": 1.535, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.23702700414797256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 7.987878787878789e-06, |
|
"loss": 0.0122, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 7.886868686868686e-06, |
|
"loss": 0.0104, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 7.785858585858586e-06, |
|
"loss": 0.0102, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.684848484848485e-06, |
|
"loss": 0.0124, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 7.583838383838384e-06, |
|
"loss": 0.0084, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 7.4828282828282835e-06, |
|
"loss": 0.0067, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 7.381818181818182e-06, |
|
"loss": 0.0067, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 7.280808080808082e-06, |
|
"loss": 0.0074, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 7.17979797979798e-06, |
|
"loss": 0.0059, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 7.07878787878788e-06, |
|
"loss": 0.0041, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_loss": 0.26467418670654297, |
|
"eval_runtime": 1880.1582, |
|
"eval_samples_per_second": 1.539, |
|
"eval_steps_per_second": 0.193, |
|
"eval_wer": 0.23097435029205113, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 6.977777777777779e-06, |
|
"loss": 0.0038, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.876767676767677e-06, |
|
"loss": 0.005, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 6.7757575757575765e-06, |
|
"loss": 0.0035, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 6.674747474747475e-06, |
|
"loss": 0.0034, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 6.5737373737373746e-06, |
|
"loss": 0.0034, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.472727272727272e-06, |
|
"loss": 0.0045, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 6.371717171717172e-06, |
|
"loss": 0.0034, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 6.270707070707071e-06, |
|
"loss": 0.0025, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 6.16969696969697e-06, |
|
"loss": 0.0038, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 6.068686868686869e-06, |
|
"loss": 0.0028, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 0.29039227962493896, |
|
"eval_runtime": 1891.4656, |
|
"eval_samples_per_second": 1.53, |
|
"eval_steps_per_second": 0.191, |
|
"eval_wer": 0.23436045035130787, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 5.967676767676768e-06, |
|
"loss": 0.0021, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 5.8666666666666675e-06, |
|
"loss": 0.0015, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 5.765656565656567e-06, |
|
"loss": 0.0018, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 5.664646464646465e-06, |
|
"loss": 0.0019, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 5.563636363636364e-06, |
|
"loss": 0.0023, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 5.462626262626263e-06, |
|
"loss": 0.0015, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 5.361616161616162e-06, |
|
"loss": 0.0018, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 5.26060606060606e-06, |
|
"loss": 0.002, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 5.15959595959596e-06, |
|
"loss": 0.0014, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 5.058585858585859e-06, |
|
"loss": 0.0015, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"eval_loss": 0.29083308577537537, |
|
"eval_runtime": 1888.3025, |
|
"eval_samples_per_second": 1.533, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.2268263777194616, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 4.957575757575758e-06, |
|
"loss": 0.0012, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 4.856565656565657e-06, |
|
"loss": 0.0012, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 4.755555555555556e-06, |
|
"loss": 0.001, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 4.654545454545455e-06, |
|
"loss": 0.0008, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 4.553535353535354e-06, |
|
"loss": 0.0007, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 4.452525252525253e-06, |
|
"loss": 0.0004, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 4.351515151515152e-06, |
|
"loss": 0.0005, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 4.250505050505051e-06, |
|
"loss": 0.0004, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 4.14949494949495e-06, |
|
"loss": 0.0004, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 4.048484848484849e-06, |
|
"loss": 0.0003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"eval_loss": 0.3021999001502991, |
|
"eval_runtime": 1889.0309, |
|
"eval_samples_per_second": 1.532, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.21967324134428173, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 3.9474747474747474e-06, |
|
"loss": 0.0003, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 3.846464646464647e-06, |
|
"loss": 0.0002, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 3.745454545454546e-06, |
|
"loss": 0.0001, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 3.644444444444445e-06, |
|
"loss": 0.0004, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 3.5434343434343437e-06, |
|
"loss": 0.0001, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 3.4424242424242427e-06, |
|
"loss": 0.0001, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 16.38, |
|
"learning_rate": 3.3414141414141413e-06, |
|
"loss": 0.0001, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 3.2404040404040404e-06, |
|
"loss": 0.0001, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 16.87, |
|
"learning_rate": 3.13939393939394e-06, |
|
"loss": 0.0001, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 3.038383838383839e-06, |
|
"loss": 0.0003, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"eval_loss": 0.3248833417892456, |
|
"eval_runtime": 1885.9913, |
|
"eval_samples_per_second": 1.534, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.2195462625920596, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 2.9373737373737376e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 2.8363636363636366e-06, |
|
"loss": 0.0004, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 2.7353535353535353e-06, |
|
"loss": 0.0001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 2.6343434343434343e-06, |
|
"loss": 0.0001, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"loss": 0.0002, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 18.58, |
|
"learning_rate": 2.432323232323233e-06, |
|
"loss": 0.0001, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 2.3313131313131315e-06, |
|
"loss": 0.0002, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 2.2303030303030305e-06, |
|
"loss": 0.0001, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 2.1292929292929296e-06, |
|
"loss": 0.0001, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 2.0282828282828286e-06, |
|
"loss": 0.0003, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"eval_loss": 0.3216637372970581, |
|
"eval_runtime": 1889.5891, |
|
"eval_samples_per_second": 1.532, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.21611783628206213, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 1.9272727272727273e-06, |
|
"loss": 0.0001, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 1.8262626262626265e-06, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 1.7252525252525254e-06, |
|
"loss": 0.0002, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 1.6242424242424242e-06, |
|
"loss": 0.0001, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 20.78, |
|
"learning_rate": 1.5232323232323235e-06, |
|
"loss": 0.0, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 1.4222222222222223e-06, |
|
"loss": 0.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"learning_rate": 1.3212121212121212e-06, |
|
"loss": 0.0, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 21.52, |
|
"learning_rate": 1.2202020202020202e-06, |
|
"loss": 0.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 1.1191919191919193e-06, |
|
"loss": 0.0, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.0181818181818183e-06, |
|
"loss": 0.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.3335433900356293, |
|
"eval_runtime": 1886.0688, |
|
"eval_samples_per_second": 1.534, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.21814949631761618, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 9.171717171717172e-07, |
|
"loss": 0.0, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 22.49, |
|
"learning_rate": 8.161616161616162e-07, |
|
"loss": 0.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 22.74, |
|
"learning_rate": 7.151515151515153e-07, |
|
"loss": 0.0, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"learning_rate": 6.141414141414142e-07, |
|
"loss": 0.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 5.131313131313132e-07, |
|
"loss": 0.0, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 4.121212121212122e-07, |
|
"loss": 0.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 3.111111111111111e-07, |
|
"loss": 0.0, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 23.96, |
|
"learning_rate": 2.1010101010101013e-07, |
|
"loss": 0.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 1.090909090909091e-07, |
|
"loss": 0.0, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"learning_rate": 8.080808080808081e-09, |
|
"loss": 0.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"eval_loss": 0.33888712525367737, |
|
"eval_runtime": 1887.2294, |
|
"eval_samples_per_second": 1.533, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 0.21857275882502328, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"step": 5000, |
|
"total_flos": 3.395035389100032e+20, |
|
"train_loss": 0.016903733740281313, |
|
"train_runtime": 68439.6404, |
|
"train_samples_per_second": 2.338, |
|
"train_steps_per_second": 0.073 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5000, |
|
"num_train_epochs": 25, |
|
"save_steps": 500, |
|
"total_flos": 3.395035389100032e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|