|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.99842519685039, |
|
"global_step": 31700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 0.5203462243080139, |
|
"eval_runtime": 141.5768, |
|
"eval_samples_per_second": 35.853, |
|
"eval_steps_per_second": 2.246, |
|
"eval_wer": 0.4333326279704594, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.0009674592833876221, |
|
"loss": 1.4284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"eval_loss": 0.48156219720840454, |
|
"eval_runtime": 142.5636, |
|
"eval_samples_per_second": 35.605, |
|
"eval_steps_per_second": 2.231, |
|
"eval_wer": 0.3950949065746873, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"eval_loss": 0.4314565062522888, |
|
"eval_runtime": 139.8322, |
|
"eval_samples_per_second": 36.301, |
|
"eval_steps_per_second": 2.274, |
|
"eval_wer": 0.3545506485811626, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.0009023452768729642, |
|
"loss": 1.283, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"eval_loss": 0.42783403396606445, |
|
"eval_runtime": 141.0912, |
|
"eval_samples_per_second": 35.977, |
|
"eval_steps_per_second": 2.254, |
|
"eval_wer": 0.34039401570137756, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"eval_loss": 0.40902915596961975, |
|
"eval_runtime": 140.9561, |
|
"eval_samples_per_second": 36.011, |
|
"eval_steps_per_second": 2.256, |
|
"eval_wer": 0.3053939098969465, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 0.0008372312703583062, |
|
"loss": 1.1777, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_loss": 0.3892641067504883, |
|
"eval_runtime": 139.9163, |
|
"eval_samples_per_second": 36.279, |
|
"eval_steps_per_second": 2.273, |
|
"eval_wer": 0.30056922783926193, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"eval_loss": 0.3967570960521698, |
|
"eval_runtime": 139.2725, |
|
"eval_samples_per_second": 36.447, |
|
"eval_steps_per_second": 2.283, |
|
"eval_wer": 0.28565080305563195, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 0.0007720846905537459, |
|
"loss": 1.0994, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"eval_loss": 0.3892391324043274, |
|
"eval_runtime": 138.7844, |
|
"eval_samples_per_second": 36.575, |
|
"eval_steps_per_second": 2.291, |
|
"eval_wer": 0.27509152083289246, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"eval_loss": 0.4061281681060791, |
|
"eval_runtime": 139.2312, |
|
"eval_samples_per_second": 36.457, |
|
"eval_steps_per_second": 2.284, |
|
"eval_wer": 0.2689760247159151, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 31.54, |
|
"learning_rate": 0.000706970684039088, |
|
"loss": 1.0323, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 31.54, |
|
"eval_loss": 0.41136494278907776, |
|
"eval_runtime": 139.4432, |
|
"eval_samples_per_second": 36.402, |
|
"eval_steps_per_second": 2.28, |
|
"eval_wer": 0.25065069725120087, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 34.7, |
|
"eval_loss": 0.40214526653289795, |
|
"eval_runtime": 139.4093, |
|
"eval_samples_per_second": 36.411, |
|
"eval_steps_per_second": 2.281, |
|
"eval_wer": 0.2508411452271621, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"learning_rate": 0.00064185667752443, |
|
"loss": 0.9623, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"eval_loss": 0.40321338176727295, |
|
"eval_runtime": 139.9917, |
|
"eval_samples_per_second": 36.259, |
|
"eval_steps_per_second": 2.272, |
|
"eval_wer": 0.2378060393169266, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"eval_loss": 0.4147748053073883, |
|
"eval_runtime": 139.3612, |
|
"eval_samples_per_second": 36.423, |
|
"eval_steps_per_second": 2.282, |
|
"eval_wer": 0.23744630425122204, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"learning_rate": 0.0005767100977198697, |
|
"loss": 0.9077, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"eval_loss": 0.4350396394729614, |
|
"eval_runtime": 138.8108, |
|
"eval_samples_per_second": 36.568, |
|
"eval_steps_per_second": 2.291, |
|
"eval_wer": 0.23230420890026873, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 47.32, |
|
"eval_loss": 0.4514589309692383, |
|
"eval_runtime": 138.719, |
|
"eval_samples_per_second": 36.592, |
|
"eval_steps_per_second": 2.292, |
|
"eval_wer": 0.22464396808938358, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 50.47, |
|
"learning_rate": 0.0005115960912052118, |
|
"loss": 0.8573, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 50.47, |
|
"eval_loss": 0.4473990499973297, |
|
"eval_runtime": 140.3605, |
|
"eval_samples_per_second": 36.164, |
|
"eval_steps_per_second": 2.266, |
|
"eval_wer": 0.21797828893074042, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 53.63, |
|
"eval_loss": 0.4649062752723694, |
|
"eval_runtime": 137.3039, |
|
"eval_samples_per_second": 36.969, |
|
"eval_steps_per_second": 2.316, |
|
"eval_wer": 0.21713185348202382, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 56.78, |
|
"learning_rate": 0.00044651465798045605, |
|
"loss": 0.8083, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 56.78, |
|
"eval_loss": 0.44551119208335876, |
|
"eval_runtime": 139.4699, |
|
"eval_samples_per_second": 36.395, |
|
"eval_steps_per_second": 2.28, |
|
"eval_wer": 0.2102334045749836, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 59.94, |
|
"eval_loss": 0.4586869478225708, |
|
"eval_runtime": 139.1403, |
|
"eval_samples_per_second": 36.481, |
|
"eval_steps_per_second": 2.285, |
|
"eval_wer": 0.20917536026408787, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 63.09, |
|
"learning_rate": 0.00038140065146579803, |
|
"loss": 0.769, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 63.09, |
|
"eval_loss": 0.4793929159641266, |
|
"eval_runtime": 139.562, |
|
"eval_samples_per_second": 36.371, |
|
"eval_steps_per_second": 2.279, |
|
"eval_wer": 0.20117654527371606, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"eval_loss": 0.4844733476638794, |
|
"eval_runtime": 138.9678, |
|
"eval_samples_per_second": 36.526, |
|
"eval_steps_per_second": 2.288, |
|
"eval_wer": 0.20073216666313987, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"learning_rate": 0.00031628664495114006, |
|
"loss": 0.7308, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 69.4, |
|
"eval_loss": 0.49372631311416626, |
|
"eval_runtime": 139.7927, |
|
"eval_samples_per_second": 36.311, |
|
"eval_steps_per_second": 2.275, |
|
"eval_wer": 0.20075332754935776, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 72.55, |
|
"eval_loss": 0.4920376241207123, |
|
"eval_runtime": 138.7644, |
|
"eval_samples_per_second": 36.58, |
|
"eval_steps_per_second": 2.292, |
|
"eval_wer": 0.1894745751952092, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 75.71, |
|
"learning_rate": 0.0002511400651465798, |
|
"loss": 0.6927, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 75.71, |
|
"eval_loss": 0.5178954005241394, |
|
"eval_runtime": 139.799, |
|
"eval_samples_per_second": 36.309, |
|
"eval_steps_per_second": 2.275, |
|
"eval_wer": 0.19114628520642443, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 78.86, |
|
"eval_loss": 0.520152747631073, |
|
"eval_runtime": 140.3812, |
|
"eval_samples_per_second": 36.159, |
|
"eval_steps_per_second": 2.265, |
|
"eval_wer": 0.18767589986668642, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 82.02, |
|
"learning_rate": 0.00018602605863192182, |
|
"loss": 0.6622, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 82.02, |
|
"eval_loss": 0.5265706181526184, |
|
"eval_runtime": 138.1289, |
|
"eval_samples_per_second": 36.748, |
|
"eval_steps_per_second": 2.302, |
|
"eval_wer": 0.18401506655098715, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 85.17, |
|
"eval_loss": 0.5350863933563232, |
|
"eval_runtime": 140.1605, |
|
"eval_samples_per_second": 36.216, |
|
"eval_steps_per_second": 2.269, |
|
"eval_wer": 0.18541168504136954, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 88.33, |
|
"learning_rate": 0.00012091205211726384, |
|
"loss": 0.6315, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 88.33, |
|
"eval_loss": 0.5373002290725708, |
|
"eval_runtime": 138.125, |
|
"eval_samples_per_second": 36.749, |
|
"eval_steps_per_second": 2.302, |
|
"eval_wer": 0.18113718602535075, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 91.48, |
|
"eval_loss": 0.5330832600593567, |
|
"eval_runtime": 139.0156, |
|
"eval_samples_per_second": 36.514, |
|
"eval_steps_per_second": 2.288, |
|
"eval_wer": 0.17923270626573842, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 94.64, |
|
"learning_rate": 5.576547231270358e-05, |
|
"loss": 0.6075, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 94.64, |
|
"eval_loss": 0.538992166519165, |
|
"eval_runtime": 138.185, |
|
"eval_samples_per_second": 36.733, |
|
"eval_steps_per_second": 2.301, |
|
"eval_wer": 0.17787840954779185, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 97.79, |
|
"eval_loss": 0.5459240078926086, |
|
"eval_runtime": 137.8608, |
|
"eval_samples_per_second": 36.82, |
|
"eval_steps_per_second": 2.307, |
|
"eval_wer": 0.17730706561990817, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 31700, |
|
"total_flos": 3.173184730349909e+20, |
|
"train_loss": 0.8928292760036721, |
|
"train_runtime": 80759.6589, |
|
"train_samples_per_second": 25.144, |
|
"train_steps_per_second": 0.393 |
|
} |
|
], |
|
"max_steps": 31700, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.173184730349909e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|