|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.2224, |
|
"eval_steps": 500, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9600000000000003e-06, |
|
"loss": 4.1002, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.96e-06, |
|
"loss": 3.4625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.9600000000000005e-06, |
|
"loss": 2.4907, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 7.960000000000002e-06, |
|
"loss": 1.5881, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 1.1583, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_combined_wer": 0.3216761184625079, |
|
"eval_f1_score": 0.07455306200076076, |
|
"eval_label_f1": 0.1414986686953214, |
|
"eval_loss": 1.0361335277557373, |
|
"eval_runtime": 337.7385, |
|
"eval_samples_per_second": 2.961, |
|
"eval_steps_per_second": 0.047, |
|
"eval_wer": 0.20672983948946044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.98830238119205e-06, |
|
"loss": 0.9384, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.952307128483257e-06, |
|
"loss": 0.7396, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.892184733248666e-06, |
|
"loss": 0.4787, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.808228105754378e-06, |
|
"loss": 0.436, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.700846274250252e-06, |
|
"loss": 0.4069, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_combined_wer": 0.22034499054820417, |
|
"eval_f1_score": 0.4222737819025522, |
|
"eval_label_f1": 0.5939675174013921, |
|
"eval_loss": 0.4110757112503052, |
|
"eval_runtime": 259.1805, |
|
"eval_samples_per_second": 3.858, |
|
"eval_steps_per_second": 0.062, |
|
"eval_wer": 0.12349642235544382, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.570562392225395e-06, |
|
"loss": 0.4044, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.418011189656942e-06, |
|
"loss": 0.3882, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.24393588066941e-06, |
|
"loss": 0.3855, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.0491845426702e-06, |
|
"loss": 0.3774, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.834705984601708e-06, |
|
"loss": 0.3708, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_combined_wer": 0.2200693131695022, |
|
"eval_f1_score": 0.4609408718170047, |
|
"eval_label_f1": 0.6266724212343547, |
|
"eval_loss": 0.3768249750137329, |
|
"eval_runtime": 253.1584, |
|
"eval_samples_per_second": 3.95, |
|
"eval_steps_per_second": 0.063, |
|
"eval_wer": 0.12949139431444595, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.601545124439535e-06, |
|
"loss": 0.3629, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.350837898457142e-06, |
|
"loss": 0.3609, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.083805727058514e-06, |
|
"loss": 0.3605, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.801749564140724e-06, |
|
"loss": 0.3534, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.5060435589773215e-06, |
|
"loss": 0.3512, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_combined_wer": 0.22231411468178955, |
|
"eval_f1_score": 0.5141859450021825, |
|
"eval_label_f1": 0.6835443037974683, |
|
"eval_loss": 0.3623911738395691, |
|
"eval_runtime": 245.3233, |
|
"eval_samples_per_second": 4.076, |
|
"eval_steps_per_second": 0.065, |
|
"eval_wer": 0.13587313865789982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.1981283615012e-06, |
|
"loss": 0.3504, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.879504103602934e-06, |
|
"loss": 0.3475, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.551723090639008e-06, |
|
"loss": 0.3433, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.216382238756147e-06, |
|
"loss": 0.3397, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.8751152948763815e-06, |
|
"loss": 0.3411, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_combined_wer": 0.22038437303087588, |
|
"eval_f1_score": 0.5225073622212874, |
|
"eval_label_f1": 0.6882625157761885, |
|
"eval_loss": 0.3542915880680084, |
|
"eval_runtime": 266.2481, |
|
"eval_samples_per_second": 3.756, |
|
"eval_steps_per_second": 0.06, |
|
"eval_wer": 0.13738155095726165, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.52958487724626e-06, |
|
"loss": 0.337, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5.18147437532788e-06, |
|
"loss": 0.3346, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.832479748494643e-06, |
|
"loss": 0.3348, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.484301263487664e-06, |
|
"loss": 0.3316, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.138635210887117e-06, |
|
"loss": 0.3313, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_combined_wer": 0.22353497164461247, |
|
"eval_f1_score": 0.5193370165745856, |
|
"eval_label_f1": 0.6808329791755205, |
|
"eval_loss": 0.34923481941223145, |
|
"eval_runtime": 238.9408, |
|
"eval_samples_per_second": 4.185, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 0.1397795397408625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.797165640955041e-06, |
|
"loss": 0.3322, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.4615561591117486e-06, |
|
"loss": 0.3258, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.1334418210174268e-06, |
|
"loss": 0.3297, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.814421166745337e-06, |
|
"loss": 0.328, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.506048432855247e-06, |
|
"loss": 0.3252, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_combined_wer": 0.22511027095148078, |
|
"eval_f1_score": 0.533276812208563, |
|
"eval_label_f1": 0.6892751165748198, |
|
"eval_loss": 0.34593451023101807, |
|
"eval_runtime": 242.7141, |
|
"eval_samples_per_second": 4.12, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 0.14356990910848966, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.209825980309151e-06, |
|
"loss": 0.3262, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.927196975119678e-06, |
|
"loss": 0.3247, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.6595383573903412e-06, |
|
"loss": 0.3246, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.4081541330017706e-06, |
|
"loss": 0.3197, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1742690206261293e-06, |
|
"loss": 0.3293, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_combined_wer": 0.2236531190926276, |
|
"eval_f1_score": 0.5324947589098533, |
|
"eval_label_f1": 0.6859538784067086, |
|
"eval_loss": 0.34473055601119995, |
|
"eval_runtime": 244.5187, |
|
"eval_samples_per_second": 4.09, |
|
"eval_steps_per_second": 0.065, |
|
"eval_wer": 0.14159736994778574, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 1.4916379341570048e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|