|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.2950819672131146, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.5e-06, |
|
"loss": 3.5867, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-06, |
|
"loss": 3.5457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.5e-06, |
|
"loss": 3.4513, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1e-05, |
|
"loss": 3.3432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.25e-05, |
|
"loss": 3.3533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 3.2806732654571533, |
|
"eval_runtime": 190.4728, |
|
"eval_samples_per_second": 25.426, |
|
"eval_steps_per_second": 0.798, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.5e-05, |
|
"loss": 3.2217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 3.1765, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2e-05, |
|
"loss": 3.1408, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 3.1165, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.1709, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 3.1325438022613525, |
|
"eval_runtime": 192.4978, |
|
"eval_samples_per_second": 25.159, |
|
"eval_steps_per_second": 0.79, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.75e-05, |
|
"loss": 3.079, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3e-05, |
|
"loss": 3.0677, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 3.0656, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 3.1463, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.75e-05, |
|
"loss": 3.0573, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 3.0614514350891113, |
|
"eval_runtime": 194.36, |
|
"eval_samples_per_second": 24.918, |
|
"eval_steps_per_second": 0.782, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
} |
|
], |
|
"max_steps": 4550, |
|
"num_train_epochs": 50, |
|
"total_flos": 4.675293533891495e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|