|
{ |
|
"best_metric": 30.12351433232347, |
|
"best_model_checkpoint": "./the-final-whisper/checkpoint-250", |
|
"epoch": 0.41186161449752884, |
|
"eval_steps": 125, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04118616144975288, |
|
"grad_norm": 52.4793701171875, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 2.7503, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08237232289950576, |
|
"grad_norm": 16.525333404541016, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 2.0994, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12355848434925865, |
|
"grad_norm": 14.271560668945312, |
|
"learning_rate": 1.42e-06, |
|
"loss": 1.3213, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16474464579901152, |
|
"grad_norm": 8.628070831298828, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 0.7342, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20593080724876442, |
|
"grad_norm": 8.307537078857422, |
|
"learning_rate": 2.42e-06, |
|
"loss": 0.5674, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.20593080724876442, |
|
"eval_loss": 0.6090311408042908, |
|
"eval_runtime": 2805.8489, |
|
"eval_samples_per_second": 1.442, |
|
"eval_steps_per_second": 0.18, |
|
"eval_wer": 45.15497553017945, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2471169686985173, |
|
"grad_norm": 6.402714729309082, |
|
"learning_rate": 2.92e-06, |
|
"loss": 0.4853, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2883031301482702, |
|
"grad_norm": 7.138896942138672, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 0.424, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.32948929159802304, |
|
"grad_norm": 6.78637170791626, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 0.3443, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.37067545304777594, |
|
"grad_norm": 6.042959690093994, |
|
"learning_rate": 4.42e-06, |
|
"loss": 0.2769, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.41186161449752884, |
|
"grad_norm": 2.8671882152557373, |
|
"learning_rate": 4.92e-06, |
|
"loss": 0.1545, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.41186161449752884, |
|
"eval_loss": 0.18588024377822876, |
|
"eval_runtime": 2810.3027, |
|
"eval_samples_per_second": 1.439, |
|
"eval_steps_per_second": 0.18, |
|
"eval_wer": 30.12351433232347, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 250, |
|
"total_flos": 1.15434160128e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|