|
{ |
|
"best_metric": 77.37530328335532, |
|
"best_model_checkpoint": "/root/turkic_qa/en_kaz_models/en_kaz_xlm_roberta_large_squad_model/checkpoint-3260", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 3260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 652, |
|
"train_exact_match": 68.63136863136863, |
|
"train_f1": 83.20724970751985, |
|
"train_runtime": 28.0635, |
|
"train_samples_per_second": 43.722, |
|
"train_steps_per_second": 1.568 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 45.333194732666016, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2106, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 60.65625, |
|
"eval_f1": 75.59481049132827, |
|
"eval_runtime": 87.9399, |
|
"eval_samples_per_second": 43.871, |
|
"eval_steps_per_second": 1.569, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1304, |
|
"train_exact_match": 75.52447552447552, |
|
"train_f1": 89.04467674156216, |
|
"train_runtime": 28.5171, |
|
"train_samples_per_second": 43.518, |
|
"train_steps_per_second": 1.578 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 26.666345596313477, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.8497, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 62.71875, |
|
"eval_f1": 77.20303172730397, |
|
"eval_runtime": 88.007, |
|
"eval_samples_per_second": 43.837, |
|
"eval_steps_per_second": 1.568, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1956, |
|
"train_exact_match": 83.91608391608392, |
|
"train_f1": 93.33096969615053, |
|
"train_runtime": 27.7404, |
|
"train_samples_per_second": 43.474, |
|
"train_steps_per_second": 1.586 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 30.600830078125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5689, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 63.40625, |
|
"eval_f1": 77.30509811671915, |
|
"eval_runtime": 88.124, |
|
"eval_samples_per_second": 43.779, |
|
"eval_steps_per_second": 1.566, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2608, |
|
"train_exact_match": 86.61338661338661, |
|
"train_f1": 95.19866566649596, |
|
"train_runtime": 28.7793, |
|
"train_samples_per_second": 43.782, |
|
"train_steps_per_second": 1.564 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 76.71251678466797, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.4001, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 62.71875, |
|
"eval_f1": 77.01339638544334, |
|
"eval_runtime": 88.1308, |
|
"eval_samples_per_second": 43.776, |
|
"eval_steps_per_second": 1.566, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3260, |
|
"train_exact_match": 90.20979020979021, |
|
"train_f1": 96.69185942456909, |
|
"train_runtime": 27.889, |
|
"train_samples_per_second": 43.386, |
|
"train_steps_per_second": 1.578 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 29.443225860595703, |
|
"learning_rate": 0.0, |
|
"loss": 0.3113, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 63.40625, |
|
"eval_f1": 77.37530328335532, |
|
"eval_runtime": 88.2621, |
|
"eval_samples_per_second": 43.711, |
|
"eval_steps_per_second": 1.564, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3260, |
|
"total_flos": 6.354793682009856e+16, |
|
"train_loss": 0.6681148458843582, |
|
"train_runtime": 5891.9748, |
|
"train_samples_per_second": 15.485, |
|
"train_steps_per_second": 0.553 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3260, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 6.354793682009856e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|