|
{ |
|
"best_metric": 79.20930220037307, |
|
"best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_model/checkpoint-1674", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 2790, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 558, |
|
"train_exact_match": 58.64135864135864, |
|
"train_f1": 78.03999717537812, |
|
"train_runtime": 24.1085, |
|
"train_samples_per_second": 43.802, |
|
"train_steps_per_second": 1.576 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 71.476318359375, |
|
"learning_rate": 1e-05, |
|
"loss": 3.094, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 55.96875, |
|
"eval_f1": 75.1917557548502, |
|
"eval_runtime": 76.2534, |
|
"eval_samples_per_second": 44.05, |
|
"eval_steps_per_second": 1.574, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1116, |
|
"train_exact_match": 69.43056943056943, |
|
"train_f1": 86.54359079483112, |
|
"train_runtime": 23.8497, |
|
"train_samples_per_second": 43.648, |
|
"train_steps_per_second": 1.593 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 64.74529266357422, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.1571, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 60.25, |
|
"eval_f1": 78.34951496921676, |
|
"eval_runtime": 76.3189, |
|
"eval_samples_per_second": 44.013, |
|
"eval_steps_per_second": 1.572, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1674, |
|
"train_exact_match": 77.32267732267732, |
|
"train_f1": 90.90190658126197, |
|
"train_runtime": 24.1109, |
|
"train_samples_per_second": 43.632, |
|
"train_steps_per_second": 1.576 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 59.769203186035156, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8046, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 60.71875, |
|
"eval_f1": 79.20930220037307, |
|
"eval_runtime": 76.4543, |
|
"eval_samples_per_second": 43.935, |
|
"eval_steps_per_second": 1.57, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2232, |
|
"train_exact_match": 79.12087912087912, |
|
"train_f1": 91.91790747073001, |
|
"train_runtime": 24.69, |
|
"train_samples_per_second": 42.365, |
|
"train_steps_per_second": 1.539 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 10.912280082702637, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.612, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 61.15625, |
|
"eval_f1": 79.20198937913115, |
|
"eval_runtime": 76.6624, |
|
"eval_samples_per_second": 43.816, |
|
"eval_steps_per_second": 1.565, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2790, |
|
"train_exact_match": 82.71728271728271, |
|
"train_f1": 92.99953055921905, |
|
"train_runtime": 23.9309, |
|
"train_samples_per_second": 43.542, |
|
"train_steps_per_second": 1.588 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 39.581809997558594, |
|
"learning_rate": 0.0, |
|
"loss": 0.4905, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 61.03125, |
|
"eval_f1": 78.98703726982994, |
|
"eval_runtime": 76.5756, |
|
"eval_samples_per_second": 43.865, |
|
"eval_steps_per_second": 1.567, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2790, |
|
"total_flos": 5.432587638826752e+16, |
|
"train_loss": 1.231632082265765, |
|
"train_runtime": 5057.1596, |
|
"train_samples_per_second": 15.423, |
|
"train_steps_per_second": 0.552 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2790, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 5.432587638826752e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|