med-alex's picture
End of training
b1b27ef verified
{
"best_metric": 79.20930220037307,
"best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_model/checkpoint-1674",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 2790,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"step": 558,
"train_exact_match": 58.64135864135864,
"train_f1": 78.03999717537812,
"train_runtime": 24.1085,
"train_samples_per_second": 43.802,
"train_steps_per_second": 1.576
},
{
"epoch": 1.0,
"grad_norm": 71.476318359375,
"learning_rate": 1e-05,
"loss": 3.094,
"step": 558
},
{
"epoch": 1.0,
"eval_exact_match": 55.96875,
"eval_f1": 75.1917557548502,
"eval_runtime": 76.2534,
"eval_samples_per_second": 44.05,
"eval_steps_per_second": 1.574,
"step": 558
},
{
"epoch": 2.0,
"step": 1116,
"train_exact_match": 69.43056943056943,
"train_f1": 86.54359079483112,
"train_runtime": 23.8497,
"train_samples_per_second": 43.648,
"train_steps_per_second": 1.593
},
{
"epoch": 2.0,
"grad_norm": 64.74529266357422,
"learning_rate": 7.500000000000001e-06,
"loss": 1.1571,
"step": 1116
},
{
"epoch": 2.0,
"eval_exact_match": 60.25,
"eval_f1": 78.34951496921676,
"eval_runtime": 76.3189,
"eval_samples_per_second": 44.013,
"eval_steps_per_second": 1.572,
"step": 1116
},
{
"epoch": 3.0,
"step": 1674,
"train_exact_match": 77.32267732267732,
"train_f1": 90.90190658126197,
"train_runtime": 24.1109,
"train_samples_per_second": 43.632,
"train_steps_per_second": 1.576
},
{
"epoch": 3.0,
"grad_norm": 59.769203186035156,
"learning_rate": 5e-06,
"loss": 0.8046,
"step": 1674
},
{
"epoch": 3.0,
"eval_exact_match": 60.71875,
"eval_f1": 79.20930220037307,
"eval_runtime": 76.4543,
"eval_samples_per_second": 43.935,
"eval_steps_per_second": 1.57,
"step": 1674
},
{
"epoch": 4.0,
"step": 2232,
"train_exact_match": 79.12087912087912,
"train_f1": 91.91790747073001,
"train_runtime": 24.69,
"train_samples_per_second": 42.365,
"train_steps_per_second": 1.539
},
{
"epoch": 4.0,
"grad_norm": 10.912280082702637,
"learning_rate": 2.5e-06,
"loss": 0.612,
"step": 2232
},
{
"epoch": 4.0,
"eval_exact_match": 61.15625,
"eval_f1": 79.20198937913115,
"eval_runtime": 76.6624,
"eval_samples_per_second": 43.816,
"eval_steps_per_second": 1.565,
"step": 2232
},
{
"epoch": 5.0,
"step": 2790,
"train_exact_match": 82.71728271728271,
"train_f1": 92.99953055921905,
"train_runtime": 23.9309,
"train_samples_per_second": 43.542,
"train_steps_per_second": 1.588
},
{
"epoch": 5.0,
"grad_norm": 39.581809997558594,
"learning_rate": 0.0,
"loss": 0.4905,
"step": 2790
},
{
"epoch": 5.0,
"eval_exact_match": 61.03125,
"eval_f1": 78.98703726982994,
"eval_runtime": 76.5756,
"eval_samples_per_second": 43.865,
"eval_steps_per_second": 1.567,
"step": 2790
},
{
"epoch": 5.0,
"step": 2790,
"total_flos": 5.432587638826752e+16,
"train_loss": 1.231632082265765,
"train_runtime": 5057.1596,
"train_samples_per_second": 15.423,
"train_steps_per_second": 0.552
}
],
"logging_steps": 500,
"max_steps": 2790,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 5.432587638826752e+16,
"train_batch_size": 28,
"trial_name": null,
"trial_params": null
}