|
{ |
|
"best_metric": 60.62761059424247, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mlqa/distillbeto/epochs_3_bs_16_lr_5e-5/checkpoint-12300", |
|
"epoch": 3.0, |
|
"global_step": 15381, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"eval_exact_match": 17.4, |
|
"eval_f1": 30.32932709945737, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.838437032702685e-05, |
|
"loss": 3.719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_exact_match": 20.6, |
|
"eval_f1": 37.076640043048776, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_exact_match": 24.8, |
|
"eval_f1": 41.84428151388094, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6758988362265134e-05, |
|
"loss": 2.9745, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_exact_match": 24.4, |
|
"eval_f1": 42.64360101358078, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5133606397503415e-05, |
|
"loss": 2.7579, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_exact_match": 26.8, |
|
"eval_f1": 48.268680757504505, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_exact_match": 28.2, |
|
"eval_f1": 49.756554071203084, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3508224432741696e-05, |
|
"loss": 2.5993, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_exact_match": 29.0, |
|
"eval_f1": 48.999017405606445, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_exact_match": 28.2, |
|
"eval_f1": 50.21409850637284, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.188284246797998e-05, |
|
"loss": 2.5658, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_exact_match": 30.8, |
|
"eval_f1": 51.69176784487257, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0260711267147784e-05, |
|
"loss": 2.4763, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_exact_match": 32.6, |
|
"eval_f1": 53.82386360217891, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_exact_match": 31.6, |
|
"eval_f1": 53.311489367004775, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8635329302386065e-05, |
|
"loss": 2.3893, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_exact_match": 32.0, |
|
"eval_f1": 53.23265973484611, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_exact_match": 31.8, |
|
"eval_f1": 54.46584036020615, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7009947337624346e-05, |
|
"loss": 2.3713, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 30.4, |
|
"eval_f1": 53.87761850361369, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5387816136792146e-05, |
|
"loss": 2.3229, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 33.8, |
|
"eval_f1": 56.159600258090194, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_exact_match": 34.8, |
|
"eval_f1": 57.45088180281838, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.376243417203043e-05, |
|
"loss": 2.2892, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_exact_match": 33.4, |
|
"eval_f1": 57.09233458055284, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_exact_match": 33.6, |
|
"eval_f1": 57.65540686906378, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.213705220726871e-05, |
|
"loss": 2.0818, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_exact_match": 34.0, |
|
"eval_f1": 56.63309020916699, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.051167024250699e-05, |
|
"loss": 1.9928, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_exact_match": 34.8, |
|
"eval_f1": 57.55733406275571, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_exact_match": 35.4, |
|
"eval_f1": 56.60799657495974, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.888628827774527e-05, |
|
"loss": 1.9304, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_exact_match": 33.8, |
|
"eval_f1": 56.337531192657984, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_exact_match": 35.8, |
|
"eval_f1": 58.39477883856857, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7260906312983555e-05, |
|
"loss": 1.984, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_exact_match": 35.2, |
|
"eval_f1": 57.36112838371099, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5635524348221836e-05, |
|
"loss": 1.9726, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_exact_match": 35.6, |
|
"eval_f1": 57.98892993007854, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_exact_match": 36.8, |
|
"eval_f1": 57.73167903988771, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4010142383460113e-05, |
|
"loss": 1.9706, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_exact_match": 34.2, |
|
"eval_f1": 57.1473434081718, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 36.6, |
|
"eval_f1": 58.059032558136906, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2384760418698394e-05, |
|
"loss": 1.9159, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 36.8, |
|
"eval_f1": 57.88380136273412, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.07626292178662e-05, |
|
"loss": 1.9019, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_exact_match": 36.6, |
|
"eval_f1": 58.80808564736181, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 36.2, |
|
"eval_f1": 58.473738166006726, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9137247253104482e-05, |
|
"loss": 1.9478, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_exact_match": 35.6, |
|
"eval_f1": 58.20909169330069, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_exact_match": 35.0, |
|
"eval_f1": 58.29322380617596, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.751186528834276e-05, |
|
"loss": 1.8909, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_exact_match": 35.8, |
|
"eval_f1": 59.20379968263694, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5889734087510566e-05, |
|
"loss": 1.7441, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_exact_match": 36.6, |
|
"eval_f1": 59.70561445422752, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_exact_match": 36.0, |
|
"eval_f1": 59.06336874221, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4264352122748847e-05, |
|
"loss": 1.6342, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_exact_match": 34.8, |
|
"eval_f1": 58.16179505760981, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_exact_match": 35.0, |
|
"eval_f1": 58.02777082755699, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2638970157987128e-05, |
|
"loss": 1.6129, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_exact_match": 34.6, |
|
"eval_f1": 58.29141560339857, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.101358819322541e-05, |
|
"loss": 1.5814, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_exact_match": 36.6, |
|
"eval_f1": 58.77641537872069, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_exact_match": 37.2, |
|
"eval_f1": 60.62761059424247, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.388206228463689e-06, |
|
"loss": 1.6041, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 36.0, |
|
"eval_f1": 59.05191316875973, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 34.0, |
|
"eval_f1": 58.3915453062455, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.76282426370197e-06, |
|
"loss": 1.6054, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 35.8, |
|
"eval_f1": 59.222805863165625, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.137442298940251e-06, |
|
"loss": 1.5882, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 34.6, |
|
"eval_f1": 58.24542684372941, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_exact_match": 35.4, |
|
"eval_f1": 58.46770769703503, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.512060334178532e-06, |
|
"loss": 1.5841, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_exact_match": 35.8, |
|
"eval_f1": 58.797998172208, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_exact_match": 35.4, |
|
"eval_f1": 59.277714258266194, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8899291333463367e-06, |
|
"loss": 1.5605, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_exact_match": 35.8, |
|
"eval_f1": 59.45930485720972, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2645471685846174e-06, |
|
"loss": 1.5615, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_exact_match": 35.2, |
|
"eval_f1": 58.96715598234879, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_exact_match": 35.0, |
|
"eval_f1": 58.67411209812842, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 15381, |
|
"total_flos": 2.2092851276461056e+16, |
|
"train_loss": 2.057734964200962, |
|
"train_runtime": 14184.383, |
|
"train_samples_per_second": 17.348, |
|
"train_steps_per_second": 1.084 |
|
} |
|
], |
|
"max_steps": 15381, |
|
"num_train_epochs": 3, |
|
"total_flos": 2.2092851276461056e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|