|
{ |
|
"best_metric": 1.3909834623336792, |
|
"best_model_checkpoint": "finetuning/output/bart-base-finetuned_xe_ey_fae/checkpoint-25000", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 25377, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.804547424833511e-06, |
|
"loss": 5.4226, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.3627901941481408, |
|
"eval_loss": 3.8137550354003906, |
|
"eval_runtime": 98.6024, |
|
"eval_samples_per_second": 171.679, |
|
"eval_steps_per_second": 21.46, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.607518619222132e-06, |
|
"loss": 4.0408, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.46300121473546585, |
|
"eval_loss": 3.057621717453003, |
|
"eval_runtime": 99.414, |
|
"eval_samples_per_second": 170.278, |
|
"eval_steps_per_second": 21.285, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.41048981361075e-06, |
|
"loss": 3.4979, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.5132904448434071, |
|
"eval_loss": 2.70158314704895, |
|
"eval_runtime": 99.9098, |
|
"eval_samples_per_second": 169.433, |
|
"eval_steps_per_second": 21.179, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.21346100799937e-06, |
|
"loss": 3.1691, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5430825323065444, |
|
"eval_loss": 2.4879872798919678, |
|
"eval_runtime": 99.733, |
|
"eval_samples_per_second": 169.733, |
|
"eval_steps_per_second": 21.217, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.01643220238799e-06, |
|
"loss": 2.9564, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5644360825553116, |
|
"eval_loss": 2.330946445465088, |
|
"eval_runtime": 100.0932, |
|
"eval_samples_per_second": 169.122, |
|
"eval_steps_per_second": 21.14, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.819797454387831e-06, |
|
"loss": 2.8078, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5792018144043999, |
|
"eval_loss": 2.232025384902954, |
|
"eval_runtime": 100.0923, |
|
"eval_samples_per_second": 169.124, |
|
"eval_steps_per_second": 21.14, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.622768648776452e-06, |
|
"loss": 2.6741, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.592379386392151, |
|
"eval_loss": 2.1506171226501465, |
|
"eval_runtime": 99.9507, |
|
"eval_samples_per_second": 169.364, |
|
"eval_steps_per_second": 21.17, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.425739843165071e-06, |
|
"loss": 2.5323, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.617633758132823, |
|
"eval_loss": 1.9845681190490723, |
|
"eval_runtime": 100.0279, |
|
"eval_samples_per_second": 169.233, |
|
"eval_steps_per_second": 21.154, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.22871103755369e-06, |
|
"loss": 2.3678, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.6374534268418744, |
|
"eval_loss": 1.8812607526779175, |
|
"eval_runtime": 100.101, |
|
"eval_samples_per_second": 169.109, |
|
"eval_steps_per_second": 21.139, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.03168223194231e-06, |
|
"loss": 2.25, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.6496838449438552, |
|
"eval_loss": 1.809983253479004, |
|
"eval_runtime": 100.2479, |
|
"eval_samples_per_second": 168.861, |
|
"eval_steps_per_second": 21.108, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.83465342633093e-06, |
|
"loss": 2.1795, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.6579494225370981, |
|
"eval_loss": 1.7632389068603516, |
|
"eval_runtime": 100.0951, |
|
"eval_samples_per_second": 169.119, |
|
"eval_steps_per_second": 21.14, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.63762462071955e-06, |
|
"loss": 2.1203, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.664559097259069, |
|
"eval_loss": 1.7238309383392334, |
|
"eval_runtime": 99.9087, |
|
"eval_samples_per_second": 169.435, |
|
"eval_steps_per_second": 21.179, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.440595815108169e-06, |
|
"loss": 2.0764, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.6713205569113848, |
|
"eval_loss": 1.6855953931808472, |
|
"eval_runtime": 100.047, |
|
"eval_samples_per_second": 169.201, |
|
"eval_steps_per_second": 21.15, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.2435670094967895e-06, |
|
"loss": 2.026, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.6759595736369565, |
|
"eval_loss": 1.6568557024002075, |
|
"eval_runtime": 99.903, |
|
"eval_samples_per_second": 169.444, |
|
"eval_steps_per_second": 21.181, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.046932261496632e-06, |
|
"loss": 1.9942, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.6803347736385223, |
|
"eval_loss": 1.6309233903884888, |
|
"eval_runtime": 100.1047, |
|
"eval_samples_per_second": 169.103, |
|
"eval_steps_per_second": 21.138, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.849903455885251e-06, |
|
"loss": 1.9665, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.6836478246699454, |
|
"eval_loss": 1.612231731414795, |
|
"eval_runtime": 206.2817, |
|
"eval_samples_per_second": 82.063, |
|
"eval_steps_per_second": 10.258, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.652874650273871e-06, |
|
"loss": 1.9395, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6866433413548132, |
|
"eval_loss": 1.5912940502166748, |
|
"eval_runtime": 206.5703, |
|
"eval_samples_per_second": 81.948, |
|
"eval_steps_per_second": 10.243, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.455845844662491e-06, |
|
"loss": 1.9155, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.6894629039599454, |
|
"eval_loss": 1.5758066177368164, |
|
"eval_runtime": 206.7537, |
|
"eval_samples_per_second": 81.875, |
|
"eval_steps_per_second": 10.234, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.25881703905111e-06, |
|
"loss": 1.8828, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.6918324332777558, |
|
"eval_loss": 1.5607072114944458, |
|
"eval_runtime": 203.7553, |
|
"eval_samples_per_second": 83.08, |
|
"eval_steps_per_second": 10.385, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.06178823343973e-06, |
|
"loss": 1.8721, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.6948063170580184, |
|
"eval_loss": 1.5421587228775024, |
|
"eval_runtime": 205.9617, |
|
"eval_samples_per_second": 82.19, |
|
"eval_steps_per_second": 10.274, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.8647594278283496e-06, |
|
"loss": 1.8474, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_accuracy": 0.6963892745418871, |
|
"eval_loss": 1.5320152044296265, |
|
"eval_runtime": 206.4027, |
|
"eval_samples_per_second": 82.014, |
|
"eval_steps_per_second": 10.252, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.667730622216968e-06, |
|
"loss": 1.8293, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.6978303363523796, |
|
"eval_loss": 1.5213782787322998, |
|
"eval_runtime": 206.4515, |
|
"eval_samples_per_second": 81.995, |
|
"eval_steps_per_second": 10.249, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.471095874216811e-06, |
|
"loss": 1.8129, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.6997515674908317, |
|
"eval_loss": 1.5102019309997559, |
|
"eval_runtime": 203.4625, |
|
"eval_samples_per_second": 83.2, |
|
"eval_steps_per_second": 10.4, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.274067068605431e-06, |
|
"loss": 1.8148, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.7013130680794967, |
|
"eval_loss": 1.5009928941726685, |
|
"eval_runtime": 206.7456, |
|
"eval_samples_per_second": 81.878, |
|
"eval_steps_per_second": 10.235, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.077038262994051e-06, |
|
"loss": 1.7903, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.7037519606361885, |
|
"eval_loss": 1.484366774559021, |
|
"eval_runtime": 207.0125, |
|
"eval_samples_per_second": 81.773, |
|
"eval_steps_per_second": 10.222, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.88000945738267e-06, |
|
"loss": 1.7815, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.7039102273054718, |
|
"eval_loss": 1.4823458194732666, |
|
"eval_runtime": 206.0669, |
|
"eval_samples_per_second": 82.148, |
|
"eval_steps_per_second": 10.269, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.68298065177129e-06, |
|
"loss": 1.7637, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.705173223800616, |
|
"eval_loss": 1.4746402502059937, |
|
"eval_runtime": 202.4173, |
|
"eval_samples_per_second": 83.629, |
|
"eval_steps_per_second": 10.454, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.485951846159909e-06, |
|
"loss": 1.7623, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.706123367116372, |
|
"eval_loss": 1.470130205154419, |
|
"eval_runtime": 205.8377, |
|
"eval_samples_per_second": 82.24, |
|
"eval_steps_per_second": 10.28, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.289317098159752e-06, |
|
"loss": 1.7402, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.7075649407306767, |
|
"eval_loss": 1.4597938060760498, |
|
"eval_runtime": 206.177, |
|
"eval_samples_per_second": 82.104, |
|
"eval_steps_per_second": 10.263, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.092288292548371e-06, |
|
"loss": 1.7376, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.7089666967285505, |
|
"eval_loss": 1.451911449432373, |
|
"eval_runtime": 206.3085, |
|
"eval_samples_per_second": 82.052, |
|
"eval_steps_per_second": 10.256, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.89525948693699e-06, |
|
"loss": 1.7287, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.7101150715078346, |
|
"eval_loss": 1.4501255750656128, |
|
"eval_runtime": 100.0594, |
|
"eval_samples_per_second": 169.18, |
|
"eval_steps_per_second": 21.147, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.6982306813256103e-06, |
|
"loss": 1.7273, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.7106747872019036, |
|
"eval_loss": 1.4408985376358032, |
|
"eval_runtime": 100.2351, |
|
"eval_samples_per_second": 168.883, |
|
"eval_steps_per_second": 21.11, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.5012018757142298e-06, |
|
"loss": 1.7119, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.7125312598082394, |
|
"eval_loss": 1.431384563446045, |
|
"eval_runtime": 100.206, |
|
"eval_samples_per_second": 168.932, |
|
"eval_steps_per_second": 21.117, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3045671277140724e-06, |
|
"loss": 1.7098, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.712873669928985, |
|
"eval_loss": 1.4268542528152466, |
|
"eval_runtime": 99.9713, |
|
"eval_samples_per_second": 169.329, |
|
"eval_steps_per_second": 21.166, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.1075383221026915e-06, |
|
"loss": 1.6978, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.7132452679915875, |
|
"eval_loss": 1.4275221824645996, |
|
"eval_runtime": 100.0415, |
|
"eval_samples_per_second": 169.21, |
|
"eval_steps_per_second": 21.151, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.910509516491311e-06, |
|
"loss": 1.698, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.7139832935058783, |
|
"eval_loss": 1.421799898147583, |
|
"eval_runtime": 100.2878, |
|
"eval_samples_per_second": 168.794, |
|
"eval_steps_per_second": 21.099, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.713480710879931e-06, |
|
"loss": 1.6837, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.7146896815582429, |
|
"eval_loss": 1.4150662422180176, |
|
"eval_runtime": 100.1729, |
|
"eval_samples_per_second": 168.988, |
|
"eval_steps_per_second": 21.123, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.5164519052685504e-06, |
|
"loss": 1.6908, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.7148777636104067, |
|
"eval_loss": 1.413697361946106, |
|
"eval_runtime": 100.0403, |
|
"eval_samples_per_second": 169.212, |
|
"eval_steps_per_second": 21.151, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3194230996571703e-06, |
|
"loss": 1.6902, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.7161167332062431, |
|
"eval_loss": 1.4084678888320923, |
|
"eval_runtime": 99.9514, |
|
"eval_samples_per_second": 169.362, |
|
"eval_steps_per_second": 21.17, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.12239429404579e-06, |
|
"loss": 1.6741, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.7153571848548731, |
|
"eval_loss": 1.4121222496032715, |
|
"eval_runtime": 99.7721, |
|
"eval_samples_per_second": 169.667, |
|
"eval_steps_per_second": 21.208, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.925759546045632e-06, |
|
"loss": 1.6823, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.7164751883355099, |
|
"eval_loss": 1.4036943912506104, |
|
"eval_runtime": 96.9786, |
|
"eval_samples_per_second": 174.554, |
|
"eval_steps_per_second": 21.819, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7287307404342515e-06, |
|
"loss": 1.6692, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.7164227335870778, |
|
"eval_loss": 1.4038887023925781, |
|
"eval_runtime": 96.6299, |
|
"eval_samples_per_second": 175.184, |
|
"eval_steps_per_second": 21.898, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5317019348228712e-06, |
|
"loss": 1.6669, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_accuracy": 0.7171810007042829, |
|
"eval_loss": 1.4014757871627808, |
|
"eval_runtime": 96.5289, |
|
"eval_samples_per_second": 175.367, |
|
"eval_steps_per_second": 21.921, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.334673129211491e-06, |
|
"loss": 1.6613, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.7179443895145537, |
|
"eval_loss": 1.3979177474975586, |
|
"eval_runtime": 96.4739, |
|
"eval_samples_per_second": 175.467, |
|
"eval_steps_per_second": 21.933, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1376443236001104e-06, |
|
"loss": 1.664, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.7180417425737022, |
|
"eval_loss": 1.3960251808166504, |
|
"eval_runtime": 96.2769, |
|
"eval_samples_per_second": 175.826, |
|
"eval_steps_per_second": 21.978, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.406155179887299e-07, |
|
"loss": 1.6615, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.71719773048631, |
|
"eval_loss": 1.4012339115142822, |
|
"eval_runtime": 96.2915, |
|
"eval_samples_per_second": 175.8, |
|
"eval_steps_per_second": 21.975, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.435867123773496e-07, |
|
"loss": 1.6627, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_accuracy": 0.7177754487686726, |
|
"eval_loss": 1.3974287509918213, |
|
"eval_runtime": 96.2242, |
|
"eval_samples_per_second": 175.922, |
|
"eval_steps_per_second": 21.99, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.465579067659692e-07, |
|
"loss": 1.6489, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_accuracy": 0.7182007239397646, |
|
"eval_loss": 1.3947515487670898, |
|
"eval_runtime": 96.06, |
|
"eval_samples_per_second": 176.223, |
|
"eval_steps_per_second": 22.028, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.499231587658116e-07, |
|
"loss": 1.6429, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.7183795073646381, |
|
"eval_loss": 1.3920938968658447, |
|
"eval_runtime": 96.1507, |
|
"eval_samples_per_second": 176.057, |
|
"eval_steps_per_second": 22.007, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.528943531544312e-07, |
|
"loss": 1.6477, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7182484820177487, |
|
"eval_loss": 1.3909834623336792, |
|
"eval_runtime": 96.1141, |
|
"eval_samples_per_second": 176.124, |
|
"eval_steps_per_second": 22.016, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 25377, |
|
"total_flos": 1.2378168378261504e+17, |
|
"train_loss": 2.057705193860182, |
|
"train_runtime": 15314.6638, |
|
"train_samples_per_second": 26.512, |
|
"train_steps_per_second": 1.657 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 25377, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.2378168378261504e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|