{ "best_metric": 1.3909834623336792, "best_model_checkpoint": "finetuning/output/bart-base-finetuned_xe_ey_fae/checkpoint-25000", "epoch": 3.0, "eval_steps": 500, "global_step": 25377, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 9.804547424833511e-06, "loss": 5.4226, "step": 500 }, { "epoch": 0.06, "eval_accuracy": 0.3627901941481408, "eval_loss": 3.8137550354003906, "eval_runtime": 98.6024, "eval_samples_per_second": 171.679, "eval_steps_per_second": 21.46, "step": 500 }, { "epoch": 0.12, "learning_rate": 9.607518619222132e-06, "loss": 4.0408, "step": 1000 }, { "epoch": 0.12, "eval_accuracy": 0.46300121473546585, "eval_loss": 3.057621717453003, "eval_runtime": 99.414, "eval_samples_per_second": 170.278, "eval_steps_per_second": 21.285, "step": 1000 }, { "epoch": 0.18, "learning_rate": 9.41048981361075e-06, "loss": 3.4979, "step": 1500 }, { "epoch": 0.18, "eval_accuracy": 0.5132904448434071, "eval_loss": 2.70158314704895, "eval_runtime": 99.9098, "eval_samples_per_second": 169.433, "eval_steps_per_second": 21.179, "step": 1500 }, { "epoch": 0.24, "learning_rate": 9.21346100799937e-06, "loss": 3.1691, "step": 2000 }, { "epoch": 0.24, "eval_accuracy": 0.5430825323065444, "eval_loss": 2.4879872798919678, "eval_runtime": 99.733, "eval_samples_per_second": 169.733, "eval_steps_per_second": 21.217, "step": 2000 }, { "epoch": 0.3, "learning_rate": 9.01643220238799e-06, "loss": 2.9564, "step": 2500 }, { "epoch": 0.3, "eval_accuracy": 0.5644360825553116, "eval_loss": 2.330946445465088, "eval_runtime": 100.0932, "eval_samples_per_second": 169.122, "eval_steps_per_second": 21.14, "step": 2500 }, { "epoch": 0.35, "learning_rate": 8.819797454387831e-06, "loss": 2.8078, "step": 3000 }, { "epoch": 0.35, "eval_accuracy": 0.5792018144043999, "eval_loss": 2.232025384902954, "eval_runtime": 100.0923, "eval_samples_per_second": 169.124, "eval_steps_per_second": 21.14, "step": 3000 }, { "epoch": 0.41, "learning_rate": 8.622768648776452e-06, "loss": 2.6741, "step": 3500 }, { "epoch": 0.41, "eval_accuracy": 0.592379386392151, "eval_loss": 2.1506171226501465, "eval_runtime": 99.9507, "eval_samples_per_second": 169.364, "eval_steps_per_second": 21.17, "step": 3500 }, { "epoch": 0.47, "learning_rate": 8.425739843165071e-06, "loss": 2.5323, "step": 4000 }, { "epoch": 0.47, "eval_accuracy": 0.617633758132823, "eval_loss": 1.9845681190490723, "eval_runtime": 100.0279, "eval_samples_per_second": 169.233, "eval_steps_per_second": 21.154, "step": 4000 }, { "epoch": 0.53, "learning_rate": 8.22871103755369e-06, "loss": 2.3678, "step": 4500 }, { "epoch": 0.53, "eval_accuracy": 0.6374534268418744, "eval_loss": 1.8812607526779175, "eval_runtime": 100.101, "eval_samples_per_second": 169.109, "eval_steps_per_second": 21.139, "step": 4500 }, { "epoch": 0.59, "learning_rate": 8.03168223194231e-06, "loss": 2.25, "step": 5000 }, { "epoch": 0.59, "eval_accuracy": 0.6496838449438552, "eval_loss": 1.809983253479004, "eval_runtime": 100.2479, "eval_samples_per_second": 168.861, "eval_steps_per_second": 21.108, "step": 5000 }, { "epoch": 0.65, "learning_rate": 7.83465342633093e-06, "loss": 2.1795, "step": 5500 }, { "epoch": 0.65, "eval_accuracy": 0.6579494225370981, "eval_loss": 1.7632389068603516, "eval_runtime": 100.0951, "eval_samples_per_second": 169.119, "eval_steps_per_second": 21.14, "step": 5500 }, { "epoch": 0.71, "learning_rate": 7.63762462071955e-06, "loss": 2.1203, "step": 6000 }, { "epoch": 0.71, "eval_accuracy": 0.664559097259069, "eval_loss": 1.7238309383392334, "eval_runtime": 99.9087, "eval_samples_per_second": 169.435, "eval_steps_per_second": 21.179, "step": 6000 }, { "epoch": 0.77, "learning_rate": 7.440595815108169e-06, "loss": 2.0764, "step": 6500 }, { "epoch": 0.77, "eval_accuracy": 0.6713205569113848, "eval_loss": 1.6855953931808472, "eval_runtime": 100.047, "eval_samples_per_second": 169.201, "eval_steps_per_second": 21.15, "step": 6500 }, { "epoch": 0.83, "learning_rate": 7.2435670094967895e-06, "loss": 2.026, "step": 7000 }, { "epoch": 0.83, "eval_accuracy": 0.6759595736369565, "eval_loss": 1.6568557024002075, "eval_runtime": 99.903, "eval_samples_per_second": 169.444, "eval_steps_per_second": 21.181, "step": 7000 }, { "epoch": 0.89, "learning_rate": 7.046932261496632e-06, "loss": 1.9942, "step": 7500 }, { "epoch": 0.89, "eval_accuracy": 0.6803347736385223, "eval_loss": 1.6309233903884888, "eval_runtime": 100.1047, "eval_samples_per_second": 169.103, "eval_steps_per_second": 21.138, "step": 7500 }, { "epoch": 0.95, "learning_rate": 6.849903455885251e-06, "loss": 1.9665, "step": 8000 }, { "epoch": 0.95, "eval_accuracy": 0.6836478246699454, "eval_loss": 1.612231731414795, "eval_runtime": 206.2817, "eval_samples_per_second": 82.063, "eval_steps_per_second": 10.258, "step": 8000 }, { "epoch": 1.0, "learning_rate": 6.652874650273871e-06, "loss": 1.9395, "step": 8500 }, { "epoch": 1.0, "eval_accuracy": 0.6866433413548132, "eval_loss": 1.5912940502166748, "eval_runtime": 206.5703, "eval_samples_per_second": 81.948, "eval_steps_per_second": 10.243, "step": 8500 }, { "epoch": 1.06, "learning_rate": 6.455845844662491e-06, "loss": 1.9155, "step": 9000 }, { "epoch": 1.06, "eval_accuracy": 0.6894629039599454, "eval_loss": 1.5758066177368164, "eval_runtime": 206.7537, "eval_samples_per_second": 81.875, "eval_steps_per_second": 10.234, "step": 9000 }, { "epoch": 1.12, "learning_rate": 6.25881703905111e-06, "loss": 1.8828, "step": 9500 }, { "epoch": 1.12, "eval_accuracy": 0.6918324332777558, "eval_loss": 1.5607072114944458, "eval_runtime": 203.7553, "eval_samples_per_second": 83.08, "eval_steps_per_second": 10.385, "step": 9500 }, { "epoch": 1.18, "learning_rate": 6.06178823343973e-06, "loss": 1.8721, "step": 10000 }, { "epoch": 1.18, "eval_accuracy": 0.6948063170580184, "eval_loss": 1.5421587228775024, "eval_runtime": 205.9617, "eval_samples_per_second": 82.19, "eval_steps_per_second": 10.274, "step": 10000 }, { "epoch": 1.24, "learning_rate": 5.8647594278283496e-06, "loss": 1.8474, "step": 10500 }, { "epoch": 1.24, "eval_accuracy": 0.6963892745418871, "eval_loss": 1.5320152044296265, "eval_runtime": 206.4027, "eval_samples_per_second": 82.014, "eval_steps_per_second": 10.252, "step": 10500 }, { "epoch": 1.3, "learning_rate": 5.667730622216968e-06, "loss": 1.8293, "step": 11000 }, { "epoch": 1.3, "eval_accuracy": 0.6978303363523796, "eval_loss": 1.5213782787322998, "eval_runtime": 206.4515, "eval_samples_per_second": 81.995, "eval_steps_per_second": 10.249, "step": 11000 }, { "epoch": 1.36, "learning_rate": 5.471095874216811e-06, "loss": 1.8129, "step": 11500 }, { "epoch": 1.36, "eval_accuracy": 0.6997515674908317, "eval_loss": 1.5102019309997559, "eval_runtime": 203.4625, "eval_samples_per_second": 83.2, "eval_steps_per_second": 10.4, "step": 11500 }, { "epoch": 1.42, "learning_rate": 5.274067068605431e-06, "loss": 1.8148, "step": 12000 }, { "epoch": 1.42, "eval_accuracy": 0.7013130680794967, "eval_loss": 1.5009928941726685, "eval_runtime": 206.7456, "eval_samples_per_second": 81.878, "eval_steps_per_second": 10.235, "step": 12000 }, { "epoch": 1.48, "learning_rate": 5.077038262994051e-06, "loss": 1.7903, "step": 12500 }, { "epoch": 1.48, "eval_accuracy": 0.7037519606361885, "eval_loss": 1.484366774559021, "eval_runtime": 207.0125, "eval_samples_per_second": 81.773, "eval_steps_per_second": 10.222, "step": 12500 }, { "epoch": 1.54, "learning_rate": 4.88000945738267e-06, "loss": 1.7815, "step": 13000 }, { "epoch": 1.54, "eval_accuracy": 0.7039102273054718, "eval_loss": 1.4823458194732666, "eval_runtime": 206.0669, "eval_samples_per_second": 82.148, "eval_steps_per_second": 10.269, "step": 13000 }, { "epoch": 1.6, "learning_rate": 4.68298065177129e-06, "loss": 1.7637, "step": 13500 }, { "epoch": 1.6, "eval_accuracy": 0.705173223800616, "eval_loss": 1.4746402502059937, "eval_runtime": 202.4173, "eval_samples_per_second": 83.629, "eval_steps_per_second": 10.454, "step": 13500 }, { "epoch": 1.66, "learning_rate": 4.485951846159909e-06, "loss": 1.7623, "step": 14000 }, { "epoch": 1.66, "eval_accuracy": 0.706123367116372, "eval_loss": 1.470130205154419, "eval_runtime": 205.8377, "eval_samples_per_second": 82.24, "eval_steps_per_second": 10.28, "step": 14000 }, { "epoch": 1.71, "learning_rate": 4.289317098159752e-06, "loss": 1.7402, "step": 14500 }, { "epoch": 1.71, "eval_accuracy": 0.7075649407306767, "eval_loss": 1.4597938060760498, "eval_runtime": 206.177, "eval_samples_per_second": 82.104, "eval_steps_per_second": 10.263, "step": 14500 }, { "epoch": 1.77, "learning_rate": 4.092288292548371e-06, "loss": 1.7376, "step": 15000 }, { "epoch": 1.77, "eval_accuracy": 0.7089666967285505, "eval_loss": 1.451911449432373, "eval_runtime": 206.3085, "eval_samples_per_second": 82.052, "eval_steps_per_second": 10.256, "step": 15000 }, { "epoch": 1.83, "learning_rate": 3.89525948693699e-06, "loss": 1.7287, "step": 15500 }, { "epoch": 1.83, "eval_accuracy": 0.7101150715078346, "eval_loss": 1.4501255750656128, "eval_runtime": 100.0594, "eval_samples_per_second": 169.18, "eval_steps_per_second": 21.147, "step": 15500 }, { "epoch": 1.89, "learning_rate": 3.6982306813256103e-06, "loss": 1.7273, "step": 16000 }, { "epoch": 1.89, "eval_accuracy": 0.7106747872019036, "eval_loss": 1.4408985376358032, "eval_runtime": 100.2351, "eval_samples_per_second": 168.883, "eval_steps_per_second": 21.11, "step": 16000 }, { "epoch": 1.95, "learning_rate": 3.5012018757142298e-06, "loss": 1.7119, "step": 16500 }, { "epoch": 1.95, "eval_accuracy": 0.7125312598082394, "eval_loss": 1.431384563446045, "eval_runtime": 100.206, "eval_samples_per_second": 168.932, "eval_steps_per_second": 21.117, "step": 16500 }, { "epoch": 2.01, "learning_rate": 3.3045671277140724e-06, "loss": 1.7098, "step": 17000 }, { "epoch": 2.01, "eval_accuracy": 0.712873669928985, "eval_loss": 1.4268542528152466, "eval_runtime": 99.9713, "eval_samples_per_second": 169.329, "eval_steps_per_second": 21.166, "step": 17000 }, { "epoch": 2.07, "learning_rate": 3.1075383221026915e-06, "loss": 1.6978, "step": 17500 }, { "epoch": 2.07, "eval_accuracy": 0.7132452679915875, "eval_loss": 1.4275221824645996, "eval_runtime": 100.0415, "eval_samples_per_second": 169.21, "eval_steps_per_second": 21.151, "step": 17500 }, { "epoch": 2.13, "learning_rate": 2.910509516491311e-06, "loss": 1.698, "step": 18000 }, { "epoch": 2.13, "eval_accuracy": 0.7139832935058783, "eval_loss": 1.421799898147583, "eval_runtime": 100.2878, "eval_samples_per_second": 168.794, "eval_steps_per_second": 21.099, "step": 18000 }, { "epoch": 2.19, "learning_rate": 2.713480710879931e-06, "loss": 1.6837, "step": 18500 }, { "epoch": 2.19, "eval_accuracy": 0.7146896815582429, "eval_loss": 1.4150662422180176, "eval_runtime": 100.1729, "eval_samples_per_second": 168.988, "eval_steps_per_second": 21.123, "step": 18500 }, { "epoch": 2.25, "learning_rate": 2.5164519052685504e-06, "loss": 1.6908, "step": 19000 }, { "epoch": 2.25, "eval_accuracy": 0.7148777636104067, "eval_loss": 1.413697361946106, "eval_runtime": 100.0403, "eval_samples_per_second": 169.212, "eval_steps_per_second": 21.151, "step": 19000 }, { "epoch": 2.31, "learning_rate": 2.3194230996571703e-06, "loss": 1.6902, "step": 19500 }, { "epoch": 2.31, "eval_accuracy": 0.7161167332062431, "eval_loss": 1.4084678888320923, "eval_runtime": 99.9514, "eval_samples_per_second": 169.362, "eval_steps_per_second": 21.17, "step": 19500 }, { "epoch": 2.36, "learning_rate": 2.12239429404579e-06, "loss": 1.6741, "step": 20000 }, { "epoch": 2.36, "eval_accuracy": 0.7153571848548731, "eval_loss": 1.4121222496032715, "eval_runtime": 99.7721, "eval_samples_per_second": 169.667, "eval_steps_per_second": 21.208, "step": 20000 }, { "epoch": 2.42, "learning_rate": 1.925759546045632e-06, "loss": 1.6823, "step": 20500 }, { "epoch": 2.42, "eval_accuracy": 0.7164751883355099, "eval_loss": 1.4036943912506104, "eval_runtime": 96.9786, "eval_samples_per_second": 174.554, "eval_steps_per_second": 21.819, "step": 20500 }, { "epoch": 2.48, "learning_rate": 1.7287307404342515e-06, "loss": 1.6692, "step": 21000 }, { "epoch": 2.48, "eval_accuracy": 0.7164227335870778, "eval_loss": 1.4038887023925781, "eval_runtime": 96.6299, "eval_samples_per_second": 175.184, "eval_steps_per_second": 21.898, "step": 21000 }, { "epoch": 2.54, "learning_rate": 1.5317019348228712e-06, "loss": 1.6669, "step": 21500 }, { "epoch": 2.54, "eval_accuracy": 0.7171810007042829, "eval_loss": 1.4014757871627808, "eval_runtime": 96.5289, "eval_samples_per_second": 175.367, "eval_steps_per_second": 21.921, "step": 21500 }, { "epoch": 2.6, "learning_rate": 1.334673129211491e-06, "loss": 1.6613, "step": 22000 }, { "epoch": 2.6, "eval_accuracy": 0.7179443895145537, "eval_loss": 1.3979177474975586, "eval_runtime": 96.4739, "eval_samples_per_second": 175.467, "eval_steps_per_second": 21.933, "step": 22000 }, { "epoch": 2.66, "learning_rate": 1.1376443236001104e-06, "loss": 1.664, "step": 22500 }, { "epoch": 2.66, "eval_accuracy": 0.7180417425737022, "eval_loss": 1.3960251808166504, "eval_runtime": 96.2769, "eval_samples_per_second": 175.826, "eval_steps_per_second": 21.978, "step": 22500 }, { "epoch": 2.72, "learning_rate": 9.406155179887299e-07, "loss": 1.6615, "step": 23000 }, { "epoch": 2.72, "eval_accuracy": 0.71719773048631, "eval_loss": 1.4012339115142822, "eval_runtime": 96.2915, "eval_samples_per_second": 175.8, "eval_steps_per_second": 21.975, "step": 23000 }, { "epoch": 2.78, "learning_rate": 7.435867123773496e-07, "loss": 1.6627, "step": 23500 }, { "epoch": 2.78, "eval_accuracy": 0.7177754487686726, "eval_loss": 1.3974287509918213, "eval_runtime": 96.2242, "eval_samples_per_second": 175.922, "eval_steps_per_second": 21.99, "step": 23500 }, { "epoch": 2.84, "learning_rate": 5.465579067659692e-07, "loss": 1.6489, "step": 24000 }, { "epoch": 2.84, "eval_accuracy": 0.7182007239397646, "eval_loss": 1.3947515487670898, "eval_runtime": 96.06, "eval_samples_per_second": 176.223, "eval_steps_per_second": 22.028, "step": 24000 }, { "epoch": 2.9, "learning_rate": 3.499231587658116e-07, "loss": 1.6429, "step": 24500 }, { "epoch": 2.9, "eval_accuracy": 0.7183795073646381, "eval_loss": 1.3920938968658447, "eval_runtime": 96.1507, "eval_samples_per_second": 176.057, "eval_steps_per_second": 22.007, "step": 24500 }, { "epoch": 2.96, "learning_rate": 1.528943531544312e-07, "loss": 1.6477, "step": 25000 }, { "epoch": 2.96, "eval_accuracy": 0.7182484820177487, "eval_loss": 1.3909834623336792, "eval_runtime": 96.1141, "eval_samples_per_second": 176.124, "eval_steps_per_second": 22.016, "step": 25000 }, { "epoch": 3.0, "step": 25377, "total_flos": 1.2378168378261504e+17, "train_loss": 2.057705193860182, "train_runtime": 15314.6638, "train_samples_per_second": 26.512, "train_steps_per_second": 1.657 } ], "logging_steps": 500, "max_steps": 25377, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.2378168378261504e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }