{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 1757, "global_step": 35140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 23.659637451171875, "learning_rate": 9.97723392145703e-07, "loss": 3.8346, "step": 1757 }, { "epoch": 0.1, "eval_nli-pairs_loss": 2.8535287380218506, "eval_nli-pairs_runtime": 23.1384, "eval_nli-pairs_samples_per_second": 294.229, "eval_nli-pairs_steps_per_second": 18.411, "step": 1757 }, { "epoch": 0.1, "eval_scitail-pairs-pos_loss": 2.323117256164551, "eval_scitail-pairs-pos_runtime": 5.1803, "eval_scitail-pairs-pos_samples_per_second": 251.722, "eval_scitail-pairs-pos_steps_per_second": 15.829, "step": 1757 }, { "epoch": 0.1, "eval_qnli-contrastive_loss": 3.09728741645813, "eval_qnli-contrastive_runtime": 15.5151, "eval_qnli-contrastive_samples_per_second": 352.109, "eval_qnli-contrastive_steps_per_second": 22.043, "step": 1757 }, { "epoch": 0.2, "grad_norm": 13.776155471801758, "learning_rate": 1.9965850882185546e-06, "loss": 1.8532, "step": 3514 }, { "epoch": 0.2, "eval_nli-pairs_loss": 1.3508331775665283, "eval_nli-pairs_runtime": 22.8642, "eval_nli-pairs_samples_per_second": 297.758, "eval_nli-pairs_steps_per_second": 18.632, "step": 3514 }, { "epoch": 0.2, "eval_scitail-pairs-pos_loss": 0.9754649996757507, "eval_scitail-pairs-pos_runtime": 5.1924, "eval_scitail-pairs-pos_samples_per_second": 251.136, "eval_scitail-pairs-pos_steps_per_second": 15.792, "step": 3514 }, { "epoch": 0.2, "eval_qnli-contrastive_loss": 2.0602548122406006, "eval_qnli-contrastive_runtime": 15.6036, "eval_qnli-contrastive_samples_per_second": 350.111, "eval_qnli-contrastive_steps_per_second": 21.918, "step": 3514 }, { "epoch": 0.3, "grad_norm": 0.6704504489898682, "learning_rate": 2.99601593625498e-06, "loss": 1.2185, "step": 5271 }, { "epoch": 0.3, "eval_nli-pairs_loss": 0.9380640983581543, "eval_nli-pairs_runtime": 23.1102, "eval_nli-pairs_samples_per_second": 294.589, "eval_nli-pairs_steps_per_second": 18.433, "step": 5271 }, { "epoch": 0.3, "eval_scitail-pairs-pos_loss": 0.7407301664352417, "eval_scitail-pairs-pos_runtime": 5.2512, "eval_scitail-pairs-pos_samples_per_second": 248.322, "eval_scitail-pairs-pos_steps_per_second": 15.615, "step": 5271 }, { "epoch": 0.3, "eval_qnli-contrastive_loss": 1.2534083127975464, "eval_qnli-contrastive_runtime": 15.6475, "eval_qnli-contrastive_samples_per_second": 349.129, "eval_qnli-contrastive_steps_per_second": 21.857, "step": 5271 }, { "epoch": 0.4, "grad_norm": 26.072860717773438, "learning_rate": 3.99601593625498e-06, "loss": 0.9584, "step": 7028 }, { "epoch": 0.4, "eval_nli-pairs_loss": 0.749484658241272, "eval_nli-pairs_runtime": 23.0514, "eval_nli-pairs_samples_per_second": 295.34, "eval_nli-pairs_steps_per_second": 18.48, "step": 7028 }, { "epoch": 0.4, "eval_scitail-pairs-pos_loss": 0.661561131477356, "eval_scitail-pairs-pos_runtime": 5.2207, "eval_scitail-pairs-pos_samples_per_second": 249.774, "eval_scitail-pairs-pos_steps_per_second": 15.707, "step": 7028 }, { "epoch": 0.4, "eval_qnli-contrastive_loss": 0.5139556527137756, "eval_qnli-contrastive_runtime": 15.681, "eval_qnli-contrastive_samples_per_second": 348.384, "eval_qnli-contrastive_steps_per_second": 21.81, "step": 7028 }, { "epoch": 0.5, "grad_norm": 24.09697914123535, "learning_rate": 4.995446784291406e-06, "loss": 0.8157, "step": 8785 }, { "epoch": 0.5, "eval_nli-pairs_loss": 0.6549726724624634, "eval_nli-pairs_runtime": 23.2274, "eval_nli-pairs_samples_per_second": 293.102, "eval_nli-pairs_steps_per_second": 18.34, "step": 8785 }, { "epoch": 0.5, "eval_scitail-pairs-pos_loss": 0.6056841611862183, "eval_scitail-pairs-pos_runtime": 5.2473, "eval_scitail-pairs-pos_samples_per_second": 248.508, "eval_scitail-pairs-pos_steps_per_second": 15.627, "step": 8785 }, { "epoch": 0.5, "eval_qnli-contrastive_loss": 0.3295331299304962, "eval_qnli-contrastive_runtime": 15.7204, "eval_qnli-contrastive_samples_per_second": 347.511, "eval_qnli-contrastive_steps_per_second": 21.755, "step": 8785 }, { "epoch": 0.6, "grad_norm": 9.664803504943848, "learning_rate": 5.994877632327832e-06, "loss": 0.6698, "step": 10542 }, { "epoch": 0.6, "eval_nli-pairs_loss": 0.5809468626976013, "eval_nli-pairs_runtime": 22.9525, "eval_nli-pairs_samples_per_second": 296.612, "eval_nli-pairs_steps_per_second": 18.56, "step": 10542 }, { "epoch": 0.6, "eval_scitail-pairs-pos_loss": 0.5820835828781128, "eval_scitail-pairs-pos_runtime": 5.1829, "eval_scitail-pairs-pos_samples_per_second": 251.599, "eval_scitail-pairs-pos_steps_per_second": 15.821, "step": 10542 }, { "epoch": 0.6, "eval_qnli-contrastive_loss": 0.24226614832878113, "eval_qnli-contrastive_runtime": 15.6321, "eval_qnli-contrastive_samples_per_second": 349.473, "eval_qnli-contrastive_steps_per_second": 21.878, "step": 10542 }, { "epoch": 0.7, "grad_norm": 66.77753448486328, "learning_rate": 6.994877632327832e-06, "loss": 0.6497, "step": 12299 }, { "epoch": 0.7, "eval_nli-pairs_loss": 0.5178281664848328, "eval_nli-pairs_runtime": 23.0673, "eval_nli-pairs_samples_per_second": 295.136, "eval_nli-pairs_steps_per_second": 18.468, "step": 12299 }, { "epoch": 0.7, "eval_scitail-pairs-pos_loss": 0.504002571105957, "eval_scitail-pairs-pos_runtime": 5.1845, "eval_scitail-pairs-pos_samples_per_second": 251.52, "eval_scitail-pairs-pos_steps_per_second": 15.816, "step": 12299 }, { "epoch": 0.7, "eval_qnli-contrastive_loss": 0.24089547991752625, "eval_qnli-contrastive_runtime": 15.5228, "eval_qnli-contrastive_samples_per_second": 351.933, "eval_qnli-contrastive_steps_per_second": 22.032, "step": 12299 }, { "epoch": 0.8, "grad_norm": 0.6044542193412781, "learning_rate": 7.994308480364257e-06, "loss": 0.5737, "step": 14056 }, { "epoch": 0.8, "eval_nli-pairs_loss": 0.5019380450248718, "eval_nli-pairs_runtime": 23.0659, "eval_nli-pairs_samples_per_second": 295.154, "eval_nli-pairs_steps_per_second": 18.469, "step": 14056 }, { "epoch": 0.8, "eval_scitail-pairs-pos_loss": 0.49418017268180847, "eval_scitail-pairs-pos_runtime": 5.2457, "eval_scitail-pairs-pos_samples_per_second": 248.585, "eval_scitail-pairs-pos_steps_per_second": 15.632, "step": 14056 }, { "epoch": 0.8, "eval_qnli-contrastive_loss": 0.14995019137859344, "eval_qnli-contrastive_runtime": 15.7177, "eval_qnli-contrastive_samples_per_second": 347.57, "eval_qnli-contrastive_steps_per_second": 21.759, "step": 14056 }, { "epoch": 0.9, "grad_norm": 0.4454790949821472, "learning_rate": 8.993739328400684e-06, "loss": 0.5896, "step": 15813 }, { "epoch": 0.9, "eval_nli-pairs_loss": 0.4803747236728668, "eval_nli-pairs_runtime": 23.0746, "eval_nli-pairs_samples_per_second": 295.043, "eval_nli-pairs_steps_per_second": 18.462, "step": 15813 }, { "epoch": 0.9, "eval_scitail-pairs-pos_loss": 0.47568026185035706, "eval_scitail-pairs-pos_runtime": 5.2076, "eval_scitail-pairs-pos_samples_per_second": 250.402, "eval_scitail-pairs-pos_steps_per_second": 15.746, "step": 15813 }, { "epoch": 0.9, "eval_qnli-contrastive_loss": 0.14648529887199402, "eval_qnli-contrastive_runtime": 15.5997, "eval_qnli-contrastive_samples_per_second": 350.199, "eval_qnli-contrastive_steps_per_second": 21.924, "step": 15813 }, { "epoch": 1.0, "grad_norm": 196.14842224121094, "learning_rate": 9.993739328400683e-06, "loss": 0.5174, "step": 17570 }, { "epoch": 1.0, "eval_nli-pairs_loss": 0.4586646258831024, "eval_nli-pairs_runtime": 22.8967, "eval_nli-pairs_samples_per_second": 297.336, "eval_nli-pairs_steps_per_second": 18.605, "step": 17570 }, { "epoch": 1.0, "eval_scitail-pairs-pos_loss": 0.5253121256828308, "eval_scitail-pairs-pos_runtime": 5.1603, "eval_scitail-pairs-pos_samples_per_second": 252.699, "eval_scitail-pairs-pos_steps_per_second": 15.891, "step": 17570 }, { "epoch": 1.0, "eval_qnli-contrastive_loss": 0.0533733032643795, "eval_qnli-contrastive_runtime": 15.5083, "eval_qnli-contrastive_samples_per_second": 352.263, "eval_qnli-contrastive_steps_per_second": 22.053, "step": 17570 }, { "epoch": 1.1, "grad_norm": 18.839372634887695, "learning_rate": 9.75831232890717e-06, "loss": 0.5059, "step": 19327 }, { "epoch": 1.1, "eval_nli-pairs_loss": 0.45871272683143616, "eval_nli-pairs_runtime": 22.8984, "eval_nli-pairs_samples_per_second": 297.313, "eval_nli-pairs_steps_per_second": 18.604, "step": 19327 }, { "epoch": 1.1, "eval_scitail-pairs-pos_loss": 0.5492986440658569, "eval_scitail-pairs-pos_runtime": 5.1782, "eval_scitail-pairs-pos_samples_per_second": 251.824, "eval_scitail-pairs-pos_steps_per_second": 15.836, "step": 19327 }, { "epoch": 1.1, "eval_qnli-contrastive_loss": 0.027841920033097267, "eval_qnli-contrastive_runtime": 15.522, "eval_qnli-contrastive_samples_per_second": 351.952, "eval_qnli-contrastive_steps_per_second": 22.033, "step": 19327 }, { "epoch": 1.2, "grad_norm": 6.800241947174072, "learning_rate": 9.051905444616243e-06, "loss": 0.4654, "step": 21084 }, { "epoch": 1.2, "eval_nli-pairs_loss": 0.44151321053504944, "eval_nli-pairs_runtime": 23.1311, "eval_nli-pairs_samples_per_second": 294.323, "eval_nli-pairs_steps_per_second": 18.417, "step": 21084 }, { "epoch": 1.2, "eval_scitail-pairs-pos_loss": 0.4850437045097351, "eval_scitail-pairs-pos_runtime": 5.2939, "eval_scitail-pairs-pos_samples_per_second": 246.321, "eval_scitail-pairs-pos_steps_per_second": 15.49, "step": 21084 }, { "epoch": 1.2, "eval_qnli-contrastive_loss": 0.05170624330639839, "eval_qnli-contrastive_runtime": 15.7737, "eval_qnli-contrastive_samples_per_second": 346.336, "eval_qnli-contrastive_steps_per_second": 21.682, "step": 21084 }, { "epoch": 1.3, "grad_norm": 0.41899746656417847, "learning_rate": 7.948320938272786e-06, "loss": 0.4224, "step": 22841 }, { "epoch": 1.3, "eval_nli-pairs_loss": 0.39569494128227234, "eval_nli-pairs_runtime": 23.2638, "eval_nli-pairs_samples_per_second": 292.643, "eval_nli-pairs_steps_per_second": 18.312, "step": 22841 }, { "epoch": 1.3, "eval_scitail-pairs-pos_loss": 0.42922988533973694, "eval_scitail-pairs-pos_runtime": 5.2769, "eval_scitail-pairs-pos_samples_per_second": 247.114, "eval_scitail-pairs-pos_steps_per_second": 15.539, "step": 22841 }, { "epoch": 1.3, "eval_qnli-contrastive_loss": 0.0938122496008873, "eval_qnli-contrastive_runtime": 15.6681, "eval_qnli-contrastive_samples_per_second": 348.67, "eval_qnli-contrastive_steps_per_second": 21.828, "step": 22841 }, { "epoch": 1.4, "grad_norm": 3.0029168128967285, "learning_rate": 6.556983832253587e-06, "loss": 0.4125, "step": 24598 }, { "epoch": 1.4, "eval_nli-pairs_loss": 0.3794442415237427, "eval_nli-pairs_runtime": 23.2107, "eval_nli-pairs_samples_per_second": 293.313, "eval_nli-pairs_steps_per_second": 18.354, "step": 24598 }, { "epoch": 1.4, "eval_scitail-pairs-pos_loss": 0.4623956084251404, "eval_scitail-pairs-pos_runtime": 5.2884, "eval_scitail-pairs-pos_samples_per_second": 246.577, "eval_scitail-pairs-pos_steps_per_second": 15.506, "step": 24598 }, { "epoch": 1.4, "eval_qnli-contrastive_loss": 0.0838843286037445, "eval_qnli-contrastive_runtime": 15.7017, "eval_qnli-contrastive_samples_per_second": 347.924, "eval_qnli-contrastive_steps_per_second": 21.781, "step": 24598 }, { "epoch": 1.5, "grad_norm": 10.91913890838623, "learning_rate": 5.012516292320938e-06, "loss": 0.4072, "step": 26355 }, { "epoch": 1.5, "eval_nli-pairs_loss": 0.3877629041671753, "eval_nli-pairs_runtime": 23.1072, "eval_nli-pairs_samples_per_second": 294.627, "eval_nli-pairs_steps_per_second": 18.436, "step": 26355 }, { "epoch": 1.5, "eval_scitail-pairs-pos_loss": 0.4480924606323242, "eval_scitail-pairs-pos_runtime": 5.2741, "eval_scitail-pairs-pos_samples_per_second": 247.244, "eval_scitail-pairs-pos_steps_per_second": 15.548, "step": 26355 }, { "epoch": 1.5, "eval_qnli-contrastive_loss": 0.06811495870351791, "eval_qnli-contrastive_runtime": 15.7641, "eval_qnli-contrastive_samples_per_second": 346.546, "eval_qnli-contrastive_steps_per_second": 21.695, "step": 26355 }, { "epoch": 1.6, "grad_norm": 3.676146984100342, "learning_rate": 3.4668235704897813e-06, "loss": 0.3572, "step": 28112 }, { "epoch": 1.6, "eval_nli-pairs_loss": 0.3715905547142029, "eval_nli-pairs_runtime": 23.1744, "eval_nli-pairs_samples_per_second": 293.773, "eval_nli-pairs_steps_per_second": 18.382, "step": 28112 }, { "epoch": 1.6, "eval_scitail-pairs-pos_loss": 0.49534013867378235, "eval_scitail-pairs-pos_runtime": 5.2856, "eval_scitail-pairs-pos_samples_per_second": 246.708, "eval_scitail-pairs-pos_steps_per_second": 15.514, "step": 28112 }, { "epoch": 1.6, "eval_qnli-contrastive_loss": 0.06735851615667343, "eval_qnli-contrastive_runtime": 15.7308, "eval_qnli-contrastive_samples_per_second": 347.281, "eval_qnli-contrastive_steps_per_second": 21.741, "step": 28112 }, { "epoch": 1.7, "grad_norm": 229.6580047607422, "learning_rate": 2.072658211127134e-06, "loss": 0.371, "step": 29869 }, { "epoch": 1.7, "eval_nli-pairs_loss": 0.36217835545539856, "eval_nli-pairs_runtime": 23.1495, "eval_nli-pairs_samples_per_second": 294.089, "eval_nli-pairs_steps_per_second": 18.402, "step": 29869 }, { "epoch": 1.7, "eval_scitail-pairs-pos_loss": 0.47673526406288147, "eval_scitail-pairs-pos_runtime": 5.2158, "eval_scitail-pairs-pos_samples_per_second": 250.008, "eval_scitail-pairs-pos_steps_per_second": 15.721, "step": 29869 }, { "epoch": 1.7, "eval_qnli-contrastive_loss": 0.06000087782740593, "eval_qnli-contrastive_runtime": 15.6328, "eval_qnli-contrastive_samples_per_second": 349.458, "eval_qnli-contrastive_steps_per_second": 21.877, "step": 29869 }, { "epoch": 1.8, "grad_norm": 0.6022229194641113, "learning_rate": 9.638670801112644e-07, "loss": 0.3332, "step": 31626 }, { "epoch": 1.8, "eval_nli-pairs_loss": 0.3600439131259918, "eval_nli-pairs_runtime": 23.0874, "eval_nli-pairs_samples_per_second": 294.879, "eval_nli-pairs_steps_per_second": 18.452, "step": 31626 }, { "epoch": 1.8, "eval_scitail-pairs-pos_loss": 0.465911865234375, "eval_scitail-pairs-pos_runtime": 5.3369, "eval_scitail-pairs-pos_samples_per_second": 244.338, "eval_scitail-pairs-pos_steps_per_second": 15.365, "step": 31626 }, { "epoch": 1.8, "eval_qnli-contrastive_loss": 0.05613844096660614, "eval_qnli-contrastive_runtime": 15.7089, "eval_qnli-contrastive_samples_per_second": 347.764, "eval_qnli-contrastive_steps_per_second": 21.771, "step": 31626 }, { "epoch": 1.9, "grad_norm": 0.23106251657009125, "learning_rate": 2.4943593464921476e-07, "loss": 0.3695, "step": 33383 }, { "epoch": 1.9, "eval_nli-pairs_loss": 0.35667526721954346, "eval_nli-pairs_runtime": 23.1588, "eval_nli-pairs_samples_per_second": 293.971, "eval_nli-pairs_steps_per_second": 18.395, "step": 33383 }, { "epoch": 1.9, "eval_scitail-pairs-pos_loss": 0.4603894352912903, "eval_scitail-pairs-pos_runtime": 5.248, "eval_scitail-pairs-pos_samples_per_second": 248.476, "eval_scitail-pairs-pos_steps_per_second": 15.625, "step": 33383 }, { "epoch": 1.9, "eval_qnli-contrastive_loss": 0.06141861155629158, "eval_qnli-contrastive_runtime": 15.6709, "eval_qnli-contrastive_samples_per_second": 348.608, "eval_qnli-contrastive_steps_per_second": 21.824, "step": 33383 }, { "epoch": 2.0, "grad_norm": Infinity, "learning_rate": 2.5896487759191624e-11, "loss": 0.3315, "step": 35140 }, { "epoch": 2.0, "eval_nli-pairs_loss": 0.3597075045108795, "eval_nli-pairs_runtime": 23.1058, "eval_nli-pairs_samples_per_second": 294.645, "eval_nli-pairs_steps_per_second": 18.437, "step": 35140 }, { "epoch": 2.0, "eval_scitail-pairs-pos_loss": 0.47120198607444763, "eval_scitail-pairs-pos_runtime": 5.2532, "eval_scitail-pairs-pos_samples_per_second": 248.23, "eval_scitail-pairs-pos_steps_per_second": 15.61, "step": 35140 }, { "epoch": 2.0, "eval_qnli-contrastive_loss": 0.05398999899625778, "eval_qnli-contrastive_runtime": 15.7099, "eval_qnli-contrastive_samples_per_second": 347.743, "eval_qnli-contrastive_steps_per_second": 21.77, "step": 35140 } ], "logging_steps": 1757, "max_steps": 35140, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 17570, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }