bobox's picture
Training in progress, step 321, checkpoint
faa2ea3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.8122794636556105,
"eval_steps": 27,
"global_step": 321,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016937191249117856,
"grad_norm": 34.22002029418945,
"learning_rate": 6.818181818181818e-07,
"loss": 7.2372,
"step": 3
},
{
"epoch": 0.03387438249823571,
"grad_norm": 21.76839828491211,
"learning_rate": 1.3636363636363636e-06,
"loss": 6.855,
"step": 6
},
{
"epoch": 0.05081157374735357,
"grad_norm": 21.260774612426758,
"learning_rate": 2.0454545454545457e-06,
"loss": 7.4707,
"step": 9
},
{
"epoch": 0.06774876499647142,
"grad_norm": 16.885921478271484,
"learning_rate": 2.7272727272727272e-06,
"loss": 7.0187,
"step": 12
},
{
"epoch": 0.08468595624558928,
"grad_norm": 19.509899139404297,
"learning_rate": 3.409090909090909e-06,
"loss": 6.6756,
"step": 15
},
{
"epoch": 0.10162314749470713,
"grad_norm": 7.9427289962768555,
"learning_rate": 4.0909090909090915e-06,
"loss": 6.0155,
"step": 18
},
{
"epoch": 0.11856033874382499,
"grad_norm": 7.325345039367676,
"learning_rate": 4.772727272727273e-06,
"loss": 6.1644,
"step": 21
},
{
"epoch": 0.13549752999294284,
"grad_norm": 7.544689655303955,
"learning_rate": 5.4545454545454545e-06,
"loss": 6.2158,
"step": 24
},
{
"epoch": 0.1524347212420607,
"grad_norm": 5.141758918762207,
"learning_rate": 6.136363636363637e-06,
"loss": 6.1369,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.109375,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.9544724822044373,
"eval_VitaminC_cosine_ap": 0.5356492030729136,
"eval_VitaminC_cosine_f1": 0.6542553191489362,
"eval_VitaminC_cosine_f1_threshold": 0.7148199081420898,
"eval_VitaminC_cosine_precision": 0.48616600790513836,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 414.4264831542969,
"eval_VitaminC_dot_ap": 0.5108219546857565,
"eval_VitaminC_dot_f1": 0.6507936507936508,
"eval_VitaminC_dot_f1_threshold": 271.6522521972656,
"eval_VitaminC_dot_precision": 0.4823529411764706,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.55078125,
"eval_VitaminC_euclidean_accuracy_threshold": 6.519885063171387,
"eval_VitaminC_euclidean_ap": 0.5226419655984281,
"eval_VitaminC_euclidean_f1": 0.6505376344086021,
"eval_VitaminC_euclidean_f1_threshold": 15.194067001342773,
"eval_VitaminC_euclidean_precision": 0.4859437751004016,
"eval_VitaminC_euclidean_recall": 0.983739837398374,
"eval_VitaminC_manhattan_accuracy": 0.546875,
"eval_VitaminC_manhattan_accuracy_threshold": 149.20114135742188,
"eval_VitaminC_manhattan_ap": 0.5237451656134715,
"eval_VitaminC_manhattan_f1": 0.6542553191489362,
"eval_VitaminC_manhattan_f1_threshold": 259.007080078125,
"eval_VitaminC_manhattan_precision": 0.48616600790513836,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.5546875,
"eval_VitaminC_max_accuracy_threshold": 414.4264831542969,
"eval_VitaminC_max_ap": 0.5356492030729136,
"eval_VitaminC_max_f1": 0.6542553191489362,
"eval_VitaminC_max_f1_threshold": 271.6522521972656,
"eval_VitaminC_max_precision": 0.48616600790513836,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5356492030729136,
"eval_sts-test_pearson_cosine": 0.056062031998983373,
"eval_sts-test_pearson_dot": 0.2979259445723872,
"eval_sts-test_pearson_euclidean": 0.0498319208592713,
"eval_sts-test_pearson_manhattan": 0.07381429239121526,
"eval_sts-test_pearson_max": 0.2979259445723872,
"eval_sts-test_spearman_cosine": 0.1066788491614481,
"eval_sts-test_spearman_dot": 0.315952670306405,
"eval_sts-test_spearman_euclidean": 0.07303394554435191,
"eval_sts-test_spearman_manhattan": 0.09039525717692232,
"eval_sts-test_spearman_max": 0.315952670306405,
"eval_vitaminc-pairs_loss": 2.698580741882324,
"eval_vitaminc-pairs_runtime": 1.4747,
"eval_vitaminc-pairs_samples_per_second": 73.236,
"eval_vitaminc-pairs_steps_per_second": 1.356,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_negation-triplets_loss": 5.142906665802002,
"eval_negation-triplets_runtime": 0.2993,
"eval_negation-triplets_samples_per_second": 213.865,
"eval_negation-triplets_steps_per_second": 3.342,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_scitail-pairs-pos_loss": 1.9216958284378052,
"eval_scitail-pairs-pos_runtime": 0.3834,
"eval_scitail-pairs-pos_samples_per_second": 140.842,
"eval_scitail-pairs-pos_steps_per_second": 2.608,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_xsum-pairs_loss": 6.073049545288086,
"eval_xsum-pairs_runtime": 3.1587,
"eval_xsum-pairs_samples_per_second": 40.523,
"eval_xsum-pairs_steps_per_second": 0.633,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_sciq_pairs_loss": 0.3449864387512207,
"eval_sciq_pairs_runtime": 3.3747,
"eval_sciq_pairs_samples_per_second": 37.93,
"eval_sciq_pairs_steps_per_second": 0.593,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_qasc_pairs_loss": 3.2267842292785645,
"eval_qasc_pairs_runtime": 0.6576,
"eval_qasc_pairs_samples_per_second": 194.646,
"eval_qasc_pairs_steps_per_second": 3.041,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_openbookqa_pairs_loss": 4.405983924865723,
"eval_openbookqa_pairs_runtime": 0.6107,
"eval_openbookqa_pairs_samples_per_second": 209.594,
"eval_openbookqa_pairs_steps_per_second": 3.275,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_msmarco_pairs_loss": 6.937691688537598,
"eval_msmarco_pairs_runtime": 1.3091,
"eval_msmarco_pairs_samples_per_second": 97.779,
"eval_msmarco_pairs_steps_per_second": 1.528,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_nq_pairs_loss": 6.794108867645264,
"eval_nq_pairs_runtime": 2.3968,
"eval_nq_pairs_samples_per_second": 53.404,
"eval_nq_pairs_steps_per_second": 0.834,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_trivia_pairs_loss": 6.3355631828308105,
"eval_trivia_pairs_runtime": 4.4974,
"eval_trivia_pairs_samples_per_second": 28.461,
"eval_trivia_pairs_steps_per_second": 0.445,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_gooaq_pairs_loss": 6.405998706817627,
"eval_gooaq_pairs_runtime": 0.8745,
"eval_gooaq_pairs_samples_per_second": 146.37,
"eval_gooaq_pairs_steps_per_second": 2.287,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_paws-pos_loss": 2.2308223247528076,
"eval_paws-pos_runtime": 0.6998,
"eval_paws-pos_samples_per_second": 182.908,
"eval_paws-pos_steps_per_second": 2.858,
"step": 27
},
{
"epoch": 0.16937191249117856,
"grad_norm": 5.885251522064209,
"learning_rate": 6.818181818181818e-06,
"loss": 5.7653,
"step": 30
},
{
"epoch": 0.1863091037402964,
"grad_norm": 7.357480049133301,
"learning_rate": 7.500000000000001e-06,
"loss": 6.1259,
"step": 33
},
{
"epoch": 0.20324629498941427,
"grad_norm": 7.321795463562012,
"learning_rate": 8.181818181818183e-06,
"loss": 5.7539,
"step": 36
},
{
"epoch": 0.22018348623853212,
"grad_norm": 4.239792346954346,
"learning_rate": 8.863636363636365e-06,
"loss": 6.0131,
"step": 39
},
{
"epoch": 0.23712067748764998,
"grad_norm": 3.9554407596588135,
"learning_rate": 9.545454545454547e-06,
"loss": 6.0074,
"step": 42
},
{
"epoch": 0.25405786873676783,
"grad_norm": 4.406026840209961,
"learning_rate": 1.0227272727272729e-05,
"loss": 5.7125,
"step": 45
},
{
"epoch": 0.2709950599858857,
"grad_norm": 7.235893249511719,
"learning_rate": 1.0909090909090909e-05,
"loss": 5.5634,
"step": 48
},
{
"epoch": 0.28793225123500354,
"grad_norm": 5.330288410186768,
"learning_rate": 1.1590909090909093e-05,
"loss": 5.2924,
"step": 51
},
{
"epoch": 0.3048694424841214,
"grad_norm": 7.216403961181641,
"learning_rate": 1.2272727272727274e-05,
"loss": 5.2286,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.046875,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.54296875,
"eval_VitaminC_cosine_accuracy_threshold": 0.9328227043151855,
"eval_VitaminC_cosine_ap": 0.5212059026196154,
"eval_VitaminC_cosine_f1": 0.6576819407008085,
"eval_VitaminC_cosine_f1_threshold": 0.7373804450035095,
"eval_VitaminC_cosine_precision": 0.49193548387096775,
"eval_VitaminC_cosine_recall": 0.991869918699187,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 418.2774658203125,
"eval_VitaminC_dot_ap": 0.5160594099493883,
"eval_VitaminC_dot_f1": 0.6521739130434782,
"eval_VitaminC_dot_f1_threshold": 291.5081481933594,
"eval_VitaminC_dot_precision": 0.4897959183673469,
"eval_VitaminC_dot_recall": 0.975609756097561,
"eval_VitaminC_euclidean_accuracy": 0.5390625,
"eval_VitaminC_euclidean_accuracy_threshold": 8.120429039001465,
"eval_VitaminC_euclidean_ap": 0.5224837623095228,
"eval_VitaminC_euclidean_f1": 0.6576819407008085,
"eval_VitaminC_euclidean_f1_threshold": 14.879999160766602,
"eval_VitaminC_euclidean_precision": 0.49193548387096775,
"eval_VitaminC_euclidean_recall": 0.991869918699187,
"eval_VitaminC_manhattan_accuracy": 0.53515625,
"eval_VitaminC_manhattan_accuracy_threshold": 137.40658569335938,
"eval_VitaminC_manhattan_ap": 0.5186382518671783,
"eval_VitaminC_manhattan_f1": 0.6576086956521738,
"eval_VitaminC_manhattan_f1_threshold": 263.32452392578125,
"eval_VitaminC_manhattan_precision": 0.49387755102040815,
"eval_VitaminC_manhattan_recall": 0.983739837398374,
"eval_VitaminC_max_accuracy": 0.55078125,
"eval_VitaminC_max_accuracy_threshold": 418.2774658203125,
"eval_VitaminC_max_ap": 0.5224837623095228,
"eval_VitaminC_max_f1": 0.6576819407008085,
"eval_VitaminC_max_f1_threshold": 291.5081481933594,
"eval_VitaminC_max_precision": 0.49387755102040815,
"eval_VitaminC_max_recall": 0.991869918699187,
"eval_sequential_score": 0.5224837623095228,
"eval_sts-test_pearson_cosine": 0.14377091128453176,
"eval_sts-test_pearson_dot": 0.24728387094758872,
"eval_sts-test_pearson_euclidean": 0.14604155960515372,
"eval_sts-test_pearson_manhattan": 0.1446467532231986,
"eval_sts-test_pearson_max": 0.24728387094758872,
"eval_sts-test_spearman_cosine": 0.1968510434344728,
"eval_sts-test_spearman_dot": 0.29467218283745694,
"eval_sts-test_spearman_euclidean": 0.17218164683969664,
"eval_sts-test_spearman_manhattan": 0.17741843340856742,
"eval_sts-test_spearman_max": 0.29467218283745694,
"eval_vitaminc-pairs_loss": 2.664700746536255,
"eval_vitaminc-pairs_runtime": 1.4487,
"eval_vitaminc-pairs_samples_per_second": 74.551,
"eval_vitaminc-pairs_steps_per_second": 1.381,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_negation-triplets_loss": 4.6218037605285645,
"eval_negation-triplets_runtime": 0.2971,
"eval_negation-triplets_samples_per_second": 215.438,
"eval_negation-triplets_steps_per_second": 3.366,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_scitail-pairs-pos_loss": 1.2413936853408813,
"eval_scitail-pairs-pos_runtime": 0.372,
"eval_scitail-pairs-pos_samples_per_second": 145.175,
"eval_scitail-pairs-pos_steps_per_second": 2.688,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_xsum-pairs_loss": 5.249766826629639,
"eval_xsum-pairs_runtime": 3.1506,
"eval_xsum-pairs_samples_per_second": 40.627,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_sciq_pairs_loss": 0.2961578667163849,
"eval_sciq_pairs_runtime": 3.2909,
"eval_sciq_pairs_samples_per_second": 38.895,
"eval_sciq_pairs_steps_per_second": 0.608,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_qasc_pairs_loss": 2.530872344970703,
"eval_qasc_pairs_runtime": 0.6255,
"eval_qasc_pairs_samples_per_second": 204.63,
"eval_qasc_pairs_steps_per_second": 3.197,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_openbookqa_pairs_loss": 3.8855104446411133,
"eval_openbookqa_pairs_runtime": 0.5742,
"eval_openbookqa_pairs_samples_per_second": 222.914,
"eval_openbookqa_pairs_steps_per_second": 3.483,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_msmarco_pairs_loss": 5.246406555175781,
"eval_msmarco_pairs_runtime": 1.2872,
"eval_msmarco_pairs_samples_per_second": 99.442,
"eval_msmarco_pairs_steps_per_second": 1.554,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_nq_pairs_loss": 5.332630157470703,
"eval_nq_pairs_runtime": 2.3739,
"eval_nq_pairs_samples_per_second": 53.92,
"eval_nq_pairs_steps_per_second": 0.843,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_trivia_pairs_loss": 5.647429943084717,
"eval_trivia_pairs_runtime": 4.4729,
"eval_trivia_pairs_samples_per_second": 28.617,
"eval_trivia_pairs_steps_per_second": 0.447,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_gooaq_pairs_loss": 5.225871562957764,
"eval_gooaq_pairs_runtime": 0.8715,
"eval_gooaq_pairs_samples_per_second": 146.868,
"eval_gooaq_pairs_steps_per_second": 2.295,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_paws-pos_loss": 0.8335962891578674,
"eval_paws-pos_runtime": 0.6844,
"eval_paws-pos_samples_per_second": 187.036,
"eval_paws-pos_steps_per_second": 2.922,
"step": 54
},
{
"epoch": 0.32180663373323926,
"grad_norm": 6.847682952880859,
"learning_rate": 1.2954545454545455e-05,
"loss": 4.4811,
"step": 57
},
{
"epoch": 0.3387438249823571,
"grad_norm": 8.383002281188965,
"learning_rate": 1.3636363636363637e-05,
"loss": 4.4239,
"step": 60
},
{
"epoch": 0.35568101623147497,
"grad_norm": 7.014843463897705,
"learning_rate": 1.431818181818182e-05,
"loss": 4.0273,
"step": 63
},
{
"epoch": 0.3726182074805928,
"grad_norm": 5.9739885330200195,
"learning_rate": 1.5000000000000002e-05,
"loss": 3.4508,
"step": 66
},
{
"epoch": 0.3895553987297107,
"grad_norm": 11.202752113342285,
"learning_rate": 1.5681818181818182e-05,
"loss": 3.9702,
"step": 69
},
{
"epoch": 0.40649258997882853,
"grad_norm": 7.064818859100342,
"learning_rate": 1.6363636363636366e-05,
"loss": 3.5295,
"step": 72
},
{
"epoch": 0.4234297812279464,
"grad_norm": 5.912719249725342,
"learning_rate": 1.7045454545454546e-05,
"loss": 3.6395,
"step": 75
},
{
"epoch": 0.44036697247706424,
"grad_norm": 5.033207893371582,
"learning_rate": 1.772727272727273e-05,
"loss": 3.2398,
"step": 78
},
{
"epoch": 0.4573041637261821,
"grad_norm": 5.218384265899658,
"learning_rate": 1.840909090909091e-05,
"loss": 3.116,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.9041332006454468,
"eval_VitaminC_cosine_ap": 0.5292859731465609,
"eval_VitaminC_cosine_f1": 0.6542553191489362,
"eval_VitaminC_cosine_f1_threshold": 0.452939510345459,
"eval_VitaminC_cosine_precision": 0.48616600790513836,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 414.42559814453125,
"eval_VitaminC_dot_ap": 0.5222732504955002,
"eval_VitaminC_dot_f1": 0.6542553191489362,
"eval_VitaminC_dot_f1_threshold": 212.6934814453125,
"eval_VitaminC_dot_precision": 0.48616600790513836,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 9.18377685546875,
"eval_VitaminC_euclidean_ap": 0.5291787221346742,
"eval_VitaminC_euclidean_f1": 0.6542553191489362,
"eval_VitaminC_euclidean_f1_threshold": 22.683509826660156,
"eval_VitaminC_euclidean_precision": 0.48616600790513836,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 173.8212127685547,
"eval_VitaminC_manhattan_ap": 0.5305698453165033,
"eval_VitaminC_manhattan_f1": 0.6542553191489362,
"eval_VitaminC_manhattan_f1_threshold": 415.5366516113281,
"eval_VitaminC_manhattan_precision": 0.48616600790513836,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 414.42559814453125,
"eval_VitaminC_max_ap": 0.5305698453165033,
"eval_VitaminC_max_f1": 0.6542553191489362,
"eval_VitaminC_max_f1_threshold": 415.5366516113281,
"eval_VitaminC_max_precision": 0.48616600790513836,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5305698453165033,
"eval_sts-test_pearson_cosine": 0.45494716382349193,
"eval_sts-test_pearson_dot": 0.44837123659858896,
"eval_sts-test_pearson_euclidean": 0.4480861256491879,
"eval_sts-test_pearson_manhattan": 0.4417008219313264,
"eval_sts-test_pearson_max": 0.45494716382349193,
"eval_sts-test_spearman_cosine": 0.48921418507251446,
"eval_sts-test_spearman_dot": 0.46707725062744593,
"eval_sts-test_spearman_euclidean": 0.4610824798409968,
"eval_sts-test_spearman_manhattan": 0.46068648052845956,
"eval_sts-test_spearman_max": 0.48921418507251446,
"eval_vitaminc-pairs_loss": 2.5043575763702393,
"eval_vitaminc-pairs_runtime": 1.4778,
"eval_vitaminc-pairs_samples_per_second": 73.079,
"eval_vitaminc-pairs_steps_per_second": 1.353,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_negation-triplets_loss": 3.4229447841644287,
"eval_negation-triplets_runtime": 0.2991,
"eval_negation-triplets_samples_per_second": 213.954,
"eval_negation-triplets_steps_per_second": 3.343,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_scitail-pairs-pos_loss": 0.2784869372844696,
"eval_scitail-pairs-pos_runtime": 0.3633,
"eval_scitail-pairs-pos_samples_per_second": 148.649,
"eval_scitail-pairs-pos_steps_per_second": 2.753,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_xsum-pairs_loss": 2.428964614868164,
"eval_xsum-pairs_runtime": 3.1548,
"eval_xsum-pairs_samples_per_second": 40.573,
"eval_xsum-pairs_steps_per_second": 0.634,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_sciq_pairs_loss": 0.15256048738956451,
"eval_sciq_pairs_runtime": 3.2432,
"eval_sciq_pairs_samples_per_second": 39.467,
"eval_sciq_pairs_steps_per_second": 0.617,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_qasc_pairs_loss": 1.2902077436447144,
"eval_qasc_pairs_runtime": 0.6211,
"eval_qasc_pairs_samples_per_second": 206.085,
"eval_qasc_pairs_steps_per_second": 3.22,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_openbookqa_pairs_loss": 2.4784862995147705,
"eval_openbookqa_pairs_runtime": 0.5758,
"eval_openbookqa_pairs_samples_per_second": 222.308,
"eval_openbookqa_pairs_steps_per_second": 3.474,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_msmarco_pairs_loss": 2.967724084854126,
"eval_msmarco_pairs_runtime": 1.2944,
"eval_msmarco_pairs_samples_per_second": 98.885,
"eval_msmarco_pairs_steps_per_second": 1.545,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_nq_pairs_loss": 3.358661413192749,
"eval_nq_pairs_runtime": 2.3827,
"eval_nq_pairs_samples_per_second": 53.722,
"eval_nq_pairs_steps_per_second": 0.839,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_trivia_pairs_loss": 3.1391680240631104,
"eval_trivia_pairs_runtime": 4.4155,
"eval_trivia_pairs_samples_per_second": 28.989,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_gooaq_pairs_loss": 2.8774912357330322,
"eval_gooaq_pairs_runtime": 0.8746,
"eval_gooaq_pairs_samples_per_second": 146.346,
"eval_gooaq_pairs_steps_per_second": 2.287,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_paws-pos_loss": 0.19754411280155182,
"eval_paws-pos_runtime": 0.684,
"eval_paws-pos_samples_per_second": 187.141,
"eval_paws-pos_steps_per_second": 2.924,
"step": 81
},
{
"epoch": 0.47424135497529996,
"grad_norm": 5.149569988250732,
"learning_rate": 1.9090909090909094e-05,
"loss": 2.6049,
"step": 84
},
{
"epoch": 0.4911785462244178,
"grad_norm": 5.012928009033203,
"learning_rate": 1.9772727272727274e-05,
"loss": 2.7738,
"step": 87
},
{
"epoch": 0.5081157374735357,
"grad_norm": 4.880725383758545,
"learning_rate": 2.0454545454545457e-05,
"loss": 2.5416,
"step": 90
},
{
"epoch": 0.5250529287226535,
"grad_norm": 5.618528366088867,
"learning_rate": 2.113636363636364e-05,
"loss": 2.3913,
"step": 93
},
{
"epoch": 0.5419901199717714,
"grad_norm": 5.020515441894531,
"learning_rate": 2.1818181818181818e-05,
"loss": 2.3144,
"step": 96
},
{
"epoch": 0.5589273112208892,
"grad_norm": 4.818451404571533,
"learning_rate": 2.25e-05,
"loss": 2.1857,
"step": 99
},
{
"epoch": 0.5758645024700071,
"grad_norm": 5.094771385192871,
"learning_rate": 2.3181818181818185e-05,
"loss": 1.8881,
"step": 102
},
{
"epoch": 0.592801693719125,
"grad_norm": 3.795962333679199,
"learning_rate": 2.3863636363636365e-05,
"loss": 2.2699,
"step": 105
},
{
"epoch": 0.6097388849682428,
"grad_norm": 4.46245813369751,
"learning_rate": 2.454545454545455e-05,
"loss": 2.1425,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8830112218856812,
"eval_VitaminC_cosine_ap": 0.5302172957740995,
"eval_VitaminC_cosine_f1": 0.6558265582655827,
"eval_VitaminC_cosine_f1_threshold": 0.5253933668136597,
"eval_VitaminC_cosine_precision": 0.491869918699187,
"eval_VitaminC_cosine_recall": 0.983739837398374,
"eval_VitaminC_dot_accuracy": 0.5390625,
"eval_VitaminC_dot_accuracy_threshold": 427.5576171875,
"eval_VitaminC_dot_ap": 0.517120157327104,
"eval_VitaminC_dot_f1": 0.6542553191489362,
"eval_VitaminC_dot_f1_threshold": 175.80963134765625,
"eval_VitaminC_dot_precision": 0.48616600790513836,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5625,
"eval_VitaminC_euclidean_accuracy_threshold": 10.817148208618164,
"eval_VitaminC_euclidean_ap": 0.532255112376416,
"eval_VitaminC_euclidean_f1": 0.6558265582655827,
"eval_VitaminC_euclidean_f1_threshold": 21.10729217529297,
"eval_VitaminC_euclidean_precision": 0.491869918699187,
"eval_VitaminC_euclidean_recall": 0.983739837398374,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 224.70416259765625,
"eval_VitaminC_manhattan_ap": 0.5298930718604624,
"eval_VitaminC_manhattan_f1": 0.6558265582655827,
"eval_VitaminC_manhattan_f1_threshold": 415.3311767578125,
"eval_VitaminC_manhattan_precision": 0.491869918699187,
"eval_VitaminC_manhattan_recall": 0.983739837398374,
"eval_VitaminC_max_accuracy": 0.5625,
"eval_VitaminC_max_accuracy_threshold": 427.5576171875,
"eval_VitaminC_max_ap": 0.532255112376416,
"eval_VitaminC_max_f1": 0.6558265582655827,
"eval_VitaminC_max_f1_threshold": 415.3311767578125,
"eval_VitaminC_max_precision": 0.491869918699187,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.532255112376416,
"eval_sts-test_pearson_cosine": 0.755963151531783,
"eval_sts-test_pearson_dot": 0.7384823091540473,
"eval_sts-test_pearson_euclidean": 0.764089555623164,
"eval_sts-test_pearson_manhattan": 0.7670467479701304,
"eval_sts-test_pearson_max": 0.7670467479701304,
"eval_sts-test_spearman_cosine": 0.7806331583677342,
"eval_sts-test_spearman_dot": 0.7442842883778696,
"eval_sts-test_spearman_euclidean": 0.7674205303105437,
"eval_sts-test_spearman_manhattan": 0.7664974867050092,
"eval_sts-test_spearman_max": 0.7806331583677342,
"eval_vitaminc-pairs_loss": 2.721674919128418,
"eval_vitaminc-pairs_runtime": 1.4468,
"eval_vitaminc-pairs_samples_per_second": 74.65,
"eval_vitaminc-pairs_steps_per_second": 1.382,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_negation-triplets_loss": 2.338909387588501,
"eval_negation-triplets_runtime": 0.3017,
"eval_negation-triplets_samples_per_second": 212.101,
"eval_negation-triplets_steps_per_second": 3.314,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_scitail-pairs-pos_loss": 0.23291125893592834,
"eval_scitail-pairs-pos_runtime": 0.3664,
"eval_scitail-pairs-pos_samples_per_second": 147.385,
"eval_scitail-pairs-pos_steps_per_second": 2.729,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_xsum-pairs_loss": 1.2065516710281372,
"eval_xsum-pairs_runtime": 3.1488,
"eval_xsum-pairs_samples_per_second": 40.65,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_sciq_pairs_loss": 0.09487833082675934,
"eval_sciq_pairs_runtime": 3.2618,
"eval_sciq_pairs_samples_per_second": 39.242,
"eval_sciq_pairs_steps_per_second": 0.613,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_qasc_pairs_loss": 0.8461999297142029,
"eval_qasc_pairs_runtime": 0.6246,
"eval_qasc_pairs_samples_per_second": 204.93,
"eval_qasc_pairs_steps_per_second": 3.202,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_openbookqa_pairs_loss": 1.5739191770553589,
"eval_openbookqa_pairs_runtime": 0.5751,
"eval_openbookqa_pairs_samples_per_second": 222.568,
"eval_openbookqa_pairs_steps_per_second": 3.478,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_msmarco_pairs_loss": 1.6446179151535034,
"eval_msmarco_pairs_runtime": 1.2828,
"eval_msmarco_pairs_samples_per_second": 99.784,
"eval_msmarco_pairs_steps_per_second": 1.559,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_nq_pairs_loss": 2.364896535873413,
"eval_nq_pairs_runtime": 2.3802,
"eval_nq_pairs_samples_per_second": 53.777,
"eval_nq_pairs_steps_per_second": 0.84,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_trivia_pairs_loss": 1.7080069780349731,
"eval_trivia_pairs_runtime": 4.4372,
"eval_trivia_pairs_samples_per_second": 28.847,
"eval_trivia_pairs_steps_per_second": 0.451,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_gooaq_pairs_loss": 1.7924479246139526,
"eval_gooaq_pairs_runtime": 0.8761,
"eval_gooaq_pairs_samples_per_second": 146.094,
"eval_gooaq_pairs_steps_per_second": 2.283,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_paws-pos_loss": 0.08000019192695618,
"eval_paws-pos_runtime": 0.6839,
"eval_paws-pos_samples_per_second": 187.168,
"eval_paws-pos_steps_per_second": 2.924,
"step": 108
},
{
"epoch": 0.6266760762173607,
"grad_norm": 4.418070316314697,
"learning_rate": 2.5227272727272732e-05,
"loss": 2.1276,
"step": 111
},
{
"epoch": 0.6436132674664785,
"grad_norm": 4.3495259284973145,
"learning_rate": 2.590909090909091e-05,
"loss": 1.7531,
"step": 114
},
{
"epoch": 0.6605504587155964,
"grad_norm": 4.294332027435303,
"learning_rate": 2.6590909090909093e-05,
"loss": 2.0179,
"step": 117
},
{
"epoch": 0.6774876499647142,
"grad_norm": 3.4215610027313232,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.5305,
"step": 120
},
{
"epoch": 0.6944248412138321,
"grad_norm": 4.37844181060791,
"learning_rate": 2.7954545454545457e-05,
"loss": 1.6925,
"step": 123
},
{
"epoch": 0.7113620324629499,
"grad_norm": 4.019878387451172,
"learning_rate": 2.863636363636364e-05,
"loss": 1.5248,
"step": 126
},
{
"epoch": 0.7282992237120678,
"grad_norm": 4.662445068359375,
"learning_rate": 2.931818181818182e-05,
"loss": 1.523,
"step": 129
},
{
"epoch": 0.7452364149611856,
"grad_norm": 4.6323161125183105,
"learning_rate": 3.0000000000000004e-05,
"loss": 1.5474,
"step": 132
},
{
"epoch": 0.7621736062103035,
"grad_norm": 4.586575984954834,
"learning_rate": 3.068181818181819e-05,
"loss": 1.7221,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.56640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.8478574156761169,
"eval_VitaminC_cosine_ap": 0.5325579595957614,
"eval_VitaminC_cosine_f1": 0.6559999999999999,
"eval_VitaminC_cosine_f1_threshold": 0.35839784145355225,
"eval_VitaminC_cosine_precision": 0.4880952380952381,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 366.9839172363281,
"eval_VitaminC_dot_ap": 0.5326813797607027,
"eval_VitaminC_dot_f1": 0.6559999999999999,
"eval_VitaminC_dot_f1_threshold": 157.35829162597656,
"eval_VitaminC_dot_precision": 0.4880952380952381,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5625,
"eval_VitaminC_euclidean_accuracy_threshold": 12.044445037841797,
"eval_VitaminC_euclidean_ap": 0.5304103559932005,
"eval_VitaminC_euclidean_f1": 0.6542553191489362,
"eval_VitaminC_euclidean_f1_threshold": 24.461441040039062,
"eval_VitaminC_euclidean_precision": 0.48616600790513836,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5625,
"eval_VitaminC_manhattan_accuracy_threshold": 239.24815368652344,
"eval_VitaminC_manhattan_ap": 0.5314780667834758,
"eval_VitaminC_manhattan_f1": 0.6575342465753424,
"eval_VitaminC_manhattan_f1_threshold": 400.6834716796875,
"eval_VitaminC_manhattan_precision": 0.49586776859504134,
"eval_VitaminC_manhattan_recall": 0.975609756097561,
"eval_VitaminC_max_accuracy": 0.56640625,
"eval_VitaminC_max_accuracy_threshold": 366.9839172363281,
"eval_VitaminC_max_ap": 0.5326813797607027,
"eval_VitaminC_max_f1": 0.6575342465753424,
"eval_VitaminC_max_f1_threshold": 400.6834716796875,
"eval_VitaminC_max_precision": 0.49586776859504134,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5326813797607027,
"eval_sts-test_pearson_cosine": 0.7919597804368175,
"eval_sts-test_pearson_dot": 0.7994867531185785,
"eval_sts-test_pearson_euclidean": 0.8117960113303863,
"eval_sts-test_pearson_manhattan": 0.8144714466358016,
"eval_sts-test_pearson_max": 0.8144714466358016,
"eval_sts-test_spearman_cosine": 0.831478610786181,
"eval_sts-test_spearman_dot": 0.8192534746855707,
"eval_sts-test_spearman_euclidean": 0.8185577905406703,
"eval_sts-test_spearman_manhattan": 0.8154771593606782,
"eval_sts-test_spearman_max": 0.831478610786181,
"eval_vitaminc-pairs_loss": 2.852091073989868,
"eval_vitaminc-pairs_runtime": 1.4427,
"eval_vitaminc-pairs_samples_per_second": 74.858,
"eval_vitaminc-pairs_steps_per_second": 1.386,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_negation-triplets_loss": 2.074247121810913,
"eval_negation-triplets_runtime": 0.3,
"eval_negation-triplets_samples_per_second": 213.353,
"eval_negation-triplets_steps_per_second": 3.334,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_scitail-pairs-pos_loss": 0.2149849385023117,
"eval_scitail-pairs-pos_runtime": 0.3744,
"eval_scitail-pairs-pos_samples_per_second": 144.219,
"eval_scitail-pairs-pos_steps_per_second": 2.671,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_xsum-pairs_loss": 0.7706837058067322,
"eval_xsum-pairs_runtime": 3.1609,
"eval_xsum-pairs_samples_per_second": 40.495,
"eval_xsum-pairs_steps_per_second": 0.633,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_sciq_pairs_loss": 0.07513368874788284,
"eval_sciq_pairs_runtime": 3.2949,
"eval_sciq_pairs_samples_per_second": 38.848,
"eval_sciq_pairs_steps_per_second": 0.607,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_qasc_pairs_loss": 0.6355602741241455,
"eval_qasc_pairs_runtime": 0.6392,
"eval_qasc_pairs_samples_per_second": 200.246,
"eval_qasc_pairs_steps_per_second": 3.129,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_openbookqa_pairs_loss": 1.4014525413513184,
"eval_openbookqa_pairs_runtime": 0.622,
"eval_openbookqa_pairs_samples_per_second": 205.786,
"eval_openbookqa_pairs_steps_per_second": 3.215,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_msmarco_pairs_loss": 1.1524099111557007,
"eval_msmarco_pairs_runtime": 1.31,
"eval_msmarco_pairs_samples_per_second": 97.709,
"eval_msmarco_pairs_steps_per_second": 1.527,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_nq_pairs_loss": 1.7768574953079224,
"eval_nq_pairs_runtime": 2.3979,
"eval_nq_pairs_samples_per_second": 53.379,
"eval_nq_pairs_steps_per_second": 0.834,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_trivia_pairs_loss": 1.4495295286178589,
"eval_trivia_pairs_runtime": 4.4194,
"eval_trivia_pairs_samples_per_second": 28.964,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_gooaq_pairs_loss": 1.3955378532409668,
"eval_gooaq_pairs_runtime": 0.8788,
"eval_gooaq_pairs_samples_per_second": 145.649,
"eval_gooaq_pairs_steps_per_second": 2.276,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_paws-pos_loss": 0.06006813049316406,
"eval_paws-pos_runtime": 0.6896,
"eval_paws-pos_samples_per_second": 185.603,
"eval_paws-pos_steps_per_second": 2.9,
"step": 135
},
{
"epoch": 0.7791107974594214,
"grad_norm": 3.864208936691284,
"learning_rate": 3.1363636363636365e-05,
"loss": 1.5366,
"step": 138
},
{
"epoch": 0.7960479887085392,
"grad_norm": 3.837550640106201,
"learning_rate": 3.204545454545455e-05,
"loss": 1.3045,
"step": 141
},
{
"epoch": 0.8129851799576571,
"grad_norm": 3.5258102416992188,
"learning_rate": 3.272727272727273e-05,
"loss": 1.1999,
"step": 144
},
{
"epoch": 0.8299223712067749,
"grad_norm": 3.4431183338165283,
"learning_rate": 3.340909090909091e-05,
"loss": 1.3483,
"step": 147
},
{
"epoch": 0.8468595624558928,
"grad_norm": 3.6455864906311035,
"learning_rate": 3.409090909090909e-05,
"loss": 1.2009,
"step": 150
},
{
"epoch": 0.8637967537050106,
"grad_norm": 4.508525371551514,
"learning_rate": 3.4772727272727276e-05,
"loss": 1.4495,
"step": 153
},
{
"epoch": 0.8807339449541285,
"grad_norm": 3.0432400703430176,
"learning_rate": 3.545454545454546e-05,
"loss": 1.2329,
"step": 156
},
{
"epoch": 0.8976711362032463,
"grad_norm": 3.0190365314483643,
"learning_rate": 3.613636363636364e-05,
"loss": 1.1905,
"step": 159
},
{
"epoch": 0.9146083274523642,
"grad_norm": 3.74668288230896,
"learning_rate": 3.681818181818182e-05,
"loss": 1.277,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8101799488067627,
"eval_VitaminC_cosine_ap": 0.5298515171639175,
"eval_VitaminC_cosine_f1": 0.6542553191489362,
"eval_VitaminC_cosine_f1_threshold": 0.345889687538147,
"eval_VitaminC_cosine_precision": 0.48616600790513836,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 373.5804443359375,
"eval_VitaminC_dot_ap": 0.5310954683437364,
"eval_VitaminC_dot_f1": 0.6542553191489362,
"eval_VitaminC_dot_f1_threshold": 155.41326904296875,
"eval_VitaminC_dot_precision": 0.48616600790513836,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.57421875,
"eval_VitaminC_euclidean_accuracy_threshold": 13.60124683380127,
"eval_VitaminC_euclidean_ap": 0.5286057955992807,
"eval_VitaminC_euclidean_f1": 0.6577540106951871,
"eval_VitaminC_euclidean_f1_threshold": 22.904512405395508,
"eval_VitaminC_euclidean_precision": 0.4900398406374502,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 262.37322998046875,
"eval_VitaminC_manhattan_ap": 0.5253560845853567,
"eval_VitaminC_manhattan_f1": 0.6559999999999999,
"eval_VitaminC_manhattan_f1_threshold": 465.94549560546875,
"eval_VitaminC_manhattan_precision": 0.4880952380952381,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.57421875,
"eval_VitaminC_max_accuracy_threshold": 373.5804443359375,
"eval_VitaminC_max_ap": 0.5310954683437364,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 465.94549560546875,
"eval_VitaminC_max_precision": 0.4900398406374502,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5310954683437364,
"eval_sts-test_pearson_cosine": 0.8067612938723231,
"eval_sts-test_pearson_dot": 0.8217874837658639,
"eval_sts-test_pearson_euclidean": 0.827948115812785,
"eval_sts-test_pearson_manhattan": 0.8261527694953693,
"eval_sts-test_pearson_max": 0.827948115812785,
"eval_sts-test_spearman_cosine": 0.8547777638284432,
"eval_sts-test_spearman_dot": 0.8498786150097738,
"eval_sts-test_spearman_euclidean": 0.8373845860667446,
"eval_sts-test_spearman_manhattan": 0.8324507067477893,
"eval_sts-test_spearman_max": 0.8547777638284432,
"eval_vitaminc-pairs_loss": 2.776399612426758,
"eval_vitaminc-pairs_runtime": 1.4503,
"eval_vitaminc-pairs_samples_per_second": 74.467,
"eval_vitaminc-pairs_steps_per_second": 1.379,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_negation-triplets_loss": 2.005451202392578,
"eval_negation-triplets_runtime": 0.2981,
"eval_negation-triplets_samples_per_second": 214.709,
"eval_negation-triplets_steps_per_second": 3.355,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_scitail-pairs-pos_loss": 0.19877880811691284,
"eval_scitail-pairs-pos_runtime": 0.3623,
"eval_scitail-pairs-pos_samples_per_second": 149.043,
"eval_scitail-pairs-pos_steps_per_second": 2.76,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_xsum-pairs_loss": 0.5586928725242615,
"eval_xsum-pairs_runtime": 3.1466,
"eval_xsum-pairs_samples_per_second": 40.679,
"eval_xsum-pairs_steps_per_second": 0.636,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_sciq_pairs_loss": 0.06038254499435425,
"eval_sciq_pairs_runtime": 3.4092,
"eval_sciq_pairs_samples_per_second": 37.545,
"eval_sciq_pairs_steps_per_second": 0.587,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_qasc_pairs_loss": 0.49434012174606323,
"eval_qasc_pairs_runtime": 0.6342,
"eval_qasc_pairs_samples_per_second": 201.832,
"eval_qasc_pairs_steps_per_second": 3.154,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_openbookqa_pairs_loss": 1.1903400421142578,
"eval_openbookqa_pairs_runtime": 0.5754,
"eval_openbookqa_pairs_samples_per_second": 222.449,
"eval_openbookqa_pairs_steps_per_second": 3.476,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_msmarco_pairs_loss": 0.8656420707702637,
"eval_msmarco_pairs_runtime": 1.2858,
"eval_msmarco_pairs_samples_per_second": 99.547,
"eval_msmarco_pairs_steps_per_second": 1.555,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_nq_pairs_loss": 1.1553651094436646,
"eval_nq_pairs_runtime": 2.3754,
"eval_nq_pairs_samples_per_second": 53.885,
"eval_nq_pairs_steps_per_second": 0.842,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_trivia_pairs_loss": 1.2928619384765625,
"eval_trivia_pairs_runtime": 4.4084,
"eval_trivia_pairs_samples_per_second": 29.035,
"eval_trivia_pairs_steps_per_second": 0.454,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_gooaq_pairs_loss": 1.1580811738967896,
"eval_gooaq_pairs_runtime": 0.8731,
"eval_gooaq_pairs_samples_per_second": 146.607,
"eval_gooaq_pairs_steps_per_second": 2.291,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_paws-pos_loss": 0.052534349262714386,
"eval_paws-pos_runtime": 0.6835,
"eval_paws-pos_samples_per_second": 187.258,
"eval_paws-pos_steps_per_second": 2.926,
"step": 162
},
{
"epoch": 0.9315455187014821,
"grad_norm": 4.7817864418029785,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.339,
"step": 165
},
{
"epoch": 0.9484827099505999,
"grad_norm": 4.000570774078369,
"learning_rate": 3.818181818181819e-05,
"loss": 1.1535,
"step": 168
},
{
"epoch": 0.9654199011997178,
"grad_norm": 3.5971670150756836,
"learning_rate": 3.8863636363636364e-05,
"loss": 1.1643,
"step": 171
},
{
"epoch": 0.9823570924488356,
"grad_norm": 3.6582131385803223,
"learning_rate": 3.954545454545455e-05,
"loss": 1.2221,
"step": 174
},
{
"epoch": 0.9992942836979535,
"grad_norm": 4.0953898429870605,
"learning_rate": 3.9999477905707075e-05,
"loss": 1.0974,
"step": 177
},
{
"epoch": 1.0162314749470713,
"grad_norm": 4.092026233673096,
"learning_rate": 3.999164730903481e-05,
"loss": 1.0984,
"step": 180
},
{
"epoch": 1.0331686661961892,
"grad_norm": 3.6480906009674072,
"learning_rate": 3.997442539262898e-05,
"loss": 1.0543,
"step": 183
},
{
"epoch": 1.050105857445307,
"grad_norm": 3.433056592941284,
"learning_rate": 3.99478242943326e-05,
"loss": 1.0994,
"step": 186
},
{
"epoch": 1.067043048694425,
"grad_norm": 3.507981777191162,
"learning_rate": 3.991186276234698e-05,
"loss": 1.0621,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.578125,
"eval_VitaminC_cosine_accuracy_threshold": 0.7840081453323364,
"eval_VitaminC_cosine_ap": 0.5400770399437144,
"eval_VitaminC_cosine_f1": 0.6577540106951871,
"eval_VitaminC_cosine_f1_threshold": 0.39448243379592896,
"eval_VitaminC_cosine_precision": 0.4900398406374502,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 323.20281982421875,
"eval_VitaminC_dot_ap": 0.5420016101916201,
"eval_VitaminC_dot_f1": 0.6575342465753424,
"eval_VitaminC_dot_f1_threshold": 198.04354858398438,
"eval_VitaminC_dot_precision": 0.49586776859504134,
"eval_VitaminC_dot_recall": 0.975609756097561,
"eval_VitaminC_euclidean_accuracy": 0.5859375,
"eval_VitaminC_euclidean_accuracy_threshold": 13.84214973449707,
"eval_VitaminC_euclidean_ap": 0.5392157650683609,
"eval_VitaminC_euclidean_f1": 0.6577540106951871,
"eval_VitaminC_euclidean_f1_threshold": 22.595678329467773,
"eval_VitaminC_euclidean_precision": 0.4900398406374502,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5703125,
"eval_VitaminC_manhattan_accuracy_threshold": 275.1253356933594,
"eval_VitaminC_manhattan_ap": 0.5341380380767263,
"eval_VitaminC_manhattan_f1": 0.6576819407008085,
"eval_VitaminC_manhattan_f1_threshold": 457.04986572265625,
"eval_VitaminC_manhattan_precision": 0.49193548387096775,
"eval_VitaminC_manhattan_recall": 0.991869918699187,
"eval_VitaminC_max_accuracy": 0.5859375,
"eval_VitaminC_max_accuracy_threshold": 323.20281982421875,
"eval_VitaminC_max_ap": 0.5420016101916201,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 457.04986572265625,
"eval_VitaminC_max_precision": 0.49586776859504134,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5420016101916201,
"eval_sts-test_pearson_cosine": 0.8193410747427454,
"eval_sts-test_pearson_dot": 0.8275444476338831,
"eval_sts-test_pearson_euclidean": 0.8464528142983967,
"eval_sts-test_pearson_manhattan": 0.8440476980962159,
"eval_sts-test_pearson_max": 0.8464528142983967,
"eval_sts-test_spearman_cosine": 0.8680272706642878,
"eval_sts-test_spearman_dot": 0.8555529342729671,
"eval_sts-test_spearman_euclidean": 0.8542457068859202,
"eval_sts-test_spearman_manhattan": 0.8510265117511795,
"eval_sts-test_spearman_max": 0.8680272706642878,
"eval_vitaminc-pairs_loss": 2.6755428314208984,
"eval_vitaminc-pairs_runtime": 1.4509,
"eval_vitaminc-pairs_samples_per_second": 74.437,
"eval_vitaminc-pairs_steps_per_second": 1.378,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_negation-triplets_loss": 1.9071491956710815,
"eval_negation-triplets_runtime": 0.3051,
"eval_negation-triplets_samples_per_second": 209.756,
"eval_negation-triplets_steps_per_second": 3.277,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_scitail-pairs-pos_loss": 0.18539850413799286,
"eval_scitail-pairs-pos_runtime": 0.4199,
"eval_scitail-pairs-pos_samples_per_second": 128.604,
"eval_scitail-pairs-pos_steps_per_second": 2.382,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_xsum-pairs_loss": 0.38365328311920166,
"eval_xsum-pairs_runtime": 3.1907,
"eval_xsum-pairs_samples_per_second": 40.116,
"eval_xsum-pairs_steps_per_second": 0.627,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_sciq_pairs_loss": 0.05558515340089798,
"eval_sciq_pairs_runtime": 3.2891,
"eval_sciq_pairs_samples_per_second": 38.917,
"eval_sciq_pairs_steps_per_second": 0.608,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_qasc_pairs_loss": 0.40469691157341003,
"eval_qasc_pairs_runtime": 0.6267,
"eval_qasc_pairs_samples_per_second": 204.245,
"eval_qasc_pairs_steps_per_second": 3.191,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_openbookqa_pairs_loss": 1.0837312936782837,
"eval_openbookqa_pairs_runtime": 0.5765,
"eval_openbookqa_pairs_samples_per_second": 222.02,
"eval_openbookqa_pairs_steps_per_second": 3.469,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_msmarco_pairs_loss": 0.6897398233413696,
"eval_msmarco_pairs_runtime": 1.2918,
"eval_msmarco_pairs_samples_per_second": 99.089,
"eval_msmarco_pairs_steps_per_second": 1.548,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_nq_pairs_loss": 0.9603796601295471,
"eval_nq_pairs_runtime": 2.3975,
"eval_nq_pairs_samples_per_second": 53.39,
"eval_nq_pairs_steps_per_second": 0.834,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_trivia_pairs_loss": 1.200446605682373,
"eval_trivia_pairs_runtime": 4.4582,
"eval_trivia_pairs_samples_per_second": 28.711,
"eval_trivia_pairs_steps_per_second": 0.449,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_gooaq_pairs_loss": 1.0353316068649292,
"eval_gooaq_pairs_runtime": 0.8765,
"eval_gooaq_pairs_samples_per_second": 146.042,
"eval_gooaq_pairs_steps_per_second": 2.282,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_paws-pos_loss": 0.042069558054208755,
"eval_paws-pos_runtime": 0.6909,
"eval_paws-pos_samples_per_second": 185.263,
"eval_paws-pos_steps_per_second": 2.895,
"step": 189
},
{
"epoch": 1.0839802399435428,
"grad_norm": 2.979419469833374,
"learning_rate": 3.986656614201813e-05,
"loss": 0.8724,
"step": 192
},
{
"epoch": 1.1009174311926606,
"grad_norm": 2.835219144821167,
"learning_rate": 3.981196635797361e-05,
"loss": 0.9381,
"step": 195
},
{
"epoch": 1.1178546224417785,
"grad_norm": 3.6650869846343994,
"learning_rate": 3.974810189162238e-05,
"loss": 0.9617,
"step": 198
},
{
"epoch": 1.1347918136908963,
"grad_norm": 4.188896656036377,
"learning_rate": 3.967501775403343e-05,
"loss": 1.0139,
"step": 201
},
{
"epoch": 1.1517290049400142,
"grad_norm": 3.1624915599823,
"learning_rate": 3.959276545421244e-05,
"loss": 1.1073,
"step": 204
},
{
"epoch": 1.168666196189132,
"grad_norm": 3.245002508163452,
"learning_rate": 3.950140296279871e-05,
"loss": 0.8365,
"step": 207
},
{
"epoch": 1.18560338743825,
"grad_norm": 4.376185894012451,
"learning_rate": 3.9400994671208e-05,
"loss": 1.1012,
"step": 210
},
{
"epoch": 1.2025405786873677,
"grad_norm": 3.236583948135376,
"learning_rate": 3.9291611346250066e-05,
"loss": 1.0016,
"step": 213
},
{
"epoch": 1.2194777699364856,
"grad_norm": 3.7601733207702637,
"learning_rate": 3.9173330080252904e-05,
"loss": 1.0957,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7863086462020874,
"eval_VitaminC_cosine_ap": 0.538511783260847,
"eval_VitaminC_cosine_f1": 0.6577540106951871,
"eval_VitaminC_cosine_f1_threshold": 0.4006580412387848,
"eval_VitaminC_cosine_precision": 0.4900398406374502,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.57421875,
"eval_VitaminC_dot_accuracy_threshold": 323.53277587890625,
"eval_VitaminC_dot_ap": 0.5304994537787167,
"eval_VitaminC_dot_f1": 0.6577540106951871,
"eval_VitaminC_dot_f1_threshold": 166.45921325683594,
"eval_VitaminC_dot_precision": 0.4900398406374502,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 13.631423950195312,
"eval_VitaminC_euclidean_ap": 0.5363284984763951,
"eval_VitaminC_euclidean_f1": 0.6542553191489362,
"eval_VitaminC_euclidean_f1_threshold": 25.392715454101562,
"eval_VitaminC_euclidean_precision": 0.48616600790513836,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 283.5897216796875,
"eval_VitaminC_manhattan_ap": 0.5327191155331534,
"eval_VitaminC_manhattan_f1": 0.6559999999999999,
"eval_VitaminC_manhattan_f1_threshold": 491.0370178222656,
"eval_VitaminC_manhattan_precision": 0.4880952380952381,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 323.53277587890625,
"eval_VitaminC_max_ap": 0.538511783260847,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 491.0370178222656,
"eval_VitaminC_max_precision": 0.4900398406374502,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.538511783260847,
"eval_sts-test_pearson_cosine": 0.8156684919084325,
"eval_sts-test_pearson_dot": 0.8230786053133633,
"eval_sts-test_pearson_euclidean": 0.845348828865422,
"eval_sts-test_pearson_manhattan": 0.8432655375716184,
"eval_sts-test_pearson_max": 0.845348828865422,
"eval_sts-test_spearman_cosine": 0.8655524539841267,
"eval_sts-test_spearman_dot": 0.8507196659909223,
"eval_sts-test_spearman_euclidean": 0.8547050804103192,
"eval_sts-test_spearman_manhattan": 0.8508668230591436,
"eval_sts-test_spearman_max": 0.8655524539841267,
"eval_vitaminc-pairs_loss": 2.5465524196624756,
"eval_vitaminc-pairs_runtime": 1.4425,
"eval_vitaminc-pairs_samples_per_second": 74.869,
"eval_vitaminc-pairs_steps_per_second": 1.386,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_negation-triplets_loss": 1.9161474704742432,
"eval_negation-triplets_runtime": 0.2994,
"eval_negation-triplets_samples_per_second": 213.785,
"eval_negation-triplets_steps_per_second": 3.34,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_scitail-pairs-pos_loss": 0.19009728729724884,
"eval_scitail-pairs-pos_runtime": 0.3745,
"eval_scitail-pairs-pos_samples_per_second": 144.203,
"eval_scitail-pairs-pos_steps_per_second": 2.67,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_xsum-pairs_loss": 0.35912859439849854,
"eval_xsum-pairs_runtime": 3.1543,
"eval_xsum-pairs_samples_per_second": 40.58,
"eval_xsum-pairs_steps_per_second": 0.634,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_sciq_pairs_loss": 0.05168920382857323,
"eval_sciq_pairs_runtime": 3.2561,
"eval_sciq_pairs_samples_per_second": 39.31,
"eval_sciq_pairs_steps_per_second": 0.614,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_qasc_pairs_loss": 0.30753791332244873,
"eval_qasc_pairs_runtime": 0.6201,
"eval_qasc_pairs_samples_per_second": 206.418,
"eval_qasc_pairs_steps_per_second": 3.225,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_openbookqa_pairs_loss": 0.9365726113319397,
"eval_openbookqa_pairs_runtime": 0.5832,
"eval_openbookqa_pairs_samples_per_second": 219.496,
"eval_openbookqa_pairs_steps_per_second": 3.43,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_msmarco_pairs_loss": 0.5819053053855896,
"eval_msmarco_pairs_runtime": 1.2858,
"eval_msmarco_pairs_samples_per_second": 99.551,
"eval_msmarco_pairs_steps_per_second": 1.555,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_nq_pairs_loss": 0.8172401785850525,
"eval_nq_pairs_runtime": 2.3809,
"eval_nq_pairs_samples_per_second": 53.761,
"eval_nq_pairs_steps_per_second": 0.84,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_trivia_pairs_loss": 1.1411677598953247,
"eval_trivia_pairs_runtime": 4.4162,
"eval_trivia_pairs_samples_per_second": 28.984,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_gooaq_pairs_loss": 0.9686058759689331,
"eval_gooaq_pairs_runtime": 0.8788,
"eval_gooaq_pairs_samples_per_second": 145.645,
"eval_gooaq_pairs_steps_per_second": 2.276,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_paws-pos_loss": 0.03953952714800835,
"eval_paws-pos_runtime": 0.708,
"eval_paws-pos_samples_per_second": 180.782,
"eval_paws-pos_steps_per_second": 2.825,
"step": 216
},
{
"epoch": 1.2364149611856035,
"grad_norm": 3.566471576690674,
"learning_rate": 3.904623423672881e-05,
"loss": 1.1273,
"step": 219
},
{
"epoch": 1.2533521524347213,
"grad_norm": 4.086460590362549,
"learning_rate": 3.891041339162053e-05,
"loss": 1.2568,
"step": 222
},
{
"epoch": 1.2702893436838392,
"grad_norm": 3.2877376079559326,
"learning_rate": 3.876596327016904e-05,
"loss": 0.873,
"step": 225
},
{
"epoch": 1.287226534932957,
"grad_norm": 3.383211851119995,
"learning_rate": 3.861298567944728e-05,
"loss": 1.0003,
"step": 228
},
{
"epoch": 1.3041637261820749,
"grad_norm": 3.8474605083465576,
"learning_rate": 3.8451588436607487e-05,
"loss": 1.142,
"step": 231
},
{
"epoch": 1.3211009174311927,
"grad_norm": 3.027008533477783,
"learning_rate": 3.8281885292892706e-05,
"loss": 0.807,
"step": 234
},
{
"epoch": 1.3380381086803106,
"grad_norm": 2.9607250690460205,
"learning_rate": 3.810399585346599e-05,
"loss": 1.0231,
"step": 237
},
{
"epoch": 1.3549752999294284,
"grad_norm": 2.511488676071167,
"learning_rate": 3.791804549311382e-05,
"loss": 0.797,
"step": 240
},
{
"epoch": 1.3719124911785463,
"grad_norm": 2.603672504425049,
"learning_rate": 3.7724165267883146e-05,
"loss": 0.8473,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.578125,
"eval_VitaminC_cosine_accuracy_threshold": 0.7651997804641724,
"eval_VitaminC_cosine_ap": 0.5427753322056709,
"eval_VitaminC_cosine_f1": 0.6595174262734584,
"eval_VitaminC_cosine_f1_threshold": 0.38563254475593567,
"eval_VitaminC_cosine_precision": 0.492,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5703125,
"eval_VitaminC_dot_accuracy_threshold": 330.23577880859375,
"eval_VitaminC_dot_ap": 0.5507967714924796,
"eval_VitaminC_dot_f1": 0.6595174262734584,
"eval_VitaminC_dot_f1_threshold": 160.55694580078125,
"eval_VitaminC_dot_precision": 0.492,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 13.033781051635742,
"eval_VitaminC_euclidean_ap": 0.5406935655135654,
"eval_VitaminC_euclidean_f1": 0.6576819407008085,
"eval_VitaminC_euclidean_f1_threshold": 22.224994659423828,
"eval_VitaminC_euclidean_precision": 0.49193548387096775,
"eval_VitaminC_euclidean_recall": 0.991869918699187,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 274.7045593261719,
"eval_VitaminC_manhattan_ap": 0.5366045405118165,
"eval_VitaminC_manhattan_f1": 0.6577540106951871,
"eval_VitaminC_manhattan_f1_threshold": 475.4096374511719,
"eval_VitaminC_manhattan_precision": 0.4900398406374502,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 330.23577880859375,
"eval_VitaminC_max_ap": 0.5507967714924796,
"eval_VitaminC_max_f1": 0.6595174262734584,
"eval_VitaminC_max_f1_threshold": 475.4096374511719,
"eval_VitaminC_max_precision": 0.492,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5507967714924796,
"eval_sts-test_pearson_cosine": 0.8204982036322743,
"eval_sts-test_pearson_dot": 0.8243481169631539,
"eval_sts-test_pearson_euclidean": 0.8495098083065487,
"eval_sts-test_pearson_manhattan": 0.8491539225772841,
"eval_sts-test_pearson_max": 0.8495098083065487,
"eval_sts-test_spearman_cosine": 0.8687444375928703,
"eval_sts-test_spearman_dot": 0.8509044179305871,
"eval_sts-test_spearman_euclidean": 0.8563313271350431,
"eval_sts-test_spearman_manhattan": 0.8563900467437737,
"eval_sts-test_spearman_max": 0.8687444375928703,
"eval_vitaminc-pairs_loss": 2.5139691829681396,
"eval_vitaminc-pairs_runtime": 1.449,
"eval_vitaminc-pairs_samples_per_second": 74.533,
"eval_vitaminc-pairs_steps_per_second": 1.38,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_negation-triplets_loss": 1.8629425764083862,
"eval_negation-triplets_runtime": 0.3014,
"eval_negation-triplets_samples_per_second": 212.31,
"eval_negation-triplets_steps_per_second": 3.317,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_scitail-pairs-pos_loss": 0.17502877116203308,
"eval_scitail-pairs-pos_runtime": 0.3707,
"eval_scitail-pairs-pos_samples_per_second": 145.673,
"eval_scitail-pairs-pos_steps_per_second": 2.698,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_xsum-pairs_loss": 0.2802315950393677,
"eval_xsum-pairs_runtime": 3.1565,
"eval_xsum-pairs_samples_per_second": 40.551,
"eval_xsum-pairs_steps_per_second": 0.634,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_sciq_pairs_loss": 0.046695925295352936,
"eval_sciq_pairs_runtime": 3.2866,
"eval_sciq_pairs_samples_per_second": 38.946,
"eval_sciq_pairs_steps_per_second": 0.609,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_qasc_pairs_loss": 0.2354799211025238,
"eval_qasc_pairs_runtime": 0.6228,
"eval_qasc_pairs_samples_per_second": 205.533,
"eval_qasc_pairs_steps_per_second": 3.211,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_openbookqa_pairs_loss": 0.8562020659446716,
"eval_openbookqa_pairs_runtime": 0.5764,
"eval_openbookqa_pairs_samples_per_second": 222.058,
"eval_openbookqa_pairs_steps_per_second": 3.47,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_msmarco_pairs_loss": 0.5559017658233643,
"eval_msmarco_pairs_runtime": 1.2826,
"eval_msmarco_pairs_samples_per_second": 99.801,
"eval_msmarco_pairs_steps_per_second": 1.559,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_nq_pairs_loss": 0.743526041507721,
"eval_nq_pairs_runtime": 2.3784,
"eval_nq_pairs_samples_per_second": 53.817,
"eval_nq_pairs_steps_per_second": 0.841,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_trivia_pairs_loss": 1.106662392616272,
"eval_trivia_pairs_runtime": 4.4193,
"eval_trivia_pairs_samples_per_second": 28.964,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_gooaq_pairs_loss": 0.8928955793380737,
"eval_gooaq_pairs_runtime": 0.8831,
"eval_gooaq_pairs_samples_per_second": 144.944,
"eval_gooaq_pairs_steps_per_second": 2.265,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_paws-pos_loss": 0.03428014740347862,
"eval_paws-pos_runtime": 0.6872,
"eval_paws-pos_samples_per_second": 186.261,
"eval_paws-pos_steps_per_second": 2.91,
"step": 243
},
{
"epoch": 1.3888496824276642,
"grad_norm": 4.478828430175781,
"learning_rate": 3.752249182271433e-05,
"loss": 0.9531,
"step": 246
},
{
"epoch": 1.405786873676782,
"grad_norm": 3.3206863403320312,
"learning_rate": 3.731316729513507e-05,
"loss": 0.9023,
"step": 249
},
{
"epoch": 1.4227240649258999,
"grad_norm": 3.4713878631591797,
"learning_rate": 3.7096339215083274e-05,
"loss": 0.8922,
"step": 252
},
{
"epoch": 1.4396612561750177,
"grad_norm": 3.4212491512298584,
"learning_rate": 3.687216040092931e-05,
"loss": 0.9874,
"step": 255
},
{
"epoch": 1.4565984474241356,
"grad_norm": 3.398963689804077,
"learning_rate": 3.6640788851771084e-05,
"loss": 0.8508,
"step": 258
},
{
"epoch": 1.4735356386732534,
"grad_norm": 3.350128650665283,
"learning_rate": 3.64023876360778e-05,
"loss": 0.7149,
"step": 261
},
{
"epoch": 1.4904728299223713,
"grad_norm": 3.438978433609009,
"learning_rate": 3.615712477676081e-05,
"loss": 0.894,
"step": 264
},
{
"epoch": 1.5074100211714891,
"grad_norm": 3.1700806617736816,
"learning_rate": 3.5905173132752725e-05,
"loss": 0.867,
"step": 267
},
{
"epoch": 1.524347212420607,
"grad_norm": 3.1567916870117188,
"learning_rate": 3.5646710277178006e-05,
"loss": 0.7493,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.58203125,
"eval_VitaminC_cosine_accuracy_threshold": 0.7990785241127014,
"eval_VitaminC_cosine_ap": 0.5489113961762149,
"eval_VitaminC_cosine_f1": 0.6595174262734584,
"eval_VitaminC_cosine_f1_threshold": 0.3687684237957001,
"eval_VitaminC_cosine_precision": 0.492,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.57421875,
"eval_VitaminC_dot_accuracy_threshold": 328.30560302734375,
"eval_VitaminC_dot_ap": 0.5498735151014204,
"eval_VitaminC_dot_f1": 0.6595174262734584,
"eval_VitaminC_dot_f1_threshold": 153.01849365234375,
"eval_VitaminC_dot_precision": 0.492,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 12.773557662963867,
"eval_VitaminC_euclidean_ap": 0.5426159894851803,
"eval_VitaminC_euclidean_f1": 0.6559999999999999,
"eval_VitaminC_euclidean_f1_threshold": 23.71053123474121,
"eval_VitaminC_euclidean_precision": 0.4880952380952381,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.58203125,
"eval_VitaminC_manhattan_accuracy_threshold": 272.04931640625,
"eval_VitaminC_manhattan_ap": 0.5396432749419082,
"eval_VitaminC_manhattan_f1": 0.6577540106951871,
"eval_VitaminC_manhattan_f1_threshold": 494.33001708984375,
"eval_VitaminC_manhattan_precision": 0.4900398406374502,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.58203125,
"eval_VitaminC_max_accuracy_threshold": 328.30560302734375,
"eval_VitaminC_max_ap": 0.5498735151014204,
"eval_VitaminC_max_f1": 0.6595174262734584,
"eval_VitaminC_max_f1_threshold": 494.33001708984375,
"eval_VitaminC_max_precision": 0.492,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5498735151014204,
"eval_sts-test_pearson_cosine": 0.8213785723785002,
"eval_sts-test_pearson_dot": 0.8169840312248031,
"eval_sts-test_pearson_euclidean": 0.8549065829936804,
"eval_sts-test_pearson_manhattan": 0.8559014033008101,
"eval_sts-test_pearson_max": 0.8559014033008101,
"eval_sts-test_spearman_cosine": 0.871560114440785,
"eval_sts-test_spearman_dot": 0.8412461164335756,
"eval_sts-test_spearman_euclidean": 0.8616554770242205,
"eval_sts-test_spearman_manhattan": 0.86344749922969,
"eval_sts-test_spearman_max": 0.871560114440785,
"eval_vitaminc-pairs_loss": 2.5574047565460205,
"eval_vitaminc-pairs_runtime": 1.4466,
"eval_vitaminc-pairs_samples_per_second": 74.658,
"eval_vitaminc-pairs_steps_per_second": 1.383,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_negation-triplets_loss": 1.853515386581421,
"eval_negation-triplets_runtime": 0.2992,
"eval_negation-triplets_samples_per_second": 213.896,
"eval_negation-triplets_steps_per_second": 3.342,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_scitail-pairs-pos_loss": 0.1692524254322052,
"eval_scitail-pairs-pos_runtime": 0.3739,
"eval_scitail-pairs-pos_samples_per_second": 144.426,
"eval_scitail-pairs-pos_steps_per_second": 2.675,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_xsum-pairs_loss": 0.22170975804328918,
"eval_xsum-pairs_runtime": 3.1517,
"eval_xsum-pairs_samples_per_second": 40.613,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_sciq_pairs_loss": 0.04346679896116257,
"eval_sciq_pairs_runtime": 3.2686,
"eval_sciq_pairs_samples_per_second": 39.16,
"eval_sciq_pairs_steps_per_second": 0.612,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_qasc_pairs_loss": 0.24427936971187592,
"eval_qasc_pairs_runtime": 0.6217,
"eval_qasc_pairs_samples_per_second": 205.897,
"eval_qasc_pairs_steps_per_second": 3.217,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_openbookqa_pairs_loss": 0.7998915910720825,
"eval_openbookqa_pairs_runtime": 0.576,
"eval_openbookqa_pairs_samples_per_second": 222.206,
"eval_openbookqa_pairs_steps_per_second": 3.472,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_msmarco_pairs_loss": 0.5027381777763367,
"eval_msmarco_pairs_runtime": 1.2901,
"eval_msmarco_pairs_samples_per_second": 99.216,
"eval_msmarco_pairs_steps_per_second": 1.55,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_nq_pairs_loss": 0.6529555916786194,
"eval_nq_pairs_runtime": 2.3842,
"eval_nq_pairs_samples_per_second": 53.687,
"eval_nq_pairs_steps_per_second": 0.839,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_trivia_pairs_loss": 1.0634211301803589,
"eval_trivia_pairs_runtime": 4.4089,
"eval_trivia_pairs_samples_per_second": 29.032,
"eval_trivia_pairs_steps_per_second": 0.454,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_gooaq_pairs_loss": 0.800453245639801,
"eval_gooaq_pairs_runtime": 0.8705,
"eval_gooaq_pairs_samples_per_second": 147.034,
"eval_gooaq_pairs_steps_per_second": 2.297,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_paws-pos_loss": 0.031901415437459946,
"eval_paws-pos_runtime": 0.6828,
"eval_paws-pos_samples_per_second": 187.456,
"eval_paws-pos_steps_per_second": 2.929,
"step": 270
},
{
"epoch": 1.5412844036697249,
"grad_norm": 3.258525848388672,
"learning_rate": 3.5381918372201175e-05,
"loss": 0.7974,
"step": 273
},
{
"epoch": 1.5582215949188427,
"grad_norm": 2.9689552783966064,
"learning_rate": 3.5110984040640627e-05,
"loss": 0.797,
"step": 276
},
{
"epoch": 1.5751587861679606,
"grad_norm": 3.50411057472229,
"learning_rate": 3.483409823443864e-05,
"loss": 0.6749,
"step": 279
},
{
"epoch": 1.5920959774170784,
"grad_norm": 2.840614080429077,
"learning_rate": 3.4551456100080266e-05,
"loss": 0.9325,
"step": 282
},
{
"epoch": 1.6090331686661963,
"grad_norm": 2.934267044067383,
"learning_rate": 3.426325684105594e-05,
"loss": 0.8418,
"step": 285
},
{
"epoch": 1.6259703599153141,
"grad_norm": 3.5037455558776855,
"learning_rate": 3.396970357746474e-05,
"loss": 1.0135,
"step": 288
},
{
"epoch": 1.642907551164432,
"grad_norm": 3.349975109100342,
"learning_rate": 3.3671003202857315e-05,
"loss": 0.6961,
"step": 291
},
{
"epoch": 1.6598447424135498,
"grad_norm": 3.207557439804077,
"learning_rate": 3.336736623841924e-05,
"loss": 0.9361,
"step": 294
},
{
"epoch": 1.6767819336626677,
"grad_norm": 2.0259296894073486,
"learning_rate": 3.305900668459766e-05,
"loss": 0.6747,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7887165546417236,
"eval_VitaminC_cosine_ap": 0.5443802154749287,
"eval_VitaminC_cosine_f1": 0.6595174262734584,
"eval_VitaminC_cosine_f1_threshold": 0.35189926624298096,
"eval_VitaminC_cosine_precision": 0.492,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 335.016357421875,
"eval_VitaminC_dot_ap": 0.5460930199557891,
"eval_VitaminC_dot_f1": 0.6594594594594595,
"eval_VitaminC_dot_f1_threshold": 158.6214599609375,
"eval_VitaminC_dot_precision": 0.4939271255060729,
"eval_VitaminC_dot_recall": 0.991869918699187,
"eval_VitaminC_euclidean_accuracy": 0.57421875,
"eval_VitaminC_euclidean_accuracy_threshold": 13.359209060668945,
"eval_VitaminC_euclidean_ap": 0.5420558119789205,
"eval_VitaminC_euclidean_f1": 0.6577540106951871,
"eval_VitaminC_euclidean_f1_threshold": 23.44475746154785,
"eval_VitaminC_euclidean_precision": 0.4900398406374502,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.578125,
"eval_VitaminC_manhattan_accuracy_threshold": 309.7850646972656,
"eval_VitaminC_manhattan_ap": 0.5398712022586767,
"eval_VitaminC_manhattan_f1": 0.6595174262734584,
"eval_VitaminC_manhattan_f1_threshold": 486.6765441894531,
"eval_VitaminC_manhattan_precision": 0.492,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 335.016357421875,
"eval_VitaminC_max_ap": 0.5460930199557891,
"eval_VitaminC_max_f1": 0.6595174262734584,
"eval_VitaminC_max_f1_threshold": 486.6765441894531,
"eval_VitaminC_max_precision": 0.4939271255060729,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5460930199557891,
"eval_sts-test_pearson_cosine": 0.8332392980969607,
"eval_sts-test_pearson_dot": 0.8346600863241642,
"eval_sts-test_pearson_euclidean": 0.8653211336269704,
"eval_sts-test_pearson_manhattan": 0.8653335270474869,
"eval_sts-test_pearson_max": 0.8653335270474869,
"eval_sts-test_spearman_cosine": 0.8786841635561152,
"eval_sts-test_spearman_dot": 0.8596876540389535,
"eval_sts-test_spearman_euclidean": 0.8687344122938186,
"eval_sts-test_spearman_manhattan": 0.8687734393508408,
"eval_sts-test_spearman_max": 0.8786841635561152,
"eval_vitaminc-pairs_loss": 2.4870808124542236,
"eval_vitaminc-pairs_runtime": 1.4506,
"eval_vitaminc-pairs_samples_per_second": 74.451,
"eval_vitaminc-pairs_steps_per_second": 1.379,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_negation-triplets_loss": 1.7349412441253662,
"eval_negation-triplets_runtime": 0.2993,
"eval_negation-triplets_samples_per_second": 213.838,
"eval_negation-triplets_steps_per_second": 3.341,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_scitail-pairs-pos_loss": 0.15961770713329315,
"eval_scitail-pairs-pos_runtime": 0.3704,
"eval_scitail-pairs-pos_samples_per_second": 145.808,
"eval_scitail-pairs-pos_steps_per_second": 2.7,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_xsum-pairs_loss": 0.22417353093624115,
"eval_xsum-pairs_runtime": 3.1629,
"eval_xsum-pairs_samples_per_second": 40.469,
"eval_xsum-pairs_steps_per_second": 0.632,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_sciq_pairs_loss": 0.03957323729991913,
"eval_sciq_pairs_runtime": 3.2788,
"eval_sciq_pairs_samples_per_second": 39.039,
"eval_sciq_pairs_steps_per_second": 0.61,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_qasc_pairs_loss": 0.19627788662910461,
"eval_qasc_pairs_runtime": 0.6246,
"eval_qasc_pairs_samples_per_second": 204.945,
"eval_qasc_pairs_steps_per_second": 3.202,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_openbookqa_pairs_loss": 0.7668256163597107,
"eval_openbookqa_pairs_runtime": 0.5769,
"eval_openbookqa_pairs_samples_per_second": 221.888,
"eval_openbookqa_pairs_steps_per_second": 3.467,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_msmarco_pairs_loss": 0.5024800300598145,
"eval_msmarco_pairs_runtime": 1.287,
"eval_msmarco_pairs_samples_per_second": 99.457,
"eval_msmarco_pairs_steps_per_second": 1.554,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_nq_pairs_loss": 0.6426529288291931,
"eval_nq_pairs_runtime": 2.3694,
"eval_nq_pairs_samples_per_second": 54.023,
"eval_nq_pairs_steps_per_second": 0.844,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_trivia_pairs_loss": 0.9762344360351562,
"eval_trivia_pairs_runtime": 4.4202,
"eval_trivia_pairs_samples_per_second": 28.958,
"eval_trivia_pairs_steps_per_second": 0.452,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_gooaq_pairs_loss": 0.7546207904815674,
"eval_gooaq_pairs_runtime": 0.8779,
"eval_gooaq_pairs_samples_per_second": 145.803,
"eval_gooaq_pairs_steps_per_second": 2.278,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_paws-pos_loss": 0.029145879670977592,
"eval_paws-pos_runtime": 0.6938,
"eval_paws-pos_samples_per_second": 184.484,
"eval_paws-pos_steps_per_second": 2.883,
"step": 297
},
{
"epoch": 1.6937191249117856,
"grad_norm": 2.766063928604126,
"learning_rate": 3.274614187027587e-05,
"loss": 0.7786,
"step": 300
},
{
"epoch": 1.7106563161609034,
"grad_norm": 3.1933176517486572,
"learning_rate": 3.2428992299601946e-05,
"loss": 0.7171,
"step": 303
},
{
"epoch": 1.7275935074100213,
"grad_norm": 3.0088443756103516,
"learning_rate": 3.2107781496579536e-05,
"loss": 0.6627,
"step": 306
},
{
"epoch": 1.7445306986591391,
"grad_norm": 3.13895845413208,
"learning_rate": 3.178273584753023e-05,
"loss": 0.6711,
"step": 309
},
{
"epoch": 1.761467889908257,
"grad_norm": 3.34114933013916,
"learning_rate": 3.145408444153868e-05,
"loss": 0.9076,
"step": 312
},
{
"epoch": 1.7784050811573748,
"grad_norm": 2.5035502910614014,
"learning_rate": 3.1122058908992746e-05,
"loss": 0.7414,
"step": 315
},
{
"epoch": 1.7953422724064927,
"grad_norm": 2.284698247909546,
"learning_rate": 3.078689325833264e-05,
"loss": 0.582,
"step": 318
},
{
"epoch": 1.8122794636556105,
"grad_norm": 2.643444538116455,
"learning_rate": 3.044882371112396e-05,
"loss": 0.6068,
"step": 321
}
],
"logging_steps": 3,
"max_steps": 531,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 107,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 160,
"trial_name": null,
"trial_params": null
}