{ "best_metric": 0.7325463743676223, "best_model_checkpoint": "omarmomen/tf_babylm_1/finetune/qnli/checkpoint-3800", "epoch": 6.11353711790393, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "eval_accuracy": 0.6141732335090637, "eval_f1": 0.6941747572815534, "eval_loss": 0.6516156196594238, "eval_mcc": 0.22499476038642752, "eval_runtime": 3.0135, "eval_samples_per_second": 758.595, "eval_steps_per_second": 94.907, "step": 200 }, { "epoch": 0.58, "eval_accuracy": 0.6146106719970703, "eval_f1": 0.6876993973768167, "eval_loss": 0.6433870196342468, "eval_mcc": 0.22324602394367388, "eval_runtime": 2.9954, "eval_samples_per_second": 763.173, "eval_steps_per_second": 95.48, "step": 400 }, { "epoch": 0.73, "learning_rate": 4.636098981077147e-05, "loss": 0.6617, "step": 500 }, { "epoch": 0.87, "eval_accuracy": 0.6132983565330505, "eval_f1": 0.7005420054200542, "eval_loss": 0.6730934977531433, "eval_mcc": 0.22735948653592145, "eval_runtime": 3.0047, "eval_samples_per_second": 760.815, "eval_steps_per_second": 95.185, "step": 600 }, { "epoch": 1.16, "eval_accuracy": 0.616797924041748, "eval_f1": 0.7135382603008502, "eval_loss": 0.7076134085655212, "eval_mcc": 0.24718358845250435, "eval_runtime": 3.0234, "eval_samples_per_second": 756.093, "eval_steps_per_second": 94.594, "step": 800 }, { "epoch": 1.46, "learning_rate": 4.272197962154294e-05, "loss": 0.6087, "step": 1000 }, { "epoch": 1.46, "eval_accuracy": 0.6307961344718933, "eval_f1": 0.7163978494623656, "eval_loss": 0.7313140630722046, "eval_mcc": 0.2710164987662102, "eval_runtime": 3.0006, "eval_samples_per_second": 761.857, "eval_steps_per_second": 95.315, "step": 1000 }, { "epoch": 1.75, "eval_accuracy": 0.616797924041748, "eval_f1": 0.7183279742765273, "eval_loss": 0.778058648109436, "eval_mcc": 0.2556594651569668, "eval_runtime": 3.0192, "eval_samples_per_second": 757.142, "eval_steps_per_second": 94.726, "step": 1200 }, { "epoch": 2.04, "eval_accuracy": 0.6482939720153809, "eval_f1": 0.7280108254397835, "eval_loss": 0.8302698135375977, "eval_mcc": 0.30996246947541667, "eval_runtime": 3.0083, "eval_samples_per_second": 759.898, "eval_steps_per_second": 95.07, "step": 1400 }, { "epoch": 2.18, "learning_rate": 3.9082969432314415e-05, "loss": 0.5517, "step": 1500 }, { "epoch": 2.33, "eval_accuracy": 0.6132983565330505, "eval_f1": 0.7182919056724028, "eval_loss": 0.9632509350776672, "eval_mcc": 0.25168815647322607, "eval_runtime": 3.0009, "eval_samples_per_second": 761.777, "eval_steps_per_second": 95.305, "step": 1600 }, { "epoch": 2.62, "eval_accuracy": 0.612860918045044, "eval_f1": 0.7193149381541389, "eval_loss": 0.9746847748756409, "eval_mcc": 0.253685558926296, "eval_runtime": 3.0098, "eval_samples_per_second": 759.523, "eval_steps_per_second": 95.023, "step": 1800 }, { "epoch": 2.91, "learning_rate": 3.544395924308588e-05, "loss": 0.4786, "step": 2000 }, { "epoch": 2.91, "eval_accuracy": 0.6141732335090637, "eval_f1": 0.7207093096896771, "eval_loss": 0.9755086302757263, "eval_mcc": 0.25849446751016225, "eval_runtime": 3.0063, "eval_samples_per_second": 760.407, "eval_steps_per_second": 95.134, "step": 2000 }, { "epoch": 3.2, "eval_accuracy": 0.6609798669815063, "eval_f1": 0.7322970639032815, "eval_loss": 0.9067291021347046, "eval_mcc": 0.33185170807279407, "eval_runtime": 3.0009, "eval_samples_per_second": 761.769, "eval_steps_per_second": 95.304, "step": 2200 }, { "epoch": 3.49, "eval_accuracy": 0.6303586959838867, "eval_f1": 0.7248453272549658, "eval_loss": 1.0302561521530151, "eval_mcc": 0.28386334628294807, "eval_runtime": 3.0233, "eval_samples_per_second": 756.116, "eval_steps_per_second": 94.597, "step": 2400 }, { "epoch": 3.64, "learning_rate": 3.1804949053857355e-05, "loss": 0.3795, "step": 2500 }, { "epoch": 3.78, "eval_accuracy": 0.6272965669631958, "eval_f1": 0.7233766233766233, "eval_loss": 1.073946237564087, "eval_mcc": 0.2776317996888772, "eval_runtime": 3.0269, "eval_samples_per_second": 755.222, "eval_steps_per_second": 94.485, "step": 2600 }, { "epoch": 4.08, "eval_accuracy": 0.6242344975471497, "eval_f1": 0.7193727540019601, "eval_loss": 1.393174648284912, "eval_mcc": 0.26651346871760156, "eval_runtime": 3.0247, "eval_samples_per_second": 755.772, "eval_steps_per_second": 94.554, "step": 2800 }, { "epoch": 4.37, "learning_rate": 2.816593886462882e-05, "loss": 0.3087, "step": 3000 }, { "epoch": 4.37, "eval_accuracy": 0.6299212574958801, "eval_f1": 0.7247885491216656, "eval_loss": 1.3153369426727295, "eval_mcc": 0.28329167906840397, "eval_runtime": 3.0213, "eval_samples_per_second": 756.639, "eval_steps_per_second": 94.663, "step": 3000 }, { "epoch": 4.66, "eval_accuracy": 0.6356080770492554, "eval_f1": 0.7273322422258592, "eval_loss": 1.3374916315078735, "eval_mcc": 0.29432211381962475, "eval_runtime": 3.0207, "eval_samples_per_second": 756.78, "eval_steps_per_second": 94.68, "step": 3200 }, { "epoch": 4.95, "eval_accuracy": 0.6281715035438538, "eval_f1": 0.723846653671215, "eval_loss": 1.2929773330688477, "eval_mcc": 0.279522957371612, "eval_runtime": 3.0073, "eval_samples_per_second": 760.149, "eval_steps_per_second": 95.102, "step": 3400 }, { "epoch": 5.09, "learning_rate": 2.452692867540029e-05, "loss": 0.261, "step": 3500 }, { "epoch": 5.24, "eval_accuracy": 0.6552931070327759, "eval_f1": 0.7293956043956044, "eval_loss": 1.3341577053070068, "eval_mcc": 0.3208142617256846, "eval_runtime": 3.0069, "eval_samples_per_second": 760.245, "eval_steps_per_second": 95.114, "step": 3600 }, { "epoch": 5.53, "eval_accuracy": 0.6531058549880981, "eval_f1": 0.7325463743676223, "eval_loss": 1.4561353921890259, "eval_mcc": 0.32254298037112755, "eval_runtime": 3.0074, "eval_samples_per_second": 760.119, "eval_steps_per_second": 95.098, "step": 3800 }, { "epoch": 5.82, "learning_rate": 2.088791848617176e-05, "loss": 0.1808, "step": 4000 }, { "epoch": 5.82, "eval_accuracy": 0.6579177379608154, "eval_f1": 0.7292243767313019, "eval_loss": 1.3927106857299805, "eval_mcc": 0.3241513466674666, "eval_runtime": 3.0083, "eval_samples_per_second": 759.895, "eval_steps_per_second": 95.07, "step": 4000 }, { "epoch": 6.11, "eval_accuracy": 0.6482939720153809, "eval_f1": 0.7269021739130435, "eval_loss": 1.718584656715393, "eval_mcc": 0.30843932453221495, "eval_runtime": 3.0225, "eval_samples_per_second": 756.317, "eval_steps_per_second": 94.622, "step": 4200 }, { "epoch": 6.11, "step": 4200, "total_flos": 7367156844493824.0, "train_loss": 0.4156740043276832, "train_runtime": 776.4968, "train_samples_per_second": 565.579, "train_steps_per_second": 8.847 } ], "max_steps": 6870, "num_train_epochs": 10, "total_flos": 7367156844493824.0, "trial_name": null, "trial_params": null }