{ "best_metric": 0.8307692307692308, "best_model_checkpoint": "omarmomen/tf_babylm_1/finetune/sst2/checkpoint-1600", "epoch": 4.556962025316456, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "eval_accuracy": 0.7657480239868164, "eval_f1": 0.7733333333333333, "eval_loss": 0.48952749371528625, "eval_mcc": 0.533119535626159, "eval_runtime": 0.6794, "eval_samples_per_second": 747.755, "eval_steps_per_second": 94.205, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.7834645509719849, "eval_f1": 0.8007246376811594, "eval_loss": 0.4489835798740387, "eval_mcc": 0.576754658800836, "eval_runtime": 0.6711, "eval_samples_per_second": 756.956, "eval_steps_per_second": 95.364, "step": 400 }, { "epoch": 0.63, "learning_rate": 4.683544303797468e-05, "loss": 0.4517, "step": 500 }, { "epoch": 0.76, "eval_accuracy": 0.8129921555519104, "eval_f1": 0.8183556405353728, "eval_loss": 0.4774584472179413, "eval_mcc": 0.6274832725618316, "eval_runtime": 0.6731, "eval_samples_per_second": 754.687, "eval_steps_per_second": 95.079, "step": 600 }, { "epoch": 1.01, "eval_accuracy": 0.8228346705436707, "eval_f1": 0.8170731707317073, "eval_loss": 0.4105101227760315, "eval_mcc": 0.6465702369056265, "eval_runtime": 0.6717, "eval_samples_per_second": 756.27, "eval_steps_per_second": 95.278, "step": 800 }, { "epoch": 1.27, "learning_rate": 4.367088607594937e-05, "loss": 0.2923, "step": 1000 }, { "epoch": 1.27, "eval_accuracy": 0.8149606585502625, "eval_f1": 0.8239700374531835, "eval_loss": 0.4857898950576782, "eval_mcc": 0.6339364104084189, "eval_runtime": 0.6723, "eval_samples_per_second": 755.671, "eval_steps_per_second": 95.203, "step": 1000 }, { "epoch": 1.52, "eval_accuracy": 0.8169291615486145, "eval_f1": 0.8228571428571428, "eval_loss": 0.5239109992980957, "eval_mcc": 0.6357405398605986, "eval_runtime": 0.6727, "eval_samples_per_second": 755.204, "eval_steps_per_second": 95.144, "step": 1200 }, { "epoch": 1.77, "eval_accuracy": 0.7637795209884644, "eval_f1": 0.7902097902097902, "eval_loss": 0.6100760698318481, "eval_mcc": 0.5467773556337732, "eval_runtime": 0.6727, "eval_samples_per_second": 755.151, "eval_steps_per_second": 95.137, "step": 1400 }, { "epoch": 1.9, "learning_rate": 4.050632911392405e-05, "loss": 0.2322, "step": 1500 }, { "epoch": 2.03, "eval_accuracy": 0.8267716765403748, "eval_f1": 0.8307692307692308, "eval_loss": 0.6272916197776794, "eval_mcc": 0.6546078105862819, "eval_runtime": 0.6722, "eval_samples_per_second": 755.698, "eval_steps_per_second": 95.206, "step": 1600 }, { "epoch": 2.28, "eval_accuracy": 0.7992125749588013, "eval_f1": 0.8068181818181818, "eval_loss": 0.6427187919616699, "eval_mcc": 0.6008122892984797, "eval_runtime": 0.6732, "eval_samples_per_second": 754.579, "eval_steps_per_second": 95.065, "step": 1800 }, { "epoch": 2.53, "learning_rate": 3.7341772151898736e-05, "loss": 0.1644, "step": 2000 }, { "epoch": 2.53, "eval_accuracy": 0.8169291615486145, "eval_f1": 0.8180039138943248, "eval_loss": 0.5454850196838379, "eval_mcc": 0.6340038022931133, "eval_runtime": 0.6736, "eval_samples_per_second": 754.184, "eval_steps_per_second": 95.015, "step": 2000 }, { "epoch": 2.78, "eval_accuracy": 0.8149606585502625, "eval_f1": 0.8104838709677419, "eval_loss": 0.5786112546920776, "eval_mcc": 0.6303470266599184, "eval_runtime": 0.6734, "eval_samples_per_second": 754.357, "eval_steps_per_second": 95.037, "step": 2200 }, { "epoch": 3.04, "eval_accuracy": 0.8188976645469666, "eval_f1": 0.8250950570342205, "eval_loss": 0.7192121148109436, "eval_mcc": 0.6398874815952302, "eval_runtime": 0.6726, "eval_samples_per_second": 755.237, "eval_steps_per_second": 95.148, "step": 2400 }, { "epoch": 3.16, "learning_rate": 3.4177215189873416e-05, "loss": 0.1477, "step": 2500 }, { "epoch": 3.29, "eval_accuracy": 0.8051180839538574, "eval_f1": 0.8107074569789675, "eval_loss": 0.7842402458190918, "eval_mcc": 0.6117037813256844, "eval_runtime": 0.6735, "eval_samples_per_second": 754.282, "eval_steps_per_second": 95.028, "step": 2600 }, { "epoch": 3.54, "eval_accuracy": 0.7854330539703369, "eval_f1": 0.7970204841713222, "eval_loss": 0.7084320783615112, "eval_mcc": 0.575362828713563, "eval_runtime": 0.6735, "eval_samples_per_second": 754.266, "eval_steps_per_second": 95.026, "step": 2800 }, { "epoch": 3.8, "learning_rate": 3.10126582278481e-05, "loss": 0.1101, "step": 3000 }, { "epoch": 3.8, "eval_accuracy": 0.7775590419769287, "eval_f1": 0.7941712204007286, "eval_loss": 0.8787282109260559, "eval_mcc": 0.5635308581623699, "eval_runtime": 0.6726, "eval_samples_per_second": 755.3, "eval_steps_per_second": 95.156, "step": 3000 }, { "epoch": 4.05, "eval_accuracy": 0.7775590419769287, "eval_f1": 0.7911275415896488, "eval_loss": 0.9535109996795654, "eval_mcc": 0.5606954034738957, "eval_runtime": 0.674, "eval_samples_per_second": 753.699, "eval_steps_per_second": 94.954, "step": 3200 }, { "epoch": 4.3, "eval_accuracy": 0.789370059967041, "eval_f1": 0.7914230019493177, "eval_loss": 0.9281810522079468, "eval_mcc": 0.5789992073276542, "eval_runtime": 0.6733, "eval_samples_per_second": 754.472, "eval_steps_per_second": 95.052, "step": 3400 }, { "epoch": 4.43, "learning_rate": 2.7848101265822786e-05, "loss": 0.0878, "step": 3500 }, { "epoch": 4.56, "eval_accuracy": 0.7992125749588013, "eval_f1": 0.7992125984251969, "eval_loss": 0.8580671548843384, "eval_mcc": 0.5984499728745253, "eval_runtime": 0.6728, "eval_samples_per_second": 755.083, "eval_steps_per_second": 95.129, "step": 3600 }, { "epoch": 4.56, "step": 3600, "total_flos": 6318390507405312.0, "train_loss": 0.2085385314623515, "train_runtime": 617.8306, "train_samples_per_second": 817.829, "train_steps_per_second": 12.787 } ], "max_steps": 7900, "num_train_epochs": 10, "total_flos": 6318390507405312.0, "trial_name": null, "trial_params": null }