{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 6260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 4.920127795527157e-05, "loss": 0.7778, "step": 100 }, { "epoch": 0.32, "learning_rate": 4.840255591054313e-05, "loss": 0.5964, "step": 200 }, { "epoch": 0.48, "learning_rate": 4.76038338658147e-05, "loss": 0.5901, "step": 300 }, { "epoch": 0.64, "learning_rate": 4.680511182108626e-05, "loss": 0.4494, "step": 400 }, { "epoch": 0.8, "learning_rate": 4.600638977635783e-05, "loss": 0.4166, "step": 500 }, { "epoch": 0.96, "learning_rate": 4.520766773162939e-05, "loss": 0.4516, "step": 600 }, { "epoch": 1.0, "eval_accuracy": 0.8979708054137165, "eval_f1": 0.44450345193839613, "eval_loss": 0.40465056896209717, "eval_precision": 0.4332298136645963, "eval_recall": 0.4563794983642312, "eval_runtime": 68.5329, "eval_samples_per_second": 146.017, "eval_steps_per_second": 36.508, "step": 626 }, { "epoch": 1.12, "learning_rate": 4.440894568690096e-05, "loss": 0.4155, "step": 700 }, { "epoch": 1.28, "learning_rate": 4.361022364217253e-05, "loss": 0.3645, "step": 800 }, { "epoch": 1.44, "learning_rate": 4.2811501597444096e-05, "loss": 0.3712, "step": 900 }, { "epoch": 1.6, "learning_rate": 4.201277955271566e-05, "loss": 0.3669, "step": 1000 }, { "epoch": 1.76, "learning_rate": 4.1214057507987225e-05, "loss": 0.3424, "step": 1100 }, { "epoch": 1.92, "learning_rate": 4.041533546325879e-05, "loss": 0.3677, "step": 1200 }, { "epoch": 2.0, "eval_accuracy": 0.9192688065906114, "eval_f1": 0.5293376983127676, "eval_loss": 0.2773844301700592, "eval_precision": 0.4918109499298081, "eval_recall": 0.5730643402399127, "eval_runtime": 67.6554, "eval_samples_per_second": 147.911, "eval_steps_per_second": 36.982, "step": 1252 }, { "epoch": 2.08, "learning_rate": 3.9616613418530355e-05, "loss": 0.3039, "step": 1300 }, { "epoch": 2.24, "learning_rate": 3.8817891373801916e-05, "loss": 0.2599, "step": 1400 }, { "epoch": 2.4, "learning_rate": 3.8019169329073485e-05, "loss": 0.3243, "step": 1500 }, { "epoch": 2.56, "learning_rate": 3.722044728434505e-05, "loss": 0.2701, "step": 1600 }, { "epoch": 2.72, "learning_rate": 3.6421725239616614e-05, "loss": 0.2634, "step": 1700 }, { "epoch": 2.88, "learning_rate": 3.562300319488818e-05, "loss": 0.2892, "step": 1800 }, { "epoch": 3.0, "eval_accuracy": 0.9383648753820163, "eval_f1": 0.6352631578947368, "eval_loss": 0.21329015493392944, "eval_precision": 0.6139369277721262, "eval_recall": 0.6581243184296619, "eval_runtime": 67.955, "eval_samples_per_second": 147.259, "eval_steps_per_second": 36.818, "step": 1878 }, { "epoch": 3.04, "learning_rate": 3.482428115015975e-05, "loss": 0.315, "step": 1900 }, { "epoch": 3.19, "learning_rate": 3.402555910543131e-05, "loss": 0.242, "step": 2000 }, { "epoch": 3.35, "learning_rate": 3.322683706070287e-05, "loss": 0.2366, "step": 2100 }, { "epoch": 3.51, "learning_rate": 3.242811501597444e-05, "loss": 0.2233, "step": 2200 }, { "epoch": 3.67, "learning_rate": 3.162939297124601e-05, "loss": 0.233, "step": 2300 }, { "epoch": 3.83, "learning_rate": 3.083067092651757e-05, "loss": 0.2469, "step": 2400 }, { "epoch": 3.99, "learning_rate": 3.003194888178914e-05, "loss": 0.2736, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.9488050720373569, "eval_f1": 0.6536661466458659, "eval_loss": 0.17724330723285675, "eval_precision": 0.6247514910536779, "eval_recall": 0.6853871319520175, "eval_runtime": 68.1206, "eval_samples_per_second": 146.901, "eval_steps_per_second": 36.729, "step": 2504 }, { "epoch": 4.15, "learning_rate": 2.9233226837060707e-05, "loss": 0.1691, "step": 2600 }, { "epoch": 4.31, "learning_rate": 2.843450479233227e-05, "loss": 0.1941, "step": 2700 }, { "epoch": 4.47, "learning_rate": 2.7635782747603834e-05, "loss": 0.1891, "step": 2800 }, { "epoch": 4.63, "learning_rate": 2.68370607028754e-05, "loss": 0.2037, "step": 2900 }, { "epoch": 4.79, "learning_rate": 2.6038338658146967e-05, "loss": 0.2222, "step": 3000 }, { "epoch": 4.95, "learning_rate": 2.523961661341853e-05, "loss": 0.221, "step": 3100 }, { "epoch": 5.0, "eval_accuracy": 0.9560372809931474, "eval_f1": 0.6772486772486773, "eval_loss": 0.15026314556598663, "eval_precision": 0.6295081967213115, "eval_recall": 0.732824427480916, "eval_runtime": 68.8869, "eval_samples_per_second": 145.267, "eval_steps_per_second": 36.32, "step": 3130 }, { "epoch": 5.11, "learning_rate": 2.44408945686901e-05, "loss": 0.1524, "step": 3200 }, { "epoch": 5.27, "learning_rate": 2.364217252396166e-05, "loss": 0.1575, "step": 3300 }, { "epoch": 5.43, "learning_rate": 2.284345047923323e-05, "loss": 0.1606, "step": 3400 }, { "epoch": 5.59, "learning_rate": 2.2044728434504794e-05, "loss": 0.1314, "step": 3500 }, { "epoch": 5.75, "learning_rate": 2.124600638977636e-05, "loss": 0.1845, "step": 3600 }, { "epoch": 5.91, "learning_rate": 2.0447284345047924e-05, "loss": 0.1569, "step": 3700 }, { "epoch": 6.0, "eval_accuracy": 0.9622824168106149, "eval_f1": 0.7409068261086198, "eval_loss": 0.1283087134361267, "eval_precision": 0.6821100917431193, "eval_recall": 0.8107960741548528, "eval_runtime": 69.0068, "eval_samples_per_second": 145.015, "eval_steps_per_second": 36.257, "step": 3756 }, { "epoch": 6.07, "learning_rate": 1.964856230031949e-05, "loss": 0.1495, "step": 3800 }, { "epoch": 6.23, "learning_rate": 1.8849840255591057e-05, "loss": 0.1309, "step": 3900 }, { "epoch": 6.39, "learning_rate": 1.805111821086262e-05, "loss": 0.131, "step": 4000 }, { "epoch": 6.55, "learning_rate": 1.7252396166134186e-05, "loss": 0.1177, "step": 4100 }, { "epoch": 6.71, "learning_rate": 1.645367412140575e-05, "loss": 0.1046, "step": 4200 }, { "epoch": 6.87, "learning_rate": 1.565495207667732e-05, "loss": 0.1534, "step": 4300 }, { "epoch": 7.0, "eval_accuracy": 0.9707674493650462, "eval_f1": 0.7749154306531356, "eval_loss": 0.09951327741146088, "eval_precision": 0.7411647585863613, "eval_recall": 0.811886586695747, "eval_runtime": 67.5487, "eval_samples_per_second": 148.145, "eval_steps_per_second": 37.04, "step": 4382 }, { "epoch": 7.03, "learning_rate": 1.485623003194888e-05, "loss": 0.1147, "step": 4400 }, { "epoch": 7.19, "learning_rate": 1.4057507987220447e-05, "loss": 0.1158, "step": 4500 }, { "epoch": 7.35, "learning_rate": 1.3258785942492014e-05, "loss": 0.0993, "step": 4600 }, { "epoch": 7.51, "learning_rate": 1.2460063897763578e-05, "loss": 0.1288, "step": 4700 }, { "epoch": 7.67, "learning_rate": 1.1661341853035145e-05, "loss": 0.0874, "step": 4800 }, { "epoch": 7.83, "learning_rate": 1.086261980830671e-05, "loss": 0.105, "step": 4900 }, { "epoch": 7.99, "learning_rate": 1.0063897763578276e-05, "loss": 0.089, "step": 5000 }, { "epoch": 8.0, "eval_accuracy": 0.9760065298684535, "eval_f1": 0.8010457516339871, "eval_loss": 0.08459383249282837, "eval_precision": 0.7694625816172778, "eval_recall": 0.8353326063249727, "eval_runtime": 67.5952, "eval_samples_per_second": 148.043, "eval_steps_per_second": 37.014, "step": 5008 }, { "epoch": 8.15, "learning_rate": 9.265175718849841e-06, "loss": 0.0766, "step": 5100 }, { "epoch": 8.31, "learning_rate": 8.466453674121406e-06, "loss": 0.0929, "step": 5200 }, { "epoch": 8.47, "learning_rate": 7.66773162939297e-06, "loss": 0.089, "step": 5300 }, { "epoch": 8.63, "learning_rate": 6.869009584664538e-06, "loss": 0.0946, "step": 5400 }, { "epoch": 8.79, "learning_rate": 6.070287539936103e-06, "loss": 0.0757, "step": 5500 }, { "epoch": 8.95, "learning_rate": 5.2715654952076674e-06, "loss": 0.0923, "step": 5600 }, { "epoch": 9.0, "eval_accuracy": 0.9789108027562119, "eval_f1": 0.828852119958635, "eval_loss": 0.07430661469697952, "eval_precision": 0.788102261553589, "eval_recall": 0.8740458015267175, "eval_runtime": 67.8851, "eval_samples_per_second": 147.411, "eval_steps_per_second": 36.856, "step": 5634 }, { "epoch": 9.11, "learning_rate": 4.472843450479233e-06, "loss": 0.0578, "step": 5700 }, { "epoch": 9.27, "learning_rate": 3.6741214057507987e-06, "loss": 0.0664, "step": 5800 }, { "epoch": 9.42, "learning_rate": 2.8753993610223644e-06, "loss": 0.0616, "step": 5900 }, { "epoch": 9.58, "learning_rate": 2.0766773162939296e-06, "loss": 0.0759, "step": 6000 }, { "epoch": 9.74, "learning_rate": 1.2779552715654952e-06, "loss": 0.0769, "step": 6100 }, { "epoch": 9.9, "learning_rate": 4.792332268370607e-07, "loss": 0.0711, "step": 6200 }, { "epoch": 10.0, "eval_accuracy": 0.9819289686983922, "eval_f1": 0.8444211629125196, "eval_loss": 0.06683139503002167, "eval_precision": 0.8125, "eval_recall": 0.8789531079607416, "eval_runtime": 67.6072, "eval_samples_per_second": 148.017, "eval_steps_per_second": 37.008, "step": 6260 }, { "epoch": 10.0, "step": 6260, "total_flos": 1.307859275810304e+16, "train_loss": 0.217802461039144, "train_runtime": 2284.8138, "train_samples_per_second": 43.798, "train_steps_per_second": 2.74 } ], "max_steps": 6260, "num_train_epochs": 10, "total_flos": 1.307859275810304e+16, "trial_name": null, "trial_params": null }