{ "best_metric": 0.8716280849435623, "best_model_checkpoint": "best_model_big/checkpoint-2968", "epoch": 5.0, "eval_steps": 500, "global_step": 3710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1347708894878706, "grad_norm": 18.93534278869629, "learning_rate": 1.946091644204852e-05, "loss": 0.5038, "step": 100 }, { "epoch": 0.2695417789757412, "grad_norm": 7.889501571655273, "learning_rate": 1.8921832884097035e-05, "loss": 0.4015, "step": 200 }, { "epoch": 0.40431266846361186, "grad_norm": 17.194637298583984, "learning_rate": 1.8382749326145554e-05, "loss": 0.359, "step": 300 }, { "epoch": 0.5390835579514824, "grad_norm": 10.652689933776855, "learning_rate": 1.7843665768194072e-05, "loss": 0.3494, "step": 400 }, { "epoch": 0.6738544474393531, "grad_norm": 12.343999862670898, "learning_rate": 1.7304582210242588e-05, "loss": 0.357, "step": 500 }, { "epoch": 0.8086253369272237, "grad_norm": 5.575014114379883, "learning_rate": 1.6765498652291106e-05, "loss": 0.3446, "step": 600 }, { "epoch": 0.9433962264150944, "grad_norm": 10.024672508239746, "learning_rate": 1.6226415094339625e-05, "loss": 0.3307, "step": 700 }, { "epoch": 1.0, "eval_accuracy": 0.8470863462356444, "eval_confusion_matrix": [ [ 1759, 519 ], [ 200, 2224 ] ], "eval_f1": 0.8608476872459842, "eval_loss": 0.3607315421104431, "eval_precision": 0.8107911046299672, "eval_recall": 0.9174917491749175, "eval_runtime": 22.8032, "eval_samples_per_second": 206.199, "eval_steps_per_second": 4.298, "step": 742 }, { "epoch": 1.0781671159029649, "grad_norm": 8.227458000183105, "learning_rate": 1.5687331536388143e-05, "loss": 0.301, "step": 800 }, { "epoch": 1.2129380053908356, "grad_norm": 13.138360023498535, "learning_rate": 1.5148247978436658e-05, "loss": 0.261, "step": 900 }, { "epoch": 1.3477088948787062, "grad_norm": 21.908050537109375, "learning_rate": 1.4609164420485175e-05, "loss": 0.2568, "step": 1000 }, { "epoch": 1.482479784366577, "grad_norm": 65.63346862792969, "learning_rate": 1.4070080862533696e-05, "loss": 0.2525, "step": 1100 }, { "epoch": 1.6172506738544474, "grad_norm": 10.492274284362793, "learning_rate": 1.3530997304582212e-05, "loss": 0.2738, "step": 1200 }, { "epoch": 1.7520215633423182, "grad_norm": 4.424431800842285, "learning_rate": 1.299191374663073e-05, "loss": 0.2612, "step": 1300 }, { "epoch": 1.8867924528301887, "grad_norm": 5.688779830932617, "learning_rate": 1.2452830188679246e-05, "loss": 0.2821, "step": 1400 }, { "epoch": 2.0, "eval_accuracy": 0.8517652062951936, "eval_confusion_matrix": [ [ 1750, 528 ], [ 169, 2255 ] ], "eval_f1": 0.8661417322834646, "eval_loss": 0.3994266390800476, "eval_precision": 0.8102766798418972, "eval_recall": 0.9302805280528053, "eval_runtime": 22.8205, "eval_samples_per_second": 206.043, "eval_steps_per_second": 4.294, "step": 1484 }, { "epoch": 2.0215633423180592, "grad_norm": 17.876014709472656, "learning_rate": 1.1913746630727763e-05, "loss": 0.2494, "step": 1500 }, { "epoch": 2.1563342318059298, "grad_norm": 30.222442626953125, "learning_rate": 1.1374663072776282e-05, "loss": 0.2105, "step": 1600 }, { "epoch": 2.2911051212938007, "grad_norm": 8.314850807189941, "learning_rate": 1.0835579514824798e-05, "loss": 0.2062, "step": 1700 }, { "epoch": 2.4258760107816713, "grad_norm": 14.823955535888672, "learning_rate": 1.0296495956873315e-05, "loss": 0.1949, "step": 1800 }, { "epoch": 2.560646900269542, "grad_norm": 15.158774375915527, "learning_rate": 9.757412398921834e-06, "loss": 0.2077, "step": 1900 }, { "epoch": 2.6954177897574123, "grad_norm": 3.280972719192505, "learning_rate": 9.21832884097035e-06, "loss": 0.2029, "step": 2000 }, { "epoch": 2.830188679245283, "grad_norm": 23.08829116821289, "learning_rate": 8.67924528301887e-06, "loss": 0.2147, "step": 2100 }, { "epoch": 2.964959568733154, "grad_norm": 6.822205543518066, "learning_rate": 8.140161725067386e-06, "loss": 0.2226, "step": 2200 }, { "epoch": 3.0, "eval_accuracy": 0.8555933645257338, "eval_confusion_matrix": [ [ 1752, 526 ], [ 153, 2271 ] ], "eval_f1": 0.8699482857690097, "eval_loss": 0.482412725687027, "eval_precision": 0.8119413657490168, "eval_recall": 0.9368811881188119, "eval_runtime": 22.8014, "eval_samples_per_second": 206.215, "eval_steps_per_second": 4.298, "step": 2226 }, { "epoch": 3.0997304582210243, "grad_norm": 21.149120330810547, "learning_rate": 7.601078167115904e-06, "loss": 0.1726, "step": 2300 }, { "epoch": 3.234501347708895, "grad_norm": 6.035734176635742, "learning_rate": 7.061994609164421e-06, "loss": 0.1614, "step": 2400 }, { "epoch": 3.3692722371967654, "grad_norm": 9.38839340209961, "learning_rate": 6.522911051212939e-06, "loss": 0.1648, "step": 2500 }, { "epoch": 3.5040431266846364, "grad_norm": 7.1731486320495605, "learning_rate": 5.983827493261456e-06, "loss": 0.1678, "step": 2600 }, { "epoch": 3.638814016172507, "grad_norm": 29.55657958984375, "learning_rate": 5.444743935309974e-06, "loss": 0.1492, "step": 2700 }, { "epoch": 3.7735849056603774, "grad_norm": 15.47530746459961, "learning_rate": 4.905660377358491e-06, "loss": 0.1707, "step": 2800 }, { "epoch": 3.908355795148248, "grad_norm": 8.083237648010254, "learning_rate": 4.366576819407008e-06, "loss": 0.1727, "step": 2900 }, { "epoch": 4.0, "eval_accuracy": 0.8572947681837516, "eval_confusion_matrix": [ [ 1753, 525 ], [ 146, 2278 ] ], "eval_f1": 0.8716280849435623, "eval_loss": 0.5570098161697388, "eval_precision": 0.8127006778451659, "eval_recall": 0.9397689768976898, "eval_runtime": 22.7535, "eval_samples_per_second": 206.649, "eval_steps_per_second": 4.307, "step": 2968 }, { "epoch": 4.0431266846361185, "grad_norm": 8.28526782989502, "learning_rate": 3.827493261455526e-06, "loss": 0.155, "step": 3000 }, { "epoch": 4.177897574123989, "grad_norm": 8.550823211669922, "learning_rate": 3.2884097035040433e-06, "loss": 0.1385, "step": 3100 }, { "epoch": 4.3126684636118595, "grad_norm": 7.248845100402832, "learning_rate": 2.749326145552561e-06, "loss": 0.1245, "step": 3200 }, { "epoch": 4.44743935309973, "grad_norm": 4.223452091217041, "learning_rate": 2.2102425876010783e-06, "loss": 0.1316, "step": 3300 }, { "epoch": 4.5822102425876015, "grad_norm": 26.39322853088379, "learning_rate": 1.6711590296495958e-06, "loss": 0.1347, "step": 3400 }, { "epoch": 4.716981132075472, "grad_norm": 9.451475143432617, "learning_rate": 1.1320754716981133e-06, "loss": 0.1373, "step": 3500 }, { "epoch": 4.8517520215633425, "grad_norm": 7.989397048950195, "learning_rate": 5.929919137466308e-07, "loss": 0.127, "step": 3600 }, { "epoch": 4.986522911051213, "grad_norm": 8.661871910095215, "learning_rate": 5.3908355795148254e-08, "loss": 0.1288, "step": 3700 }, { "epoch": 5.0, "eval_accuracy": 0.8538919608677159, "eval_confusion_matrix": [ [ 1721, 557 ], [ 130, 2294 ] ], "eval_f1": 0.8697630331753554, "eval_loss": 0.6569812893867493, "eval_precision": 0.8046299544019643, "eval_recall": 0.9463696369636964, "eval_runtime": 22.4622, "eval_samples_per_second": 209.329, "eval_steps_per_second": 4.363, "step": 3710 } ], "logging_steps": 100, "max_steps": 3710, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.8848225720991744e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }