|
{ |
|
"best_metric": 0.8716280849435623, |
|
"best_model_checkpoint": "best_model_big/checkpoint-2968", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 3710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1347708894878706, |
|
"grad_norm": 18.93534278869629, |
|
"learning_rate": 1.946091644204852e-05, |
|
"loss": 0.5038, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2695417789757412, |
|
"grad_norm": 7.889501571655273, |
|
"learning_rate": 1.8921832884097035e-05, |
|
"loss": 0.4015, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40431266846361186, |
|
"grad_norm": 17.194637298583984, |
|
"learning_rate": 1.8382749326145554e-05, |
|
"loss": 0.359, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5390835579514824, |
|
"grad_norm": 10.652689933776855, |
|
"learning_rate": 1.7843665768194072e-05, |
|
"loss": 0.3494, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6738544474393531, |
|
"grad_norm": 12.343999862670898, |
|
"learning_rate": 1.7304582210242588e-05, |
|
"loss": 0.357, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8086253369272237, |
|
"grad_norm": 5.575014114379883, |
|
"learning_rate": 1.6765498652291106e-05, |
|
"loss": 0.3446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9433962264150944, |
|
"grad_norm": 10.024672508239746, |
|
"learning_rate": 1.6226415094339625e-05, |
|
"loss": 0.3307, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8470863462356444, |
|
"eval_confusion_matrix": [ |
|
[ |
|
1759, |
|
519 |
|
], |
|
[ |
|
200, |
|
2224 |
|
] |
|
], |
|
"eval_f1": 0.8608476872459842, |
|
"eval_loss": 0.3607315421104431, |
|
"eval_precision": 0.8107911046299672, |
|
"eval_recall": 0.9174917491749175, |
|
"eval_runtime": 22.8032, |
|
"eval_samples_per_second": 206.199, |
|
"eval_steps_per_second": 4.298, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.0781671159029649, |
|
"grad_norm": 8.227458000183105, |
|
"learning_rate": 1.5687331536388143e-05, |
|
"loss": 0.301, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2129380053908356, |
|
"grad_norm": 13.138360023498535, |
|
"learning_rate": 1.5148247978436658e-05, |
|
"loss": 0.261, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3477088948787062, |
|
"grad_norm": 21.908050537109375, |
|
"learning_rate": 1.4609164420485175e-05, |
|
"loss": 0.2568, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.482479784366577, |
|
"grad_norm": 65.63346862792969, |
|
"learning_rate": 1.4070080862533696e-05, |
|
"loss": 0.2525, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6172506738544474, |
|
"grad_norm": 10.492274284362793, |
|
"learning_rate": 1.3530997304582212e-05, |
|
"loss": 0.2738, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7520215633423182, |
|
"grad_norm": 4.424431800842285, |
|
"learning_rate": 1.299191374663073e-05, |
|
"loss": 0.2612, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8867924528301887, |
|
"grad_norm": 5.688779830932617, |
|
"learning_rate": 1.2452830188679246e-05, |
|
"loss": 0.2821, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8517652062951936, |
|
"eval_confusion_matrix": [ |
|
[ |
|
1750, |
|
528 |
|
], |
|
[ |
|
169, |
|
2255 |
|
] |
|
], |
|
"eval_f1": 0.8661417322834646, |
|
"eval_loss": 0.3994266390800476, |
|
"eval_precision": 0.8102766798418972, |
|
"eval_recall": 0.9302805280528053, |
|
"eval_runtime": 22.8205, |
|
"eval_samples_per_second": 206.043, |
|
"eval_steps_per_second": 4.294, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 2.0215633423180592, |
|
"grad_norm": 17.876014709472656, |
|
"learning_rate": 1.1913746630727763e-05, |
|
"loss": 0.2494, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1563342318059298, |
|
"grad_norm": 30.222442626953125, |
|
"learning_rate": 1.1374663072776282e-05, |
|
"loss": 0.2105, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2911051212938007, |
|
"grad_norm": 8.314850807189941, |
|
"learning_rate": 1.0835579514824798e-05, |
|
"loss": 0.2062, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4258760107816713, |
|
"grad_norm": 14.823955535888672, |
|
"learning_rate": 1.0296495956873315e-05, |
|
"loss": 0.1949, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.560646900269542, |
|
"grad_norm": 15.158774375915527, |
|
"learning_rate": 9.757412398921834e-06, |
|
"loss": 0.2077, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.6954177897574123, |
|
"grad_norm": 3.280972719192505, |
|
"learning_rate": 9.21832884097035e-06, |
|
"loss": 0.2029, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.830188679245283, |
|
"grad_norm": 23.08829116821289, |
|
"learning_rate": 8.67924528301887e-06, |
|
"loss": 0.2147, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.964959568733154, |
|
"grad_norm": 6.822205543518066, |
|
"learning_rate": 8.140161725067386e-06, |
|
"loss": 0.2226, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8555933645257338, |
|
"eval_confusion_matrix": [ |
|
[ |
|
1752, |
|
526 |
|
], |
|
[ |
|
153, |
|
2271 |
|
] |
|
], |
|
"eval_f1": 0.8699482857690097, |
|
"eval_loss": 0.482412725687027, |
|
"eval_precision": 0.8119413657490168, |
|
"eval_recall": 0.9368811881188119, |
|
"eval_runtime": 22.8014, |
|
"eval_samples_per_second": 206.215, |
|
"eval_steps_per_second": 4.298, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 3.0997304582210243, |
|
"grad_norm": 21.149120330810547, |
|
"learning_rate": 7.601078167115904e-06, |
|
"loss": 0.1726, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.234501347708895, |
|
"grad_norm": 6.035734176635742, |
|
"learning_rate": 7.061994609164421e-06, |
|
"loss": 0.1614, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.3692722371967654, |
|
"grad_norm": 9.38839340209961, |
|
"learning_rate": 6.522911051212939e-06, |
|
"loss": 0.1648, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.5040431266846364, |
|
"grad_norm": 7.1731486320495605, |
|
"learning_rate": 5.983827493261456e-06, |
|
"loss": 0.1678, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.638814016172507, |
|
"grad_norm": 29.55657958984375, |
|
"learning_rate": 5.444743935309974e-06, |
|
"loss": 0.1492, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.7735849056603774, |
|
"grad_norm": 15.47530746459961, |
|
"learning_rate": 4.905660377358491e-06, |
|
"loss": 0.1707, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.908355795148248, |
|
"grad_norm": 8.083237648010254, |
|
"learning_rate": 4.366576819407008e-06, |
|
"loss": 0.1727, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8572947681837516, |
|
"eval_confusion_matrix": [ |
|
[ |
|
1753, |
|
525 |
|
], |
|
[ |
|
146, |
|
2278 |
|
] |
|
], |
|
"eval_f1": 0.8716280849435623, |
|
"eval_loss": 0.5570098161697388, |
|
"eval_precision": 0.8127006778451659, |
|
"eval_recall": 0.9397689768976898, |
|
"eval_runtime": 22.7535, |
|
"eval_samples_per_second": 206.649, |
|
"eval_steps_per_second": 4.307, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 4.0431266846361185, |
|
"grad_norm": 8.28526782989502, |
|
"learning_rate": 3.827493261455526e-06, |
|
"loss": 0.155, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.177897574123989, |
|
"grad_norm": 8.550823211669922, |
|
"learning_rate": 3.2884097035040433e-06, |
|
"loss": 0.1385, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.3126684636118595, |
|
"grad_norm": 7.248845100402832, |
|
"learning_rate": 2.749326145552561e-06, |
|
"loss": 0.1245, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.44743935309973, |
|
"grad_norm": 4.223452091217041, |
|
"learning_rate": 2.2102425876010783e-06, |
|
"loss": 0.1316, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.5822102425876015, |
|
"grad_norm": 26.39322853088379, |
|
"learning_rate": 1.6711590296495958e-06, |
|
"loss": 0.1347, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.716981132075472, |
|
"grad_norm": 9.451475143432617, |
|
"learning_rate": 1.1320754716981133e-06, |
|
"loss": 0.1373, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.8517520215633425, |
|
"grad_norm": 7.989397048950195, |
|
"learning_rate": 5.929919137466308e-07, |
|
"loss": 0.127, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.986522911051213, |
|
"grad_norm": 8.661871910095215, |
|
"learning_rate": 5.3908355795148254e-08, |
|
"loss": 0.1288, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8538919608677159, |
|
"eval_confusion_matrix": [ |
|
[ |
|
1721, |
|
557 |
|
], |
|
[ |
|
130, |
|
2294 |
|
] |
|
], |
|
"eval_f1": 0.8697630331753554, |
|
"eval_loss": 0.6569812893867493, |
|
"eval_precision": 0.8046299544019643, |
|
"eval_recall": 0.9463696369636964, |
|
"eval_runtime": 22.4622, |
|
"eval_samples_per_second": 209.329, |
|
"eval_steps_per_second": 4.363, |
|
"step": 3710 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 3710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8848225720991744e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|