bert-model / trainer_state.json
AnnLo's picture
Upload folder using huggingface_hub
f85878e verified
{
"best_metric": 0.8716280849435623,
"best_model_checkpoint": "best_model_big/checkpoint-2968",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 3710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1347708894878706,
"grad_norm": 18.93534278869629,
"learning_rate": 1.946091644204852e-05,
"loss": 0.5038,
"step": 100
},
{
"epoch": 0.2695417789757412,
"grad_norm": 7.889501571655273,
"learning_rate": 1.8921832884097035e-05,
"loss": 0.4015,
"step": 200
},
{
"epoch": 0.40431266846361186,
"grad_norm": 17.194637298583984,
"learning_rate": 1.8382749326145554e-05,
"loss": 0.359,
"step": 300
},
{
"epoch": 0.5390835579514824,
"grad_norm": 10.652689933776855,
"learning_rate": 1.7843665768194072e-05,
"loss": 0.3494,
"step": 400
},
{
"epoch": 0.6738544474393531,
"grad_norm": 12.343999862670898,
"learning_rate": 1.7304582210242588e-05,
"loss": 0.357,
"step": 500
},
{
"epoch": 0.8086253369272237,
"grad_norm": 5.575014114379883,
"learning_rate": 1.6765498652291106e-05,
"loss": 0.3446,
"step": 600
},
{
"epoch": 0.9433962264150944,
"grad_norm": 10.024672508239746,
"learning_rate": 1.6226415094339625e-05,
"loss": 0.3307,
"step": 700
},
{
"epoch": 1.0,
"eval_accuracy": 0.8470863462356444,
"eval_confusion_matrix": [
[
1759,
519
],
[
200,
2224
]
],
"eval_f1": 0.8608476872459842,
"eval_loss": 0.3607315421104431,
"eval_precision": 0.8107911046299672,
"eval_recall": 0.9174917491749175,
"eval_runtime": 22.8032,
"eval_samples_per_second": 206.199,
"eval_steps_per_second": 4.298,
"step": 742
},
{
"epoch": 1.0781671159029649,
"grad_norm": 8.227458000183105,
"learning_rate": 1.5687331536388143e-05,
"loss": 0.301,
"step": 800
},
{
"epoch": 1.2129380053908356,
"grad_norm": 13.138360023498535,
"learning_rate": 1.5148247978436658e-05,
"loss": 0.261,
"step": 900
},
{
"epoch": 1.3477088948787062,
"grad_norm": 21.908050537109375,
"learning_rate": 1.4609164420485175e-05,
"loss": 0.2568,
"step": 1000
},
{
"epoch": 1.482479784366577,
"grad_norm": 65.63346862792969,
"learning_rate": 1.4070080862533696e-05,
"loss": 0.2525,
"step": 1100
},
{
"epoch": 1.6172506738544474,
"grad_norm": 10.492274284362793,
"learning_rate": 1.3530997304582212e-05,
"loss": 0.2738,
"step": 1200
},
{
"epoch": 1.7520215633423182,
"grad_norm": 4.424431800842285,
"learning_rate": 1.299191374663073e-05,
"loss": 0.2612,
"step": 1300
},
{
"epoch": 1.8867924528301887,
"grad_norm": 5.688779830932617,
"learning_rate": 1.2452830188679246e-05,
"loss": 0.2821,
"step": 1400
},
{
"epoch": 2.0,
"eval_accuracy": 0.8517652062951936,
"eval_confusion_matrix": [
[
1750,
528
],
[
169,
2255
]
],
"eval_f1": 0.8661417322834646,
"eval_loss": 0.3994266390800476,
"eval_precision": 0.8102766798418972,
"eval_recall": 0.9302805280528053,
"eval_runtime": 22.8205,
"eval_samples_per_second": 206.043,
"eval_steps_per_second": 4.294,
"step": 1484
},
{
"epoch": 2.0215633423180592,
"grad_norm": 17.876014709472656,
"learning_rate": 1.1913746630727763e-05,
"loss": 0.2494,
"step": 1500
},
{
"epoch": 2.1563342318059298,
"grad_norm": 30.222442626953125,
"learning_rate": 1.1374663072776282e-05,
"loss": 0.2105,
"step": 1600
},
{
"epoch": 2.2911051212938007,
"grad_norm": 8.314850807189941,
"learning_rate": 1.0835579514824798e-05,
"loss": 0.2062,
"step": 1700
},
{
"epoch": 2.4258760107816713,
"grad_norm": 14.823955535888672,
"learning_rate": 1.0296495956873315e-05,
"loss": 0.1949,
"step": 1800
},
{
"epoch": 2.560646900269542,
"grad_norm": 15.158774375915527,
"learning_rate": 9.757412398921834e-06,
"loss": 0.2077,
"step": 1900
},
{
"epoch": 2.6954177897574123,
"grad_norm": 3.280972719192505,
"learning_rate": 9.21832884097035e-06,
"loss": 0.2029,
"step": 2000
},
{
"epoch": 2.830188679245283,
"grad_norm": 23.08829116821289,
"learning_rate": 8.67924528301887e-06,
"loss": 0.2147,
"step": 2100
},
{
"epoch": 2.964959568733154,
"grad_norm": 6.822205543518066,
"learning_rate": 8.140161725067386e-06,
"loss": 0.2226,
"step": 2200
},
{
"epoch": 3.0,
"eval_accuracy": 0.8555933645257338,
"eval_confusion_matrix": [
[
1752,
526
],
[
153,
2271
]
],
"eval_f1": 0.8699482857690097,
"eval_loss": 0.482412725687027,
"eval_precision": 0.8119413657490168,
"eval_recall": 0.9368811881188119,
"eval_runtime": 22.8014,
"eval_samples_per_second": 206.215,
"eval_steps_per_second": 4.298,
"step": 2226
},
{
"epoch": 3.0997304582210243,
"grad_norm": 21.149120330810547,
"learning_rate": 7.601078167115904e-06,
"loss": 0.1726,
"step": 2300
},
{
"epoch": 3.234501347708895,
"grad_norm": 6.035734176635742,
"learning_rate": 7.061994609164421e-06,
"loss": 0.1614,
"step": 2400
},
{
"epoch": 3.3692722371967654,
"grad_norm": 9.38839340209961,
"learning_rate": 6.522911051212939e-06,
"loss": 0.1648,
"step": 2500
},
{
"epoch": 3.5040431266846364,
"grad_norm": 7.1731486320495605,
"learning_rate": 5.983827493261456e-06,
"loss": 0.1678,
"step": 2600
},
{
"epoch": 3.638814016172507,
"grad_norm": 29.55657958984375,
"learning_rate": 5.444743935309974e-06,
"loss": 0.1492,
"step": 2700
},
{
"epoch": 3.7735849056603774,
"grad_norm": 15.47530746459961,
"learning_rate": 4.905660377358491e-06,
"loss": 0.1707,
"step": 2800
},
{
"epoch": 3.908355795148248,
"grad_norm": 8.083237648010254,
"learning_rate": 4.366576819407008e-06,
"loss": 0.1727,
"step": 2900
},
{
"epoch": 4.0,
"eval_accuracy": 0.8572947681837516,
"eval_confusion_matrix": [
[
1753,
525
],
[
146,
2278
]
],
"eval_f1": 0.8716280849435623,
"eval_loss": 0.5570098161697388,
"eval_precision": 0.8127006778451659,
"eval_recall": 0.9397689768976898,
"eval_runtime": 22.7535,
"eval_samples_per_second": 206.649,
"eval_steps_per_second": 4.307,
"step": 2968
},
{
"epoch": 4.0431266846361185,
"grad_norm": 8.28526782989502,
"learning_rate": 3.827493261455526e-06,
"loss": 0.155,
"step": 3000
},
{
"epoch": 4.177897574123989,
"grad_norm": 8.550823211669922,
"learning_rate": 3.2884097035040433e-06,
"loss": 0.1385,
"step": 3100
},
{
"epoch": 4.3126684636118595,
"grad_norm": 7.248845100402832,
"learning_rate": 2.749326145552561e-06,
"loss": 0.1245,
"step": 3200
},
{
"epoch": 4.44743935309973,
"grad_norm": 4.223452091217041,
"learning_rate": 2.2102425876010783e-06,
"loss": 0.1316,
"step": 3300
},
{
"epoch": 4.5822102425876015,
"grad_norm": 26.39322853088379,
"learning_rate": 1.6711590296495958e-06,
"loss": 0.1347,
"step": 3400
},
{
"epoch": 4.716981132075472,
"grad_norm": 9.451475143432617,
"learning_rate": 1.1320754716981133e-06,
"loss": 0.1373,
"step": 3500
},
{
"epoch": 4.8517520215633425,
"grad_norm": 7.989397048950195,
"learning_rate": 5.929919137466308e-07,
"loss": 0.127,
"step": 3600
},
{
"epoch": 4.986522911051213,
"grad_norm": 8.661871910095215,
"learning_rate": 5.3908355795148254e-08,
"loss": 0.1288,
"step": 3700
},
{
"epoch": 5.0,
"eval_accuracy": 0.8538919608677159,
"eval_confusion_matrix": [
[
1721,
557
],
[
130,
2294
]
],
"eval_f1": 0.8697630331753554,
"eval_loss": 0.6569812893867493,
"eval_precision": 0.8046299544019643,
"eval_recall": 0.9463696369636964,
"eval_runtime": 22.4622,
"eval_samples_per_second": 209.329,
"eval_steps_per_second": 4.363,
"step": 3710
}
],
"logging_steps": 100,
"max_steps": 3710,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.8848225720991744e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}