camembertv2-base-ftb-ner / trainer_state.json
wissamantoun's picture
Upload folder using huggingface_hub
e43f405 verified
{
"best_metric": 0.935483870967742,
"best_model_checkpoint": "/scratch/camembertv2/runs/results/ftb_ner/camembertv2-base-bf16-p2-17000/max_seq_length-192-gradient_accumulation_steps-2-precision-fp32-learning_rate-5.000000000000001e-05-epochs-8-lr_scheduler-linear-warmup_steps-0.1/SEED-1337/checkpoint-4326",
"epoch": 8.0,
"eval_steps": 500,
"global_step": 4944,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16181229773462782,
"grad_norm": 9.89955997467041,
"learning_rate": 1.0101010101010103e-05,
"loss": 1.8738,
"step": 100
},
{
"epoch": 0.32362459546925565,
"grad_norm": 2.3764805793762207,
"learning_rate": 2.0202020202020206e-05,
"loss": 0.6979,
"step": 200
},
{
"epoch": 0.4854368932038835,
"grad_norm": 1.3664543628692627,
"learning_rate": 3.030303030303031e-05,
"loss": 0.5111,
"step": 300
},
{
"epoch": 0.6472491909385113,
"grad_norm": 0.6372264623641968,
"learning_rate": 4.040404040404041e-05,
"loss": 0.2666,
"step": 400
},
{
"epoch": 0.8090614886731392,
"grad_norm": 0.5098221302032471,
"learning_rate": 4.9943807597212865e-05,
"loss": 0.1199,
"step": 500
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.5974541902542114,
"learning_rate": 4.8819959541470004e-05,
"loss": 0.0775,
"step": 600
},
{
"epoch": 1.0,
"eval_accuracy": 0.9852635038895584,
"eval_f1": 0.7820512820512822,
"eval_loss": 0.0750068947672844,
"eval_precision": 0.7514492753623189,
"eval_recall": 0.815251572327044,
"eval_runtime": 3.2799,
"eval_samples_per_second": 376.537,
"eval_steps_per_second": 47.258,
"step": 618
},
{
"epoch": 1.132686084142395,
"grad_norm": 0.15989889204502106,
"learning_rate": 4.7696111485727136e-05,
"loss": 0.0648,
"step": 700
},
{
"epoch": 1.2944983818770226,
"grad_norm": 0.28292131423950195,
"learning_rate": 4.6572263429984275e-05,
"loss": 0.0555,
"step": 800
},
{
"epoch": 1.4563106796116505,
"grad_norm": 0.09367953985929489,
"learning_rate": 4.544841537424141e-05,
"loss": 0.0485,
"step": 900
},
{
"epoch": 1.6181229773462782,
"grad_norm": 0.3826428949832916,
"learning_rate": 4.4324567318498546e-05,
"loss": 0.0401,
"step": 1000
},
{
"epoch": 1.779935275080906,
"grad_norm": 0.18068315088748932,
"learning_rate": 4.3200719262755685e-05,
"loss": 0.0369,
"step": 1100
},
{
"epoch": 1.941747572815534,
"grad_norm": 0.23946309089660645,
"learning_rate": 4.207687120701282e-05,
"loss": 0.0387,
"step": 1200
},
{
"epoch": 2.0,
"eval_accuracy": 0.9903582776377781,
"eval_f1": 0.8799067236688691,
"eval_loss": 0.04682581126689911,
"eval_precision": 0.8700999231360492,
"eval_recall": 0.889937106918239,
"eval_runtime": 2.8072,
"eval_samples_per_second": 439.943,
"eval_steps_per_second": 55.215,
"step": 1236
},
{
"epoch": 2.103559870550162,
"grad_norm": 0.8596442937850952,
"learning_rate": 4.0953023151269956e-05,
"loss": 0.0285,
"step": 1300
},
{
"epoch": 2.26537216828479,
"grad_norm": 0.03754520043730736,
"learning_rate": 3.9829175095527095e-05,
"loss": 0.0322,
"step": 1400
},
{
"epoch": 2.4271844660194173,
"grad_norm": 0.6684575080871582,
"learning_rate": 3.870532703978423e-05,
"loss": 0.023,
"step": 1500
},
{
"epoch": 2.588996763754045,
"grad_norm": 0.03833441436290741,
"learning_rate": 3.758147898404136e-05,
"loss": 0.0268,
"step": 1600
},
{
"epoch": 2.750809061488673,
"grad_norm": 0.3890291452407837,
"learning_rate": 3.6457630928298505e-05,
"loss": 0.0217,
"step": 1700
},
{
"epoch": 2.912621359223301,
"grad_norm": 0.4564450681209564,
"learning_rate": 3.533378287255564e-05,
"loss": 0.0295,
"step": 1800
},
{
"epoch": 3.0,
"eval_accuracy": 0.9906869727183083,
"eval_f1": 0.8855799373040752,
"eval_loss": 0.039505813270807266,
"eval_precision": 0.8828125,
"eval_recall": 0.8883647798742138,
"eval_runtime": 2.8133,
"eval_samples_per_second": 438.979,
"eval_steps_per_second": 55.095,
"step": 1854
},
{
"epoch": 3.074433656957929,
"grad_norm": 0.027059998363256454,
"learning_rate": 3.420993481681277e-05,
"loss": 0.0166,
"step": 1900
},
{
"epoch": 3.236245954692557,
"grad_norm": 0.030333412811160088,
"learning_rate": 3.308608676106991e-05,
"loss": 0.0174,
"step": 2000
},
{
"epoch": 3.3980582524271843,
"grad_norm": 0.13804250955581665,
"learning_rate": 3.196223870532705e-05,
"loss": 0.0153,
"step": 2100
},
{
"epoch": 3.559870550161812,
"grad_norm": 0.2849176824092865,
"learning_rate": 3.083839064958418e-05,
"loss": 0.0152,
"step": 2200
},
{
"epoch": 3.72168284789644,
"grad_norm": 0.14825651049613953,
"learning_rate": 2.971454259384132e-05,
"loss": 0.0171,
"step": 2300
},
{
"epoch": 3.883495145631068,
"grad_norm": 0.045380860567092896,
"learning_rate": 2.8590694538098453e-05,
"loss": 0.0255,
"step": 2400
},
{
"epoch": 4.0,
"eval_accuracy": 0.9920565355538512,
"eval_f1": 0.8999999999999999,
"eval_loss": 0.03599809855222702,
"eval_precision": 0.9014195583596214,
"eval_recall": 0.8985849056603774,
"eval_runtime": 2.8186,
"eval_samples_per_second": 438.161,
"eval_steps_per_second": 54.992,
"step": 2472
},
{
"epoch": 4.0453074433656955,
"grad_norm": 0.5658661723136902,
"learning_rate": 2.746684648235559e-05,
"loss": 0.0228,
"step": 2500
},
{
"epoch": 4.207119741100324,
"grad_norm": 0.11415175348520279,
"learning_rate": 2.6342998426612728e-05,
"loss": 0.0162,
"step": 2600
},
{
"epoch": 4.368932038834951,
"grad_norm": 0.1993759125471115,
"learning_rate": 2.5219150370869863e-05,
"loss": 0.0135,
"step": 2700
},
{
"epoch": 4.53074433656958,
"grad_norm": 0.11497118324041367,
"learning_rate": 2.4095302315127e-05,
"loss": 0.0159,
"step": 2800
},
{
"epoch": 4.692556634304207,
"grad_norm": 0.2147281914949417,
"learning_rate": 2.2971454259384134e-05,
"loss": 0.0156,
"step": 2900
},
{
"epoch": 4.854368932038835,
"grad_norm": 0.1083710715174675,
"learning_rate": 2.1847606203641273e-05,
"loss": 0.0094,
"step": 3000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9922756656075381,
"eval_f1": 0.9050980392156862,
"eval_loss": 0.03369523212313652,
"eval_precision": 0.9029733959311425,
"eval_recall": 0.9072327044025157,
"eval_runtime": 2.8037,
"eval_samples_per_second": 440.494,
"eval_steps_per_second": 55.285,
"step": 3090
},
{
"epoch": 5.016181229773463,
"grad_norm": 0.013677417300641537,
"learning_rate": 2.072375814789841e-05,
"loss": 0.016,
"step": 3100
},
{
"epoch": 5.17799352750809,
"grad_norm": 0.08207657188177109,
"learning_rate": 1.9599910092155544e-05,
"loss": 0.0133,
"step": 3200
},
{
"epoch": 5.339805825242719,
"grad_norm": 0.02103651873767376,
"learning_rate": 1.847606203641268e-05,
"loss": 0.0092,
"step": 3300
},
{
"epoch": 5.501618122977346,
"grad_norm": 1.4357458353042603,
"learning_rate": 1.735221398066982e-05,
"loss": 0.0122,
"step": 3400
},
{
"epoch": 5.663430420711974,
"grad_norm": 0.16999904811382294,
"learning_rate": 1.622836592492695e-05,
"loss": 0.0086,
"step": 3500
},
{
"epoch": 5.825242718446602,
"grad_norm": 0.09043747931718826,
"learning_rate": 1.510451786918409e-05,
"loss": 0.0093,
"step": 3600
},
{
"epoch": 5.9870550161812295,
"grad_norm": 0.06608462333679199,
"learning_rate": 1.3980669813441227e-05,
"loss": 0.0067,
"step": 3700
},
{
"epoch": 6.0,
"eval_accuracy": 0.9932617508491289,
"eval_f1": 0.9301960784313724,
"eval_loss": 0.033360060304403305,
"eval_precision": 0.9280125195618153,
"eval_recall": 0.9323899371069182,
"eval_runtime": 2.8189,
"eval_samples_per_second": 438.116,
"eval_steps_per_second": 54.986,
"step": 3708
},
{
"epoch": 6.148867313915858,
"grad_norm": 0.2284722775220871,
"learning_rate": 1.285682175769836e-05,
"loss": 0.0107,
"step": 3800
},
{
"epoch": 6.310679611650485,
"grad_norm": 0.02673812210559845,
"learning_rate": 1.1732973701955498e-05,
"loss": 0.0052,
"step": 3900
},
{
"epoch": 6.472491909385114,
"grad_norm": 0.33707210421562195,
"learning_rate": 1.0609125646212633e-05,
"loss": 0.0072,
"step": 4000
},
{
"epoch": 6.634304207119741,
"grad_norm": 0.0059865182265639305,
"learning_rate": 9.48527759046977e-06,
"loss": 0.0049,
"step": 4100
},
{
"epoch": 6.796116504854369,
"grad_norm": 0.2759881615638733,
"learning_rate": 8.361429534726907e-06,
"loss": 0.016,
"step": 4200
},
{
"epoch": 6.957928802588997,
"grad_norm": 0.18257270753383636,
"learning_rate": 7.237581478984042e-06,
"loss": 0.0069,
"step": 4300
},
{
"epoch": 7.0,
"eval_accuracy": 0.9937000109565027,
"eval_f1": 0.935483870967742,
"eval_loss": 0.0347304567694664,
"eval_precision": 0.9362204724409449,
"eval_recall": 0.934748427672956,
"eval_runtime": 2.8106,
"eval_samples_per_second": 439.402,
"eval_steps_per_second": 55.148,
"step": 4326
},
{
"epoch": 7.119741100323624,
"grad_norm": 0.007623529061675072,
"learning_rate": 6.113733423241179e-06,
"loss": 0.0046,
"step": 4400
},
{
"epoch": 7.281553398058253,
"grad_norm": 0.043167050927877426,
"learning_rate": 4.989885367498316e-06,
"loss": 0.009,
"step": 4500
},
{
"epoch": 7.44336569579288,
"grad_norm": 0.009674232453107834,
"learning_rate": 3.866037311755451e-06,
"loss": 0.0046,
"step": 4600
},
{
"epoch": 7.605177993527509,
"grad_norm": 0.05575043708086014,
"learning_rate": 2.742189256012588e-06,
"loss": 0.0052,
"step": 4700
},
{
"epoch": 7.766990291262136,
"grad_norm": 0.006715767551213503,
"learning_rate": 1.6183412002697239e-06,
"loss": 0.0044,
"step": 4800
},
{
"epoch": 7.9288025889967635,
"grad_norm": 0.009280543774366379,
"learning_rate": 4.9449314452686e-07,
"loss": 0.0054,
"step": 4900
},
{
"epoch": 8.0,
"eval_accuracy": 0.9936726196997918,
"eval_f1": 0.93401413982718,
"eval_loss": 0.03279910609126091,
"eval_precision": 0.9332810047095761,
"eval_recall": 0.934748427672956,
"eval_runtime": 2.829,
"eval_samples_per_second": 436.551,
"eval_steps_per_second": 54.79,
"step": 4944
},
{
"epoch": 8.0,
"step": 4944,
"total_flos": 2833132740217920.0,
"train_loss": 0.08807948804957774,
"train_runtime": 679.3683,
"train_samples_per_second": 116.355,
"train_steps_per_second": 7.277
}
],
"logging_steps": 100,
"max_steps": 4944,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2833132740217920.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}