|
{ |
|
"best_metric": 0.935483870967742, |
|
"best_model_checkpoint": "/scratch/camembertv2/runs/results/ftb_ner/camembertv2-base-bf16-p2-17000/max_seq_length-192-gradient_accumulation_steps-2-precision-fp32-learning_rate-5.000000000000001e-05-epochs-8-lr_scheduler-linear-warmup_steps-0.1/SEED-1337/checkpoint-4326", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 4944, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16181229773462782, |
|
"grad_norm": 9.89955997467041, |
|
"learning_rate": 1.0101010101010103e-05, |
|
"loss": 1.8738, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32362459546925565, |
|
"grad_norm": 2.3764805793762207, |
|
"learning_rate": 2.0202020202020206e-05, |
|
"loss": 0.6979, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4854368932038835, |
|
"grad_norm": 1.3664543628692627, |
|
"learning_rate": 3.030303030303031e-05, |
|
"loss": 0.5111, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6472491909385113, |
|
"grad_norm": 0.6372264623641968, |
|
"learning_rate": 4.040404040404041e-05, |
|
"loss": 0.2666, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8090614886731392, |
|
"grad_norm": 0.5098221302032471, |
|
"learning_rate": 4.9943807597212865e-05, |
|
"loss": 0.1199, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.970873786407767, |
|
"grad_norm": 0.5974541902542114, |
|
"learning_rate": 4.8819959541470004e-05, |
|
"loss": 0.0775, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9852635038895584, |
|
"eval_f1": 0.7820512820512822, |
|
"eval_loss": 0.0750068947672844, |
|
"eval_precision": 0.7514492753623189, |
|
"eval_recall": 0.815251572327044, |
|
"eval_runtime": 3.2799, |
|
"eval_samples_per_second": 376.537, |
|
"eval_steps_per_second": 47.258, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.132686084142395, |
|
"grad_norm": 0.15989889204502106, |
|
"learning_rate": 4.7696111485727136e-05, |
|
"loss": 0.0648, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2944983818770226, |
|
"grad_norm": 0.28292131423950195, |
|
"learning_rate": 4.6572263429984275e-05, |
|
"loss": 0.0555, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.4563106796116505, |
|
"grad_norm": 0.09367953985929489, |
|
"learning_rate": 4.544841537424141e-05, |
|
"loss": 0.0485, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.6181229773462782, |
|
"grad_norm": 0.3826428949832916, |
|
"learning_rate": 4.4324567318498546e-05, |
|
"loss": 0.0401, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.779935275080906, |
|
"grad_norm": 0.18068315088748932, |
|
"learning_rate": 4.3200719262755685e-05, |
|
"loss": 0.0369, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.941747572815534, |
|
"grad_norm": 0.23946309089660645, |
|
"learning_rate": 4.207687120701282e-05, |
|
"loss": 0.0387, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9903582776377781, |
|
"eval_f1": 0.8799067236688691, |
|
"eval_loss": 0.04682581126689911, |
|
"eval_precision": 0.8700999231360492, |
|
"eval_recall": 0.889937106918239, |
|
"eval_runtime": 2.8072, |
|
"eval_samples_per_second": 439.943, |
|
"eval_steps_per_second": 55.215, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.103559870550162, |
|
"grad_norm": 0.8596442937850952, |
|
"learning_rate": 4.0953023151269956e-05, |
|
"loss": 0.0285, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.26537216828479, |
|
"grad_norm": 0.03754520043730736, |
|
"learning_rate": 3.9829175095527095e-05, |
|
"loss": 0.0322, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4271844660194173, |
|
"grad_norm": 0.6684575080871582, |
|
"learning_rate": 3.870532703978423e-05, |
|
"loss": 0.023, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.588996763754045, |
|
"grad_norm": 0.03833441436290741, |
|
"learning_rate": 3.758147898404136e-05, |
|
"loss": 0.0268, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.750809061488673, |
|
"grad_norm": 0.3890291452407837, |
|
"learning_rate": 3.6457630928298505e-05, |
|
"loss": 0.0217, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.912621359223301, |
|
"grad_norm": 0.4564450681209564, |
|
"learning_rate": 3.533378287255564e-05, |
|
"loss": 0.0295, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9906869727183083, |
|
"eval_f1": 0.8855799373040752, |
|
"eval_loss": 0.039505813270807266, |
|
"eval_precision": 0.8828125, |
|
"eval_recall": 0.8883647798742138, |
|
"eval_runtime": 2.8133, |
|
"eval_samples_per_second": 438.979, |
|
"eval_steps_per_second": 55.095, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 3.074433656957929, |
|
"grad_norm": 0.027059998363256454, |
|
"learning_rate": 3.420993481681277e-05, |
|
"loss": 0.0166, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.236245954692557, |
|
"grad_norm": 0.030333412811160088, |
|
"learning_rate": 3.308608676106991e-05, |
|
"loss": 0.0174, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.3980582524271843, |
|
"grad_norm": 0.13804250955581665, |
|
"learning_rate": 3.196223870532705e-05, |
|
"loss": 0.0153, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.559870550161812, |
|
"grad_norm": 0.2849176824092865, |
|
"learning_rate": 3.083839064958418e-05, |
|
"loss": 0.0152, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.72168284789644, |
|
"grad_norm": 0.14825651049613953, |
|
"learning_rate": 2.971454259384132e-05, |
|
"loss": 0.0171, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.883495145631068, |
|
"grad_norm": 0.045380860567092896, |
|
"learning_rate": 2.8590694538098453e-05, |
|
"loss": 0.0255, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9920565355538512, |
|
"eval_f1": 0.8999999999999999, |
|
"eval_loss": 0.03599809855222702, |
|
"eval_precision": 0.9014195583596214, |
|
"eval_recall": 0.8985849056603774, |
|
"eval_runtime": 2.8186, |
|
"eval_samples_per_second": 438.161, |
|
"eval_steps_per_second": 54.992, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 4.0453074433656955, |
|
"grad_norm": 0.5658661723136902, |
|
"learning_rate": 2.746684648235559e-05, |
|
"loss": 0.0228, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.207119741100324, |
|
"grad_norm": 0.11415175348520279, |
|
"learning_rate": 2.6342998426612728e-05, |
|
"loss": 0.0162, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 4.368932038834951, |
|
"grad_norm": 0.1993759125471115, |
|
"learning_rate": 2.5219150370869863e-05, |
|
"loss": 0.0135, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.53074433656958, |
|
"grad_norm": 0.11497118324041367, |
|
"learning_rate": 2.4095302315127e-05, |
|
"loss": 0.0159, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.692556634304207, |
|
"grad_norm": 0.2147281914949417, |
|
"learning_rate": 2.2971454259384134e-05, |
|
"loss": 0.0156, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.854368932038835, |
|
"grad_norm": 0.1083710715174675, |
|
"learning_rate": 2.1847606203641273e-05, |
|
"loss": 0.0094, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9922756656075381, |
|
"eval_f1": 0.9050980392156862, |
|
"eval_loss": 0.03369523212313652, |
|
"eval_precision": 0.9029733959311425, |
|
"eval_recall": 0.9072327044025157, |
|
"eval_runtime": 2.8037, |
|
"eval_samples_per_second": 440.494, |
|
"eval_steps_per_second": 55.285, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 5.016181229773463, |
|
"grad_norm": 0.013677417300641537, |
|
"learning_rate": 2.072375814789841e-05, |
|
"loss": 0.016, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 5.17799352750809, |
|
"grad_norm": 0.08207657188177109, |
|
"learning_rate": 1.9599910092155544e-05, |
|
"loss": 0.0133, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 5.339805825242719, |
|
"grad_norm": 0.02103651873767376, |
|
"learning_rate": 1.847606203641268e-05, |
|
"loss": 0.0092, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 5.501618122977346, |
|
"grad_norm": 1.4357458353042603, |
|
"learning_rate": 1.735221398066982e-05, |
|
"loss": 0.0122, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 5.663430420711974, |
|
"grad_norm": 0.16999904811382294, |
|
"learning_rate": 1.622836592492695e-05, |
|
"loss": 0.0086, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.825242718446602, |
|
"grad_norm": 0.09043747931718826, |
|
"learning_rate": 1.510451786918409e-05, |
|
"loss": 0.0093, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 5.9870550161812295, |
|
"grad_norm": 0.06608462333679199, |
|
"learning_rate": 1.3980669813441227e-05, |
|
"loss": 0.0067, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9932617508491289, |
|
"eval_f1": 0.9301960784313724, |
|
"eval_loss": 0.033360060304403305, |
|
"eval_precision": 0.9280125195618153, |
|
"eval_recall": 0.9323899371069182, |
|
"eval_runtime": 2.8189, |
|
"eval_samples_per_second": 438.116, |
|
"eval_steps_per_second": 54.986, |
|
"step": 3708 |
|
}, |
|
{ |
|
"epoch": 6.148867313915858, |
|
"grad_norm": 0.2284722775220871, |
|
"learning_rate": 1.285682175769836e-05, |
|
"loss": 0.0107, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 6.310679611650485, |
|
"grad_norm": 0.02673812210559845, |
|
"learning_rate": 1.1732973701955498e-05, |
|
"loss": 0.0052, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 6.472491909385114, |
|
"grad_norm": 0.33707210421562195, |
|
"learning_rate": 1.0609125646212633e-05, |
|
"loss": 0.0072, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.634304207119741, |
|
"grad_norm": 0.0059865182265639305, |
|
"learning_rate": 9.48527759046977e-06, |
|
"loss": 0.0049, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 6.796116504854369, |
|
"grad_norm": 0.2759881615638733, |
|
"learning_rate": 8.361429534726907e-06, |
|
"loss": 0.016, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 6.957928802588997, |
|
"grad_norm": 0.18257270753383636, |
|
"learning_rate": 7.237581478984042e-06, |
|
"loss": 0.0069, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9937000109565027, |
|
"eval_f1": 0.935483870967742, |
|
"eval_loss": 0.0347304567694664, |
|
"eval_precision": 0.9362204724409449, |
|
"eval_recall": 0.934748427672956, |
|
"eval_runtime": 2.8106, |
|
"eval_samples_per_second": 439.402, |
|
"eval_steps_per_second": 55.148, |
|
"step": 4326 |
|
}, |
|
{ |
|
"epoch": 7.119741100323624, |
|
"grad_norm": 0.007623529061675072, |
|
"learning_rate": 6.113733423241179e-06, |
|
"loss": 0.0046, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 7.281553398058253, |
|
"grad_norm": 0.043167050927877426, |
|
"learning_rate": 4.989885367498316e-06, |
|
"loss": 0.009, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 7.44336569579288, |
|
"grad_norm": 0.009674232453107834, |
|
"learning_rate": 3.866037311755451e-06, |
|
"loss": 0.0046, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 7.605177993527509, |
|
"grad_norm": 0.05575043708086014, |
|
"learning_rate": 2.742189256012588e-06, |
|
"loss": 0.0052, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 7.766990291262136, |
|
"grad_norm": 0.006715767551213503, |
|
"learning_rate": 1.6183412002697239e-06, |
|
"loss": 0.0044, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 7.9288025889967635, |
|
"grad_norm": 0.009280543774366379, |
|
"learning_rate": 4.9449314452686e-07, |
|
"loss": 0.0054, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9936726196997918, |
|
"eval_f1": 0.93401413982718, |
|
"eval_loss": 0.03279910609126091, |
|
"eval_precision": 0.9332810047095761, |
|
"eval_recall": 0.934748427672956, |
|
"eval_runtime": 2.829, |
|
"eval_samples_per_second": 436.551, |
|
"eval_steps_per_second": 54.79, |
|
"step": 4944 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 4944, |
|
"total_flos": 2833132740217920.0, |
|
"train_loss": 0.08807948804957774, |
|
"train_runtime": 679.3683, |
|
"train_samples_per_second": 116.355, |
|
"train_steps_per_second": 7.277 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4944, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2833132740217920.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|