|
{ |
|
"best_metric": 0.20017504692077637, |
|
"best_model_checkpoint": "retr00h/deberta-v3-xsmall-NER-FINETUNED/checkpoint-21750", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 21750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.853134870529175, |
|
"learning_rate": 5.747126436781609e-06, |
|
"loss": 3.0474, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 6.231101989746094, |
|
"learning_rate": 1.1494252873563218e-05, |
|
"loss": 1.6994, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.9581143856048584, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 1.3816, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 4.630077362060547, |
|
"learning_rate": 2.2988505747126437e-05, |
|
"loss": 1.1601, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7779691278743173, |
|
"eval_f1": 0.16480717205426432, |
|
"eval_loss": 0.9353219270706177, |
|
"eval_precision": 0.255042170883755, |
|
"eval_recall": 0.12173634003325651, |
|
"eval_runtime": 79.6748, |
|
"eval_samples_per_second": 109.206, |
|
"eval_steps_per_second": 6.828, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 4.457046985626221, |
|
"learning_rate": 2.8735632183908045e-05, |
|
"loss": 0.9478, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 4.775448799133301, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 0.8116, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 5.369730472564697, |
|
"learning_rate": 4.0229885057471265e-05, |
|
"loss": 0.6936, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 4.07623815536499, |
|
"learning_rate": 4.597701149425287e-05, |
|
"loss": 0.6135, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8369943324621483, |
|
"eval_f1": 0.3358227912411726, |
|
"eval_loss": 0.5102753043174744, |
|
"eval_precision": 0.4002825999192572, |
|
"eval_recall": 0.2892441436447971, |
|
"eval_runtime": 71.0073, |
|
"eval_samples_per_second": 122.537, |
|
"eval_steps_per_second": 7.661, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 5.637992858886719, |
|
"learning_rate": 4.99908321555821e-05, |
|
"loss": 0.5371, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 3.2398834228515625, |
|
"learning_rate": 4.982803524033569e-05, |
|
"loss": 0.4694, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 1.9868240356445312, |
|
"learning_rate": 4.9463034836859035e-05, |
|
"loss": 0.4455, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 4.583148002624512, |
|
"learning_rate": 4.8898803566790296e-05, |
|
"loss": 0.4219, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 5.9840850830078125, |
|
"learning_rate": 4.813993661979598e-05, |
|
"loss": 0.398, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8726373171368517, |
|
"eval_f1": 0.4454696078749878, |
|
"eval_loss": 0.36235153675079346, |
|
"eval_precision": 0.5025840266832826, |
|
"eval_recall": 0.4000116689518364, |
|
"eval_runtime": 70.9398, |
|
"eval_samples_per_second": 122.653, |
|
"eval_steps_per_second": 7.668, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 3.646019458770752, |
|
"learning_rate": 4.7192614329611715e-05, |
|
"loss": 0.3553, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 3.0514578819274902, |
|
"learning_rate": 4.606455184041622e-05, |
|
"loss": 0.331, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 3.9326324462890625, |
|
"learning_rate": 4.4764936273463734e-05, |
|
"loss": 0.3217, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"grad_norm": 4.370193958282471, |
|
"learning_rate": 4.3304351905699714e-05, |
|
"loss": 0.3176, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8849798379121097, |
|
"eval_f1": 0.5237382906107038, |
|
"eval_loss": 0.3116133511066437, |
|
"eval_precision": 0.5214888940305414, |
|
"eval_recall": 0.5260071764053794, |
|
"eval_runtime": 71.2115, |
|
"eval_samples_per_second": 122.185, |
|
"eval_steps_per_second": 7.639, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"grad_norm": 3.159623146057129, |
|
"learning_rate": 4.169469396971739e-05, |
|
"loss": 0.2854, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"grad_norm": 2.860635757446289, |
|
"learning_rate": 3.994907177708181e-05, |
|
"loss": 0.272, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 3.7297463417053223, |
|
"learning_rate": 3.808170195400064e-05, |
|
"loss": 0.2613, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"grad_norm": 2.5407516956329346, |
|
"learning_rate": 3.6107792658847595e-05, |
|
"loss": 0.2517, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9037705346052107, |
|
"eval_f1": 0.5800842276864557, |
|
"eval_loss": 0.2615918517112732, |
|
"eval_precision": 0.6010322177040975, |
|
"eval_recall": 0.5605472738411272, |
|
"eval_runtime": 70.3111, |
|
"eval_samples_per_second": 123.75, |
|
"eval_steps_per_second": 7.737, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 3.3271915912628174, |
|
"learning_rate": 3.404341972448928e-05, |
|
"loss": 0.247, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"grad_norm": 1.9302195310592651, |
|
"learning_rate": 3.1905395734132326e-05, |
|
"loss": 0.2211, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 1.3263697624206543, |
|
"learning_rate": 2.9711133096957962e-05, |
|
"loss": 0.2132, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 2.6910924911499023, |
|
"learning_rate": 2.7478502238677862e-05, |
|
"loss": 0.2143, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"grad_norm": 4.239352703094482, |
|
"learning_rate": 2.5225686061930326e-05, |
|
"loss": 0.2085, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9127478723656762, |
|
"eval_f1": 0.6213380291938935, |
|
"eval_loss": 0.23839746415615082, |
|
"eval_precision": 0.6199395945867456, |
|
"eval_recall": 0.6227427871291461, |
|
"eval_runtime": 70.8696, |
|
"eval_samples_per_second": 122.775, |
|
"eval_steps_per_second": 7.676, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 2.263456106185913, |
|
"learning_rate": 2.2971031861814223e-05, |
|
"loss": 0.1895, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 3.127639055252075, |
|
"learning_rate": 2.073290190258459e-05, |
|
"loss": 0.1891, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 6.135817527770996, |
|
"learning_rate": 1.852952387243698e-05, |
|
"loss": 0.1729, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 2.980323076248169, |
|
"learning_rate": 1.6378842434300746e-05, |
|
"loss": 0.1752, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9185261764294775, |
|
"eval_f1": 0.6522748506554175, |
|
"eval_loss": 0.22249895334243774, |
|
"eval_precision": 0.633583587712785, |
|
"eval_recall": 0.6721024533971236, |
|
"eval_runtime": 71.7327, |
|
"eval_samples_per_second": 121.298, |
|
"eval_steps_per_second": 7.584, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"grad_norm": 3.3994078636169434, |
|
"learning_rate": 1.4298373081635322e-05, |
|
"loss": 0.1721, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"grad_norm": 2.7240686416625977, |
|
"learning_rate": 1.2305059489451364e-05, |
|
"loss": 0.1549, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 2.529590606689453, |
|
"learning_rate": 1.041513552231265e-05, |
|
"loss": 0.1577, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"grad_norm": 2.3119211196899414, |
|
"learning_rate": 8.643993023147797e-06, |
|
"loss": 0.1513, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.92717385284445, |
|
"eval_f1": 0.6904867478151416, |
|
"eval_loss": 0.20956499874591827, |
|
"eval_precision": 0.6773587023629118, |
|
"eval_recall": 0.7041337261880452, |
|
"eval_runtime": 69.3099, |
|
"eval_samples_per_second": 125.538, |
|
"eval_steps_per_second": 7.849, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"grad_norm": 1.2401313781738281, |
|
"learning_rate": 7.00605645962078e-06, |
|
"loss": 0.1493, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"grad_norm": 4.699456691741943, |
|
"learning_rate": 5.51466544896021e-06, |
|
"loss": 0.1456, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"grad_norm": 1.5753775835037231, |
|
"learning_rate": 4.181966117984099e-06, |
|
"loss": 0.1389, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"grad_norm": 0.9824960231781006, |
|
"learning_rate": 3.0188121831012023e-06, |
|
"loss": 0.1399, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"grad_norm": 1.2070369720458984, |
|
"learning_rate": 2.0346765559094567e-06, |
|
"loss": 0.1365, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9298507029698532, |
|
"eval_f1": 0.7024456131275685, |
|
"eval_loss": 0.20172493159770966, |
|
"eval_precision": 0.6921421492283732, |
|
"eval_recall": 0.7130604743428921, |
|
"eval_runtime": 71.3537, |
|
"eval_samples_per_second": 121.942, |
|
"eval_steps_per_second": 7.624, |
|
"step": 19575 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 2.9319190979003906, |
|
"learning_rate": 1.2375741942894869e-06, |
|
"loss": 0.1354, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"grad_norm": 2.670553684234619, |
|
"learning_rate": 6.339968273062741e-07, |
|
"loss": 0.1298, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"grad_norm": 1.6918872594833374, |
|
"learning_rate": 2.288600855298306e-07, |
|
"loss": 0.1291, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"grad_norm": 1.8715825080871582, |
|
"learning_rate": 2.546346735399219e-08, |
|
"loss": 0.1314, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9306313410359806, |
|
"eval_f1": 0.7046287809349221, |
|
"eval_loss": 0.20017504692077637, |
|
"eval_precision": 0.6920804613869742, |
|
"eval_recall": 0.7176405379386797, |
|
"eval_runtime": 70.9745, |
|
"eval_samples_per_second": 122.593, |
|
"eval_steps_per_second": 7.665, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 21750, |
|
"total_flos": 3852992789501856.0, |
|
"train_loss": 0.4320273476085444, |
|
"train_runtime": 4787.7797, |
|
"train_samples_per_second": 72.685, |
|
"train_steps_per_second": 4.543 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9306313410359806, |
|
"eval_f1": 0.7046287809349221, |
|
"eval_loss": 0.20017504692077637, |
|
"eval_precision": 0.6920804613869742, |
|
"eval_recall": 0.7176405379386797, |
|
"eval_runtime": 71.7164, |
|
"eval_samples_per_second": 121.325, |
|
"eval_steps_per_second": 7.585, |
|
"step": 21750 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3852992789501856.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|