|
{ |
|
"best_metric": 0.267339825630188, |
|
"best_model_checkpoint": "./checkpoint-xlm-v-base/checkpoint-62000", |
|
"epoch": 3.032583672746591, |
|
"eval_steps": 1000, |
|
"global_step": 68500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 46.0612678527832, |
|
"learning_rate": 1.9873510587163855e-05, |
|
"loss": 0.943, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_LOC_f1": 0.6007898672649095, |
|
"eval_ORG_f1": 0.5205864729691723, |
|
"eval_PER_f1": 0.6537866457692849, |
|
"eval_loss": 0.5755352973937988, |
|
"eval_overall_accuracy": 0.813493507822672, |
|
"eval_overall_f1": 0.5959796923883924, |
|
"eval_overall_precision": 0.571592844668358, |
|
"eval_overall_recall": 0.622540194436182, |
|
"eval_runtime": 909.7639, |
|
"eval_samples_per_second": 72.217, |
|
"eval_steps_per_second": 0.282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 5.543514251708984, |
|
"learning_rate": 1.974702117432771e-05, |
|
"loss": 0.5825, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_LOC_f1": 0.6761863812260971, |
|
"eval_ORG_f1": 0.573783382357224, |
|
"eval_PER_f1": 0.7231398018028106, |
|
"eval_loss": 0.5157074332237244, |
|
"eval_overall_accuracy": 0.844698117312631, |
|
"eval_overall_f1": 0.664661446599853, |
|
"eval_overall_precision": 0.690302943433134, |
|
"eval_overall_recall": 0.640856646367237, |
|
"eval_runtime": 884.4485, |
|
"eval_samples_per_second": 74.284, |
|
"eval_steps_per_second": 0.291, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 30.680952072143555, |
|
"learning_rate": 1.9620531761491565e-05, |
|
"loss": 0.5153, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_LOC_f1": 0.6448332585613877, |
|
"eval_ORG_f1": 0.5780655943179445, |
|
"eval_PER_f1": 0.749264457627936, |
|
"eval_loss": 0.4841216504573822, |
|
"eval_overall_accuracy": 0.8415159100197845, |
|
"eval_overall_f1": 0.6599932560127353, |
|
"eval_overall_precision": 0.6612013701212998, |
|
"eval_overall_recall": 0.6587895486638383, |
|
"eval_runtime": 887.5751, |
|
"eval_samples_per_second": 74.022, |
|
"eval_steps_per_second": 0.29, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 14.460062026977539, |
|
"learning_rate": 1.949404234865542e-05, |
|
"loss": 0.4744, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_LOC_f1": 0.7135048963789569, |
|
"eval_ORG_f1": 0.6044746860257756, |
|
"eval_PER_f1": 0.7562344421814889, |
|
"eval_loss": 0.4284209907054901, |
|
"eval_overall_accuracy": 0.8650805108611886, |
|
"eval_overall_f1": 0.6945276906141412, |
|
"eval_overall_precision": 0.6951840204528166, |
|
"eval_overall_recall": 0.6938725989010129, |
|
"eval_runtime": 884.4505, |
|
"eval_samples_per_second": 74.283, |
|
"eval_steps_per_second": 0.291, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.3655357360839844, |
|
"learning_rate": 1.9367552935819272e-05, |
|
"loss": 0.4385, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_LOC_f1": 0.7295629535257298, |
|
"eval_ORG_f1": 0.6200475216886777, |
|
"eval_PER_f1": 0.7408250910983861, |
|
"eval_loss": 0.4239448308944702, |
|
"eval_overall_accuracy": 0.8635053351958905, |
|
"eval_overall_f1": 0.7025319684063077, |
|
"eval_overall_precision": 0.7043199823770524, |
|
"eval_overall_recall": 0.7007530096904989, |
|
"eval_runtime": 907.5971, |
|
"eval_samples_per_second": 72.389, |
|
"eval_steps_per_second": 0.283, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 9.867854118347168, |
|
"learning_rate": 1.924106352298313e-05, |
|
"loss": 0.4279, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_LOC_f1": 0.7370099725835874, |
|
"eval_ORG_f1": 0.6403813434199981, |
|
"eval_PER_f1": 0.7776639577500056, |
|
"eval_loss": 0.38233184814453125, |
|
"eval_overall_accuracy": 0.8816618781055326, |
|
"eval_overall_f1": 0.7238370468534203, |
|
"eval_overall_precision": 0.7385234418271267, |
|
"eval_overall_recall": 0.7097233746105797, |
|
"eval_runtime": 948.2573, |
|
"eval_samples_per_second": 69.285, |
|
"eval_steps_per_second": 0.271, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 25.877347946166992, |
|
"learning_rate": 1.9114574110146982e-05, |
|
"loss": 0.4099, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_LOC_f1": 0.7575684397708062, |
|
"eval_ORG_f1": 0.630874803840732, |
|
"eval_PER_f1": 0.7838224767358626, |
|
"eval_loss": 0.38040244579315186, |
|
"eval_overall_accuracy": 0.883136741379065, |
|
"eval_overall_f1": 0.7288917006049582, |
|
"eval_overall_precision": 0.7364569017865703, |
|
"eval_overall_recall": 0.7214803450381201, |
|
"eval_runtime": 907.1265, |
|
"eval_samples_per_second": 72.427, |
|
"eval_steps_per_second": 0.283, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 30.637121200561523, |
|
"learning_rate": 1.8988084697310836e-05, |
|
"loss": 0.3874, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_LOC_f1": 0.7474734456723695, |
|
"eval_ORG_f1": 0.6407748343462335, |
|
"eval_PER_f1": 0.7854664027017585, |
|
"eval_loss": 0.37021398544311523, |
|
"eval_overall_accuracy": 0.8872800498308584, |
|
"eval_overall_f1": 0.7295473133392094, |
|
"eval_overall_precision": 0.74022719948437, |
|
"eval_overall_recall": 0.7191712196878376, |
|
"eval_runtime": 884.9913, |
|
"eval_samples_per_second": 74.238, |
|
"eval_steps_per_second": 0.29, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 20.109619140625, |
|
"learning_rate": 1.8861595284474693e-05, |
|
"loss": 0.3841, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_LOC_f1": 0.7684967782745274, |
|
"eval_ORG_f1": 0.655326947582435, |
|
"eval_PER_f1": 0.789217873159736, |
|
"eval_loss": 0.3808096945285797, |
|
"eval_overall_accuracy": 0.8879015799879489, |
|
"eval_overall_f1": 0.743771496693436, |
|
"eval_overall_precision": 0.76602787456446, |
|
"eval_overall_recall": 0.7227718897255663, |
|
"eval_runtime": 885.0879, |
|
"eval_samples_per_second": 74.23, |
|
"eval_steps_per_second": 0.29, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 10.265982627868652, |
|
"learning_rate": 1.8735105871638546e-05, |
|
"loss": 0.3764, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_LOC_f1": 0.7831821749367751, |
|
"eval_ORG_f1": 0.6622161847467495, |
|
"eval_PER_f1": 0.7948864849077164, |
|
"eval_loss": 0.34247785806655884, |
|
"eval_overall_accuracy": 0.8911041208737209, |
|
"eval_overall_f1": 0.7509862429761675, |
|
"eval_overall_precision": 0.7436605881991772, |
|
"eval_overall_recall": 0.7584576608168825, |
|
"eval_runtime": 885.3889, |
|
"eval_samples_per_second": 74.205, |
|
"eval_steps_per_second": 0.29, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.401586055755615, |
|
"learning_rate": 1.86086164588024e-05, |
|
"loss": 0.3564, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_LOC_f1": 0.789610444706057, |
|
"eval_ORG_f1": 0.6701892389389907, |
|
"eval_PER_f1": 0.8054954166474735, |
|
"eval_loss": 0.35062676668167114, |
|
"eval_overall_accuracy": 0.8928101093201735, |
|
"eval_overall_f1": 0.7598466310260445, |
|
"eval_overall_precision": 0.7565214692509428, |
|
"eval_overall_recall": 0.7632011522144121, |
|
"eval_runtime": 885.0018, |
|
"eval_samples_per_second": 74.237, |
|
"eval_steps_per_second": 0.29, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.0011909008026123, |
|
"learning_rate": 1.8482127045966253e-05, |
|
"loss": 0.3484, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_LOC_f1": 0.7712024123633622, |
|
"eval_ORG_f1": 0.6809512535185331, |
|
"eval_PER_f1": 0.8100081183474653, |
|
"eval_loss": 0.37064051628112793, |
|
"eval_overall_accuracy": 0.8851477337194005, |
|
"eval_overall_f1": 0.758193057536852, |
|
"eval_overall_precision": 0.7375241450255696, |
|
"eval_overall_recall": 0.7800538534996947, |
|
"eval_runtime": 950.4008, |
|
"eval_samples_per_second": 69.129, |
|
"eval_steps_per_second": 0.27, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 13.891754150390625, |
|
"learning_rate": 1.8355637633130106e-05, |
|
"loss": 0.3563, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_LOC_f1": 0.7934942596408595, |
|
"eval_ORG_f1": 0.6686377545091862, |
|
"eval_PER_f1": 0.8152987398240509, |
|
"eval_loss": 0.3389296531677246, |
|
"eval_overall_accuracy": 0.8935766857734662, |
|
"eval_overall_f1": 0.7638085016673694, |
|
"eval_overall_precision": 0.7483700877298401, |
|
"eval_overall_recall": 0.7798973026284891, |
|
"eval_runtime": 970.0425, |
|
"eval_samples_per_second": 67.729, |
|
"eval_steps_per_second": 0.265, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 19.86951446533203, |
|
"learning_rate": 1.8229148220293963e-05, |
|
"loss": 0.3396, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_LOC_f1": 0.7943446440452429, |
|
"eval_ORG_f1": 0.6860717813631874, |
|
"eval_PER_f1": 0.808841180333809, |
|
"eval_loss": 0.33801111578941345, |
|
"eval_overall_accuracy": 0.8965501348456104, |
|
"eval_overall_f1": 0.767065352823492, |
|
"eval_overall_precision": 0.770362767931157, |
|
"eval_overall_recall": 0.7637960455249934, |
|
"eval_runtime": 892.9487, |
|
"eval_samples_per_second": 73.576, |
|
"eval_steps_per_second": 0.288, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 6.008892059326172, |
|
"learning_rate": 1.8102658807457817e-05, |
|
"loss": 0.3513, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_LOC_f1": 0.796750172086523, |
|
"eval_ORG_f1": 0.6869723599718148, |
|
"eval_PER_f1": 0.8222321051851345, |
|
"eval_loss": 0.3108769357204437, |
|
"eval_overall_accuracy": 0.900560597156957, |
|
"eval_overall_f1": 0.773478314631055, |
|
"eval_overall_precision": 0.7637860773210824, |
|
"eval_overall_recall": 0.7834196972306151, |
|
"eval_runtime": 885.3039, |
|
"eval_samples_per_second": 74.212, |
|
"eval_steps_per_second": 0.29, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 36.22490692138672, |
|
"learning_rate": 1.7976169394621674e-05, |
|
"loss": 0.3332, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_LOC_f1": 0.8066277986085525, |
|
"eval_ORG_f1": 0.6983725665012176, |
|
"eval_PER_f1": 0.8332057011578333, |
|
"eval_loss": 0.31944143772125244, |
|
"eval_overall_accuracy": 0.9018524580941595, |
|
"eval_overall_f1": 0.7838363709114303, |
|
"eval_overall_precision": 0.7856547450536315, |
|
"eval_overall_recall": 0.7820263944768853, |
|
"eval_runtime": 885.7636, |
|
"eval_samples_per_second": 74.173, |
|
"eval_steps_per_second": 0.29, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 12.729876518249512, |
|
"learning_rate": 1.7849679981785527e-05, |
|
"loss": 0.3324, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_LOC_f1": 0.8091387580118001, |
|
"eval_ORG_f1": 0.7010727593385756, |
|
"eval_PER_f1": 0.8321347376235326, |
|
"eval_loss": 0.3180248737335205, |
|
"eval_overall_accuracy": 0.9008195115735422, |
|
"eval_overall_f1": 0.7850356248119096, |
|
"eval_overall_precision": 0.7839540384210074, |
|
"eval_overall_recall": 0.7861201997589117, |
|
"eval_runtime": 884.7821, |
|
"eval_samples_per_second": 74.256, |
|
"eval_steps_per_second": 0.29, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.6387939453125, |
|
"learning_rate": 1.772319056894938e-05, |
|
"loss": 0.3385, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_LOC_f1": 0.7763382604086921, |
|
"eval_ORG_f1": 0.6755694544225161, |
|
"eval_PER_f1": 0.8256535378180551, |
|
"eval_loss": 0.33690232038497925, |
|
"eval_overall_accuracy": 0.8964328777930731, |
|
"eval_overall_f1": 0.7651045588831604, |
|
"eval_overall_precision": 0.755082284607938, |
|
"eval_overall_recall": 0.7753964650813282, |
|
"eval_runtime": 890.8661, |
|
"eval_samples_per_second": 73.748, |
|
"eval_steps_per_second": 0.288, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 9.216795921325684, |
|
"learning_rate": 1.7596701156113234e-05, |
|
"loss": 0.3267, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_LOC_f1": 0.8015086633534421, |
|
"eval_ORG_f1": 0.70105107410561, |
|
"eval_PER_f1": 0.8364990020855292, |
|
"eval_loss": 0.3270108103752136, |
|
"eval_overall_accuracy": 0.9019283701744147, |
|
"eval_overall_f1": 0.7856358496296815, |
|
"eval_overall_precision": 0.7816849022099619, |
|
"eval_overall_recall": 0.789626939273917, |
|
"eval_runtime": 950.9161, |
|
"eval_samples_per_second": 69.091, |
|
"eval_steps_per_second": 0.27, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 10.781560897827148, |
|
"learning_rate": 1.747021174327709e-05, |
|
"loss": 0.3233, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_LOC_f1": 0.8056958546374753, |
|
"eval_ORG_f1": 0.6917519315097098, |
|
"eval_PER_f1": 0.8345942771968653, |
|
"eval_loss": 0.30491939187049866, |
|
"eval_overall_accuracy": 0.9033815442821598, |
|
"eval_overall_f1": 0.783789045768533, |
|
"eval_overall_precision": 0.7798233379823338, |
|
"eval_overall_recall": 0.7877952940808116, |
|
"eval_runtime": 917.217, |
|
"eval_samples_per_second": 71.63, |
|
"eval_steps_per_second": 0.28, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 15.803696632385254, |
|
"learning_rate": 1.7343722330440944e-05, |
|
"loss": 0.3112, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_LOC_f1": 0.8185129588612579, |
|
"eval_ORG_f1": 0.7198663442992582, |
|
"eval_PER_f1": 0.8407628833170422, |
|
"eval_loss": 0.3230852782726288, |
|
"eval_overall_accuracy": 0.9054094812832666, |
|
"eval_overall_f1": 0.7978603713205666, |
|
"eval_overall_precision": 0.7936613663999752, |
|
"eval_overall_recall": 0.8021040437090032, |
|
"eval_runtime": 886.1519, |
|
"eval_samples_per_second": 74.141, |
|
"eval_steps_per_second": 0.29, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.6037917137145996, |
|
"learning_rate": 1.7217232917604798e-05, |
|
"loss": 0.3256, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_LOC_f1": 0.8135399708226737, |
|
"eval_ORG_f1": 0.7164829968607569, |
|
"eval_PER_f1": 0.8380035321776889, |
|
"eval_loss": 0.3068985044956207, |
|
"eval_overall_accuracy": 0.9048909746636654, |
|
"eval_overall_f1": 0.7934551505253751, |
|
"eval_overall_precision": 0.781072268142868, |
|
"eval_overall_recall": 0.806236986708831, |
|
"eval_runtime": 885.0947, |
|
"eval_samples_per_second": 74.229, |
|
"eval_steps_per_second": 0.29, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 20.057506561279297, |
|
"learning_rate": 1.709074350476865e-05, |
|
"loss": 0.2931, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_LOC_f1": 0.8090675101559834, |
|
"eval_ORG_f1": 0.7063356635658061, |
|
"eval_PER_f1": 0.8313781014023732, |
|
"eval_loss": 0.3428688645362854, |
|
"eval_overall_accuracy": 0.8993378704357015, |
|
"eval_overall_f1": 0.7878607280931577, |
|
"eval_overall_precision": 0.779137676333938, |
|
"eval_overall_recall": 0.7967813140880129, |
|
"eval_runtime": 913.4969, |
|
"eval_samples_per_second": 71.921, |
|
"eval_steps_per_second": 0.281, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 10.215392112731934, |
|
"learning_rate": 1.6964254091932504e-05, |
|
"loss": 0.2936, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_LOC_f1": 0.8279096724337782, |
|
"eval_ORG_f1": 0.7213703143205346, |
|
"eval_PER_f1": 0.8399587047500638, |
|
"eval_loss": 0.3276561200618744, |
|
"eval_overall_accuracy": 0.9048035402140856, |
|
"eval_overall_f1": 0.8012747941254013, |
|
"eval_overall_precision": 0.7938388261504187, |
|
"eval_overall_recall": 0.8088513862579645, |
|
"eval_runtime": 888.4065, |
|
"eval_samples_per_second": 73.953, |
|
"eval_steps_per_second": 0.289, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 13.276248931884766, |
|
"learning_rate": 1.683776467909636e-05, |
|
"loss": 0.2797, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_LOC_f1": 0.8137454037894649, |
|
"eval_ORG_f1": 0.7157125069541951, |
|
"eval_PER_f1": 0.8422392139071162, |
|
"eval_loss": 0.30677318572998047, |
|
"eval_overall_accuracy": 0.9050170429398038, |
|
"eval_overall_f1": 0.7953167244565965, |
|
"eval_overall_precision": 0.7764588335296837, |
|
"eval_overall_recall": 0.8151134211061885, |
|
"eval_runtime": 923.9779, |
|
"eval_samples_per_second": 71.106, |
|
"eval_steps_per_second": 0.278, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.08989755064249039, |
|
"learning_rate": 1.6711275266260215e-05, |
|
"loss": 0.2792, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_LOC_f1": 0.8237387838615486, |
|
"eval_ORG_f1": 0.717827626918536, |
|
"eval_PER_f1": 0.8449089917750373, |
|
"eval_loss": 0.3350381851196289, |
|
"eval_overall_accuracy": 0.9065115620198306, |
|
"eval_overall_f1": 0.7990867935095444, |
|
"eval_overall_precision": 0.7941203753807146, |
|
"eval_overall_recall": 0.8041157224039952, |
|
"eval_runtime": 950.4133, |
|
"eval_samples_per_second": 69.128, |
|
"eval_steps_per_second": 0.27, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 32.182918548583984, |
|
"learning_rate": 1.658478585342407e-05, |
|
"loss": 0.2698, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_LOC_f1": 0.8267147626869356, |
|
"eval_ORG_f1": 0.7322118816415036, |
|
"eval_PER_f1": 0.8463502705378134, |
|
"eval_loss": 0.3303050696849823, |
|
"eval_overall_accuracy": 0.9053356025623038, |
|
"eval_overall_f1": 0.8061476513209491, |
|
"eval_overall_precision": 0.803686040812516, |
|
"eval_overall_recall": 0.8086243874947164, |
|
"eval_runtime": 903.1604, |
|
"eval_samples_per_second": 72.745, |
|
"eval_steps_per_second": 0.285, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 27.704275131225586, |
|
"learning_rate": 1.6458296440587925e-05, |
|
"loss": 0.2846, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_LOC_f1": 0.8198407012516331, |
|
"eval_ORG_f1": 0.7170252756930114, |
|
"eval_PER_f1": 0.8497716275494053, |
|
"eval_loss": 0.3040228486061096, |
|
"eval_overall_accuracy": 0.9088763588770705, |
|
"eval_overall_f1": 0.7998951624404026, |
|
"eval_overall_precision": 0.7879266486958503, |
|
"eval_overall_recall": 0.8122328850760054, |
|
"eval_runtime": 883.6437, |
|
"eval_samples_per_second": 74.351, |
|
"eval_steps_per_second": 0.291, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.532094120979309, |
|
"learning_rate": 1.633180702775178e-05, |
|
"loss": 0.2765, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_LOC_f1": 0.8226323815533471, |
|
"eval_ORG_f1": 0.7322743544720759, |
|
"eval_PER_f1": 0.8458698818030955, |
|
"eval_loss": 0.3010263741016388, |
|
"eval_overall_accuracy": 0.9093941877102408, |
|
"eval_overall_f1": 0.8048208514659728, |
|
"eval_overall_precision": 0.799766577265244, |
|
"eval_overall_recall": 0.8099394148128435, |
|
"eval_runtime": 883.3843, |
|
"eval_samples_per_second": 74.373, |
|
"eval_steps_per_second": 0.291, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 2.7648439407348633, |
|
"learning_rate": 1.6205317614915632e-05, |
|
"loss": 0.2758, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_LOC_f1": 0.8158232882579698, |
|
"eval_ORG_f1": 0.7211553763726063, |
|
"eval_PER_f1": 0.8392668350824088, |
|
"eval_loss": 0.2979504466056824, |
|
"eval_overall_accuracy": 0.907270682822384, |
|
"eval_overall_f1": 0.7967142515352101, |
|
"eval_overall_precision": 0.7737616641463505, |
|
"eval_overall_recall": 0.8210701817555615, |
|
"eval_runtime": 884.2556, |
|
"eval_samples_per_second": 74.3, |
|
"eval_steps_per_second": 0.291, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 2.9498727321624756, |
|
"learning_rate": 1.607882820207949e-05, |
|
"loss": 0.2745, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_LOC_f1": 0.8269012485811577, |
|
"eval_ORG_f1": 0.7277582167305856, |
|
"eval_PER_f1": 0.856517895595802, |
|
"eval_loss": 0.2944641709327698, |
|
"eval_overall_accuracy": 0.9108385505943848, |
|
"eval_overall_f1": 0.807803496021649, |
|
"eval_overall_precision": 0.7947313807024321, |
|
"eval_overall_recall": 0.8213128356059302, |
|
"eval_runtime": 883.9066, |
|
"eval_samples_per_second": 74.329, |
|
"eval_steps_per_second": 0.291, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 11.60289192199707, |
|
"learning_rate": 1.5952338789243342e-05, |
|
"loss": 0.2645, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_LOC_f1": 0.8305319969159598, |
|
"eval_ORG_f1": 0.7228604829282057, |
|
"eval_PER_f1": 0.8315148384875288, |
|
"eval_loss": 0.32325080037117004, |
|
"eval_overall_accuracy": 0.9048618298471388, |
|
"eval_overall_f1": 0.7998450483255535, |
|
"eval_overall_precision": 0.7917570997998328, |
|
"eval_overall_recall": 0.8080999420761776, |
|
"eval_runtime": 933.3011, |
|
"eval_samples_per_second": 70.395, |
|
"eval_steps_per_second": 0.275, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 42.618431091308594, |
|
"learning_rate": 1.5825849376407196e-05, |
|
"loss": 0.2779, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_LOC_f1": 0.8264125401549256, |
|
"eval_ORG_f1": 0.7424042624042624, |
|
"eval_PER_f1": 0.8601716304896517, |
|
"eval_loss": 0.2943771183490753, |
|
"eval_overall_accuracy": 0.9127221190857203, |
|
"eval_overall_f1": 0.8132353632361465, |
|
"eval_overall_precision": 0.8138473840171838, |
|
"eval_overall_recall": 0.8126242622540194, |
|
"eval_runtime": 953.7502, |
|
"eval_samples_per_second": 68.886, |
|
"eval_steps_per_second": 0.269, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 15.319729804992676, |
|
"learning_rate": 1.569935996357105e-05, |
|
"loss": 0.2709, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_LOC_f1": 0.832774509183695, |
|
"eval_ORG_f1": 0.7316936984844457, |
|
"eval_PER_f1": 0.8539732494099136, |
|
"eval_loss": 0.2914768159389496, |
|
"eval_overall_accuracy": 0.9130203451152948, |
|
"eval_overall_f1": 0.8107029247351679, |
|
"eval_overall_precision": 0.7998217523118878, |
|
"eval_overall_recall": 0.8218842462858306, |
|
"eval_runtime": 894.9182, |
|
"eval_samples_per_second": 73.415, |
|
"eval_steps_per_second": 0.287, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.931920051574707, |
|
"learning_rate": 1.5572870550734906e-05, |
|
"loss": 0.2631, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_LOC_f1": 0.8323614548810673, |
|
"eval_ORG_f1": 0.7279775567457282, |
|
"eval_PER_f1": 0.8522675037838443, |
|
"eval_loss": 0.3124816417694092, |
|
"eval_overall_accuracy": 0.9096829247297835, |
|
"eval_overall_f1": 0.8079843932416348, |
|
"eval_overall_precision": 0.7857095311702623, |
|
"eval_overall_recall": 0.8315590901263366, |
|
"eval_runtime": 886.0992, |
|
"eval_samples_per_second": 74.145, |
|
"eval_steps_per_second": 0.29, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 2.4540863037109375, |
|
"learning_rate": 1.544638113789876e-05, |
|
"loss": 0.2684, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_LOC_f1": 0.8353469255313396, |
|
"eval_ORG_f1": 0.743517370545253, |
|
"eval_PER_f1": 0.8544175455688603, |
|
"eval_loss": 0.31003931164741516, |
|
"eval_overall_accuracy": 0.9140255023922472, |
|
"eval_overall_f1": 0.8147700607298496, |
|
"eval_overall_precision": 0.8114867383067271, |
|
"eval_overall_recall": 0.8180800601155346, |
|
"eval_runtime": 886.9365, |
|
"eval_samples_per_second": 74.075, |
|
"eval_steps_per_second": 0.29, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 1.3480443954467773, |
|
"learning_rate": 1.5319891725062616e-05, |
|
"loss": 0.2546, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_LOC_f1": 0.8268920250802105, |
|
"eval_ORG_f1": 0.7359205250232403, |
|
"eval_PER_f1": 0.8566762684569846, |
|
"eval_loss": 0.3172565698623657, |
|
"eval_overall_accuracy": 0.9102766656432092, |
|
"eval_overall_f1": 0.8115168704156479, |
|
"eval_overall_precision": 0.8111456076827428, |
|
"eval_overall_recall": 0.8118884731593531, |
|
"eval_runtime": 886.1348, |
|
"eval_samples_per_second": 74.142, |
|
"eval_steps_per_second": 0.29, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 2.66180419921875, |
|
"learning_rate": 1.5193402312226468e-05, |
|
"loss": 0.2642, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_LOC_f1": 0.8459391601383606, |
|
"eval_ORG_f1": 0.7362593503366764, |
|
"eval_PER_f1": 0.8645872824401172, |
|
"eval_loss": 0.2804827094078064, |
|
"eval_overall_accuracy": 0.913356527184997, |
|
"eval_overall_f1": 0.8191614534186092, |
|
"eval_overall_precision": 0.8128202954617264, |
|
"eval_overall_recall": 0.8256023294769635, |
|
"eval_runtime": 901.1127, |
|
"eval_samples_per_second": 72.91, |
|
"eval_steps_per_second": 0.285, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 47.826175689697266, |
|
"learning_rate": 1.5066912899390323e-05, |
|
"loss": 0.2776, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_LOC_f1": 0.8417204029165086, |
|
"eval_ORG_f1": 0.7461313828771049, |
|
"eval_PER_f1": 0.8621870343195805, |
|
"eval_loss": 0.2955803871154785, |
|
"eval_overall_accuracy": 0.9141780043391887, |
|
"eval_overall_f1": 0.8213932893138981, |
|
"eval_overall_precision": 0.8195021231836067, |
|
"eval_overall_recall": 0.8232932041266809, |
|
"eval_runtime": 949.1162, |
|
"eval_samples_per_second": 69.222, |
|
"eval_steps_per_second": 0.271, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 14.713150024414062, |
|
"learning_rate": 1.4940423486554176e-05, |
|
"loss": 0.2616, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_LOC_f1": 0.8312933303965682, |
|
"eval_ORG_f1": 0.738299968952903, |
|
"eval_PER_f1": 0.8513267743278481, |
|
"eval_loss": 0.29292425513267517, |
|
"eval_overall_accuracy": 0.9104176452208262, |
|
"eval_overall_f1": 0.8118819476942669, |
|
"eval_overall_precision": 0.796034420507883, |
|
"eval_overall_recall": 0.8283732798973026, |
|
"eval_runtime": 908.7427, |
|
"eval_samples_per_second": 72.298, |
|
"eval_steps_per_second": 0.283, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 16.184900283813477, |
|
"learning_rate": 1.481393407371803e-05, |
|
"loss": 0.2701, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_LOC_f1": 0.8392716598242965, |
|
"eval_ORG_f1": 0.74401776384535, |
|
"eval_PER_f1": 0.8639952804501724, |
|
"eval_loss": 0.2767677903175354, |
|
"eval_overall_accuracy": 0.9163774213073009, |
|
"eval_overall_f1": 0.8195078963845922, |
|
"eval_overall_precision": 0.8094401856885441, |
|
"eval_overall_recall": 0.8298292029995147, |
|
"eval_runtime": 885.7415, |
|
"eval_samples_per_second": 74.175, |
|
"eval_steps_per_second": 0.29, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 16.436620712280273, |
|
"learning_rate": 1.4687444660881885e-05, |
|
"loss": 0.2669, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_LOC_f1": 0.8361988121287902, |
|
"eval_ORG_f1": 0.7500968409804315, |
|
"eval_PER_f1": 0.8611851501962505, |
|
"eval_loss": 0.29421770572662354, |
|
"eval_overall_accuracy": 0.9147995344962793, |
|
"eval_overall_f1": 0.819935938895562, |
|
"eval_overall_precision": 0.8067546477976939, |
|
"eval_overall_recall": 0.833555113734208, |
|
"eval_runtime": 886.1039, |
|
"eval_samples_per_second": 74.145, |
|
"eval_steps_per_second": 0.29, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 4.060434341430664, |
|
"learning_rate": 1.456095524804574e-05, |
|
"loss": 0.2422, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_LOC_f1": 0.8396598172309967, |
|
"eval_ORG_f1": 0.752934357339516, |
|
"eval_PER_f1": 0.8587656968190062, |
|
"eval_loss": 0.29513150453567505, |
|
"eval_overall_accuracy": 0.9147182001245772, |
|
"eval_overall_f1": 0.8206534155814486, |
|
"eval_overall_precision": 0.8120531232517684, |
|
"eval_overall_recall": 0.8294378258215007, |
|
"eval_runtime": 885.9872, |
|
"eval_samples_per_second": 74.155, |
|
"eval_steps_per_second": 0.29, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 4.084081172943115, |
|
"learning_rate": 1.4434465835209595e-05, |
|
"loss": 0.2616, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_LOC_f1": 0.8452747626229368, |
|
"eval_ORG_f1": 0.7507735621040889, |
|
"eval_PER_f1": 0.8679754713527367, |
|
"eval_loss": 0.29186713695526123, |
|
"eval_overall_accuracy": 0.915251618045657, |
|
"eval_overall_f1": 0.8252959748971241, |
|
"eval_overall_precision": 0.820965230928905, |
|
"eval_overall_recall": 0.8296726521283091, |
|
"eval_runtime": 886.49, |
|
"eval_samples_per_second": 74.113, |
|
"eval_steps_per_second": 0.29, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 12.051443099975586, |
|
"learning_rate": 1.4307976422373449e-05, |
|
"loss": 0.2449, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_LOC_f1": 0.8420882739030321, |
|
"eval_ORG_f1": 0.7511743283897188, |
|
"eval_PER_f1": 0.8660442600276625, |
|
"eval_loss": 0.28106340765953064, |
|
"eval_overall_accuracy": 0.9165787238772637, |
|
"eval_overall_f1": 0.8232545031821703, |
|
"eval_overall_precision": 0.823325400056368, |
|
"eval_overall_recall": 0.8231836185168371, |
|
"eval_runtime": 931.6188, |
|
"eval_samples_per_second": 70.522, |
|
"eval_steps_per_second": 0.276, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 16.095355987548828, |
|
"learning_rate": 1.4181487009537302e-05, |
|
"loss": 0.2379, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_LOC_f1": 0.8334692878701362, |
|
"eval_ORG_f1": 0.7499450670182377, |
|
"eval_PER_f1": 0.8676157711285138, |
|
"eval_loss": 0.2910194396972656, |
|
"eval_overall_accuracy": 0.914796145564125, |
|
"eval_overall_f1": 0.821648434727601, |
|
"eval_overall_precision": 0.8099514821518198, |
|
"eval_overall_recall": 0.8336881819747327, |
|
"eval_runtime": 955.4465, |
|
"eval_samples_per_second": 68.764, |
|
"eval_steps_per_second": 0.269, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 6.1998419761657715, |
|
"learning_rate": 1.4054997596701157e-05, |
|
"loss": 0.2128, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_LOC_f1": 0.8394146138221968, |
|
"eval_ORG_f1": 0.7394133361546803, |
|
"eval_PER_f1": 0.864682724271338, |
|
"eval_loss": 0.30839666724205017, |
|
"eval_overall_accuracy": 0.9148042790012952, |
|
"eval_overall_f1": 0.8188789651986448, |
|
"eval_overall_precision": 0.8056906504249807, |
|
"eval_overall_recall": 0.8325062228971304, |
|
"eval_runtime": 901.3734, |
|
"eval_samples_per_second": 72.889, |
|
"eval_steps_per_second": 0.285, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 5.403193950653076, |
|
"learning_rate": 1.3928508183865012e-05, |
|
"loss": 0.2237, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_LOC_f1": 0.8372631513660468, |
|
"eval_ORG_f1": 0.7524156839779593, |
|
"eval_PER_f1": 0.8648470673721019, |
|
"eval_loss": 0.3043561279773712, |
|
"eval_overall_accuracy": 0.9152800850757528, |
|
"eval_overall_f1": 0.8220857007666829, |
|
"eval_overall_precision": 0.8082709895080826, |
|
"eval_overall_recall": 0.836380856959469, |
|
"eval_runtime": 884.5053, |
|
"eval_samples_per_second": 74.279, |
|
"eval_steps_per_second": 0.291, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 13.765303611755371, |
|
"learning_rate": 1.3802018771028867e-05, |
|
"loss": 0.2246, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_LOC_f1": 0.8349439826902872, |
|
"eval_ORG_f1": 0.7424747298710351, |
|
"eval_PER_f1": 0.859314059653789, |
|
"eval_loss": 0.28388652205467224, |
|
"eval_overall_accuracy": 0.9144118406578324, |
|
"eval_overall_f1": 0.817645207294658, |
|
"eval_overall_precision": 0.8118686576378439, |
|
"eval_overall_recall": 0.8235045478028086, |
|
"eval_runtime": 886.1908, |
|
"eval_samples_per_second": 74.138, |
|
"eval_steps_per_second": 0.29, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 26.609722137451172, |
|
"learning_rate": 1.3675529358192721e-05, |
|
"loss": 0.2231, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_LOC_f1": 0.8453938301706774, |
|
"eval_ORG_f1": 0.7530178399743618, |
|
"eval_PER_f1": 0.8633811603243918, |
|
"eval_loss": 0.30370599031448364, |
|
"eval_overall_accuracy": 0.9166329467917318, |
|
"eval_overall_f1": 0.8246776205110672, |
|
"eval_overall_precision": 0.8187134051793966, |
|
"eval_overall_recall": 0.8307293705089469, |
|
"eval_runtime": 885.5464, |
|
"eval_samples_per_second": 74.191, |
|
"eval_steps_per_second": 0.29, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 18.287857055664062, |
|
"learning_rate": 1.3549039945356574e-05, |
|
"loss": 0.2156, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_LOC_f1": 0.8369090369642839, |
|
"eval_ORG_f1": 0.7570827451034141, |
|
"eval_PER_f1": 0.8699436414871374, |
|
"eval_loss": 0.2922073304653168, |
|
"eval_overall_accuracy": 0.916367932297269, |
|
"eval_overall_f1": 0.8256348807545127, |
|
"eval_overall_precision": 0.8155410977732979, |
|
"eval_overall_recall": 0.8359816522378947, |
|
"eval_runtime": 885.263, |
|
"eval_samples_per_second": 74.215, |
|
"eval_steps_per_second": 0.29, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 35.76387405395508, |
|
"learning_rate": 1.3422550532520428e-05, |
|
"loss": 0.2279, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_LOC_f1": 0.8493380871850663, |
|
"eval_ORG_f1": 0.7652859960552268, |
|
"eval_PER_f1": 0.8658015544747966, |
|
"eval_loss": 0.30765289068222046, |
|
"eval_overall_accuracy": 0.9169528619870936, |
|
"eval_overall_f1": 0.8303520832274882, |
|
"eval_overall_precision": 0.8291563575626546, |
|
"eval_overall_recall": 0.8315512625827762, |
|
"eval_runtime": 939.0823, |
|
"eval_samples_per_second": 69.962, |
|
"eval_steps_per_second": 0.274, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 12.871335983276367, |
|
"learning_rate": 1.3296061119684283e-05, |
|
"loss": 0.2192, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_LOC_f1": 0.8450333357909482, |
|
"eval_ORG_f1": 0.7589152754918096, |
|
"eval_PER_f1": 0.8706159740642501, |
|
"eval_loss": 0.29916831851005554, |
|
"eval_overall_accuracy": 0.9182454007107268, |
|
"eval_overall_f1": 0.8283387559440156, |
|
"eval_overall_precision": 0.8151347746682732, |
|
"eval_overall_recall": 0.8419775506050691, |
|
"eval_runtime": 927.2359, |
|
"eval_samples_per_second": 70.856, |
|
"eval_steps_per_second": 0.277, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 12.074441909790039, |
|
"learning_rate": 1.3169571706848138e-05, |
|
"loss": 0.2199, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_LOC_f1": 0.845725804758205, |
|
"eval_ORG_f1": 0.7583743578767123, |
|
"eval_PER_f1": 0.8723780235920504, |
|
"eval_loss": 0.29886308312416077, |
|
"eval_overall_accuracy": 0.9192946141056846, |
|
"eval_overall_f1": 0.8293654188671028, |
|
"eval_overall_precision": 0.8241158649684679, |
|
"eval_overall_recall": 0.8346822800068883, |
|
"eval_runtime": 890.1649, |
|
"eval_samples_per_second": 73.807, |
|
"eval_steps_per_second": 0.289, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"grad_norm": 23.149980545043945, |
|
"learning_rate": 1.3043082294011993e-05, |
|
"loss": 0.2255, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_LOC_f1": 0.8466151994355207, |
|
"eval_ORG_f1": 0.7544473410506125, |
|
"eval_PER_f1": 0.870403734801872, |
|
"eval_loss": 0.2841680943965912, |
|
"eval_overall_accuracy": 0.9179688638469395, |
|
"eval_overall_f1": 0.8283511691203761, |
|
"eval_overall_precision": 0.8183908572825472, |
|
"eval_overall_recall": 0.8385569140692268, |
|
"eval_runtime": 885.5261, |
|
"eval_samples_per_second": 74.193, |
|
"eval_steps_per_second": 0.29, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 9.740825653076172, |
|
"learning_rate": 1.2916592881175847e-05, |
|
"loss": 0.2166, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_LOC_f1": 0.8564384031559538, |
|
"eval_ORG_f1": 0.7616027673681177, |
|
"eval_PER_f1": 0.8733214429549507, |
|
"eval_loss": 0.2920551300048828, |
|
"eval_overall_accuracy": 0.9202286038073975, |
|
"eval_overall_f1": 0.8339421536254372, |
|
"eval_overall_precision": 0.8307068573159461, |
|
"eval_overall_recall": 0.8372027490332984, |
|
"eval_runtime": 885.9705, |
|
"eval_samples_per_second": 74.156, |
|
"eval_steps_per_second": 0.29, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 7.81465482711792, |
|
"learning_rate": 1.27901034683397e-05, |
|
"loss": 0.2195, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_LOC_f1": 0.8524520572659642, |
|
"eval_ORG_f1": 0.7654914529914529, |
|
"eval_PER_f1": 0.8711133515111243, |
|
"eval_loss": 0.2894265651702881, |
|
"eval_overall_accuracy": 0.9196877302355783, |
|
"eval_overall_f1": 0.8334080883643471, |
|
"eval_overall_precision": 0.8305412821928031, |
|
"eval_overall_recall": 0.836294753980306, |
|
"eval_runtime": 886.104, |
|
"eval_samples_per_second": 74.145, |
|
"eval_steps_per_second": 0.29, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 4.749297618865967, |
|
"learning_rate": 1.2663614055503555e-05, |
|
"loss": 0.2198, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_LOC_f1": 0.8479476339833629, |
|
"eval_ORG_f1": 0.7569071497897121, |
|
"eval_PER_f1": 0.8715211159515157, |
|
"eval_loss": 0.2978798449039459, |
|
"eval_overall_accuracy": 0.917242276793067, |
|
"eval_overall_f1": 0.8290111404616975, |
|
"eval_overall_precision": 0.8113011029852536, |
|
"eval_overall_recall": 0.847511623902187, |
|
"eval_runtime": 912.438, |
|
"eval_samples_per_second": 72.005, |
|
"eval_steps_per_second": 0.282, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 8.958308219909668, |
|
"learning_rate": 1.2537124642667409e-05, |
|
"loss": 0.2186, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_LOC_f1": 0.8410615339749197, |
|
"eval_ORG_f1": 0.7623071419893903, |
|
"eval_PER_f1": 0.8698487455846391, |
|
"eval_loss": 0.2916683554649353, |
|
"eval_overall_accuracy": 0.9165563569250457, |
|
"eval_overall_f1": 0.828324104278644, |
|
"eval_overall_precision": 0.8190848632805027, |
|
"eval_overall_recall": 0.8377741597131988, |
|
"eval_runtime": 949.686, |
|
"eval_samples_per_second": 69.181, |
|
"eval_steps_per_second": 0.271, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 2.5494885444641113, |
|
"learning_rate": 1.2410635229831265e-05, |
|
"loss": 0.2105, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_LOC_f1": 0.8486577670408396, |
|
"eval_ORG_f1": 0.7658020018726344, |
|
"eval_PER_f1": 0.8764428548203543, |
|
"eval_loss": 0.28897759318351746, |
|
"eval_overall_accuracy": 0.9185185486423599, |
|
"eval_overall_f1": 0.8335781872027352, |
|
"eval_overall_precision": 0.8256759558603319, |
|
"eval_overall_recall": 0.8416331386884168, |
|
"eval_runtime": 906.7935, |
|
"eval_samples_per_second": 72.453, |
|
"eval_steps_per_second": 0.283, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 2.0383992195129395, |
|
"learning_rate": 1.2284145816995119e-05, |
|
"loss": 0.2117, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_LOC_f1": 0.8508689748097309, |
|
"eval_ORG_f1": 0.7658278739306382, |
|
"eval_PER_f1": 0.8731916232956014, |
|
"eval_loss": 0.28091031312942505, |
|
"eval_overall_accuracy": 0.9196301183889559, |
|
"eval_overall_f1": 0.833681650059079, |
|
"eval_overall_precision": 0.8200686015431561, |
|
"eval_overall_recall": 0.8477542777525557, |
|
"eval_runtime": 885.0192, |
|
"eval_samples_per_second": 74.236, |
|
"eval_steps_per_second": 0.29, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.6501883268356323, |
|
"learning_rate": 1.2157656404158972e-05, |
|
"loss": 0.1994, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_LOC_f1": 0.8548620423851409, |
|
"eval_ORG_f1": 0.7666146057733736, |
|
"eval_PER_f1": 0.8740409497434966, |
|
"eval_loss": 0.267339825630188, |
|
"eval_overall_accuracy": 0.9214282857900041, |
|
"eval_overall_f1": 0.835940143844595, |
|
"eval_overall_precision": 0.8308500027062763, |
|
"eval_overall_recall": 0.8410930381827575, |
|
"eval_runtime": 886.6936, |
|
"eval_samples_per_second": 74.095, |
|
"eval_steps_per_second": 0.29, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 3.6416823863983154, |
|
"learning_rate": 1.2031166991322827e-05, |
|
"loss": 0.2075, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_LOC_f1": 0.8514531524204939, |
|
"eval_ORG_f1": 0.7741699019900634, |
|
"eval_PER_f1": 0.8762775257778839, |
|
"eval_loss": 0.2862880229949951, |
|
"eval_overall_accuracy": 0.9219467924096053, |
|
"eval_overall_f1": 0.8376946930582835, |
|
"eval_overall_precision": 0.8390993269298734, |
|
"eval_overall_recall": 0.836294753980306, |
|
"eval_runtime": 883.1341, |
|
"eval_samples_per_second": 74.394, |
|
"eval_steps_per_second": 0.291, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 3.767646312713623, |
|
"learning_rate": 1.1904677578486681e-05, |
|
"loss": 0.2144, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_LOC_f1": 0.8542264412564663, |
|
"eval_ORG_f1": 0.762882333169584, |
|
"eval_PER_f1": 0.8737852991218755, |
|
"eval_loss": 0.2919914424419403, |
|
"eval_overall_accuracy": 0.9192736027263282, |
|
"eval_overall_f1": 0.8335330637616842, |
|
"eval_overall_precision": 0.8234296712697055, |
|
"eval_overall_recall": 0.8438874712337774, |
|
"eval_runtime": 882.9236, |
|
"eval_samples_per_second": 74.412, |
|
"eval_steps_per_second": 0.291, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.2373511791229248, |
|
"learning_rate": 1.1778188165650538e-05, |
|
"loss": 0.2107, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_LOC_f1": 0.8536771728748805, |
|
"eval_ORG_f1": 0.7691218130311614, |
|
"eval_PER_f1": 0.8777075297286194, |
|
"eval_loss": 0.2956686019897461, |
|
"eval_overall_accuracy": 0.92124189452152, |
|
"eval_overall_f1": 0.8368564609614728, |
|
"eval_overall_precision": 0.8285232067510548, |
|
"eval_overall_recall": 0.84535904942311, |
|
"eval_runtime": 930.5121, |
|
"eval_samples_per_second": 70.606, |
|
"eval_steps_per_second": 0.276, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 13.250840187072754, |
|
"learning_rate": 1.1651698752814391e-05, |
|
"loss": 0.2133, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_LOC_f1": 0.8533000763334159, |
|
"eval_ORG_f1": 0.7736413979491799, |
|
"eval_PER_f1": 0.8740390436699679, |
|
"eval_loss": 0.2793155908584595, |
|
"eval_overall_accuracy": 0.9226618570941534, |
|
"eval_overall_f1": 0.8369070216139791, |
|
"eval_overall_precision": 0.8351781983723613, |
|
"eval_overall_recall": 0.8386430170483898, |
|
"eval_runtime": 953.5624, |
|
"eval_samples_per_second": 68.9, |
|
"eval_steps_per_second": 0.27, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"grad_norm": 25.819507598876953, |
|
"learning_rate": 1.1525209339978245e-05, |
|
"loss": 0.2112, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_LOC_f1": 0.8548619072433559, |
|
"eval_ORG_f1": 0.7661784507158363, |
|
"eval_PER_f1": 0.8776364551402296, |
|
"eval_loss": 0.2820794880390167, |
|
"eval_overall_accuracy": 0.9220823496957755, |
|
"eval_overall_f1": 0.8374375390381013, |
|
"eval_overall_precision": 0.8353114340451381, |
|
"eval_overall_recall": 0.8395744947320631, |
|
"eval_runtime": 899.3376, |
|
"eval_samples_per_second": 73.054, |
|
"eval_steps_per_second": 0.286, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 13.493629455566406, |
|
"learning_rate": 1.13987199271421e-05, |
|
"loss": 0.1983, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_LOC_f1": 0.8558231253148143, |
|
"eval_ORG_f1": 0.7679850431851696, |
|
"eval_PER_f1": 0.8760919620026149, |
|
"eval_loss": 0.2852949798107147, |
|
"eval_overall_accuracy": 0.9224734324663767, |
|
"eval_overall_f1": 0.8365820844153812, |
|
"eval_overall_precision": 0.8359935591789517, |
|
"eval_overall_recall": 0.8371714388590572, |
|
"eval_runtime": 881.1865, |
|
"eval_samples_per_second": 74.559, |
|
"eval_steps_per_second": 0.292, |
|
"step": 68000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 158116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 2.1809439865622904e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|