diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -10,1659 +10,1659 @@ "log_history": [ { "epoch": 1.0, - "grad_norm": 4.555638790130615, + "grad_norm": 2.7315826416015625, "learning_rate": 4.9500000000000004e-05, - "loss": 0.3502, + "loss": 0.3629, "step": 106 }, { "epoch": 1.0, - "eval_LOCATION_f1": 0.751592356687898, + "eval_LOCATION_f1": 0.8, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.6941176470588235, - "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7305389221556886, + "eval_LOCATION_precision": 0.7692307692307693, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.6941176470588235, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6630434782608695, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.8976897689768978, + "eval_ORGANIZATION_precision": 0.6210526315789474, + "eval_ORGANIZATION_recall": 0.7866666666666666, + "eval_PERSON_f1": 0.8888888888888888, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.85, + "eval_PERSON_precision": 0.8343558282208589, "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.5423728813559322, + "eval_QUANTITY_f1": 0.5396825396825397, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.4444444444444444, - "eval_QUANTITY_recall": 0.6956521739130435, - "eval_TIME_f1": 0.6779661016949152, + "eval_QUANTITY_precision": 0.425, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.721311475409836, "eval_TIME_number": 26, - "eval_TIME_precision": 0.6060606060606061, - "eval_TIME_recall": 0.7692307692307693, - "eval_loss": 0.16481448709964752, - "eval_overall_accuracy": 0.9522141606204654, - "eval_overall_f1": 0.7838926174496645, - "eval_overall_precision": 0.7192118226600985, - "eval_overall_recall": 0.8613569321533924, - "eval_runtime": 0.8792, - "eval_samples_per_second": 212.683, - "eval_steps_per_second": 3.412, + "eval_TIME_precision": 0.6285714285714286, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.1598740667104721, + "eval_overall_accuracy": 0.9482111583687766, + "eval_overall_f1": 0.7839999999999999, + "eval_overall_precision": 0.7153284671532847, + "eval_overall_recall": 0.8672566371681416, + "eval_runtime": 0.3626, + "eval_samples_per_second": 515.737, + "eval_steps_per_second": 8.274, "step": 106 }, { "epoch": 2.0, - "grad_norm": 3.0741477012634277, + "grad_norm": 1.3484944105148315, "learning_rate": 4.9e-05, - "loss": 0.1178, + "loss": 0.128, "step": 212 }, { "epoch": 2.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.7922077922077924, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7100000000000001, + "eval_LOCATION_precision": 0.7439024390243902, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.782122905027933, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.568, - "eval_ORGANIZATION_recall": 0.9466666666666667, - "eval_PERSON_f1": 0.9047619047619047, + "eval_ORGANIZATION_precision": 0.6730769230769231, + "eval_ORGANIZATION_recall": 0.9333333333333333, + "eval_PERSON_f1": 0.8993288590604027, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8807947019867549, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.5614035087719297, + "eval_PERSON_precision": 0.864516129032258, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.6545454545454547, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.47058823529411764, - "eval_QUANTITY_recall": 0.6956521739130435, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.5625, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8076923076923077, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.14857080578804016, - "eval_overall_accuracy": 0.9537152864648486, - "eval_overall_f1": 0.8079470198675497, - "eval_overall_precision": 0.7331730769230769, - "eval_overall_recall": 0.8997050147492626, - "eval_runtime": 0.9505, - "eval_samples_per_second": 196.731, - "eval_steps_per_second": 3.156, + "eval_TIME_precision": 0.8076923076923077, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.13682067394256592, + "eval_overall_accuracy": 0.9567175381536153, + "eval_overall_f1": 0.8238482384823848, + "eval_overall_precision": 0.7619047619047619, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3665, + "eval_samples_per_second": 510.258, + "eval_steps_per_second": 8.186, "step": 212 }, { "epoch": 3.0, - "grad_norm": 2.83402419090271, + "grad_norm": 2.2102744579315186, "learning_rate": 4.85e-05, - "loss": 0.0781, + "loss": 0.0841, "step": 318 }, { "epoch": 3.0, - "eval_LOCATION_f1": 0.8299319727891157, + "eval_LOCATION_f1": 0.8028169014084506, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8133333333333334, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7777777777777778, + "eval_LOCATION_precision": 0.8142857142857143, + "eval_LOCATION_recall": 0.7916666666666666, + "eval_ORGANIZATION_f1": 0.7810650887573964, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6666666666666666, - "eval_ORGANIZATION_recall": 0.9333333333333333, - "eval_PERSON_f1": 0.8933333333333333, + "eval_ORGANIZATION_precision": 0.7021276595744681, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8918032786885245, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8535031847133758, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.8395061728395061, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6415094339622641, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, + "eval_QUANTITY_precision": 0.5666666666666667, "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.8333333333333333, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.15726295113563538, - "eval_overall_accuracy": 0.9552164123092319, - "eval_overall_f1": 0.8363136176066025, - "eval_overall_precision": 0.7835051546391752, - "eval_overall_recall": 0.8967551622418879, - "eval_runtime": 0.8869, - "eval_samples_per_second": 210.857, - "eval_steps_per_second": 3.383, + "eval_TIME_precision": 0.9090909090909091, + "eval_TIME_recall": 0.7692307692307693, + "eval_loss": 0.1551402360200882, + "eval_overall_accuracy": 0.9547160370277709, + "eval_overall_f1": 0.8256624825662482, + "eval_overall_precision": 0.783068783068783, + "eval_overall_recall": 0.8731563421828908, + "eval_runtime": 0.3569, + "eval_samples_per_second": 523.957, + "eval_steps_per_second": 8.406, "step": 318 }, { "epoch": 4.0, - "grad_norm": 2.320517063140869, + "grad_norm": 8.499090194702148, "learning_rate": 4.8e-05, - "loss": 0.0572, + "loss": 0.0533, "step": 424 }, { "epoch": 4.0, - "eval_LOCATION_f1": 0.816326530612245, + "eval_LOCATION_f1": 0.8053691275167787, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8, + "eval_LOCATION_precision": 0.7792207792207793, "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7651006711409396, + "eval_ORGANIZATION_f1": 0.7719298245614036, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7702702702702703, - "eval_ORGANIZATION_recall": 0.76, - "eval_PERSON_f1": 0.9019607843137255, + "eval_ORGANIZATION_precision": 0.6875, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8778877887788779, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8466257668711656, - "eval_PERSON_recall": 0.965034965034965, - "eval_QUANTITY_f1": 0.6415094339622641, + "eval_PERSON_precision": 0.83125, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.6666666666666667, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.5666666666666667, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.7368421052631579, + "eval_QUANTITY_precision": 0.5806451612903226, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.7540983606557377, "eval_TIME_number": 26, - "eval_TIME_precision": 0.6774193548387096, - "eval_TIME_recall": 0.8076923076923077, - "eval_loss": 0.17222364246845245, - "eval_overall_accuracy": 0.957968476357268, - "eval_overall_f1": 0.8230337078651685, - "eval_overall_precision": 0.7855227882037533, - "eval_overall_recall": 0.8643067846607669, - "eval_runtime": 0.9001, - "eval_samples_per_second": 207.754, - "eval_steps_per_second": 3.333, + "eval_TIME_precision": 0.6571428571428571, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.17204634845256805, + "eval_overall_accuracy": 0.9552164123092319, + "eval_overall_f1": 0.8130081300813008, + "eval_overall_precision": 0.7518796992481203, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3715, + "eval_samples_per_second": 503.365, + "eval_steps_per_second": 8.075, "step": 424 }, { "epoch": 5.0, - "grad_norm": 0.44582077860832214, + "grad_norm": 10.976715087890625, "learning_rate": 4.75e-05, "loss": 0.0359, "step": 530 }, { "epoch": 5.0, - "eval_LOCATION_f1": 0.8258064516129032, + "eval_LOCATION_f1": 0.8079470198675496, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7710843373493976, - "eval_LOCATION_recall": 0.8888888888888888, - "eval_ORGANIZATION_f1": 0.7951807228915663, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.7784431137724551, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7252747252747253, - "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.9152542372881356, + "eval_ORGANIZATION_precision": 0.7065217391304348, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.9049180327868852, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8881578947368421, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7450980392156864, + "eval_PERSON_precision": 0.8518518518518519, + "eval_PERSON_recall": 0.965034965034965, + "eval_QUANTITY_f1": 0.6785714285714286, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6785714285714286, + "eval_QUANTITY_precision": 0.5757575757575758, "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8799999999999999, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.9166666666666666, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.15475203096866608, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8535564853556487, - "eval_overall_precision": 0.8095238095238095, - "eval_overall_recall": 0.9026548672566371, - "eval_runtime": 0.9396, - "eval_samples_per_second": 199.021, - "eval_steps_per_second": 3.193, + "eval_loss": 0.20843125879764557, + "eval_overall_accuracy": 0.9564673505128847, + "eval_overall_f1": 0.8310626702997276, + "eval_overall_precision": 0.7721518987341772, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3572, + "eval_samples_per_second": 523.556, + "eval_steps_per_second": 8.399, "step": 530 }, { "epoch": 6.0, - "grad_norm": 0.24779094755649567, + "grad_norm": 0.3230496346950531, "learning_rate": 4.7e-05, - "loss": 0.0236, + "loss": 0.0256, "step": 636 }, { "epoch": 6.0, - "eval_LOCATION_f1": 0.7808219178082192, + "eval_LOCATION_f1": 0.816326530612245, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7702702702702703, - "eval_LOCATION_recall": 0.7916666666666666, - "eval_ORGANIZATION_f1": 0.7564102564102564, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.7597765363128492, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7283950617283951, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.888888888888889, + "eval_ORGANIZATION_precision": 0.6538461538461539, + "eval_ORGANIZATION_recall": 0.9066666666666666, + "eval_PERSON_f1": 0.9006622516556291, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8571428571428571, - "eval_PERSON_recall": 0.9230769230769231, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6666666666666667, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8148148148148148, + "eval_QUANTITY_precision": 0.5588235294117647, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.7368421052631579, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7857142857142857, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.18202847242355347, - "eval_overall_accuracy": 0.9604703527645734, - "eval_overall_f1": 0.8193456614509247, - "eval_overall_precision": 0.7912087912087912, - "eval_overall_recall": 0.8495575221238938, - "eval_runtime": 0.9076, - "eval_samples_per_second": 206.029, - "eval_steps_per_second": 3.305, + "eval_TIME_precision": 0.6774193548387096, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.2402852177619934, + "eval_overall_accuracy": 0.9539654741055792, + "eval_overall_f1": 0.8194070080862534, + "eval_overall_precision": 0.7543424317617866, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3663, + "eval_samples_per_second": 510.497, + "eval_steps_per_second": 8.19, "step": 636 }, { "epoch": 7.0, - "grad_norm": 0.0966544821858406, + "grad_norm": 0.021450912579894066, "learning_rate": 4.6500000000000005e-05, - "loss": 0.0164, + "loss": 0.0188, "step": 742 }, { "epoch": 7.0, - "eval_LOCATION_f1": 0.8181818181818181, + "eval_LOCATION_f1": 0.8133333333333332, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7682926829268293, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.744186046511628, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.7624309392265193, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6597938144329897, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9072847682119206, + "eval_ORGANIZATION_precision": 0.6509433962264151, + "eval_ORGANIZATION_recall": 0.92, + "eval_PERSON_f1": 0.8926174496644295, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8616352201257862, - "eval_PERSON_recall": 0.958041958041958, - "eval_QUANTITY_f1": 0.7547169811320754, + "eval_PERSON_precision": 0.8580645161290322, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.6923076923076923, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.8695652173913043, - "eval_TIME_f1": 0.7719298245614036, + "eval_QUANTITY_precision": 0.6206896551724138, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.7118644067796609, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7096774193548387, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.2388230413198471, - "eval_overall_accuracy": 0.9567175381536153, - "eval_overall_f1": 0.8292682926829268, - "eval_overall_precision": 0.7669172932330827, - "eval_overall_recall": 0.9026548672566371, - "eval_runtime": 0.9186, - "eval_samples_per_second": 203.564, - "eval_steps_per_second": 3.266, + "eval_TIME_precision": 0.6363636363636364, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.22656874358654022, + "eval_overall_accuracy": 0.9569677257943457, + "eval_overall_f1": 0.8162162162162162, + "eval_overall_precision": 0.7531172069825436, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.356, + "eval_samples_per_second": 525.333, + "eval_steps_per_second": 8.428, "step": 742 }, { "epoch": 8.0, - "grad_norm": 6.724938869476318, + "grad_norm": 0.026271946728229523, "learning_rate": 4.600000000000001e-05, - "loss": 0.0131, + "loss": 0.0134, "step": 848 }, { "epoch": 8.0, - "eval_LOCATION_f1": 0.8157894736842106, + "eval_LOCATION_f1": 0.7891156462585034, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.775, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7341772151898734, + "eval_LOCATION_precision": 0.7733333333333333, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.7530864197530864, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6987951807228916, - "eval_ORGANIZATION_recall": 0.7733333333333333, - "eval_PERSON_f1": 0.8754208754208753, + "eval_ORGANIZATION_precision": 0.7011494252873564, + "eval_ORGANIZATION_recall": 0.8133333333333334, + "eval_PERSON_f1": 0.9066666666666667, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8441558441558441, - "eval_PERSON_recall": 0.9090909090909091, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8662420382165605, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6415094339622641, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.6779661016949152, + "eval_QUANTITY_precision": 0.5666666666666667, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.7719298245614036, "eval_TIME_number": 26, - "eval_TIME_precision": 0.6060606060606061, - "eval_TIME_recall": 0.7692307692307693, - "eval_loss": 0.21797136962413788, - "eval_overall_accuracy": 0.9582186639979985, - "eval_overall_f1": 0.8022284122562674, - "eval_overall_precision": 0.7598944591029023, - "eval_overall_recall": 0.8495575221238938, - "eval_runtime": 0.906, - "eval_samples_per_second": 206.4, - "eval_steps_per_second": 3.311, + "eval_TIME_precision": 0.7096774193548387, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.24617096781730652, + "eval_overall_accuracy": 0.9572179134350763, + "eval_overall_f1": 0.8178025034770514, + "eval_overall_precision": 0.7736842105263158, + "eval_overall_recall": 0.8672566371681416, + "eval_runtime": 0.3558, + "eval_samples_per_second": 525.577, + "eval_steps_per_second": 8.432, "step": 848 }, { "epoch": 9.0, - "grad_norm": 1.854614019393921, + "grad_norm": 0.6316163539886475, "learning_rate": 4.55e-05, - "loss": 0.0091, + "loss": 0.0135, "step": 954 }, { "epoch": 9.0, - "eval_LOCATION_f1": 0.8435374149659863, + "eval_LOCATION_f1": 0.7972027972027971, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8266666666666667, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.788235294117647, + "eval_LOCATION_precision": 0.8028169014084507, + "eval_LOCATION_recall": 0.7916666666666666, + "eval_ORGANIZATION_f1": 0.8098159509202455, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7052631578947368, - "eval_ORGANIZATION_recall": 0.8933333333333333, - "eval_PERSON_f1": 0.8837209302325583, + "eval_ORGANIZATION_precision": 0.75, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8829431438127091, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8417721518987342, - "eval_PERSON_recall": 0.9300699300699301, + "eval_PERSON_precision": 0.8461538461538461, + "eval_PERSON_recall": 0.9230769230769231, "eval_QUANTITY_f1": 0.6923076923076923, "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6206896551724138, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.7777777777777779, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.75, - "eval_TIME_recall": 0.8076923076923077, - "eval_loss": 0.26150327920913696, - "eval_overall_accuracy": 0.9589692269201902, - "eval_overall_f1": 0.8314917127071824, - "eval_overall_precision": 0.7818181818181819, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9105, - "eval_samples_per_second": 205.388, - "eval_steps_per_second": 3.295, + "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.2374754250049591, + "eval_overall_accuracy": 0.9592194145609206, + "eval_overall_f1": 0.829817158931083, + "eval_overall_precision": 0.793010752688172, + "eval_overall_recall": 0.8702064896755162, + "eval_runtime": 0.3585, + "eval_samples_per_second": 521.613, + "eval_steps_per_second": 8.368, "step": 954 }, { "epoch": 10.0, - "grad_norm": 1.278331995010376, + "grad_norm": 0.20594312250614166, "learning_rate": 4.5e-05, - "loss": 0.0099, + "loss": 0.0082, "step": 1060 }, { "epoch": 10.0, - "eval_LOCATION_f1": 0.816326530612245, + "eval_LOCATION_f1": 0.7973856209150327, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7500000000000001, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.7904191616766466, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7058823529411765, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.9183673469387755, + "eval_ORGANIZATION_precision": 0.717391304347826, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8933333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8940397350993378, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8535031847133758, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.6415094339622641, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8214285714285715, + "eval_QUANTITY_precision": 0.5666666666666667, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7666666666666667, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.24217499792575836, - "eval_overall_accuracy": 0.9619714786089567, - "eval_overall_f1": 0.8349788434414668, - "eval_overall_precision": 0.8, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9185, - "eval_samples_per_second": 203.602, - "eval_steps_per_second": 3.266, + "eval_loss": 0.27111825346946716, + "eval_overall_accuracy": 0.9574681010758068, + "eval_overall_f1": 0.8269230769230769, + "eval_overall_precision": 0.7737789203084833, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3597, + "eval_samples_per_second": 519.941, + "eval_steps_per_second": 8.341, "step": 1060 }, { "epoch": 11.0, - "grad_norm": 0.09772255271673203, + "grad_norm": 0.010851857252418995, "learning_rate": 4.4500000000000004e-05, - "loss": 0.0059, + "loss": 0.0079, "step": 1166 }, { "epoch": 11.0, - "eval_LOCATION_f1": 0.7972972972972973, + "eval_LOCATION_f1": 0.7837837837837838, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7763157894736842, - "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7619047619047621, + "eval_LOCATION_precision": 0.7631578947368421, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.7878787878787877, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6881720430107527, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.907849829351536, + "eval_ORGANIZATION_precision": 0.7222222222222222, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8866666666666667, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6792452830188679, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, + "eval_QUANTITY_precision": 0.6, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_TIME_f1": 0.7636363636363636, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.2546893060207367, - "eval_overall_accuracy": 0.9589692269201902, - "eval_overall_f1": 0.8307692307692307, - "eval_overall_precision": 0.7898936170212766, + "eval_TIME_precision": 0.7241379310344828, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.27776339650154114, + "eval_overall_accuracy": 0.9574681010758068, + "eval_overall_f1": 0.825, + "eval_overall_precision": 0.7795275590551181, "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9121, - "eval_samples_per_second": 205.026, - "eval_steps_per_second": 3.289, + "eval_runtime": 0.3586, + "eval_samples_per_second": 521.408, + "eval_steps_per_second": 8.365, "step": 1166 }, { "epoch": 12.0, - "grad_norm": 0.023314962163567543, + "grad_norm": 4.9953837394714355, "learning_rate": 4.4000000000000006e-05, - "loss": 0.0062, + "loss": 0.0105, "step": 1272 }, { "epoch": 12.0, - "eval_LOCATION_f1": 0.7945205479452055, + "eval_LOCATION_f1": 0.7916666666666666, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7837837837837838, - "eval_LOCATION_recall": 0.8055555555555556, - "eval_ORGANIZATION_f1": 0.8074534161490684, + "eval_LOCATION_precision": 0.7916666666666666, + "eval_LOCATION_recall": 0.7916666666666666, + "eval_ORGANIZATION_f1": 0.8101265822784811, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7558139534883721, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.8829431438127091, + "eval_ORGANIZATION_precision": 0.7710843373493976, + "eval_ORGANIZATION_recall": 0.8533333333333334, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8461538461538461, - "eval_PERSON_recall": 0.9230769230769231, - "eval_QUANTITY_f1": 0.6538461538461539, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.5862068965517241, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8363636363636363, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.7857142857142856, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7931034482758621, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.2780083119869232, - "eval_overall_accuracy": 0.9602201651238429, - "eval_overall_f1": 0.8274894810659187, - "eval_overall_precision": 0.7887700534759359, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9675, - "eval_samples_per_second": 193.282, - "eval_steps_per_second": 3.101, + "eval_TIME_precision": 0.7333333333333333, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.28447094559669495, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.8373408769448375, + "eval_overall_precision": 0.8043478260869565, + "eval_overall_recall": 0.8731563421828908, + "eval_runtime": 0.3599, + "eval_samples_per_second": 519.532, + "eval_steps_per_second": 8.335, "step": 1272 }, { "epoch": 13.0, - "grad_norm": 3.4945220947265625, + "grad_norm": 0.2857362926006317, "learning_rate": 4.35e-05, - "loss": 0.0037, + "loss": 0.0103, "step": 1378 }, { "epoch": 13.0, - "eval_LOCATION_f1": 0.782051282051282, + "eval_LOCATION_f1": 0.7891156462585034, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7261904761904762, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7417218543046358, + "eval_LOCATION_precision": 0.7733333333333333, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.8170731707317072, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7368421052631579, - "eval_ORGANIZATION_recall": 0.7466666666666667, - "eval_PERSON_f1": 0.9163879598662208, + "eval_ORGANIZATION_precision": 0.7528089887640449, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.8926174496644295, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8782051282051282, - "eval_PERSON_recall": 0.958041958041958, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8580645161290322, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.7037037037037037, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8727272727272727, + "eval_QUANTITY_precision": 0.6129032258064516, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.9019607843137256, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8275862068965517, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.2757226526737213, - "eval_overall_accuracy": 0.9627220415311484, - "eval_overall_f1": 0.8326300984528833, - "eval_overall_precision": 0.7956989247311828, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9689, - "eval_samples_per_second": 193.003, - "eval_steps_per_second": 3.096, + "eval_TIME_precision": 0.92, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.2690028250217438, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8403361344537816, + "eval_overall_precision": 0.8, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3608, + "eval_samples_per_second": 518.338, + "eval_steps_per_second": 8.316, "step": 1378 }, { "epoch": 14.0, - "grad_norm": 0.025552373379468918, + "grad_norm": 0.11683321744203568, "learning_rate": 4.3e-05, - "loss": 0.005, + "loss": 0.0045, "step": 1484 }, { "epoch": 14.0, - "eval_LOCATION_f1": 0.8243243243243243, + "eval_LOCATION_f1": 0.7887323943661971, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8026315789473685, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7341772151898734, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.7777777777777778, + "eval_ORGANIZATION_f1": 0.809248554913295, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6987951807228916, - "eval_ORGANIZATION_recall": 0.7733333333333333, - "eval_PERSON_f1": 0.9023569023569024, + "eval_ORGANIZATION_precision": 0.7142857142857143, + "eval_ORGANIZATION_recall": 0.9333333333333333, + "eval_PERSON_f1": 0.8933333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8701298701298701, + "eval_PERSON_precision": 0.8535031847133758, "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7169811320754716, + "eval_QUANTITY_f1": 0.6545454545454547, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6333333333333333, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.5625, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.7636363636363636, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.280028760433197, - "eval_overall_accuracy": 0.9594696022016512, - "eval_overall_f1": 0.8305084745762712, - "eval_overall_precision": 0.7967479674796748, - "eval_overall_recall": 0.8672566371681416, - "eval_runtime": 0.9073, - "eval_samples_per_second": 206.107, - "eval_steps_per_second": 3.307, + "eval_TIME_precision": 0.7241379310344828, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.304815411567688, + "eval_overall_accuracy": 0.957968476357268, + "eval_overall_f1": 0.8248275862068966, + "eval_overall_precision": 0.7746113989637305, + "eval_overall_recall": 0.8820058997050148, + "eval_runtime": 0.3586, + "eval_samples_per_second": 521.439, + "eval_steps_per_second": 8.365, "step": 1484 }, { "epoch": 15.0, - "grad_norm": 0.192918598651886, + "grad_norm": 0.15561413764953613, "learning_rate": 4.25e-05, - "loss": 0.0058, + "loss": 0.005, "step": 1590 }, { "epoch": 15.0, - "eval_LOCATION_f1": 0.8356164383561645, + "eval_LOCATION_f1": 0.816326530612245, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8243243243243243, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7784431137724551, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8000000000000002, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7065217391304348, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.8963210702341137, + "eval_ORGANIZATION_precision": 0.775, + "eval_ORGANIZATION_recall": 0.8266666666666667, + "eval_PERSON_f1": 0.9054054054054055, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8589743589743589, + "eval_PERSON_precision": 0.8758169934640523, "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6666666666666666, + "eval_QUANTITY_f1": 0.6792452830188679, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.64, - "eval_QUANTITY_recall": 0.6956521739130435, - "eval_TIME_f1": 0.8070175438596492, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7419354838709677, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.2716998755931854, - "eval_overall_accuracy": 0.9629722291718789, - "eval_overall_f1": 0.8340306834030684, - "eval_overall_precision": 0.791005291005291, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.91, - "eval_samples_per_second": 205.503, - "eval_steps_per_second": 3.297, + "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.27857497334480286, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.8397163120567377, + "eval_overall_precision": 0.8087431693989071, + "eval_overall_recall": 0.8731563421828908, + "eval_runtime": 0.3743, + "eval_samples_per_second": 499.656, + "eval_steps_per_second": 8.016, "step": 1590 }, { "epoch": 16.0, - "grad_norm": 0.09364385157823563, + "grad_norm": 0.24679070711135864, "learning_rate": 4.2e-05, - "loss": 0.0046, + "loss": 0.0041, "step": 1696 }, { "epoch": 16.0, - "eval_LOCATION_f1": 0.816326530612245, + "eval_LOCATION_f1": 0.7866666666666666, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.735632183908046, + "eval_LOCATION_precision": 0.7564102564102564, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8170731707317072, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6464646464646465, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.8993288590604027, + "eval_ORGANIZATION_precision": 0.7528089887640449, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.8903654485049833, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.864516129032258, + "eval_PERSON_precision": 0.8481012658227848, "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6545454545454547, + "eval_QUANTITY_f1": 0.7169811320754716, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.5625, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.7931034482758621, + "eval_QUANTITY_precision": 0.6333333333333333, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.71875, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.30642953515052795, - "eval_overall_accuracy": 0.9539654741055792, - "eval_overall_f1": 0.8169398907103824, - "eval_overall_precision": 0.7608142493638677, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9036, - "eval_samples_per_second": 206.952, - "eval_steps_per_second": 3.32, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.30642324686050415, + "eval_overall_accuracy": 0.9572179134350763, + "eval_overall_f1": 0.8349514563106795, + "eval_overall_precision": 0.7879581151832461, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3572, + "eval_samples_per_second": 523.51, + "eval_steps_per_second": 8.399, "step": 1696 }, { "epoch": 17.0, - "grad_norm": 4.5664825439453125, + "grad_norm": 0.17553383111953735, "learning_rate": 4.15e-05, - "loss": 0.0063, + "loss": 0.0061, "step": 1802 }, { "epoch": 17.0, - "eval_LOCATION_f1": 0.7763157894736842, + "eval_LOCATION_f1": 0.7733333333333334, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7375, - "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7922077922077922, + "eval_LOCATION_precision": 0.7435897435897436, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.7701863354037267, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7721518987341772, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.8940397350993377, + "eval_ORGANIZATION_precision": 0.7209302325581395, + "eval_ORGANIZATION_recall": 0.8266666666666667, + "eval_PERSON_f1": 0.8910891089108911, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8490566037735849, + "eval_PERSON_precision": 0.84375, "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.6792452830188679, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_precision": 0.6428571428571429, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.7857142857142857, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.28576722741127014, - "eval_overall_accuracy": 0.9617212909682261, - "eval_overall_f1": 0.8263305322128851, - "eval_overall_precision": 0.7866666666666666, + "eval_loss": 0.300820916891098, + "eval_overall_accuracy": 0.9574681010758068, + "eval_overall_f1": 0.8205841446453408, + "eval_overall_precision": 0.7763157894736842, "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9045, - "eval_samples_per_second": 206.753, - "eval_steps_per_second": 3.317, + "eval_runtime": 0.3595, + "eval_samples_per_second": 520.18, + "eval_steps_per_second": 8.345, "step": 1802 }, { "epoch": 18.0, - "grad_norm": 0.04714202508330345, + "grad_norm": 0.004105593077838421, "learning_rate": 4.1e-05, - "loss": 0.0033, + "loss": 0.0036, "step": 1908 }, { "epoch": 18.0, - "eval_LOCATION_f1": 0.8, + "eval_LOCATION_f1": 0.8133333333333332, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7692307692307693, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.779874213836478, + "eval_LOCATION_precision": 0.782051282051282, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8129032258064516, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7380952380952381, - "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.9006622516556291, + "eval_ORGANIZATION_precision": 0.7875, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.8993288590604027, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8553459119496856, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.864516129032258, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8421052631578947, + "eval_QUANTITY_precision": 0.6551724137931034, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7741935483870968, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.30405738949775696, - "eval_overall_accuracy": 0.9619714786089567, - "eval_overall_f1": 0.8333333333333335, - "eval_overall_precision": 0.7874015748031497, - "eval_overall_recall": 0.8849557522123894, - "eval_runtime": 0.903, - "eval_samples_per_second": 207.088, - "eval_steps_per_second": 3.322, + "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.29750457406044006, + "eval_overall_accuracy": 0.9627220415311484, + "eval_overall_f1": 0.8434414668547249, + "eval_overall_precision": 0.8081081081081081, + "eval_overall_recall": 0.8820058997050148, + "eval_runtime": 0.3588, + "eval_samples_per_second": 521.143, + "eval_steps_per_second": 8.361, "step": 1908 }, { "epoch": 19.0, - "grad_norm": 0.008477783761918545, + "grad_norm": 0.007032826077193022, "learning_rate": 4.05e-05, - "loss": 0.0033, + "loss": 0.0025, "step": 2014 }, { "epoch": 19.0, - "eval_LOCATION_f1": 0.8413793103448277, + "eval_LOCATION_f1": 0.8513513513513513, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8356164383561644, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8101265822784811, + "eval_LOCATION_precision": 0.8289473684210527, + "eval_LOCATION_recall": 0.875, + "eval_ORGANIZATION_f1": 0.834355828220859, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7710843373493976, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.8979591836734694, + "eval_ORGANIZATION_precision": 0.7727272727272727, + "eval_ORGANIZATION_recall": 0.9066666666666666, + "eval_PERSON_f1": 0.8970099667774087, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8741721854304636, - "eval_PERSON_recall": 0.9230769230769231, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8544303797468354, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.76, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8148148148148148, + "eval_QUANTITY_precision": 0.7037037037037037, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_precision": 0.8461538461538461, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3013346493244171, - "eval_overall_accuracy": 0.960720540405304, - "eval_overall_f1": 0.844950213371266, - "eval_overall_precision": 0.8159340659340659, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9229, - "eval_samples_per_second": 202.623, - "eval_steps_per_second": 3.251, + "eval_loss": 0.3171651363372803, + "eval_overall_accuracy": 0.9622216662496873, + "eval_overall_f1": 0.8599439775910364, + "eval_overall_precision": 0.8186666666666667, + "eval_overall_recall": 0.9056047197640118, + "eval_runtime": 0.3602, + "eval_samples_per_second": 519.212, + "eval_steps_per_second": 8.33, "step": 2014 }, { "epoch": 20.0, - "grad_norm": 0.0049218675121665, + "grad_norm": 1.055253505706787, "learning_rate": 4e-05, - "loss": 0.0036, + "loss": 0.0039, "step": 2120 }, { "epoch": 20.0, - "eval_LOCATION_f1": 0.8219178082191781, + "eval_LOCATION_f1": 0.8187919463087249, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8108108108108109, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.8, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8098159509202455, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7333333333333333, + "eval_ORGANIZATION_precision": 0.75, "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.9023569023569024, + "eval_PERSON_f1": 0.8970099667774087, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8701298701298701, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6538461538461539, + "eval_PERSON_precision": 0.8544303797468354, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.76, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.5862068965517241, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.7037037037037037, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.8461538461538461, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.2967695891857147, - "eval_overall_accuracy": 0.9609707280460346, - "eval_overall_f1": 0.8387096774193548, - "eval_overall_precision": 0.7994652406417112, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9818, - "eval_samples_per_second": 190.457, - "eval_steps_per_second": 3.055, + "eval_loss": 0.3097495436668396, + "eval_overall_accuracy": 0.9622216662496873, + "eval_overall_f1": 0.8475524475524475, + "eval_overall_precision": 0.8058510638297872, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3569, + "eval_samples_per_second": 524.015, + "eval_steps_per_second": 8.407, "step": 2120 }, { "epoch": 21.0, - "grad_norm": 0.03995713219046593, + "grad_norm": 0.4885401129722595, "learning_rate": 3.9500000000000005e-05, - "loss": 0.0038, + "loss": 0.0031, "step": 2226 }, { "epoch": 21.0, - "eval_LOCATION_f1": 0.7625899280575541, + "eval_LOCATION_f1": 0.781456953642384, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7910447761194029, - "eval_LOCATION_recall": 0.7361111111111112, - "eval_ORGANIZATION_f1": 0.7374999999999999, + "eval_LOCATION_precision": 0.7468354430379747, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.7974683544303797, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6941176470588235, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.9047619047619047, + "eval_ORGANIZATION_precision": 0.7590361445783133, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.8940397350993377, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8807947019867549, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7058823529411765, + "eval_PERSON_precision": 0.8490566037735849, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6428571428571429, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6551724137931034, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.2894110381603241, - "eval_overall_accuracy": 0.9577182887165374, - "eval_overall_f1": 0.8177905308464849, - "eval_overall_precision": 0.7960893854748603, - "eval_overall_recall": 0.8407079646017699, - "eval_runtime": 0.9338, - "eval_samples_per_second": 200.266, - "eval_steps_per_second": 3.213, + "eval_loss": 0.32485198974609375, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.83008356545961, + "eval_overall_precision": 0.7862796833773087, + "eval_overall_recall": 0.8790560471976401, + "eval_runtime": 0.3689, + "eval_samples_per_second": 506.94, + "eval_steps_per_second": 8.133, "step": 2226 }, { "epoch": 22.0, - "grad_norm": 0.001287546823732555, + "grad_norm": 0.03123115934431553, "learning_rate": 3.9000000000000006e-05, - "loss": 0.004, + "loss": 0.0037, "step": 2332 }, { "epoch": 22.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.7972972972972973, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7051282051282051, + "eval_LOCATION_precision": 0.7763157894736842, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8263473053892216, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6790123456790124, - "eval_ORGANIZATION_recall": 0.7333333333333333, - "eval_PERSON_f1": 0.8986486486486487, + "eval_ORGANIZATION_precision": 0.75, + "eval_ORGANIZATION_recall": 0.92, + "eval_PERSON_f1": 0.8910891089108911, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.869281045751634, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.6792452830188679, + "eval_PERSON_precision": 0.84375, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.6551724137931034, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.29990673065185547, - "eval_overall_accuracy": 0.9554665999499625, - "eval_overall_f1": 0.8203677510608203, - "eval_overall_precision": 0.7880434782608695, - "eval_overall_recall": 0.855457227138643, - "eval_runtime": 0.9065, - "eval_samples_per_second": 206.288, - "eval_steps_per_second": 3.309, + "eval_loss": 0.3308863937854767, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8386206896551724, + "eval_overall_precision": 0.7875647668393783, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3591, + "eval_samples_per_second": 520.739, + "eval_steps_per_second": 8.354, "step": 2332 }, { "epoch": 23.0, - "grad_norm": 0.01974656991660595, + "grad_norm": 0.8348985314369202, "learning_rate": 3.85e-05, - "loss": 0.003, + "loss": 0.0015, "step": 2438 }, { "epoch": 23.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.8079470198675496, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8074534161490684, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8176100628930819, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7558139534883721, + "eval_ORGANIZATION_precision": 0.7738095238095238, "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9084745762711864, + "eval_PERSON_f1": 0.8933333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, + "eval_PERSON_precision": 0.8535031847133758, "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6792452830188679, + "eval_QUANTITY_f1": 0.7692307692307693, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6896551724137931, + "eval_QUANTITY_recall": 0.8695652173913043, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3059156537055969, - "eval_overall_accuracy": 0.9622216662496873, - "eval_overall_f1": 0.8443197755960731, - "eval_overall_precision": 0.8048128342245989, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9125, - "eval_samples_per_second": 204.922, - "eval_steps_per_second": 3.288, + "eval_loss": 0.32765087485313416, + "eval_overall_accuracy": 0.9609707280460346, + "eval_overall_f1": 0.8423988842398884, + "eval_overall_precision": 0.798941798941799, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3569, + "eval_samples_per_second": 523.957, + "eval_steps_per_second": 8.406, "step": 2438 }, { "epoch": 24.0, - "grad_norm": 2.60111141204834, + "grad_norm": 0.006436940282583237, "learning_rate": 3.8e-05, - "loss": 0.0038, + "loss": 0.0027, "step": 2544 }, { "epoch": 24.0, - "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_f1": 0.7837837837837838, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.782051282051282, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7471264367816093, + "eval_LOCATION_precision": 0.7631578947368421, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.8076923076923077, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6565656565656566, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9152542372881356, + "eval_ORGANIZATION_precision": 0.7777777777777778, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.8933333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8881578947368421, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.6792452830188679, + "eval_PERSON_precision": 0.8535031847133758, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7586206896551724, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3071446716785431, - "eval_overall_accuracy": 0.9582186639979985, - "eval_overall_f1": 0.828060522696011, - "eval_overall_precision": 0.7757731958762887, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9115, - "eval_samples_per_second": 205.145, - "eval_steps_per_second": 3.291, + "eval_loss": 0.3232249617576599, + "eval_overall_accuracy": 0.9609707280460346, + "eval_overall_f1": 0.8345120226308345, + "eval_overall_precision": 0.8016304347826086, + "eval_overall_recall": 0.8702064896755162, + "eval_runtime": 0.3705, + "eval_samples_per_second": 504.719, + "eval_steps_per_second": 8.097, "step": 2544 }, { "epoch": 25.0, - "grad_norm": 5.347325801849365, + "grad_norm": 0.9054812788963318, "learning_rate": 3.7500000000000003e-05, - "loss": 0.0039, + "loss": 0.0035, "step": 2650 }, { "epoch": 25.0, - "eval_LOCATION_f1": 0.7792207792207793, + "eval_LOCATION_f1": 0.8129032258064516, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7317073170731707, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.729559748427673, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.875, + "eval_ORGANIZATION_f1": 0.8227848101265823, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6904761904761905, - "eval_ORGANIZATION_recall": 0.7733333333333333, - "eval_PERSON_f1": 0.8993288590604027, + "eval_ORGANIZATION_precision": 0.7831325301204819, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.8926174496644295, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.864516129032258, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6792452830188679, + "eval_PERSON_precision": 0.8580645161290322, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.6428571428571429, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_precision": 0.5454545454545454, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8148148148148148, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3066267967224121, - "eval_overall_accuracy": 0.9572179134350763, - "eval_overall_f1": 0.8133704735376045, - "eval_overall_precision": 0.7704485488126649, - "eval_overall_recall": 0.8613569321533924, - "eval_runtime": 0.909, - "eval_samples_per_second": 205.719, - "eval_steps_per_second": 3.3, + "eval_loss": 0.35135143995285034, + "eval_overall_accuracy": 0.957968476357268, + "eval_overall_f1": 0.8337950138504155, + "eval_overall_precision": 0.7859007832898173, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3594, + "eval_samples_per_second": 520.314, + "eval_steps_per_second": 8.347, "step": 2650 }, { "epoch": 26.0, - "grad_norm": 0.00547376973554492, + "grad_norm": 0.010295592248439789, "learning_rate": 3.7e-05, - "loss": 0.0031, + "loss": 0.0028, "step": 2756 }, { "epoch": 26.0, - "eval_LOCATION_f1": 0.8137931034482759, + "eval_LOCATION_f1": 0.8129032258064516, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8082191780821918, - "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7701863354037267, + "eval_LOCATION_precision": 0.7590361445783133, + "eval_LOCATION_recall": 0.875, + "eval_ORGANIZATION_f1": 0.7831325301204819, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7209302325581395, - "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.8837209302325583, + "eval_ORGANIZATION_precision": 0.7142857142857143, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.8979591836734694, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8417721518987342, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8741721854304636, + "eval_PERSON_recall": 0.9230769230769231, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, + "eval_QUANTITY_precision": 0.6428571428571429, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8679245283018868, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8518518518518519, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3245824873447418, - "eval_overall_accuracy": 0.9597197898423818, - "eval_overall_f1": 0.8286516853932584, - "eval_overall_precision": 0.7908847184986595, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9241, - "eval_samples_per_second": 202.35, - "eval_steps_per_second": 3.246, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3200477361679077, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8344923504867873, + "eval_overall_precision": 0.7894736842105263, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3624, + "eval_samples_per_second": 516.068, + "eval_steps_per_second": 8.279, "step": 2756 }, { "epoch": 27.0, - "grad_norm": 0.009228230454027653, + "grad_norm": 0.0034800188150256872, "learning_rate": 3.65e-05, - "loss": 0.0026, + "loss": 0.0023, "step": 2862 }, { "epoch": 27.0, - "eval_LOCATION_f1": 0.8219178082191781, + "eval_LOCATION_f1": 0.8108108108108109, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8108108108108109, + "eval_LOCATION_precision": 0.7894736842105263, "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7407407407407408, + "eval_ORGANIZATION_f1": 0.7831325301204819, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6896551724137931, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.903654485049834, + "eval_ORGANIZATION_precision": 0.7142857142857143, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.8837209302325583, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8607594936708861, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.7169811320754716, + "eval_PERSON_precision": 0.8417721518987342, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.6538461538461539, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6333333333333333, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.5862068965517241, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.32465118169784546, - "eval_overall_accuracy": 0.9584688516387291, - "eval_overall_f1": 0.8324022346368715, - "eval_overall_precision": 0.7904509283819628, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.8984, - "eval_samples_per_second": 208.15, - "eval_steps_per_second": 3.339, + "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3428550064563751, + "eval_overall_accuracy": 0.9569677257943457, + "eval_overall_f1": 0.8238557558945908, + "eval_overall_precision": 0.7774869109947644, + "eval_overall_recall": 0.8761061946902655, + "eval_runtime": 0.3592, + "eval_samples_per_second": 520.564, + "eval_steps_per_second": 8.351, "step": 2862 }, { "epoch": 28.0, - "grad_norm": 3.7562034130096436, + "grad_norm": 0.001987306633964181, "learning_rate": 3.6e-05, - "loss": 0.0041, + "loss": 0.0031, "step": 2968 }, { "epoch": 28.0, - "eval_LOCATION_f1": 0.8082191780821918, + "eval_LOCATION_f1": 0.8026315789473685, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7972972972972973, - "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7469879518072289, + "eval_LOCATION_precision": 0.7625, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.7777777777777777, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6813186813186813, - "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7241379310344828, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.8983606557377047, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.845679012345679, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8070175438596492, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3042037785053253, - "eval_overall_accuracy": 0.9592194145609206, - "eval_overall_f1": 0.8286516853932584, - "eval_overall_precision": 0.7908847184986595, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9162, - "eval_samples_per_second": 204.094, - "eval_steps_per_second": 3.274, + "eval_TIME_precision": 0.7419354838709677, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3218757212162018, + "eval_overall_accuracy": 0.9599699774831123, + "eval_overall_f1": 0.828060522696011, + "eval_overall_precision": 0.7757731958762887, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3703, + "eval_samples_per_second": 504.962, + "eval_steps_per_second": 8.101, "step": 2968 }, { "epoch": 29.0, - "grad_norm": 5.308003902435303, + "grad_norm": 0.021353095769882202, "learning_rate": 3.55e-05, - "loss": 0.0034, + "loss": 0.0028, "step": 3074 }, { "epoch": 29.0, - "eval_LOCATION_f1": 0.8413793103448277, + "eval_LOCATION_f1": 0.8057553956834532, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8356164383561644, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7975460122699386, + "eval_LOCATION_precision": 0.835820895522388, + "eval_LOCATION_recall": 0.7777777777777778, + "eval_ORGANIZATION_f1": 0.8076923076923077, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7386363636363636, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9146757679180888, + "eval_ORGANIZATION_precision": 0.7777777777777778, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.8800000000000001, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8933333333333333, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8407643312101911, + "eval_PERSON_recall": 0.9230769230769231, + "eval_QUANTITY_f1": 0.6415094339622641, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.5666666666666667, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3232886493206024, - "eval_overall_accuracy": 0.9609707280460346, - "eval_overall_f1": 0.851063829787234, - "eval_overall_precision": 0.819672131147541, - "eval_overall_recall": 0.8849557522123894, - "eval_runtime": 0.9849, - "eval_samples_per_second": 189.867, - "eval_steps_per_second": 3.046, + "eval_loss": 0.35811248421669006, + "eval_overall_accuracy": 0.9562171628721541, + "eval_overall_f1": 0.8273894436519258, + "eval_overall_precision": 0.8011049723756906, + "eval_overall_recall": 0.855457227138643, + "eval_runtime": 0.3735, + "eval_samples_per_second": 500.719, + "eval_steps_per_second": 8.033, "step": 3074 }, { "epoch": 30.0, - "grad_norm": 0.0021002369467169046, + "grad_norm": 0.0009840091224759817, "learning_rate": 3.5e-05, - "loss": 0.0027, + "loss": 0.0031, "step": 3180 }, { "epoch": 30.0, - "eval_LOCATION_f1": 0.8356164383561645, + "eval_LOCATION_f1": 0.8, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8243243243243243, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.732919254658385, + "eval_LOCATION_precision": 0.7692307692307693, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.830188679245283, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.686046511627907, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.9121621621621621, + "eval_ORGANIZATION_precision": 0.7857142857142857, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9163879598662208, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8823529411764706, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7272727272727273, + "eval_PERSON_precision": 0.8782051282051282, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.625, - "eval_QUANTITY_recall": 0.8695652173913043, - "eval_TIME_f1": 0.7586206896551724, + "eval_QUANTITY_precision": 0.6551724137931034, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.6875, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.34255892038345337, - "eval_overall_accuracy": 0.9577182887165374, - "eval_overall_f1": 0.8296089385474861, - "eval_overall_precision": 0.7877984084880637, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9051, - "eval_samples_per_second": 206.618, - "eval_steps_per_second": 3.315, + "eval_loss": 0.3134097754955292, + "eval_overall_accuracy": 0.9619714786089567, + "eval_overall_f1": 0.8503496503496504, + "eval_overall_precision": 0.8085106382978723, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3742, + "eval_samples_per_second": 499.673, + "eval_steps_per_second": 8.016, "step": 3180 }, { "epoch": 31.0, - "grad_norm": 0.00587083725258708, + "grad_norm": 1.1641939878463745, "learning_rate": 3.45e-05, - "loss": 0.0049, + "loss": 0.0035, "step": 3286 }, { "epoch": 31.0, - "eval_LOCATION_f1": 0.8219178082191781, + "eval_LOCATION_f1": 0.751592356687898, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8108108108108109, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7305389221556886, + "eval_LOCATION_precision": 0.6941176470588235, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8387096774193549, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6630434782608695, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.9152542372881356, + "eval_ORGANIZATION_precision": 0.8125, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.9133333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8881578947368421, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8726114649681529, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6792452830188679, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, + "eval_QUANTITY_precision": 0.6, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.7857142857142857, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.31757453083992004, - "eval_overall_accuracy": 0.9584688516387291, - "eval_overall_f1": 0.8314606741573034, - "eval_overall_precision": 0.7935656836461126, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9036, - "eval_samples_per_second": 206.959, - "eval_steps_per_second": 3.32, + "eval_loss": 0.31325557827949524, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8372739916550764, + "eval_overall_precision": 0.7921052631578948, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3607, + "eval_samples_per_second": 518.406, + "eval_steps_per_second": 8.317, "step": 3286 }, { "epoch": 32.0, - "grad_norm": 0.03227420523762703, + "grad_norm": 0.0497661791741848, "learning_rate": 3.4000000000000007e-05, - "loss": 0.0027, + "loss": 0.0028, "step": 3392 }, { "epoch": 32.0, - "eval_LOCATION_f1": 0.8243243243243243, + "eval_LOCATION_f1": 0.802721088435374, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8026315789473685, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7625, + "eval_LOCATION_precision": 0.7866666666666666, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8214285714285714, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7176470588235294, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.9215017064846417, + "eval_ORGANIZATION_precision": 0.7419354838709677, + "eval_ORGANIZATION_recall": 0.92, + "eval_PERSON_f1": 0.9, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.9, + "eval_PERSON_precision": 0.8598726114649682, "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_QUANTITY_f1": 0.7755102040816326, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8846153846153846, + "eval_QUANTITY_precision": 0.7307692307692307, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8846153846153846, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.29548466205596924, - "eval_overall_accuracy": 0.9627220415311484, - "eval_overall_f1": 0.8473609129814551, - "eval_overall_precision": 0.8204419889502762, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9155, - "eval_samples_per_second": 204.26, - "eval_steps_per_second": 3.277, + "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.32023829221725464, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.8491620111731844, + "eval_overall_precision": 0.8063660477453581, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3634, + "eval_samples_per_second": 514.539, + "eval_steps_per_second": 8.255, "step": 3392 }, { "epoch": 33.0, - "grad_norm": 0.016770539805293083, + "grad_norm": 0.0015366391744464636, "learning_rate": 3.35e-05, - "loss": 0.0017, + "loss": 0.0013, "step": 3498 }, { "epoch": 33.0, - "eval_LOCATION_f1": 0.8, + "eval_LOCATION_f1": 0.8219178082191781, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7692307692307693, + "eval_LOCATION_precision": 0.8108108108108109, "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7974683544303797, + "eval_ORGANIZATION_f1": 0.7898089171974522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7590361445783133, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9054054054054055, + "eval_ORGANIZATION_precision": 0.7560975609756098, + "eval_ORGANIZATION_recall": 0.8266666666666667, + "eval_PERSON_f1": 0.8926174496644295, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8758169934640523, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6666666666666667, + "eval_PERSON_precision": 0.8580645161290322, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.6923076923076923, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.5806451612903226, + "eval_QUANTITY_precision": 0.6206896551724138, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.8461538461538461, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3393411338329315, - "eval_overall_accuracy": 0.9592194145609206, - "eval_overall_f1": 0.8354430379746836, - "eval_overall_precision": 0.7983870967741935, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9082, - "eval_samples_per_second": 205.908, - "eval_steps_per_second": 3.303, + "eval_loss": 0.3315739035606384, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.8368794326241135, + "eval_overall_precision": 0.8060109289617486, + "eval_overall_recall": 0.8702064896755162, + "eval_runtime": 0.3575, + "eval_samples_per_second": 523.126, + "eval_steps_per_second": 8.392, "step": 3498 }, { "epoch": 34.0, - "grad_norm": 0.003996172454208136, + "grad_norm": 0.05237346515059471, "learning_rate": 3.3e-05, - "loss": 0.0017, + "loss": 0.0018, "step": 3604 }, { "epoch": 34.0, - "eval_LOCATION_f1": 0.8243243243243243, + "eval_LOCATION_f1": 0.8571428571428572, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8026315789473685, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8227848101265823, + "eval_LOCATION_precision": 0.84, + "eval_LOCATION_recall": 0.875, + "eval_ORGANIZATION_f1": 0.8048780487804879, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7831325301204819, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.910958904109589, + "eval_ORGANIZATION_precision": 0.7415730337078652, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8970099667774087, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8926174496644296, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7555555555555555, + "eval_PERSON_precision": 0.8544303797468354, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.7169811320754716, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7727272727272727, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.6333333333333333, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3279131352901459, - "eval_overall_accuracy": 0.9644733550162622, - "eval_overall_f1": 0.8575539568345324, - "eval_overall_precision": 0.8370786516853933, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.9098, - "eval_samples_per_second": 205.55, - "eval_steps_per_second": 3.298, + "eval_loss": 0.36692577600479126, + "eval_overall_accuracy": 0.9622216662496873, + "eval_overall_f1": 0.8495821727019498, + "eval_overall_precision": 0.8047493403693932, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3644, + "eval_samples_per_second": 513.118, + "eval_steps_per_second": 8.232, "step": 3604 }, { "epoch": 35.0, - "grad_norm": 0.0200356375426054, + "grad_norm": 0.002315772697329521, "learning_rate": 3.2500000000000004e-05, - "loss": 0.002, + "loss": 0.0022, "step": 3710 }, { "epoch": 35.0, - "eval_LOCATION_f1": 0.8108108108108109, + "eval_LOCATION_f1": 0.8378378378378377, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7894736842105263, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.8, + "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7333333333333333, + "eval_ORGANIZATION_precision": 0.7674418604651163, "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.9016949152542374, + "eval_PERSON_f1": 0.91156462585034, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.875, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.6923076923076923, + "eval_PERSON_precision": 0.8874172185430463, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6206896551724138, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.6551724137931034, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.9230769230769231, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.33025962114334106, - "eval_overall_accuracy": 0.9597197898423818, - "eval_overall_f1": 0.8398876404494382, - "eval_overall_precision": 0.8016085790884718, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9088, - "eval_samples_per_second": 205.758, - "eval_steps_per_second": 3.301, + "eval_TIME_precision": 0.9230769230769231, + "eval_TIME_recall": 0.9230769230769231, + "eval_loss": 0.3101156949996948, + "eval_overall_accuracy": 0.9649737302977233, + "eval_overall_f1": 0.8628005657708627, + "eval_overall_precision": 0.8288043478260869, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3638, + "eval_samples_per_second": 514.005, + "eval_steps_per_second": 8.246, "step": 3710 }, { "epoch": 36.0, - "grad_norm": 0.04783305525779724, + "grad_norm": 0.0013785570627078414, "learning_rate": 3.2000000000000005e-05, - "loss": 0.0012, + "loss": 0.002, "step": 3816 }, { "epoch": 36.0, - "eval_LOCATION_f1": 0.8344370860927153, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7974683544303798, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7875000000000001, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7411764705882353, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9010238907849829, + "eval_ORGANIZATION_precision": 0.7674418604651163, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8933333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.88, - "eval_PERSON_recall": 0.9230769230769231, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8535031847133758, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.6923076923076923, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_precision": 0.6206896551724138, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.33322834968566895, - "eval_overall_accuracy": 0.9637227920940705, - "eval_overall_f1": 0.8441926345609065, - "eval_overall_precision": 0.8119891008174387, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.9959, - "eval_samples_per_second": 187.774, - "eval_steps_per_second": 3.012, + "eval_loss": 0.3521296977996826, + "eval_overall_accuracy": 0.9592194145609206, + "eval_overall_f1": 0.8435754189944135, + "eval_overall_precision": 0.8010610079575596, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3743, + "eval_samples_per_second": 499.566, + "eval_steps_per_second": 8.014, "step": 3816 }, { "epoch": 37.0, - "grad_norm": 0.0011351928114891052, + "grad_norm": 0.002585264155641198, "learning_rate": 3.15e-05, - "loss": 0.001, + "loss": 0.0019, "step": 3922 }, { "epoch": 37.0, - "eval_LOCATION_f1": 0.816326530612245, + "eval_LOCATION_f1": 0.8211920529801324, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7710843373493976, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8148148148148148, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7032967032967034, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.910958904109589, + "eval_ORGANIZATION_precision": 0.7586206896551724, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8956228956228957, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8926174496644296, + "eval_PERSON_precision": 0.8636363636363636, "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.72, + "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8235294117647058, + "eval_QUANTITY_precision": 0.6551724137931034, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.84, - "eval_TIME_recall": 0.8076923076923077, - "eval_loss": 0.33810704946517944, - "eval_overall_accuracy": 0.9604703527645734, - "eval_overall_f1": 0.8385269121813032, - "eval_overall_precision": 0.8065395095367848, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9038, - "eval_samples_per_second": 206.905, - "eval_steps_per_second": 3.319, + "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.34240958094596863, + "eval_overall_accuracy": 0.9587190392794596, + "eval_overall_f1": 0.8435754189944135, + "eval_overall_precision": 0.8010610079575596, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3616, + "eval_samples_per_second": 517.085, + "eval_steps_per_second": 8.295, "step": 3922 }, { "epoch": 38.0, - "grad_norm": 12.923250198364258, + "grad_norm": 4.613161563873291, "learning_rate": 3.1e-05, - "loss": 0.0021, + "loss": 0.0012, "step": 4028 }, { "epoch": 38.0, - "eval_LOCATION_f1": 0.816326530612245, + "eval_LOCATION_f1": 0.8104575163398693, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7721518987341773, + "eval_LOCATION_precision": 0.7654320987654321, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.825, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7349397590361446, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.9158249158249158, + "eval_ORGANIZATION_precision": 0.7764705882352941, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8926174496644295, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8831168831168831, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.7058823529411765, + "eval_PERSON_precision": 0.8580645161290322, + "eval_PERSON_recall": 0.9300699300699301, + "eval_QUANTITY_f1": 0.6923076923076923, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_precision": 0.6206896551724138, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.7857142857142857, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3416782021522522, - "eval_overall_accuracy": 0.960720540405304, - "eval_overall_f1": 0.8425531914893618, - "eval_overall_precision": 0.8114754098360656, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9052, - "eval_samples_per_second": 206.591, - "eval_steps_per_second": 3.314, + "eval_loss": 0.3519201874732971, + "eval_overall_accuracy": 0.957968476357268, + "eval_overall_f1": 0.8396094839609484, + "eval_overall_precision": 0.7962962962962963, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3618, + "eval_samples_per_second": 516.793, + "eval_steps_per_second": 8.291, "step": 4028 }, { "epoch": 39.0, - "grad_norm": 0.05247601494193077, + "grad_norm": 0.0017957445234060287, "learning_rate": 3.05e-05, - "loss": 0.0026, + "loss": 0.0012, "step": 4134 }, { "epoch": 39.0, - "eval_LOCATION_f1": 0.7891156462585034, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7733333333333333, - "eval_LOCATION_recall": 0.8055555555555556, - "eval_ORGANIZATION_f1": 0.7974683544303797, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.825, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7590361445783133, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.8963210702341137, + "eval_ORGANIZATION_precision": 0.7764705882352941, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8926174496644295, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8589743589743589, - "eval_PERSON_recall": 0.9370629370629371, + "eval_PERSON_precision": 0.8580645161290322, + "eval_PERSON_recall": 0.9300699300699301, "eval_QUANTITY_f1": 0.7169811320754716, "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6333333333333333, "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.830188679245283, + "eval_TIME_f1": 0.7777777777777779, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.37723416090011597, + "eval_TIME_precision": 0.75, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.3654736876487732, "eval_overall_accuracy": 0.9587190392794596, - "eval_overall_f1": 0.8338028169014085, - "eval_overall_precision": 0.7978436657681941, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9093, - "eval_samples_per_second": 205.644, - "eval_steps_per_second": 3.299, + "eval_overall_f1": 0.8419580419580419, + "eval_overall_precision": 0.800531914893617, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3617, + "eval_samples_per_second": 517.022, + "eval_steps_per_second": 8.294, "step": 4134 }, { "epoch": 40.0, - "grad_norm": 0.0038078853394836187, + "grad_norm": 0.001596860121935606, "learning_rate": 3e-05, - "loss": 0.0021, + "loss": 0.001, "step": 4240 }, { "epoch": 40.0, - "eval_LOCATION_f1": 0.7891156462585034, + "eval_LOCATION_f1": 0.8, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7733333333333333, + "eval_LOCATION_precision": 0.7945205479452054, "eval_LOCATION_recall": 0.8055555555555556, - "eval_ORGANIZATION_f1": 0.7894736842105264, + "eval_ORGANIZATION_f1": 0.7948717948717949, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7792207792207793, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.9146757679180888, + "eval_ORGANIZATION_precision": 0.7654320987654321, + "eval_ORGANIZATION_recall": 0.8266666666666667, + "eval_PERSON_f1": 0.9090909090909092, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8933333333333333, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7272727272727272, + "eval_PERSON_precision": 0.8766233766233766, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.7755102040816326, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7619047619047619, - "eval_QUANTITY_recall": 0.6956521739130435, - "eval_TIME_f1": 0.8, + "eval_QUANTITY_precision": 0.7307692307692307, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7586206896551724, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.331974595785141, - "eval_overall_accuracy": 0.960720540405304, - "eval_overall_f1": 0.8393632416787264, - "eval_overall_precision": 0.8238636363636364, - "eval_overall_recall": 0.855457227138643, - "eval_runtime": 0.902, - "eval_samples_per_second": 207.323, - "eval_steps_per_second": 3.326, + "eval_loss": 0.34077638387680054, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.8457142857142858, + "eval_overall_precision": 0.8199445983379502, + "eval_overall_recall": 0.8731563421828908, + "eval_runtime": 0.3642, + "eval_samples_per_second": 513.453, + "eval_steps_per_second": 8.237, "step": 4240 }, { "epoch": 41.0, - "grad_norm": 0.0014568913029506803, + "grad_norm": 0.0034711004700511694, "learning_rate": 2.95e-05, - "loss": 0.0013, + "loss": 0.0025, "step": 4346 }, { "epoch": 41.0, - "eval_LOCATION_f1": 0.8187919463087249, + "eval_LOCATION_f1": 0.8157894736842106, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7922077922077922, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7425149700598803, + "eval_LOCATION_precision": 0.775, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.7898089171974522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6739130434782609, + "eval_ORGANIZATION_precision": 0.7560975609756098, "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.9090909090909092, + "eval_PERSON_f1": 0.912751677852349, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8766233766233766, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.76, + "eval_PERSON_precision": 0.8774193548387097, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7037037037037037, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8627450980392156, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8, "eval_TIME_number": 26, - "eval_TIME_precision": 0.88, + "eval_TIME_precision": 0.7586206896551724, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3371780812740326, - "eval_overall_accuracy": 0.9619714786089567, - "eval_overall_f1": 0.8375350140056023, - "eval_overall_precision": 0.7973333333333333, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9175, - "eval_samples_per_second": 203.81, - "eval_steps_per_second": 3.27, + "eval_loss": 0.340951144695282, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8415147265077138, + "eval_overall_precision": 0.8021390374331551, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.361, + "eval_samples_per_second": 518.055, + "eval_steps_per_second": 8.311, "step": 4346 }, { "epoch": 42.0, - "grad_norm": 0.01008665468543768, + "grad_norm": 0.02040654979646206, "learning_rate": 2.9e-05, - "loss": 0.0013, + "loss": 0.0012, "step": 4452 }, { "epoch": 42.0, - "eval_LOCATION_f1": 0.7837837837837838, + "eval_LOCATION_f1": 0.8322147651006712, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7631578947368421, - "eval_LOCATION_recall": 0.8055555555555556, - "eval_ORGANIZATION_f1": 0.779874213836478, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8025477707006369, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7380952380952381, - "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.9023569023569024, + "eval_ORGANIZATION_precision": 0.7682926829268293, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8701298701298701, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.76, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.7037037037037037, + "eval_QUANTITY_recall": 0.8260869565217391, + "eval_TIME_f1": 0.8076923076923077, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3549456298351288, - "eval_overall_accuracy": 0.9602201651238429, - "eval_overall_f1": 0.8333333333333334, - "eval_overall_precision": 0.7994579945799458, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9061, - "eval_samples_per_second": 206.389, - "eval_steps_per_second": 3.311, + "eval_TIME_precision": 0.8076923076923077, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.3626217842102051, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8486562942008488, + "eval_overall_precision": 0.8152173913043478, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3631, + "eval_samples_per_second": 515.064, + "eval_steps_per_second": 8.263, "step": 4452 }, { "epoch": 43.0, - "grad_norm": 0.0012029644567519426, + "grad_norm": 0.001348801888525486, "learning_rate": 2.8499999999999998e-05, - "loss": 0.0003, + "loss": 0.003, "step": 4558 }, { "epoch": 43.0, - "eval_LOCATION_f1": 0.8, + "eval_LOCATION_f1": 0.816326530612245, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7469879518072289, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7682926829268292, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.7951807228915663, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7078651685393258, - "eval_ORGANIZATION_recall": 0.84, + "eval_ORGANIZATION_precision": 0.7252747252747253, + "eval_ORGANIZATION_recall": 0.88, "eval_PERSON_f1": 0.9060402684563759, "eval_PERSON_number": 143, "eval_PERSON_precision": 0.8709677419354839, @@ -1671,25 +1671,25 @@ "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6428571428571429, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8846153846153846, + "eval_TIME_f1": 0.7924528301886792, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8846153846153846, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.36216506361961365, - "eval_overall_accuracy": 0.9604703527645734, - "eval_overall_f1": 0.836111111111111, - "eval_overall_precision": 0.7900262467191601, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9049, - "eval_samples_per_second": 206.658, - "eval_steps_per_second": 3.315, + "eval_TIME_precision": 0.7777777777777778, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.3544086813926697, + "eval_overall_accuracy": 0.9587190392794596, + "eval_overall_f1": 0.8391608391608392, + "eval_overall_precision": 0.7978723404255319, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3593, + "eval_samples_per_second": 520.389, + "eval_steps_per_second": 8.348, "step": 4558 }, { "epoch": 44.0, - "grad_norm": 0.04154082387685776, + "grad_norm": 4.9029974937438965, "learning_rate": 2.8000000000000003e-05, - "loss": 0.0032, + "loss": 0.0027, "step": 4664 }, { @@ -1698,13 +1698,13 @@ "eval_LOCATION_number": 72, "eval_LOCATION_precision": 0.7625, "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7619047619047621, + "eval_ORGANIZATION_f1": 0.7999999999999999, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6881720430107527, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9060402684563759, + "eval_ORGANIZATION_precision": 0.7157894736842105, + "eval_ORGANIZATION_recall": 0.9066666666666666, + "eval_PERSON_f1": 0.8940397350993377, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8709677419354839, + "eval_PERSON_precision": 0.8490566037735849, "eval_PERSON_recall": 0.9440559440559441, "eval_QUANTITY_f1": 0.7307692307692308, "eval_QUANTITY_number": 23, @@ -1714,111 +1714,111 @@ "eval_TIME_number": 26, "eval_TIME_precision": 0.7857142857142857, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.35855501890182495, - "eval_overall_accuracy": 0.957968476357268, - "eval_overall_f1": 0.8314917127071824, - "eval_overall_precision": 0.7818181818181819, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9082, - "eval_samples_per_second": 205.905, - "eval_steps_per_second": 3.303, + "eval_loss": 0.3703295886516571, + "eval_overall_accuracy": 0.9587190392794596, + "eval_overall_f1": 0.8356164383561644, + "eval_overall_precision": 0.7800511508951407, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3623, + "eval_samples_per_second": 516.147, + "eval_steps_per_second": 8.28, "step": 4664 }, { "epoch": 45.0, - "grad_norm": 0.002047579735517502, + "grad_norm": 0.2568044662475586, "learning_rate": 2.7500000000000004e-05, - "loss": 0.001, + "loss": 0.0015, "step": 4770 }, { "epoch": 45.0, - "eval_LOCATION_f1": 0.8133333333333332, + "eval_LOCATION_f1": 0.8108108108108109, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.782051282051282, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.7950310559006211, + "eval_LOCATION_precision": 0.7894736842105263, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8354430379746836, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7441860465116279, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9158249158249158, + "eval_ORGANIZATION_precision": 0.7951807228915663, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8910891089108911, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8831168831168831, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.7307692307692308, + "eval_PERSON_precision": 0.84375, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6551724137931034, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8076923076923077, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.36997854709625244, - "eval_overall_accuracy": 0.9619714786089567, - "eval_overall_f1": 0.847124824684432, - "eval_overall_precision": 0.8074866310160428, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9062, - "eval_samples_per_second": 206.36, - "eval_steps_per_second": 3.311, + "eval_TIME_precision": 0.8076923076923077, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.34526732563972473, + "eval_overall_accuracy": 0.9604703527645734, + "eval_overall_f1": 0.8438818565400844, + "eval_overall_precision": 0.8064516129032258, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3579, + "eval_samples_per_second": 522.551, + "eval_steps_per_second": 8.383, "step": 4770 }, { "epoch": 46.0, - "grad_norm": 0.0037038603331893682, + "grad_norm": 0.0012396867386996746, "learning_rate": 2.7000000000000002e-05, - "loss": 0.0034, + "loss": 0.0018, "step": 4876 }, { "epoch": 46.0, - "eval_LOCATION_f1": 0.8333333333333334, + "eval_LOCATION_f1": 0.816326530612245, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8333333333333334, + "eval_LOCATION_precision": 0.8, "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7924528301886793, + "eval_ORGANIZATION_f1": 0.8170731707317072, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.75, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9121621621621621, + "eval_ORGANIZATION_precision": 0.7528089887640449, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9060402684563759, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8823529411764706, + "eval_PERSON_precision": 0.8709677419354839, "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7307692307692308, + "eval_QUANTITY_f1": 0.693877551020408, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6551724137931034, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6538461538461539, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8235294117647058, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.33895859122276306, - "eval_overall_accuracy": 0.9602201651238429, - "eval_overall_f1": 0.8494318181818181, - "eval_overall_precision": 0.8191780821917808, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9121, - "eval_samples_per_second": 205.028, - "eval_steps_per_second": 3.289, + "eval_TIME_precision": 0.84, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.3448871970176697, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.846262341325811, + "eval_overall_precision": 0.8108108108108109, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3635, + "eval_samples_per_second": 514.485, + "eval_steps_per_second": 8.254, "step": 4876 }, { "epoch": 47.0, - "grad_norm": 0.0005634190747514367, + "grad_norm": 1.6906110048294067, "learning_rate": 2.6500000000000004e-05, - "loss": 0.0018, + "loss": 0.0028, "step": 4982 }, { "epoch": 47.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.8137931034482759, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7564102564102564, + "eval_LOCATION_precision": 0.8082191780821918, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8148148148148148, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7283950617283951, - "eval_ORGANIZATION_recall": 0.7866666666666666, + "eval_ORGANIZATION_precision": 0.7586206896551724, + "eval_ORGANIZATION_recall": 0.88, "eval_PERSON_f1": 0.9060402684563759, "eval_PERSON_number": 143, "eval_PERSON_precision": 0.8709677419354839, @@ -1827,236 +1827,236 @@ "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3375060260295868, - "eval_overall_accuracy": 0.9632224168126094, - "eval_overall_f1": 0.8373408769448375, - "eval_overall_precision": 0.8043478260869565, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9199, - "eval_samples_per_second": 203.274, - "eval_steps_per_second": 3.261, + "eval_loss": 0.37111541628837585, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.8474576271186441, + "eval_overall_precision": 0.8130081300813008, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3591, + "eval_samples_per_second": 520.775, + "eval_steps_per_second": 8.355, "step": 4982 }, { "epoch": 48.0, - "grad_norm": 0.0028315193485468626, + "grad_norm": 0.029315035790205002, "learning_rate": 2.6000000000000002e-05, - "loss": 0.0017, + "loss": 0.0014, "step": 5088 }, { "epoch": 48.0, - "eval_LOCATION_f1": 0.802721088435374, + "eval_LOCATION_f1": 0.8251748251748252, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7866666666666666, + "eval_LOCATION_precision": 0.8309859154929577, "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7974683544303797, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7590361445783133, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9183673469387755, + "eval_ORGANIZATION_precision": 0.7674418604651163, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9090909090909092, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8940397350993378, + "eval_PERSON_precision": 0.8766233766233766, "eval_PERSON_recall": 0.9440559440559441, "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.33102044463157654, + "eval_loss": 0.3521229922771454, "eval_overall_accuracy": 0.9632224168126094, - "eval_overall_f1": 0.8473609129814551, - "eval_overall_precision": 0.8204419889502762, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9017, - "eval_samples_per_second": 207.378, - "eval_steps_per_second": 3.327, + "eval_overall_f1": 0.8522727272727273, + "eval_overall_precision": 0.821917808219178, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3694, + "eval_samples_per_second": 506.192, + "eval_steps_per_second": 8.121, "step": 5088 }, { "epoch": 49.0, - "grad_norm": 0.009939000010490417, + "grad_norm": 0.02477916330099106, "learning_rate": 2.5500000000000003e-05, - "loss": 0.0014, + "loss": 0.0023, "step": 5194 }, { "epoch": 49.0, - "eval_LOCATION_f1": 0.8299319727891157, + "eval_LOCATION_f1": 0.8187919463087249, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8133333333333334, + "eval_LOCATION_precision": 0.7922077922077922, "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8074534161490684, + "eval_ORGANIZATION_f1": 0.8129032258064516, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7558139534883721, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9121621621621621, + "eval_ORGANIZATION_precision": 0.7875, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.8874172185430464, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8823529411764706, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7450980392156864, + "eval_PERSON_precision": 0.8427672955974843, + "eval_PERSON_recall": 0.9370629370629371, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6785714285714286, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8627450980392156, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.88, + "eval_TIME_precision": 0.7857142857142857, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.32123759388923645, - "eval_overall_accuracy": 0.9624718538904178, - "eval_overall_f1": 0.8555240793201133, - "eval_overall_precision": 0.8228882833787466, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9187, - "eval_samples_per_second": 203.539, - "eval_steps_per_second": 3.265, + "eval_loss": 0.36508724093437195, + "eval_overall_accuracy": 0.9592194145609206, + "eval_overall_f1": 0.8394366197183099, + "eval_overall_precision": 0.8032345013477089, + "eval_overall_recall": 0.8790560471976401, + "eval_runtime": 0.3605, + "eval_samples_per_second": 518.761, + "eval_steps_per_second": 8.322, "step": 5194 }, { "epoch": 50.0, - "grad_norm": 0.7588798403739929, + "grad_norm": 0.017422163859009743, "learning_rate": 2.5e-05, - "loss": 0.0011, + "loss": 0.0027, "step": 5300 }, { "epoch": 50.0, - "eval_LOCATION_f1": 0.802721088435374, + "eval_LOCATION_f1": 0.8187919463087249, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7866666666666666, - "eval_LOCATION_recall": 0.8194444444444444, - "eval_ORGANIZATION_f1": 0.7924528301886793, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.75, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9152542372881356, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9096989966555185, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8881578947368421, - "eval_PERSON_recall": 0.9440559440559441, + "eval_PERSON_precision": 0.8717948717948718, + "eval_PERSON_recall": 0.951048951048951, "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_TIME_f1": 0.9230769230769231, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3109734356403351, - "eval_overall_accuracy": 0.9622216662496873, - "eval_overall_f1": 0.8453900709219858, - "eval_overall_precision": 0.8142076502732241, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.9093, - "eval_samples_per_second": 205.655, - "eval_steps_per_second": 3.299, + "eval_TIME_precision": 0.9230769230769231, + "eval_TIME_recall": 0.9230769230769231, + "eval_loss": 0.3326910436153412, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8607594936708861, + "eval_overall_precision": 0.8225806451612904, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.3722, + "eval_samples_per_second": 502.368, + "eval_steps_per_second": 8.059, "step": 5300 }, { "epoch": 51.0, - "grad_norm": 0.0009810138726606965, + "grad_norm": 0.00688315462321043, "learning_rate": 2.45e-05, - "loss": 0.0005, + "loss": 0.0009, "step": 5406 }, { "epoch": 51.0, - "eval_LOCATION_f1": 0.8299319727891157, + "eval_LOCATION_f1": 0.8137931034482759, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8133333333333334, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.825, + "eval_LOCATION_precision": 0.8082191780821918, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8387096774193549, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7764705882352941, - "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.9246575342465754, + "eval_ORGANIZATION_precision": 0.8125, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.912751677852349, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.9060402684563759, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7058823529411765, + "eval_PERSON_precision": 0.8774193548387097, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.7083333333333334, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6428571428571429, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.68, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.9230769230769231, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.30816784501075745, + "eval_TIME_precision": 0.9230769230769231, + "eval_TIME_recall": 0.9230769230769231, + "eval_loss": 0.33154383301734924, "eval_overall_accuracy": 0.9659744808606455, - "eval_overall_f1": 0.8607954545454546, - "eval_overall_precision": 0.8301369863013699, - "eval_overall_recall": 0.8938053097345132, - "eval_runtime": 0.914, - "eval_samples_per_second": 204.601, - "eval_steps_per_second": 3.282, + "eval_overall_f1": 0.8624641833810888, + "eval_overall_precision": 0.8384401114206128, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.358, + "eval_samples_per_second": 522.281, + "eval_steps_per_second": 8.379, "step": 5406 }, { "epoch": 52.0, - "grad_norm": 0.0026644645258784294, + "grad_norm": 0.0005034964415244758, "learning_rate": 2.4e-05, - "loss": 0.0003, + "loss": 0.001, "step": 5512 }, { "epoch": 52.0, - "eval_LOCATION_f1": 0.8157894736842106, + "eval_LOCATION_f1": 0.7945205479452055, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.775, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8024691358024691, + "eval_LOCATION_precision": 0.7837837837837838, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7471264367816092, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9183673469387755, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.912751677852349, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8940397350993378, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.693877551020408, + "eval_PERSON_precision": 0.8774193548387097, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6538461538461539, + "eval_QUANTITY_precision": 0.6296296296296297, "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8518518518518519, + "eval_TIME_f1": 0.9056603773584906, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3176332414150238, - "eval_overall_accuracy": 0.9647235426569928, - "eval_overall_f1": 0.849507735583685, - "eval_overall_precision": 0.8118279569892473, + "eval_TIME_precision": 0.8888888888888888, + "eval_TIME_recall": 0.9230769230769231, + "eval_loss": 0.3466086983680725, + "eval_overall_accuracy": 0.960720540405304, + "eval_overall_f1": 0.8531073446327683, + "eval_overall_precision": 0.8184281842818428, "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9767, - "eval_samples_per_second": 191.467, - "eval_steps_per_second": 3.072, + "eval_runtime": 0.3715, + "eval_samples_per_second": 503.309, + "eval_steps_per_second": 8.074, "step": 5512 }, { "epoch": 53.0, - "grad_norm": 0.0603233277797699, + "grad_norm": 0.021436743438243866, "learning_rate": 2.35e-05, - "loss": 0.0004, + "loss": 0.0014, "step": 5618 }, { "epoch": 53.0, - "eval_LOCATION_f1": 0.8356164383561645, + "eval_LOCATION_f1": 0.8108108108108109, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8243243243243243, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8121212121212122, + "eval_LOCATION_precision": 0.7894736842105263, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8271604938271604, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7444444444444445, + "eval_ORGANIZATION_precision": 0.7701149425287356, "eval_ORGANIZATION_recall": 0.8933333333333333, - "eval_PERSON_f1": 0.9215017064846417, + "eval_PERSON_f1": 0.9133333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.9, - "eval_PERSON_recall": 0.9440559440559441, + "eval_PERSON_precision": 0.8726114649681529, + "eval_PERSON_recall": 0.958041958041958, "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, "eval_QUANTITY_precision": 0.6296296296296297, @@ -2065,333 +2065,333 @@ "eval_TIME_number": 26, "eval_TIME_precision": 0.8214285714285714, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3314068019390106, - "eval_overall_accuracy": 0.9647235426569928, - "eval_overall_f1": 0.8559322033898304, - "eval_overall_precision": 0.8211382113821138, - "eval_overall_recall": 0.8938053097345132, - "eval_runtime": 0.9085, - "eval_samples_per_second": 205.826, - "eval_steps_per_second": 3.302, + "eval_loss": 0.32782742381095886, + "eval_overall_accuracy": 0.9609707280460346, + "eval_overall_f1": 0.8515406162464986, + "eval_overall_precision": 0.8106666666666666, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3599, + "eval_samples_per_second": 519.612, + "eval_steps_per_second": 8.336, "step": 5618 }, { "epoch": 54.0, - "grad_norm": 0.00044817946036346257, + "grad_norm": 0.0009922637837007642, "learning_rate": 2.3000000000000003e-05, - "loss": 0.0002, + "loss": 0.0007, "step": 5724 }, { "epoch": 54.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.8289473684210527, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8050314465408804, + "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_recall": 0.875, + "eval_ORGANIZATION_f1": 0.7976190476190477, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7619047619047619, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.925170068027211, + "eval_ORGANIZATION_precision": 0.7204301075268817, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.8910891089108911, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.9006622516556292, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.693877551020408, + "eval_PERSON_precision": 0.84375, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6538461538461539, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3241828978061676, - "eval_overall_accuracy": 0.9657242932199149, - "eval_overall_f1": 0.8543140028288544, - "eval_overall_precision": 0.8206521739130435, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9117, - "eval_samples_per_second": 205.114, - "eval_steps_per_second": 3.291, + "eval_loss": 0.36466336250305176, + "eval_overall_accuracy": 0.9589692269201902, + "eval_overall_f1": 0.8406593406593406, + "eval_overall_precision": 0.7866323907455013, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.36, + "eval_samples_per_second": 519.408, + "eval_steps_per_second": 8.333, "step": 5724 }, { "epoch": 55.0, - "grad_norm": 0.0010649035684764385, + "grad_norm": 0.003993849270045757, "learning_rate": 2.25e-05, - "loss": 0.0005, + "loss": 0.0017, "step": 5830 }, { "epoch": 55.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.7947019867549668, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7875000000000001, + "eval_LOCATION_precision": 0.759493670886076, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.7948717948717949, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7411764705882353, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9220338983050849, + "eval_ORGANIZATION_precision": 0.7654320987654321, + "eval_ORGANIZATION_recall": 0.8266666666666667, + "eval_PERSON_f1": 0.9072847682119206, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8947368421052632, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.68, + "eval_PERSON_precision": 0.8616352201257862, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6274509803921569, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6296296296296297, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.5714285714285714, + "eval_QUANTITY_recall": 0.6956521739130435, + "eval_TIME_f1": 0.7857142857142856, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.33954718708992004, - "eval_overall_accuracy": 0.9627220415311484, - "eval_overall_f1": 0.849507735583685, - "eval_overall_precision": 0.8118279569892473, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9361, - "eval_samples_per_second": 199.766, - "eval_steps_per_second": 3.205, + "eval_TIME_precision": 0.7333333333333333, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3635990619659424, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8296089385474861, + "eval_overall_precision": 0.7877984084880637, + "eval_overall_recall": 0.8761061946902655, + "eval_runtime": 0.3593, + "eval_samples_per_second": 520.494, + "eval_steps_per_second": 8.35, "step": 5830 }, { "epoch": 56.0, - "grad_norm": 0.0006854601670056581, + "grad_norm": 0.0005980022251605988, "learning_rate": 2.2000000000000003e-05, - "loss": 0.0001, + "loss": 0.0004, "step": 5936 }, { "epoch": 56.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.778523489932886, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7826086956521738, + "eval_LOCATION_precision": 0.7532467532467533, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.8516129032258064, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7325581395348837, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.925170068027211, + "eval_ORGANIZATION_precision": 0.825, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9042904290429041, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.9006622516556292, - "eval_PERSON_recall": 0.951048951048951, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6792452830188679, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8235294117647058, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3326149582862854, - "eval_overall_accuracy": 0.9647235426569928, - "eval_overall_f1": 0.8519040902679831, - "eval_overall_precision": 0.8162162162162162, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9055, - "eval_samples_per_second": 206.509, - "eval_steps_per_second": 3.313, + "eval_TIME_precision": 0.84, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.3733396530151367, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.8438818565400844, + "eval_overall_precision": 0.8064516129032258, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.3652, + "eval_samples_per_second": 512.007, + "eval_steps_per_second": 8.214, "step": 5936 }, { "epoch": 57.0, - "grad_norm": 0.0004171407490503043, + "grad_norm": 0.0007011191919445992, "learning_rate": 2.15e-05, - "loss": 0.0008, + "loss": 0.0009, "step": 6042 }, { "epoch": 57.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.8079470198675496, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7950310559006211, + "eval_LOCATION_precision": 0.7721518987341772, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.858974358974359, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7441860465116279, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9090909090909092, + "eval_ORGANIZATION_precision": 0.8271604938271605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.8970099667774087, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8766233766233766, + "eval_PERSON_precision": 0.8544303797468354, "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.68, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6296296296296297, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8679245283018868, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8518518518518519, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3388373553752899, - "eval_overall_accuracy": 0.9639729797348011, - "eval_overall_f1": 0.8455056179775281, - "eval_overall_precision": 0.806970509383378, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9161, - "eval_samples_per_second": 204.134, - "eval_steps_per_second": 3.275, + "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3640208840370178, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.849929873772791, + "eval_overall_precision": 0.8101604278074866, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3606, + "eval_samples_per_second": 518.592, + "eval_steps_per_second": 8.32, "step": 6042 }, { "epoch": 58.0, - "grad_norm": 0.000792400271166116, + "grad_norm": 0.0015956854913383722, "learning_rate": 2.1e-05, - "loss": 0.0005, + "loss": 0.0003, "step": 6148 }, { "epoch": 58.0, - "eval_LOCATION_f1": 0.8344370860927153, + "eval_LOCATION_f1": 0.7973856209150327, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7974683544303798, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.8121212121212122, + "eval_LOCATION_precision": 0.7530864197530864, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8220858895705522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7444444444444445, + "eval_ORGANIZATION_precision": 0.7613636363636364, "eval_ORGANIZATION_recall": 0.8933333333333333, - "eval_PERSON_f1": 0.8993288590604027, + "eval_PERSON_f1": 0.9023569023569024, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.864516129032258, + "eval_PERSON_precision": 0.8701298701298701, "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.693877551020408, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6538461538461539, - "eval_QUANTITY_recall": 0.7391304347826086, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, "eval_TIME_f1": 0.8518518518518519, "eval_TIME_number": 26, "eval_TIME_precision": 0.8214285714285714, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3484926223754883, - "eval_overall_accuracy": 0.9622216662496873, - "eval_overall_f1": 0.8479776847977685, - "eval_overall_precision": 0.8042328042328042, - "eval_overall_recall": 0.8967551622418879, - "eval_runtime": 0.9061, - "eval_samples_per_second": 206.382, - "eval_steps_per_second": 3.311, + "eval_loss": 0.3814009130001068, + "eval_overall_accuracy": 0.9582186639979985, + "eval_overall_f1": 0.8440111420612812, + "eval_overall_precision": 0.7994722955145118, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3599, + "eval_samples_per_second": 519.642, + "eval_steps_per_second": 8.337, "step": 6148 }, { "epoch": 59.0, - "grad_norm": 0.0005213640397414565, + "grad_norm": 0.0003817932156380266, "learning_rate": 2.05e-05, - "loss": 0.0004, + "loss": 0.0005, "step": 6254 }, { "epoch": 59.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.802721088435374, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7950310559006211, + "eval_LOCATION_precision": 0.7866666666666666, + "eval_LOCATION_recall": 0.8194444444444444, + "eval_ORGANIZATION_f1": 0.8441558441558442, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7441860465116279, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9215017064846417, + "eval_ORGANIZATION_precision": 0.8227848101265823, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.9096989966555185, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.9, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.6666666666666666, + "eval_PERSON_precision": 0.8717948717948718, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6071428571428571, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8627450980392156, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.888888888888889, "eval_TIME_number": 26, - "eval_TIME_precision": 0.88, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.34474921226501465, - "eval_overall_accuracy": 0.9627220415311484, - "eval_overall_f1": 0.8486562942008488, - "eval_overall_precision": 0.8152173913043478, - "eval_overall_recall": 0.8849557522123894, - "eval_runtime": 0.9086, - "eval_samples_per_second": 205.807, - "eval_steps_per_second": 3.302, + "eval_TIME_precision": 0.8571428571428571, + "eval_TIME_recall": 0.9230769230769231, + "eval_loss": 0.3739457130432129, + "eval_overall_accuracy": 0.9619714786089567, + "eval_overall_f1": 0.8567375886524823, + "eval_overall_precision": 0.825136612021858, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3596, + "eval_samples_per_second": 520.068, + "eval_steps_per_second": 8.343, "step": 6254 }, { "epoch": 60.0, - "grad_norm": 0.0031596734188497066, + "grad_norm": 0.00037515757139772177, "learning_rate": 2e-05, - "loss": 0.0017, + "loss": 0.0005, "step": 6360 }, { "epoch": 60.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.8219178082191781, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.783625730994152, + "eval_LOCATION_precision": 0.8108108108108109, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8354430379746836, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.6979166666666666, - "eval_ORGANIZATION_recall": 0.8933333333333333, - "eval_PERSON_f1": 0.8993288590604027, + "eval_ORGANIZATION_precision": 0.7951807228915663, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9006622516556291, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.864516129032258, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.6666666666666667, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.5806451612903226, + "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.8679245283018868, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3943552076816559, - "eval_overall_accuracy": 0.9559669752314236, - "eval_overall_f1": 0.8363136176066025, - "eval_overall_precision": 0.7835051546391752, - "eval_overall_recall": 0.8967551622418879, - "eval_runtime": 0.9068, - "eval_samples_per_second": 206.222, - "eval_steps_per_second": 3.308, + "eval_TIME_precision": 0.8518518518518519, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3766981363296509, + "eval_overall_accuracy": 0.9604703527645734, + "eval_overall_f1": 0.8547249647390691, + "eval_overall_precision": 0.8189189189189189, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3687, + "eval_samples_per_second": 507.159, + "eval_steps_per_second": 8.136, "step": 6360 }, { "epoch": 61.0, - "grad_norm": 0.0038910319563001394, + "grad_norm": 0.00191974185872823, "learning_rate": 1.9500000000000003e-05, - "loss": 0.0002, + "loss": 0.0008, "step": 6466 }, { "epoch": 61.0, - "eval_LOCATION_f1": 0.8322147651006712, + "eval_LOCATION_f1": 0.816326530612245, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8051948051948052, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8333333333333334, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7333333333333333, - "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.8963210702341137, + "eval_ORGANIZATION_precision": 0.8024691358024691, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.903654485049834, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8589743589743589, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.8607594936708861, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_precision": 0.6428571428571429, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_TIME_f1": 0.8214285714285715, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.38600656390190125, - "eval_overall_accuracy": 0.9599699774831123, - "eval_overall_f1": 0.8447552447552448, - "eval_overall_precision": 0.8031914893617021, + "eval_TIME_precision": 0.7666666666666667, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.36986875534057617, + "eval_overall_accuracy": 0.9622216662496873, + "eval_overall_f1": 0.849507735583685, + "eval_overall_precision": 0.8118279569892473, "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9064, - "eval_samples_per_second": 206.303, - "eval_steps_per_second": 3.31, + "eval_runtime": 0.3616, + "eval_samples_per_second": 517.178, + "eval_steps_per_second": 8.297, "step": 6466 }, { "epoch": 62.0, - "grad_norm": 0.008979488164186478, + "grad_norm": 1.7503353357315063, "learning_rate": 1.9e-05, - "loss": 0.0001, + "loss": 0.0016, "step": 6572 }, { @@ -2400,583 +2400,583 @@ "eval_LOCATION_number": 72, "eval_LOCATION_precision": 0.7922077922077922, "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8176100628930819, + "eval_ORGANIZATION_f1": 0.825, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7738095238095238, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9047619047619047, + "eval_ORGANIZATION_precision": 0.7764705882352941, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8838709677419354, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8807947019867549, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.8203592814371258, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8679245283018868, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3512948453426361, - "eval_overall_accuracy": 0.9629722291718789, - "eval_overall_f1": 0.8477951635846371, - "eval_overall_precision": 0.8186813186813187, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.9032, - "eval_samples_per_second": 207.035, - "eval_steps_per_second": 3.321, + "eval_TIME_precision": 0.8518518518518519, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.34973686933517456, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.8437067773167358, + "eval_overall_precision": 0.7942708333333334, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.364, + "eval_samples_per_second": 513.749, + "eval_steps_per_second": 8.242, "step": 6572 }, { "epoch": 63.0, - "grad_norm": 0.00276816263794899, + "grad_norm": 0.0005617731949314475, "learning_rate": 1.85e-05, - "loss": 0.0013, + "loss": 0.0004, "step": 6678 }, { "epoch": 63.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.8299319727891157, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7904191616766466, + "eval_LOCATION_precision": 0.8133333333333334, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.834355828220859, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.717391304347826, - "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.9060402684563759, + "eval_ORGANIZATION_precision": 0.7727272727272727, + "eval_ORGANIZATION_recall": 0.9066666666666666, + "eval_PERSON_f1": 0.9210526315789473, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8709677419354839, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.8695652173913043, + "eval_PERSON_recall": 0.9790209790209791, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.7796610169491526, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3540952205657959, - "eval_overall_accuracy": 0.9629722291718789, - "eval_overall_f1": 0.8440111420612812, - "eval_overall_precision": 0.7994722955145118, - "eval_overall_recall": 0.8938053097345132, - "eval_runtime": 0.9089, - "eval_samples_per_second": 205.74, - "eval_steps_per_second": 3.301, + "eval_TIME_precision": 0.696969696969697, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3623858094215393, + "eval_overall_accuracy": 0.9617212909682261, + "eval_overall_f1": 0.856353591160221, + "eval_overall_precision": 0.8051948051948052, + "eval_overall_recall": 0.9144542772861357, + "eval_runtime": 0.3601, + "eval_samples_per_second": 519.319, + "eval_steps_per_second": 8.331, "step": 6678 }, { "epoch": 64.0, - "grad_norm": 0.010789873078465462, + "grad_norm": 0.04498714208602905, "learning_rate": 1.8e-05, - "loss": 0.0007, + "loss": 0.0009, "step": 6784 }, { "epoch": 64.0, - "eval_LOCATION_f1": 0.779874213836478, + "eval_LOCATION_f1": 0.8435374149659863, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7126436781609196, + "eval_LOCATION_precision": 0.8266666666666667, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7848101265822784, + "eval_ORGANIZATION_f1": 0.8374999999999999, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7469879518072289, - "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.9090909090909092, + "eval_ORGANIZATION_precision": 0.788235294117647, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9006622516556291, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8766233766233766, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7058823529411765, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, "eval_TIME_f1": 0.8518518518518519, "eval_TIME_number": 26, "eval_TIME_precision": 0.8214285714285714, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.34456735849380493, - "eval_overall_accuracy": 0.9619714786089567, - "eval_overall_f1": 0.8344923504867873, - "eval_overall_precision": 0.7894736842105263, - "eval_overall_recall": 0.8849557522123894, - "eval_runtime": 0.9096, - "eval_samples_per_second": 205.587, - "eval_steps_per_second": 3.298, + "eval_loss": 0.37043052911758423, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8583450210378682, + "eval_overall_precision": 0.8181818181818182, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.36, + "eval_samples_per_second": 519.423, + "eval_steps_per_second": 8.333, "step": 6784 }, { "epoch": 65.0, - "grad_norm": 0.0003090931277256459, + "grad_norm": 0.0003102279151789844, "learning_rate": 1.75e-05, - "loss": 0.0008, + "loss": 0.001, "step": 6890 }, { "epoch": 65.0, - "eval_LOCATION_f1": 0.8181818181818181, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7682926829268293, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.782051282051282, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8220858895705522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7530864197530864, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7613636363636364, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9144736842105262, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.8633540372670807, + "eval_PERSON_recall": 0.972027972027972, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, + "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_TIME_f1": 0.8679245283018868, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3443866968154907, - "eval_overall_accuracy": 0.9629722291718789, - "eval_overall_f1": 0.8429985855728431, - "eval_overall_precision": 0.8097826086956522, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.906, - "eval_samples_per_second": 206.397, - "eval_steps_per_second": 3.311, + "eval_TIME_precision": 0.8518518518518519, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3582930862903595, + "eval_overall_accuracy": 0.96347260445334, + "eval_overall_f1": 0.8583333333333333, + "eval_overall_precision": 0.8110236220472441, + "eval_overall_recall": 0.911504424778761, + "eval_runtime": 0.3574, + "eval_samples_per_second": 523.156, + "eval_steps_per_second": 8.393, "step": 6890 }, { "epoch": 66.0, - "grad_norm": 0.008000018075108528, + "grad_norm": 0.023364154621958733, "learning_rate": 1.7000000000000003e-05, - "loss": 0.0007, + "loss": 0.0001, "step": 6996 }, { "epoch": 66.0, - "eval_LOCATION_f1": 0.823529411764706, + "eval_LOCATION_f1": 0.7894736842105262, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7777777777777778, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7564102564102564, + "eval_LOCATION_precision": 0.75, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8387096774193549, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7283950617283951, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.9121621621621621, + "eval_ORGANIZATION_precision": 0.8125, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.9139072847682119, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8823529411764706, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.8679245283018868, + "eval_PERSON_recall": 0.965034965034965, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6666666666666666, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.8461538461538461, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3399989902973175, + "eval_loss": 0.35251596570014954, "eval_overall_accuracy": 0.9627220415311484, - "eval_overall_f1": 0.8385269121813032, - "eval_overall_precision": 0.8065395095367848, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9215, - "eval_samples_per_second": 202.925, - "eval_steps_per_second": 3.255, + "eval_overall_f1": 0.8523206751054851, + "eval_overall_precision": 0.8145161290322581, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3673, + "eval_samples_per_second": 509.16, + "eval_steps_per_second": 8.168, "step": 6996 }, { "epoch": 67.0, - "grad_norm": 0.00037926252116449177, + "grad_norm": 0.0011218636063858867, "learning_rate": 1.65e-05, - "loss": 0.0004, + "loss": 0.0015, "step": 7102 }, { "epoch": 67.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.823529411764706, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_precision": 0.7777777777777778, "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.8148148148148148, + "eval_ORGANIZATION_f1": 0.8121212121212122, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7586206896551724, - "eval_ORGANIZATION_recall": 0.88, - "eval_PERSON_f1": 0.9090909090909092, - "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8766233766233766, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_ORGANIZATION_precision": 0.7444444444444445, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9078947368421052, + "eval_PERSON_number": 143, + "eval_PERSON_precision": 0.8571428571428571, + "eval_PERSON_recall": 0.965034965034965, + "eval_QUANTITY_f1": 0.72, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, + "eval_QUANTITY_precision": 0.6666666666666666, "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_TIME_f1": 0.8679245283018868, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3691372275352478, - "eval_overall_accuracy": 0.96347260445334, - "eval_overall_f1": 0.853932584269663, - "eval_overall_precision": 0.8150134048257373, - "eval_overall_recall": 0.8967551622418879, - "eval_runtime": 0.9131, - "eval_samples_per_second": 204.802, - "eval_steps_per_second": 3.286, + "eval_TIME_precision": 0.8518518518518519, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.37641996145248413, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8524137931034482, + "eval_overall_precision": 0.8005181347150259, + "eval_overall_recall": 0.911504424778761, + "eval_runtime": 0.3613, + "eval_samples_per_second": 517.565, + "eval_steps_per_second": 8.303, "step": 7102 }, { "epoch": 68.0, - "grad_norm": 0.0003724870621226728, + "grad_norm": 0.0007424333016388118, "learning_rate": 1.6000000000000003e-05, - "loss": 0.0012, + "loss": 0.0006, "step": 7208 }, { "epoch": 68.0, - "eval_LOCATION_f1": 0.8322147651006712, + "eval_LOCATION_f1": 0.8356164383561645, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8051948051948052, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8280254777070064, + "eval_LOCATION_precision": 0.8243243243243243, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.7878787878787877, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7926829268292683, + "eval_ORGANIZATION_precision": 0.7222222222222222, "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9121621621621621, + "eval_PERSON_f1": 0.9133333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8823529411764706, - "eval_PERSON_recall": 0.9440559440559441, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.8726114649681529, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.7058823529411765, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, - "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8627450980392156, + "eval_QUANTITY_precision": 0.6428571428571429, + "eval_QUANTITY_recall": 0.782608695652174, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.88, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.33306267857551575, - "eval_overall_accuracy": 0.9654741055791843, - "eval_overall_f1": 0.8587731811697574, - "eval_overall_precision": 0.8314917127071824, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9838, - "eval_samples_per_second": 190.074, - "eval_steps_per_second": 3.049, + "eval_loss": 0.3496319651603699, + "eval_overall_accuracy": 0.9594696022016512, + "eval_overall_f1": 0.8475524475524475, + "eval_overall_precision": 0.8058510638297872, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3711, + "eval_samples_per_second": 503.904, + "eval_steps_per_second": 8.084, "step": 7208 }, { "epoch": 69.0, - "grad_norm": 0.0002074290969176218, + "grad_norm": 0.008528614416718483, "learning_rate": 1.55e-05, - "loss": 0.0003, + "loss": 0.0008, "step": 7314 }, { "epoch": 69.0, - "eval_LOCATION_f1": 0.8344370860927153, + "eval_LOCATION_f1": 0.8187919463087249, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7974683544303798, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.8170731707317072, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8000000000000002, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7528089887640449, - "eval_ORGANIZATION_recall": 0.8933333333333333, - "eval_PERSON_f1": 0.9054054054054055, + "eval_ORGANIZATION_precision": 0.775, + "eval_ORGANIZATION_recall": 0.8266666666666667, + "eval_PERSON_f1": 0.8970099667774087, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8758169934640523, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8544303797468354, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.693877551020408, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.6538461538461539, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.373975545167923, - "eval_overall_accuracy": 0.96347260445334, - "eval_overall_f1": 0.8551336146272857, - "eval_overall_precision": 0.8172043010752689, - "eval_overall_recall": 0.8967551622418879, - "eval_runtime": 0.9156, - "eval_samples_per_second": 204.238, - "eval_steps_per_second": 3.277, + "eval_loss": 0.34753233194351196, + "eval_overall_accuracy": 0.9594696022016512, + "eval_overall_f1": 0.8401697312588402, + "eval_overall_precision": 0.8070652173913043, + "eval_overall_recall": 0.8761061946902655, + "eval_runtime": 0.3588, + "eval_samples_per_second": 521.137, + "eval_steps_per_second": 8.36, "step": 7314 }, { "epoch": 70.0, - "grad_norm": 0.00042664690408855677, + "grad_norm": 0.014132479205727577, "learning_rate": 1.5e-05, - "loss": 0.0001, + "loss": 0.0, "step": 7420 }, { "epoch": 70.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.8356164383561645, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8025477707006369, + "eval_LOCATION_precision": 0.8243243243243243, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8121212121212122, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7682926829268293, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9054054054054055, + "eval_ORGANIZATION_precision": 0.7444444444444445, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.8844884488448846, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8758169934640523, + "eval_PERSON_precision": 0.8375, "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8461538461538461, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8214285714285715, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.37100422382354736, - "eval_overall_accuracy": 0.96347260445334, - "eval_overall_f1": 0.8494318181818181, - "eval_overall_precision": 0.8191780821917808, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9101, - "eval_samples_per_second": 205.482, - "eval_steps_per_second": 3.297, + "eval_TIME_precision": 0.7666666666666667, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3662912845611572, + "eval_overall_accuracy": 0.9592194145609206, + "eval_overall_f1": 0.8388888888888889, + "eval_overall_precision": 0.7926509186351706, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.4097, + "eval_samples_per_second": 456.444, + "eval_steps_per_second": 7.323, "step": 7420 }, { "epoch": 71.0, - "grad_norm": 0.0007093641324900091, + "grad_norm": 0.0005066086887381971, "learning_rate": 1.45e-05, "loss": 0.0002, "step": 7526 }, { "epoch": 71.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8055555555555556, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8101265822784811, + "eval_LOCATION_precision": 0.8055555555555556, + "eval_LOCATION_recall": 0.8055555555555556, + "eval_ORGANIZATION_f1": 0.8170731707317072, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7710843373493976, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.91156462585034, + "eval_ORGANIZATION_precision": 0.7528089887640449, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8874172185430463, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7083333333333334, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.68, + "eval_QUANTITY_precision": 0.6296296296296297, "eval_QUANTITY_recall": 0.7391304347826086, - "eval_TIME_f1": 0.8461538461538461, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8461538461538461, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3477381467819214, - "eval_overall_accuracy": 0.9647235426569928, - "eval_overall_f1": 0.8518518518518517, - "eval_overall_precision": 0.8236914600550964, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9072, - "eval_samples_per_second": 206.129, - "eval_steps_per_second": 3.307, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3715399205684662, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.849507735583685, + "eval_overall_precision": 0.8118279569892473, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3561, + "eval_samples_per_second": 525.071, + "eval_steps_per_second": 8.424, "step": 7526 }, { "epoch": 72.0, - "grad_norm": 0.00021990617096889764, + "grad_norm": 0.00028915383154526353, "learning_rate": 1.4000000000000001e-05, - "loss": 0.0002, + "loss": 0.0001, "step": 7632 }, { "epoch": 72.0, - "eval_LOCATION_f1": 0.823529411764706, + "eval_LOCATION_f1": 0.8435374149659863, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7777777777777778, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7950310559006211, + "eval_LOCATION_precision": 0.8266666666666667, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8072289156626506, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7441860465116279, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7362637362637363, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9163879598662208, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8782051282051282, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8214285714285715, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.36287015676498413, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8478873239436621, - "eval_overall_precision": 0.8113207547169812, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9141, - "eval_samples_per_second": 204.572, - "eval_steps_per_second": 3.282, + "eval_TIME_precision": 0.7666666666666667, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3853207230567932, + "eval_overall_accuracy": 0.9587190392794596, + "eval_overall_f1": 0.8523676880222841, + "eval_overall_precision": 0.8073878627968337, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.362, + "eval_samples_per_second": 516.586, + "eval_steps_per_second": 8.287, "step": 7632 }, { "epoch": 73.0, - "grad_norm": 0.00021961626771371812, + "grad_norm": 0.0008943734574131668, "learning_rate": 1.3500000000000001e-05, - "loss": 0.0002, + "loss": 0.0007, "step": 7738 }, { "epoch": 73.0, - "eval_LOCATION_f1": 0.8181818181818181, + "eval_LOCATION_f1": 0.8275862068965517, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7682926829268293, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7643312101910827, + "eval_LOCATION_precision": 0.821917808219178, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7317073170731707, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.91156462585034, + "eval_ORGANIZATION_precision": 0.7674418604651163, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9060402684563759, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8874172185430463, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8709677419354839, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.8461538461538461, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3637426495552063, - "eval_overall_accuracy": 0.9639729797348011, - "eval_overall_f1": 0.8413597733711049, - "eval_overall_precision": 0.8092643051771117, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.909, - "eval_samples_per_second": 205.714, - "eval_steps_per_second": 3.3, + "eval_loss": 0.35061854124069214, + "eval_overall_accuracy": 0.9599699774831123, + "eval_overall_f1": 0.8498583569405098, + "eval_overall_precision": 0.8174386920980926, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.357, + "eval_samples_per_second": 523.747, + "eval_steps_per_second": 8.402, "step": 7738 }, { "epoch": 74.0, - "grad_norm": 0.00018741752137430012, + "grad_norm": 0.0004187956510577351, "learning_rate": 1.3000000000000001e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 7844 }, { "epoch": 74.0, - "eval_LOCATION_f1": 0.8181818181818181, + "eval_LOCATION_f1": 0.8322147651006712, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7682926829268293, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.751592356687898, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8095238095238095, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7195121951219512, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.9047619047619047, + "eval_ORGANIZATION_precision": 0.7311827956989247, + "eval_ORGANIZATION_recall": 0.9066666666666666, + "eval_PERSON_f1": 0.9163879598662208, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8807947019867549, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8782051282051282, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6538461538461539, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.5862068965517241, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8148148148148148, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_precision": 0.7857142857142857, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3637796938419342, - "eval_overall_accuracy": 0.9632224168126094, - "eval_overall_f1": 0.8356940509915013, - "eval_overall_precision": 0.8038147138964578, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9145, - "eval_samples_per_second": 204.477, - "eval_steps_per_second": 3.28, + "eval_loss": 0.4027647078037262, + "eval_overall_accuracy": 0.9584688516387291, + "eval_overall_f1": 0.847645429362881, + "eval_overall_precision": 0.7989556135770235, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.3573, + "eval_samples_per_second": 523.424, + "eval_steps_per_second": 8.397, "step": 7844 }, { "epoch": 75.0, - "grad_norm": 0.0007877243915572762, + "grad_norm": 0.0005983862793073058, "learning_rate": 1.25e-05, - "loss": 0.0001, + "loss": 0.0, "step": 7950 }, { "epoch": 75.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.816326530612245, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7919463087248321, + "eval_LOCATION_precision": 0.8, + "eval_LOCATION_recall": 0.8333333333333334, + "eval_ORGANIZATION_f1": 0.825, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7972972972972973, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7764705882352941, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9096989966555185, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8717948717948718, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3778810501098633, - "eval_overall_accuracy": 0.9649737302977233, - "eval_overall_f1": 0.8493543758967002, - "eval_overall_precision": 0.8268156424581006, - "eval_overall_recall": 0.8731563421828908, - "eval_runtime": 0.9077, - "eval_samples_per_second": 206.004, - "eval_steps_per_second": 3.305, + "eval_loss": 0.3929165005683899, + "eval_overall_accuracy": 0.960720540405304, + "eval_overall_f1": 0.8478873239436621, + "eval_overall_precision": 0.8113207547169812, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3576, + "eval_samples_per_second": 522.919, + "eval_steps_per_second": 8.389, "step": 7950 }, { "epoch": 76.0, - "grad_norm": 0.005742947105318308, + "grad_norm": 0.00021465642203111202, "learning_rate": 1.2e-05, - "loss": 0.0001, + "loss": 0.0002, "step": 8056 }, { "epoch": 76.0, - "eval_LOCATION_f1": 0.8289473684210527, + "eval_LOCATION_f1": 0.8181818181818181, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7875, + "eval_LOCATION_precision": 0.7682926829268293, "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7894736842105264, + "eval_ORGANIZATION_f1": 0.8271604938271604, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7792207792207793, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.9054054054054055, + "eval_ORGANIZATION_precision": 0.7701149425287356, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8758169934640523, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.830188679245283, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8148148148148148, - "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.3797122836112976, - "eval_overall_accuracy": 0.9644733550162622, - "eval_overall_f1": 0.8461538461538463, - "eval_overall_precision": 0.8181818181818182, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9011, - "eval_samples_per_second": 207.522, - "eval_steps_per_second": 3.329, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.4091399908065796, + "eval_overall_accuracy": 0.960720540405304, + "eval_overall_f1": 0.8527777777777777, + "eval_overall_precision": 0.8057742782152231, + "eval_overall_recall": 0.9056047197640118, + "eval_runtime": 0.3609, + "eval_samples_per_second": 518.107, + "eval_steps_per_second": 8.312, "step": 8056 }, { "epoch": 77.0, - "grad_norm": 0.00030839102691970766, + "grad_norm": 0.00041957717621698976, "learning_rate": 1.1500000000000002e-05, - "loss": 0.0004, + "loss": 0.0, "step": 8162 }, { @@ -2985,425 +2985,425 @@ "eval_LOCATION_number": 72, "eval_LOCATION_precision": 0.7974683544303798, "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.8024691358024691, + "eval_ORGANIZATION_f1": 0.8271604938271604, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7471264367816092, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.8963210702341137, + "eval_ORGANIZATION_precision": 0.7701149425287356, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8589743589743589, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.76, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7037037037037037, - "eval_QUANTITY_recall": 0.8260869565217391, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.40759775042533875, - "eval_overall_accuracy": 0.9614711033274956, - "eval_overall_f1": 0.8475524475524475, - "eval_overall_precision": 0.8058510638297872, - "eval_overall_recall": 0.8938053097345132, - "eval_runtime": 0.9103, - "eval_samples_per_second": 205.428, - "eval_steps_per_second": 3.296, + "eval_loss": 0.3996489346027374, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8491620111731844, + "eval_overall_precision": 0.8063660477453581, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3555, + "eval_samples_per_second": 526.025, + "eval_steps_per_second": 8.439, "step": 8162 }, { "epoch": 78.0, - "grad_norm": 0.00025188998552039266, + "grad_norm": 0.0018605925142765045, "learning_rate": 1.1000000000000001e-05, - "loss": 0.0005, + "loss": 0.0, "step": 8268 }, { "epoch": 78.0, - "eval_LOCATION_f1": 0.8400000000000001, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8076923076923077, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7950310559006211, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7441860465116279, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.8993288590604027, + "eval_ORGANIZATION_precision": 0.7674418604651163, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.864516129032258, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.8598726114649682, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8679245283018868, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8518518518518519, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.39274299144744873, - "eval_overall_accuracy": 0.9624718538904178, - "eval_overall_f1": 0.849507735583685, - "eval_overall_precision": 0.8118279569892473, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3973155617713928, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.8447552447552448, + "eval_overall_precision": 0.8031914893617021, "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9103, - "eval_samples_per_second": 205.436, - "eval_steps_per_second": 3.296, + "eval_runtime": 0.3588, + "eval_samples_per_second": 521.17, + "eval_steps_per_second": 8.361, "step": 8268 }, { "epoch": 79.0, - "grad_norm": 0.00023670213704463094, + "grad_norm": 0.0009037918644025922, "learning_rate": 1.05e-05, - "loss": 0.0003, + "loss": 0.0, "step": 8374 }, { "epoch": 79.0, - "eval_LOCATION_f1": 0.823529411764706, + "eval_LOCATION_f1": 0.8211920529801324, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7777777777777778, - "eval_LOCATION_recall": 0.875, - "eval_ORGANIZATION_f1": 0.7468354430379747, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.825, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7108433734939759, - "eval_ORGANIZATION_recall": 0.7866666666666666, - "eval_PERSON_f1": 0.9023569023569024, + "eval_ORGANIZATION_precision": 0.7764705882352941, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8701298701298701, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7450980392156864, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6785714285714286, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8148148148148148, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.7857142857142857, + "eval_TIME_precision": 0.8148148148148148, "eval_TIME_recall": 0.8461538461538461, - "eval_loss": 0.4000164866447449, - "eval_overall_accuracy": 0.960720540405304, - "eval_overall_f1": 0.8330995792426367, - "eval_overall_precision": 0.7941176470588235, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9149, - "eval_samples_per_second": 204.395, - "eval_steps_per_second": 3.279, + "eval_loss": 0.3981226086616516, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.84593837535014, + "eval_overall_precision": 0.8053333333333333, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3579, + "eval_samples_per_second": 522.564, + "eval_steps_per_second": 8.383, "step": 8374 }, { "epoch": 80.0, - "grad_norm": 0.000326380948536098, + "grad_norm": 0.0010246345773339272, "learning_rate": 1e-05, - "loss": 0.0001, + "loss": 0.0, "step": 8480 }, { "epoch": 80.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8344370860927153, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7483870967741936, + "eval_LOCATION_precision": 0.7974683544303798, + "eval_LOCATION_recall": 0.875, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.725, - "eval_ORGANIZATION_recall": 0.7733333333333333, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7674418604651163, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8679245283018868, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8518518518518519, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3888261020183563, - "eval_overall_accuracy": 0.9629722291718789, - "eval_overall_f1": 0.8416547788873039, - "eval_overall_precision": 0.8149171270718232, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9033, - "eval_samples_per_second": 207.02, - "eval_steps_per_second": 3.321, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.4011876583099365, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8475524475524475, + "eval_overall_precision": 0.8058510638297872, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.38, + "eval_samples_per_second": 492.061, + "eval_steps_per_second": 7.894, "step": 8480 }, { "epoch": 81.0, - "grad_norm": 0.0005148330819793046, + "grad_norm": 0.00019219562818761915, "learning_rate": 9.5e-06, - "loss": 0.0001, + "loss": 0.0006, "step": 8586 }, { "epoch": 81.0, - "eval_LOCATION_f1": 0.8211920529801324, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_precision": 0.7948717948717948, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7643312101910827, + "eval_ORGANIZATION_f1": 0.8271604938271604, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7317073170731707, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.8993288590604027, + "eval_ORGANIZATION_precision": 0.7701149425287356, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.864516129032258, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8598726114649682, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6538461538461539, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, + "eval_QUANTITY_precision": 0.5862068965517241, + "eval_QUANTITY_recall": 0.7391304347826086, "eval_TIME_f1": 0.8518518518518519, "eval_TIME_number": 26, "eval_TIME_precision": 0.8214285714285714, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3981935977935791, - "eval_overall_accuracy": 0.9614711033274956, - "eval_overall_f1": 0.836619718309859, - "eval_overall_precision": 0.8005390835579514, - "eval_overall_recall": 0.8761061946902655, - "eval_runtime": 0.9078, - "eval_samples_per_second": 205.981, - "eval_steps_per_second": 3.305, + "eval_loss": 0.4144229590892792, + "eval_overall_accuracy": 0.9597197898423818, + "eval_overall_f1": 0.8467966573816156, + "eval_overall_precision": 0.8021108179419525, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3556, + "eval_samples_per_second": 525.889, + "eval_steps_per_second": 8.437, "step": 8586 }, { "epoch": 82.0, - "grad_norm": 0.0001784728665370494, + "grad_norm": 0.000247513729846105, "learning_rate": 9e-06, - "loss": 0.0003, + "loss": 0.0, "step": 8692 }, { "epoch": 82.0, - "eval_LOCATION_f1": 0.8378378378378377, + "eval_LOCATION_f1": 0.8211920529801324, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8157894736842105, + "eval_LOCATION_precision": 0.7848101265822784, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7870967741935484, + "eval_ORGANIZATION_f1": 0.8227848101265823, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7625, - "eval_ORGANIZATION_recall": 0.8133333333333334, - "eval_PERSON_f1": 0.91156462585034, + "eval_ORGANIZATION_precision": 0.7831325301204819, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.9, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8874172185430463, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8598726114649682, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8679245283018868, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8518518518518519, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3737158179283142, - "eval_overall_accuracy": 0.9647235426569928, - "eval_overall_f1": 0.8538681948424068, - "eval_overall_precision": 0.83008356545961, - "eval_overall_recall": 0.8790560471976401, - "eval_runtime": 0.9108, - "eval_samples_per_second": 205.324, - "eval_steps_per_second": 3.294, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.40510478615760803, + "eval_overall_accuracy": 0.9594696022016512, + "eval_overall_f1": 0.8455056179775281, + "eval_overall_precision": 0.806970509383378, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3705, + "eval_samples_per_second": 504.754, + "eval_steps_per_second": 8.098, "step": 8692 }, { "epoch": 83.0, - "grad_norm": 0.00012157092714915052, + "grad_norm": 0.0001593719352968037, "learning_rate": 8.500000000000002e-06, - "loss": 0.0002, + "loss": 0.0006, "step": 8798 }, { "epoch": 83.0, - "eval_LOCATION_f1": 0.8322147651006712, + "eval_LOCATION_f1": 0.8104575163398693, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_precision": 0.7654320987654321, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7975460122699386, + "eval_ORGANIZATION_f1": 0.8048780487804879, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7386363636363636, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9047619047619047, + "eval_ORGANIZATION_precision": 0.7415730337078652, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.8970099667774087, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8807947019867549, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8544303797468354, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, "eval_TIME_f1": 0.8518518518518519, "eval_TIME_number": 26, "eval_TIME_precision": 0.8214285714285714, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3761243224143982, - "eval_overall_accuracy": 0.9632224168126094, - "eval_overall_f1": 0.8502824858757062, - "eval_overall_precision": 0.8157181571815718, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9121, - "eval_samples_per_second": 205.019, - "eval_steps_per_second": 3.289, + "eval_loss": 0.4076941907405853, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8381742738589212, + "eval_overall_precision": 0.7890625, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3564, + "eval_samples_per_second": 524.736, + "eval_steps_per_second": 8.418, "step": 8798 }, { "epoch": 84.0, - "grad_norm": 0.01326628215610981, + "grad_norm": 0.0038378555327653885, "learning_rate": 8.000000000000001e-06, - "loss": 0.0001, + "loss": 0.0004, "step": 8904 }, { "epoch": 84.0, - "eval_LOCATION_f1": 0.8322147651006712, + "eval_LOCATION_f1": 0.8187919463087249, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8051948051948052, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8025477707006369, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8280254777070064, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7682926829268293, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9047619047619047, + "eval_ORGANIZATION_precision": 0.7926829268292683, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.903010033444816, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8807947019867549, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8653846153846154, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8679245283018868, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8518518518518519, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.37578248977661133, - "eval_overall_accuracy": 0.9639729797348011, - "eval_overall_f1": 0.8530670470756063, - "eval_overall_precision": 0.8259668508287292, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.9306, - "eval_samples_per_second": 200.941, - "eval_steps_per_second": 3.224, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.39616209268569946, + "eval_overall_accuracy": 0.960720540405304, + "eval_overall_f1": 0.8474576271186441, + "eval_overall_precision": 0.8130081300813008, + "eval_overall_recall": 0.8849557522123894, + "eval_runtime": 0.359, + "eval_samples_per_second": 520.922, + "eval_steps_per_second": 8.357, "step": 8904 }, { "epoch": 85.0, - "grad_norm": 0.006039230152964592, + "grad_norm": 0.0006832346552982926, "learning_rate": 7.5e-06, - "loss": 0.0004, + "loss": 0.0005, "step": 9010 }, { "epoch": 85.0, - "eval_LOCATION_f1": 0.8322147651006712, + "eval_LOCATION_f1": 0.8243243243243243, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8051948051948052, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7975460122699386, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8235294117647058, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7386363636363636, - "eval_ORGANIZATION_recall": 0.8666666666666667, - "eval_PERSON_f1": 0.9016949152542374, + "eval_ORGANIZATION_precision": 0.8076923076923077, + "eval_ORGANIZATION_recall": 0.84, + "eval_PERSON_f1": 0.9158249158249158, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.875, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.8831168831168831, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8461538461538461, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3836413621902466, - "eval_overall_accuracy": 0.9629722291718789, - "eval_overall_f1": 0.8478873239436621, - "eval_overall_precision": 0.8113207547169812, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9049, - "eval_samples_per_second": 206.647, - "eval_steps_per_second": 3.315, + "eval_TIME_precision": 0.8461538461538461, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.38612908124923706, + "eval_overall_accuracy": 0.9617212909682261, + "eval_overall_f1": 0.8542857142857142, + "eval_overall_precision": 0.8282548476454293, + "eval_overall_recall": 0.8820058997050148, + "eval_runtime": 0.3569, + "eval_samples_per_second": 523.906, + "eval_steps_per_second": 8.405, "step": 9010 }, { "epoch": 86.0, - "grad_norm": 0.00042766937986016273, + "grad_norm": 0.0012918419670313597, "learning_rate": 7.000000000000001e-06, - "loss": 0.0005, + "loss": 0.0, "step": 9116 }, { "epoch": 86.0, - "eval_LOCATION_f1": 0.8322147651006712, + "eval_LOCATION_f1": 0.8187919463087249, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8051948051948052, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8076923076923077, + "eval_LOCATION_precision": 0.7922077922077922, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8176100628930819, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7777777777777778, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9016949152542374, + "eval_ORGANIZATION_precision": 0.7738095238095238, + "eval_ORGANIZATION_recall": 0.8666666666666667, + "eval_PERSON_f1": 0.9096989966555185, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.875, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.8717948717948718, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.693877551020408, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6538461538461539, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, - "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3791405260562897, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8506401137980086, - "eval_overall_precision": 0.8214285714285714, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.904, - "eval_samples_per_second": 206.855, - "eval_steps_per_second": 3.319, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.39533165097236633, + "eval_overall_accuracy": 0.9609707280460346, + "eval_overall_f1": 0.8490832157968972, + "eval_overall_precision": 0.8135135135135135, + "eval_overall_recall": 0.887905604719764, + "eval_runtime": 0.3669, + "eval_samples_per_second": 509.639, + "eval_steps_per_second": 8.176, "step": 9116 }, { "epoch": 87.0, - "grad_norm": 0.0006120882462710142, + "grad_norm": 0.00013653105997946113, "learning_rate": 6.5000000000000004e-06, - "loss": 0.0004, + "loss": 0.0003, "step": 9222 }, { "epoch": 87.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8211920529801324, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_precision": 0.7848101265822784, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8025477707006369, + "eval_ORGANIZATION_f1": 0.8220858895705522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7682926829268293, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9023569023569024, + "eval_ORGANIZATION_precision": 0.7613636363636364, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9042904290429041, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8701298701298701, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7346938775510203, + "eval_PERSON_precision": 0.85625, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6923076923076923, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.888888888888889, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.7924528301886792, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8571428571428571, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.38632336258888245, - "eval_overall_accuracy": 0.9639729797348011, - "eval_overall_f1": 0.8514851485148515, - "eval_overall_precision": 0.8179347826086957, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9113, - "eval_samples_per_second": 205.193, - "eval_steps_per_second": 3.292, + "eval_TIME_precision": 0.7777777777777778, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.41302046179771423, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8432732316227461, + "eval_overall_precision": 0.7958115183246073, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3613, + "eval_samples_per_second": 517.561, + "eval_steps_per_second": 8.303, "step": 9222 }, { "epoch": 88.0, - "grad_norm": 0.0006923701730556786, + "grad_norm": 0.0001936595799634233, "learning_rate": 6e-06, "loss": 0.0004, "step": 9328 @@ -3414,508 +3414,508 @@ "eval_LOCATION_number": 72, "eval_LOCATION_precision": 0.7848101265822784, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.7875000000000001, + "eval_ORGANIZATION_f1": 0.8220858895705522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7411764705882353, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.907849829351536, + "eval_ORGANIZATION_precision": 0.7613636363636364, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9006622516556291, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8866666666666667, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.7924528301886792, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.3742952346801758, - "eval_overall_accuracy": 0.9647235426569928, - "eval_overall_f1": 0.851063829787234, - "eval_overall_precision": 0.819672131147541, - "eval_overall_recall": 0.8849557522123894, - "eval_runtime": 0.9099, - "eval_samples_per_second": 205.512, - "eval_steps_per_second": 3.297, + "eval_TIME_precision": 0.7777777777777778, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.41551852226257324, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8416666666666667, + "eval_overall_precision": 0.7952755905511811, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3626, + "eval_samples_per_second": 515.716, + "eval_steps_per_second": 8.274, "step": 9328 }, { "epoch": 89.0, - "grad_norm": 0.0163017176091671, + "grad_norm": 0.000983737176284194, "learning_rate": 5.500000000000001e-06, - "loss": 0.0006, + "loss": 0.0, "step": 9434 }, { "epoch": 89.0, - "eval_LOCATION_f1": 0.816326530612245, + "eval_LOCATION_f1": 0.8211920529801324, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.8, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7894736842105264, + "eval_LOCATION_precision": 0.7848101265822784, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8220858895705522, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7792207792207793, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.907849829351536, + "eval_ORGANIZATION_precision": 0.7613636363636364, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9006622516556291, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8866666666666667, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8553459119496856, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.7924528301886792, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.3696248233318329, - "eval_overall_accuracy": 0.966224668501376, - "eval_overall_f1": 0.8513708513708513, - "eval_overall_precision": 0.8333333333333334, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9099, - "eval_samples_per_second": 205.507, - "eval_steps_per_second": 3.297, + "eval_TIME_precision": 0.7777777777777778, + "eval_TIME_recall": 0.8076923076923077, + "eval_loss": 0.4156357944011688, + "eval_overall_accuracy": 0.9602201651238429, + "eval_overall_f1": 0.8416666666666667, + "eval_overall_precision": 0.7952755905511811, + "eval_overall_recall": 0.8938053097345132, + "eval_runtime": 0.3581, + "eval_samples_per_second": 522.251, + "eval_steps_per_second": 8.378, "step": 9434 }, { "epoch": 90.0, - "grad_norm": 15.17189884185791, + "grad_norm": 0.001763033214956522, "learning_rate": 5e-06, "loss": 0.0002, "step": 9540 }, { "epoch": 90.0, - "eval_LOCATION_f1": 0.8, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7692307692307693, - "eval_LOCATION_recall": 0.8333333333333334, - "eval_ORGANIZATION_f1": 0.7894736842105264, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8198757763975156, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7792207792207793, - "eval_ORGANIZATION_recall": 0.8, - "eval_PERSON_f1": 0.907849829351536, + "eval_ORGANIZATION_precision": 0.7674418604651163, + "eval_ORGANIZATION_recall": 0.88, + "eval_PERSON_f1": 0.9090909090909092, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8866666666666667, - "eval_PERSON_recall": 0.9300699300699301, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8766233766233766, + "eval_PERSON_recall": 0.9440559440559441, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.3719424605369568, - "eval_overall_accuracy": 0.966224668501376, - "eval_overall_f1": 0.8477011494252874, - "eval_overall_precision": 0.8263305322128851, - "eval_overall_recall": 0.8702064896755162, - "eval_runtime": 0.9059, - "eval_samples_per_second": 206.432, - "eval_steps_per_second": 3.312, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.404066801071167, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.849507735583685, + "eval_overall_precision": 0.8118279569892473, + "eval_overall_recall": 0.8908554572271387, + "eval_runtime": 0.3736, + "eval_samples_per_second": 500.522, + "eval_steps_per_second": 8.03, "step": 9540 }, { "epoch": 91.0, - "grad_norm": 0.00020513041818048805, + "grad_norm": 0.0002766927646007389, "learning_rate": 4.5e-06, "loss": 0.0002, "step": 9646 }, { "epoch": 91.0, - "eval_LOCATION_f1": 0.8187919463087249, + "eval_LOCATION_f1": 0.8322147651006712, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7922077922077922, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8205128205128205, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7901234567901234, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9158249158249158, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8831168831168831, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.3863327205181122, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8587731811697574, - "eval_overall_precision": 0.8314917127071824, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9044, - "eval_samples_per_second": 206.761, - "eval_steps_per_second": 3.317, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3980797231197357, + "eval_overall_accuracy": 0.9617212909682261, + "eval_overall_f1": 0.8551336146272857, + "eval_overall_precision": 0.8172043010752689, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3573, + "eval_samples_per_second": 523.342, + "eval_steps_per_second": 8.396, "step": 9646 }, { "epoch": 92.0, - "grad_norm": 0.0004068514099344611, + "grad_norm": 0.0008579469285905361, "learning_rate": 4.000000000000001e-06, - "loss": 0.0004, + "loss": 0.0, "step": 9752 }, { "epoch": 92.0, - "eval_LOCATION_f1": 0.8187919463087249, + "eval_LOCATION_f1": 0.8322147651006712, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7922077922077922, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8129032258064516, + "eval_LOCATION_precision": 0.8051948051948052, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7875, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.91156462585034, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9158249158249158, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.8874172185430463, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8831168831168831, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.693877551020408, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6538461538461539, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.830188679245283, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.38287997245788574, - "eval_overall_accuracy": 0.9644733550162622, - "eval_overall_f1": 0.8583690987124464, - "eval_overall_precision": 0.8333333333333334, - "eval_overall_recall": 0.8849557522123894, - "eval_runtime": 0.9043, - "eval_samples_per_second": 206.779, - "eval_steps_per_second": 3.317, + "eval_TIME_precision": 0.8148148148148148, + "eval_TIME_recall": 0.8461538461538461, + "eval_loss": 0.3970873951911926, + "eval_overall_accuracy": 0.9617212909682261, + "eval_overall_f1": 0.8575458392101551, + "eval_overall_precision": 0.8216216216216217, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3615, + "eval_samples_per_second": 517.255, + "eval_steps_per_second": 8.298, "step": 9752 }, { "epoch": 93.0, - "grad_norm": 0.00010082097287522629, + "grad_norm": 0.00014306257071439177, "learning_rate": 3.5000000000000004e-06, - "loss": 0.0002, + "loss": 0.0, "step": 9858 }, { "epoch": 93.0, - "eval_LOCATION_f1": 0.8187919463087249, + "eval_LOCATION_f1": 0.8266666666666667, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7922077922077922, - "eval_LOCATION_recall": 0.8472222222222222, - "eval_ORGANIZATION_f1": 0.8104575163398693, + "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_recall": 0.8611111111111112, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7948717948717948, - "eval_ORGANIZATION_recall": 0.8266666666666667, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.37826383113861084, - "eval_overall_accuracy": 0.9652239179384539, - "eval_overall_f1": 0.8567335243553009, - "eval_overall_precision": 0.8328690807799443, - "eval_overall_recall": 0.8820058997050148, - "eval_runtime": 0.918, - "eval_samples_per_second": 203.7, - "eval_steps_per_second": 3.268, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.4024323523044586, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8559440559440559, + "eval_overall_precision": 0.8138297872340425, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.3607, + "eval_samples_per_second": 518.421, + "eval_steps_per_second": 8.317, "step": 9858 }, { "epoch": 94.0, - "grad_norm": 0.000542107445653528, + "grad_norm": 0.00042846077121794224, "learning_rate": 3e-06, - "loss": 0.0001, + "loss": 0.0, "step": 9964 }, { "epoch": 94.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8322147651006712, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_precision": 0.8051948051948052, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8076923076923077, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7777777777777778, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.9056603773584906, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8888888888888888, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.38576415181159973, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8575498575498576, - "eval_overall_precision": 0.8292011019283747, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9111, - "eval_samples_per_second": 205.24, - "eval_steps_per_second": 3.293, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.3985014855861664, + "eval_overall_accuracy": 0.9619714786089567, + "eval_overall_f1": 0.8583450210378682, + "eval_overall_precision": 0.8181818181818182, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.3651, + "eval_samples_per_second": 512.194, + "eval_steps_per_second": 8.217, "step": 9964 }, { "epoch": 95.0, - "grad_norm": 0.044874146580696106, + "grad_norm": 0.000827198673505336, "learning_rate": 2.5e-06, - "loss": 0.0001, + "loss": 0.0, "step": 10070 }, { "epoch": 95.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8322147651006712, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, + "eval_LOCATION_precision": 0.8051948051948052, "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8076923076923077, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7777777777777778, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.7499999999999999, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.6666666666666666, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.72, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.888888888888889, + "eval_QUANTITY_precision": 0.6071428571428571, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8571428571428571, - "eval_TIME_recall": 0.9230769230769231, - "eval_loss": 0.38630810379981995, - "eval_overall_accuracy": 0.9639729797348011, - "eval_overall_f1": 0.856330014224751, - "eval_overall_precision": 0.8269230769230769, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.8986, - "eval_samples_per_second": 208.113, - "eval_steps_per_second": 3.339, + "eval_TIME_precision": 0.7931034482758621, + "eval_TIME_recall": 0.8846153846153846, + "eval_loss": 0.39939987659454346, + "eval_overall_accuracy": 0.9617212909682261, + "eval_overall_f1": 0.8571428571428571, + "eval_overall_precision": 0.816, + "eval_overall_recall": 0.9026548672566371, + "eval_runtime": 0.358, + "eval_samples_per_second": 522.401, + "eval_steps_per_second": 8.381, "step": 10070 }, { "epoch": 96.0, - "grad_norm": 0.0001308424398303032, + "grad_norm": 0.0002911574556492269, "learning_rate": 2.0000000000000003e-06, - "loss": 0.0001, + "loss": 0.0003, "step": 10176 }, { "epoch": 96.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8243243243243243, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8076923076923077, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7777777777777778, - "eval_ORGANIZATION_recall": 0.84, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.912751677852349, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.76, + "eval_PERSON_precision": 0.8774193548387097, + "eval_PERSON_recall": 0.951048951048951, + "eval_QUANTITY_f1": 0.693877551020408, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7037037037037037, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6538461538461539, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3916187286376953, - "eval_overall_accuracy": 0.9637227920940705, - "eval_overall_f1": 0.8539007092198581, - "eval_overall_precision": 0.8224043715846995, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9154, - "eval_samples_per_second": 204.278, - "eval_steps_per_second": 3.277, + "eval_loss": 0.39506107568740845, + "eval_overall_accuracy": 0.961220915686765, + "eval_overall_f1": 0.8551336146272857, + "eval_overall_precision": 0.8172043010752689, + "eval_overall_recall": 0.8967551622418879, + "eval_runtime": 0.3613, + "eval_samples_per_second": 517.554, + "eval_steps_per_second": 8.303, "step": 10176 }, { "epoch": 97.0, - "grad_norm": 0.00018677054322324693, + "grad_norm": 0.0003046900383196771, "learning_rate": 1.5e-06, - "loss": 0.0001, + "loss": 0.0005, "step": 10282 }, { "epoch": 97.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8243243243243243, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8101265822784811, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7710843373493976, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9133333333333333, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.76, + "eval_PERSON_precision": 0.8726114649681529, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7037037037037037, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3897801339626312, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8543140028288544, - "eval_overall_precision": 0.8206521739130435, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9174, - "eval_samples_per_second": 203.839, - "eval_steps_per_second": 3.27, + "eval_loss": 0.4083447754383087, + "eval_overall_accuracy": 0.9609707280460346, + "eval_overall_f1": 0.854341736694678, + "eval_overall_precision": 0.8133333333333334, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3599, + "eval_samples_per_second": 519.582, + "eval_steps_per_second": 8.336, "step": 10282 }, { "epoch": 98.0, - "grad_norm": 0.004196417052298784, + "grad_norm": 0.00015339584206230938, "learning_rate": 1.0000000000000002e-06, - "loss": 0.0001, + "loss": 0.0, "step": 10388 }, { "epoch": 98.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8243243243243243, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8101265822784811, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7710843373493976, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.76, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.7037037037037037, - "eval_QUANTITY_recall": 0.8260869565217391, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.38807475566864014, - "eval_overall_accuracy": 0.9644733550162622, - "eval_overall_f1": 0.8543140028288544, - "eval_overall_precision": 0.8206521739130435, - "eval_overall_recall": 0.8908554572271387, - "eval_runtime": 0.9056, - "eval_samples_per_second": 206.492, - "eval_steps_per_second": 3.313, + "eval_loss": 0.407378226518631, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8567415730337079, + "eval_overall_precision": 0.8176943699731903, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3669, + "eval_samples_per_second": 509.703, + "eval_steps_per_second": 8.177, "step": 10388 }, { "epoch": 99.0, - "grad_norm": 0.0001258315023733303, + "grad_norm": 0.0002479134127497673, "learning_rate": 5.000000000000001e-07, - "loss": 0.0001, + "loss": 0.0, "step": 10494 }, { "epoch": 99.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8243243243243243, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8101265822784811, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7710843373493976, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3856855630874634, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8514851485148515, - "eval_overall_precision": 0.8179347826086957, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9072, - "eval_samples_per_second": 206.127, - "eval_steps_per_second": 3.307, + "eval_loss": 0.4076772630214691, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8567415730337079, + "eval_overall_precision": 0.8176943699731903, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3589, + "eval_samples_per_second": 521.056, + "eval_steps_per_second": 8.359, "step": 10494 }, { "epoch": 100.0, - "grad_norm": 0.00010668273171177134, + "grad_norm": 0.00025577095220796764, "learning_rate": 0.0, "loss": 0.0001, "step": 10600 }, { "epoch": 100.0, - "eval_LOCATION_f1": 0.8266666666666667, + "eval_LOCATION_f1": 0.8243243243243243, "eval_LOCATION_number": 72, - "eval_LOCATION_precision": 0.7948717948717948, - "eval_LOCATION_recall": 0.8611111111111112, - "eval_ORGANIZATION_f1": 0.8101265822784811, + "eval_LOCATION_precision": 0.8026315789473685, + "eval_LOCATION_recall": 0.8472222222222222, + "eval_ORGANIZATION_f1": 0.8322981366459627, "eval_ORGANIZATION_number": 75, - "eval_ORGANIZATION_precision": 0.7710843373493976, - "eval_ORGANIZATION_recall": 0.8533333333333334, - "eval_PERSON_f1": 0.9084745762711864, + "eval_ORGANIZATION_precision": 0.7790697674418605, + "eval_ORGANIZATION_recall": 0.8933333333333333, + "eval_PERSON_f1": 0.9194630872483223, "eval_PERSON_number": 143, - "eval_PERSON_precision": 0.881578947368421, - "eval_PERSON_recall": 0.9370629370629371, - "eval_QUANTITY_f1": 0.72, + "eval_PERSON_precision": 0.8838709677419355, + "eval_PERSON_recall": 0.958041958041958, + "eval_QUANTITY_f1": 0.68, "eval_QUANTITY_number": 23, - "eval_QUANTITY_precision": 0.6666666666666666, - "eval_QUANTITY_recall": 0.782608695652174, - "eval_TIME_f1": 0.8518518518518519, + "eval_QUANTITY_precision": 0.6296296296296297, + "eval_QUANTITY_recall": 0.7391304347826086, + "eval_TIME_f1": 0.8363636363636363, "eval_TIME_number": 26, - "eval_TIME_precision": 0.8214285714285714, + "eval_TIME_precision": 0.7931034482758621, "eval_TIME_recall": 0.8846153846153846, - "eval_loss": 0.3857269287109375, - "eval_overall_accuracy": 0.9642231673755316, - "eval_overall_f1": 0.8514851485148515, - "eval_overall_precision": 0.8179347826086957, - "eval_overall_recall": 0.887905604719764, - "eval_runtime": 0.9906, - "eval_samples_per_second": 188.783, - "eval_steps_per_second": 3.029, + "eval_loss": 0.40795865654945374, + "eval_overall_accuracy": 0.9614711033274956, + "eval_overall_f1": 0.8567415730337079, + "eval_overall_precision": 0.8176943699731903, + "eval_overall_recall": 0.8997050147492626, + "eval_runtime": 0.3623, + "eval_samples_per_second": 516.187, + "eval_steps_per_second": 8.281, "step": 10600 }, { "epoch": 100.0, "step": 10600, "total_flos": 4502314993613766.0, - "train_loss": 0.008494841067761815, - "train_runtime": 2623.6324, - "train_samples_per_second": 64.3, - "train_steps_per_second": 4.04 + "train_loss": 0.008934246277599155, + "train_runtime": 970.249, + "train_samples_per_second": 173.873, + "train_steps_per_second": 10.925 } ], "logging_steps": 500,