|
{ |
|
"best_metric": 0.968, |
|
"best_model_checkpoint": "dinov2-base-finetuned-eye/checkpoint-2500", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 25.091054916381836, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.2386, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.266851425170898, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.0581, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 107.60309600830078, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.2613, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.244115352630615, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.0925, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 55.03916931152344, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2499, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 104.31927490234375, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.2216, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 43.71086120605469, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.1194, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 10.551958084106445, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.098, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 25.248905181884766, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.2751, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 12.814699172973633, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2384, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 59.77313995361328, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.2072, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 74.56177520751953, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.3489, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 33.88791275024414, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.1099, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 33.14435958862305, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.1244, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 100.65065002441406, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.4129, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 86.43379974365234, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.2088, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.185288429260254, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.2778, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 55.72468185424805, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.2935, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 33.43989181518555, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.3813, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 62.71244430541992, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.2892, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 19.36113739013672, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.2822, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 56.82246017456055, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.3083, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 609.8956909179688, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.4352, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.06277847290039, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.4511, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 13.084687232971191, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3853, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.874, |
|
"eval_f1": 0.8728784694592093, |
|
"eval_loss": 0.4917781352996826, |
|
"eval_runtime": 8.6219, |
|
"eval_samples_per_second": 57.992, |
|
"eval_steps_per_second": 14.498, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 52.16224670410156, |
|
"learning_rate": 1.9911111111111112e-05, |
|
"loss": 0.2854, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 56.812461853027344, |
|
"learning_rate": 1.9822222222222226e-05, |
|
"loss": 0.3914, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 76.50477600097656, |
|
"learning_rate": 1.9733333333333336e-05, |
|
"loss": 0.433, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 54.315757751464844, |
|
"learning_rate": 1.9644444444444447e-05, |
|
"loss": 0.3325, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 26.856290817260742, |
|
"learning_rate": 1.9555555555555557e-05, |
|
"loss": 0.3577, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 7.286658763885498, |
|
"learning_rate": 1.9466666666666668e-05, |
|
"loss": 0.3507, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 88.7168960571289, |
|
"learning_rate": 1.9377777777777778e-05, |
|
"loss": 0.3419, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 10.988639831542969, |
|
"learning_rate": 1.928888888888889e-05, |
|
"loss": 0.3714, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 58.80833435058594, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 0.3088, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 44.44467544555664, |
|
"learning_rate": 1.9111111111111113e-05, |
|
"loss": 0.3212, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 25.056461334228516, |
|
"learning_rate": 1.9022222222222223e-05, |
|
"loss": 0.4266, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 76.53874969482422, |
|
"learning_rate": 1.8933333333333334e-05, |
|
"loss": 0.3125, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 57.03301239013672, |
|
"learning_rate": 1.8844444444444444e-05, |
|
"loss": 0.4589, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 49.278892517089844, |
|
"learning_rate": 1.8755555555555558e-05, |
|
"loss": 0.3832, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 56.22084045410156, |
|
"learning_rate": 1.866666666666667e-05, |
|
"loss": 0.2668, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 70.71886444091797, |
|
"learning_rate": 1.857777777777778e-05, |
|
"loss": 0.3858, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 55.63289260864258, |
|
"learning_rate": 1.848888888888889e-05, |
|
"loss": 0.3321, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 54.63759231567383, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 0.356, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 95.25802612304688, |
|
"learning_rate": 1.8311111111111114e-05, |
|
"loss": 0.2692, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 105.18350982666016, |
|
"learning_rate": 1.8222222222222224e-05, |
|
"loss": 0.3576, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 57.272056579589844, |
|
"learning_rate": 1.8133333333333335e-05, |
|
"loss": 0.5661, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 17.590261459350586, |
|
"learning_rate": 1.8044444444444445e-05, |
|
"loss": 0.2745, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 41.226783752441406, |
|
"learning_rate": 1.7955555555555556e-05, |
|
"loss": 0.3762, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 98.6812973022461, |
|
"learning_rate": 1.7866666666666666e-05, |
|
"loss": 0.2868, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 25.974720001220703, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.5345, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.878, |
|
"eval_f1": 0.8770706559649587, |
|
"eval_loss": 0.4389978051185608, |
|
"eval_runtime": 8.7088, |
|
"eval_samples_per_second": 57.413, |
|
"eval_steps_per_second": 14.353, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 37.647239685058594, |
|
"learning_rate": 1.768888888888889e-05, |
|
"loss": 0.3808, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 12.569307327270508, |
|
"learning_rate": 1.76e-05, |
|
"loss": 0.2284, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 26.684873580932617, |
|
"learning_rate": 1.751111111111111e-05, |
|
"loss": 0.3773, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 50.10330581665039, |
|
"learning_rate": 1.7422222222222222e-05, |
|
"loss": 0.3123, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 58.362998962402344, |
|
"learning_rate": 1.7333333333333336e-05, |
|
"loss": 0.3619, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 38.542510986328125, |
|
"learning_rate": 1.7244444444444446e-05, |
|
"loss": 0.4542, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.2800000000000002, |
|
"grad_norm": 19.456159591674805, |
|
"learning_rate": 1.7155555555555557e-05, |
|
"loss": 0.3001, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 49.56319808959961, |
|
"learning_rate": 1.706666666666667e-05, |
|
"loss": 0.2794, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 38.250526428222656, |
|
"learning_rate": 1.697777777777778e-05, |
|
"loss": 0.3814, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 23.731311798095703, |
|
"learning_rate": 1.688888888888889e-05, |
|
"loss": 0.271, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 67.27165985107422, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 0.268, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 55.98159408569336, |
|
"learning_rate": 1.6711111111111112e-05, |
|
"loss": 0.3155, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 73.56021881103516, |
|
"learning_rate": 1.6622222222222223e-05, |
|
"loss": 0.335, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 34.84354782104492, |
|
"learning_rate": 1.6533333333333333e-05, |
|
"loss": 0.3491, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 53.700233459472656, |
|
"learning_rate": 1.6444444444444444e-05, |
|
"loss": 0.2745, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 91.70999908447266, |
|
"learning_rate": 1.6355555555555557e-05, |
|
"loss": 0.2777, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 43.926513671875, |
|
"learning_rate": 1.6266666666666668e-05, |
|
"loss": 0.3612, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 80.7772216796875, |
|
"learning_rate": 1.617777777777778e-05, |
|
"loss": 0.4634, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 41.586029052734375, |
|
"learning_rate": 1.608888888888889e-05, |
|
"loss": 0.5096, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 49.59820556640625, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.5235, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 30.699186325073242, |
|
"learning_rate": 1.5911111111111113e-05, |
|
"loss": 0.2734, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 68.27303314208984, |
|
"learning_rate": 1.5822222222222224e-05, |
|
"loss": 0.2262, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 27.952354431152344, |
|
"learning_rate": 1.5733333333333334e-05, |
|
"loss": 0.2699, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 66.80079650878906, |
|
"learning_rate": 1.5644444444444448e-05, |
|
"loss": 0.2604, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 61.24441146850586, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 0.4693, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.88, |
|
"eval_f1": 0.8795642760077516, |
|
"eval_loss": 0.3856970965862274, |
|
"eval_runtime": 8.5744, |
|
"eval_samples_per_second": 58.313, |
|
"eval_steps_per_second": 14.578, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 18.007030487060547, |
|
"learning_rate": 1.546666666666667e-05, |
|
"loss": 0.3052, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 27.437442779541016, |
|
"learning_rate": 1.537777777777778e-05, |
|
"loss": 0.379, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 21.621686935424805, |
|
"learning_rate": 1.528888888888889e-05, |
|
"loss": 0.1708, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 43.909889221191406, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 0.3133, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 18.5758056640625, |
|
"learning_rate": 1.5111111111111112e-05, |
|
"loss": 0.2785, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 54.679080963134766, |
|
"learning_rate": 1.5022222222222223e-05, |
|
"loss": 0.3306, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.2800000000000002, |
|
"grad_norm": 8.038825988769531, |
|
"learning_rate": 1.4933333333333335e-05, |
|
"loss": 0.3865, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 38.3331413269043, |
|
"learning_rate": 1.4844444444444445e-05, |
|
"loss": 0.2825, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"grad_norm": 33.338321685791016, |
|
"learning_rate": 1.4755555555555556e-05, |
|
"loss": 0.3358, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 17.133817672729492, |
|
"learning_rate": 1.4666666666666666e-05, |
|
"loss": 0.2373, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 38.80360794067383, |
|
"learning_rate": 1.457777777777778e-05, |
|
"loss": 0.2955, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 70.89875030517578, |
|
"learning_rate": 1.448888888888889e-05, |
|
"loss": 0.2965, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"grad_norm": 21.7913875579834, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 0.2547, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 69.4725570678711, |
|
"learning_rate": 1.4311111111111111e-05, |
|
"loss": 0.3298, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 55.30929946899414, |
|
"learning_rate": 1.4222222222222224e-05, |
|
"loss": 0.3039, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 14.594221115112305, |
|
"learning_rate": 1.4133333333333334e-05, |
|
"loss": 0.502, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"grad_norm": 25.05082893371582, |
|
"learning_rate": 1.4044444444444445e-05, |
|
"loss": 0.2026, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.7199999999999998, |
|
"grad_norm": 22.42095947265625, |
|
"learning_rate": 1.3955555555555558e-05, |
|
"loss": 0.3169, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 87.3176498413086, |
|
"learning_rate": 1.3866666666666669e-05, |
|
"loss": 0.173, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 47.08412170410156, |
|
"learning_rate": 1.377777777777778e-05, |
|
"loss": 0.2126, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 26.897066116333008, |
|
"learning_rate": 1.368888888888889e-05, |
|
"loss": 0.3429, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"grad_norm": 45.889381408691406, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 0.3655, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 72.64950561523438, |
|
"learning_rate": 1.3511111111111112e-05, |
|
"loss": 0.2785, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 74.62352752685547, |
|
"learning_rate": 1.3422222222222223e-05, |
|
"loss": 0.2916, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 90.89289855957031, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.1933, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.894, |
|
"eval_f1": 0.8947502154752695, |
|
"eval_loss": 0.3444240093231201, |
|
"eval_runtime": 8.6975, |
|
"eval_samples_per_second": 57.488, |
|
"eval_steps_per_second": 14.372, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 55.490447998046875, |
|
"learning_rate": 1.3244444444444447e-05, |
|
"loss": 0.1877, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 70.09012603759766, |
|
"learning_rate": 1.3155555555555558e-05, |
|
"loss": 0.348, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 74.7720947265625, |
|
"learning_rate": 1.3066666666666668e-05, |
|
"loss": 0.3635, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"grad_norm": 37.04428482055664, |
|
"learning_rate": 1.2977777777777779e-05, |
|
"loss": 0.3447, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 77.90045928955078, |
|
"learning_rate": 1.288888888888889e-05, |
|
"loss": 0.3281, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 92.32732391357422, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 0.2735, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 44.78059387207031, |
|
"learning_rate": 1.2711111111111112e-05, |
|
"loss": 0.3756, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"grad_norm": 28.424644470214844, |
|
"learning_rate": 1.2622222222222222e-05, |
|
"loss": 0.1935, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 50.174949645996094, |
|
"learning_rate": 1.2533333333333336e-05, |
|
"loss": 0.3282, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 13.964503288269043, |
|
"learning_rate": 1.2444444444444446e-05, |
|
"loss": 0.1625, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 90.46607971191406, |
|
"learning_rate": 1.2355555555555557e-05, |
|
"loss": 0.2342, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"grad_norm": 2.025235891342163, |
|
"learning_rate": 1.2266666666666667e-05, |
|
"loss": 0.2906, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"grad_norm": 36.99541091918945, |
|
"learning_rate": 1.217777777777778e-05, |
|
"loss": 0.2817, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.5600000000000005, |
|
"grad_norm": 84.70581817626953, |
|
"learning_rate": 1.208888888888889e-05, |
|
"loss": 0.3246, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 27.94813346862793, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.3587, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"grad_norm": 8.473387718200684, |
|
"learning_rate": 1.191111111111111e-05, |
|
"loss": 0.2862, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 42.427734375, |
|
"learning_rate": 1.1822222222222225e-05, |
|
"loss": 0.2924, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"grad_norm": 65.75909423828125, |
|
"learning_rate": 1.1733333333333335e-05, |
|
"loss": 0.2524, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 72.92707061767578, |
|
"learning_rate": 1.1644444444444446e-05, |
|
"loss": 0.2454, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 30.95335578918457, |
|
"learning_rate": 1.1555555555555556e-05, |
|
"loss": 0.303, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 26.16335678100586, |
|
"learning_rate": 1.1466666666666668e-05, |
|
"loss": 0.3051, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"grad_norm": 34.4001350402832, |
|
"learning_rate": 1.1377777777777779e-05, |
|
"loss": 0.1801, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 43.86663818359375, |
|
"learning_rate": 1.1288888888888889e-05, |
|
"loss": 0.168, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"grad_norm": 0.3658877909183502, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.2493, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 17.853328704833984, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 0.3146, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.936, |
|
"eval_f1": 0.9361975859190919, |
|
"eval_loss": 0.2456333488225937, |
|
"eval_runtime": 8.6547, |
|
"eval_samples_per_second": 57.772, |
|
"eval_steps_per_second": 14.443, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"grad_norm": 28.078502655029297, |
|
"learning_rate": 1.1022222222222224e-05, |
|
"loss": 0.1975, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 30.93500518798828, |
|
"learning_rate": 1.0933333333333334e-05, |
|
"loss": 0.1725, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"grad_norm": 36.137969970703125, |
|
"learning_rate": 1.0844444444444446e-05, |
|
"loss": 0.3072, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"grad_norm": 38.812007904052734, |
|
"learning_rate": 1.0755555555555557e-05, |
|
"loss": 0.2047, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 62.65950393676758, |
|
"learning_rate": 1.0666666666666667e-05, |
|
"loss": 0.2897, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"grad_norm": 69.6206283569336, |
|
"learning_rate": 1.0577777777777778e-05, |
|
"loss": 0.2959, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"grad_norm": 53.226375579833984, |
|
"learning_rate": 1.048888888888889e-05, |
|
"loss": 0.3842, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"grad_norm": 120.20613098144531, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.2835, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"grad_norm": 19.712247848510742, |
|
"learning_rate": 1.0311111111111113e-05, |
|
"loss": 0.2208, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"grad_norm": 39.82862854003906, |
|
"learning_rate": 1.0222222222222223e-05, |
|
"loss": 0.3166, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"grad_norm": 25.11990737915039, |
|
"learning_rate": 1.0133333333333335e-05, |
|
"loss": 0.266, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"grad_norm": 29.185226440429688, |
|
"learning_rate": 1.0044444444444446e-05, |
|
"loss": 0.1907, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"grad_norm": 51.418678283691406, |
|
"learning_rate": 9.955555555555556e-06, |
|
"loss": 0.2762, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.5600000000000005, |
|
"grad_norm": 8.843996047973633, |
|
"learning_rate": 9.866666666666668e-06, |
|
"loss": 0.1223, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 91.5838851928711, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 0.2242, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"grad_norm": 58.07487869262695, |
|
"learning_rate": 9.688888888888889e-06, |
|
"loss": 0.2818, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"grad_norm": 63.59052658081055, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.2808, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"grad_norm": 35.42782974243164, |
|
"learning_rate": 9.511111111111112e-06, |
|
"loss": 0.2617, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"grad_norm": 41.844383239746094, |
|
"learning_rate": 9.422222222222222e-06, |
|
"loss": 0.1203, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 51.4361457824707, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.306, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 29.05099105834961, |
|
"learning_rate": 9.244444444444445e-06, |
|
"loss": 0.4744, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"grad_norm": 51.445220947265625, |
|
"learning_rate": 9.155555555555557e-06, |
|
"loss": 0.3157, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"grad_norm": 34.62455368041992, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.22, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 12.388022422790527, |
|
"learning_rate": 8.977777777777778e-06, |
|
"loss": 0.2072, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 9.763510704040527, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.1832, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.924, |
|
"eval_f1": 0.9229000637110923, |
|
"eval_loss": 0.3368554711341858, |
|
"eval_runtime": 8.557, |
|
"eval_samples_per_second": 58.432, |
|
"eval_steps_per_second": 14.608, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 9.751482963562012, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.2234, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"grad_norm": 3.580552101135254, |
|
"learning_rate": 8.711111111111111e-06, |
|
"loss": 0.1233, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 30.625099182128906, |
|
"learning_rate": 8.622222222222223e-06, |
|
"loss": 0.1805, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"grad_norm": 23.020915985107422, |
|
"learning_rate": 8.533333333333335e-06, |
|
"loss": 0.2868, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 18.9671630859375, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 0.2041, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"grad_norm": 33.78700256347656, |
|
"learning_rate": 8.355555555555556e-06, |
|
"loss": 0.1599, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"grad_norm": 75.42900848388672, |
|
"learning_rate": 8.266666666666667e-06, |
|
"loss": 0.1084, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 2.74202299118042, |
|
"learning_rate": 8.177777777777779e-06, |
|
"loss": 0.2967, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"grad_norm": 51.8353157043457, |
|
"learning_rate": 8.08888888888889e-06, |
|
"loss": 0.3407, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 10.7664213180542, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.3942, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"grad_norm": 25.22667694091797, |
|
"learning_rate": 7.911111111111112e-06, |
|
"loss": 0.1484, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 7.64856481552124, |
|
"learning_rate": 7.822222222222224e-06, |
|
"loss": 0.2024, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"grad_norm": 12.741067886352539, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.2127, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 6.5600000000000005, |
|
"grad_norm": 33.97446060180664, |
|
"learning_rate": 7.644444444444445e-06, |
|
"loss": 0.1763, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"grad_norm": 22.31916618347168, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.1481, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"grad_norm": 31.509164810180664, |
|
"learning_rate": 7.4666666666666675e-06, |
|
"loss": 0.2496, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 49.01666259765625, |
|
"learning_rate": 7.377777777777778e-06, |
|
"loss": 0.1519, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"grad_norm": 19.280881881713867, |
|
"learning_rate": 7.28888888888889e-06, |
|
"loss": 0.2143, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 21.911842346191406, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.1652, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 62.79658126831055, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 0.1994, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"grad_norm": 4.01326322555542, |
|
"learning_rate": 7.022222222222222e-06, |
|
"loss": 0.1806, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 61.08246994018555, |
|
"learning_rate": 6.9333333333333344e-06, |
|
"loss": 0.1763, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"grad_norm": 64.75476837158203, |
|
"learning_rate": 6.844444444444445e-06, |
|
"loss": 0.1214, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"grad_norm": 63.44350051879883, |
|
"learning_rate": 6.755555555555556e-06, |
|
"loss": 0.2009, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 7.164996147155762, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1407, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.946, |
|
"eval_f1": 0.9454404643456045, |
|
"eval_loss": 0.3425253629684448, |
|
"eval_runtime": 8.5243, |
|
"eval_samples_per_second": 58.656, |
|
"eval_steps_per_second": 14.664, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"grad_norm": 63.62957763671875, |
|
"learning_rate": 6.577777777777779e-06, |
|
"loss": 0.2602, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 44.226463317871094, |
|
"learning_rate": 6.488888888888889e-06, |
|
"loss": 0.1125, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 62.62407684326172, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.3144, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"grad_norm": 41.71411895751953, |
|
"learning_rate": 6.311111111111111e-06, |
|
"loss": 0.1585, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 33.34163284301758, |
|
"learning_rate": 6.222222222222223e-06, |
|
"loss": 0.2164, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"grad_norm": 51.477291107177734, |
|
"learning_rate": 6.133333333333334e-06, |
|
"loss": 0.1467, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"grad_norm": 4.217121601104736, |
|
"learning_rate": 6.044444444444445e-06, |
|
"loss": 0.072, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"grad_norm": 14.78985595703125, |
|
"learning_rate": 5.955555555555555e-06, |
|
"loss": 0.1472, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"grad_norm": 28.724449157714844, |
|
"learning_rate": 5.8666666666666675e-06, |
|
"loss": 0.1792, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 90.70431518554688, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.263, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"grad_norm": 46.434898376464844, |
|
"learning_rate": 5.688888888888889e-06, |
|
"loss": 0.1768, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"grad_norm": 13.007624626159668, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0883, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"grad_norm": 18.006866455078125, |
|
"learning_rate": 5.511111111111112e-06, |
|
"loss": 0.1935, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 7.5600000000000005, |
|
"grad_norm": 17.47731590270996, |
|
"learning_rate": 5.422222222222223e-06, |
|
"loss": 0.218, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 55.587310791015625, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 0.2225, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"grad_norm": 32.001670837402344, |
|
"learning_rate": 5.244444444444445e-06, |
|
"loss": 0.1861, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"grad_norm": 55.86604309082031, |
|
"learning_rate": 5.155555555555556e-06, |
|
"loss": 0.1404, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"grad_norm": 89.22872924804688, |
|
"learning_rate": 5.0666666666666676e-06, |
|
"loss": 0.162, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 0.25738686323165894, |
|
"learning_rate": 4.977777777777778e-06, |
|
"loss": 0.2159, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"grad_norm": 48.370277404785156, |
|
"learning_rate": 4.888888888888889e-06, |
|
"loss": 0.171, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 5.000326633453369, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.2945, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"grad_norm": 7.4195685386657715, |
|
"learning_rate": 4.711111111111111e-06, |
|
"loss": 0.0845, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 71.77230072021484, |
|
"learning_rate": 4.622222222222222e-06, |
|
"loss": 0.1664, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"grad_norm": 11.938881874084473, |
|
"learning_rate": 4.533333333333334e-06, |
|
"loss": 0.1009, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 9.97079086303711, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.1462, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.948, |
|
"eval_f1": 0.9475735032806624, |
|
"eval_loss": 0.2863907516002655, |
|
"eval_runtime": 8.7213, |
|
"eval_samples_per_second": 57.331, |
|
"eval_steps_per_second": 14.333, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 46.91502380371094, |
|
"learning_rate": 4.3555555555555555e-06, |
|
"loss": 0.1368, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"grad_norm": 12.112004280090332, |
|
"learning_rate": 4.266666666666668e-06, |
|
"loss": 0.0635, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 11.893278121948242, |
|
"learning_rate": 4.177777777777778e-06, |
|
"loss": 0.2191, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 56.45474624633789, |
|
"learning_rate": 4.088888888888889e-06, |
|
"loss": 0.1351, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"grad_norm": 22.199668884277344, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1389, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 13.854080200195312, |
|
"learning_rate": 3.911111111111112e-06, |
|
"loss": 0.1661, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"grad_norm": 44.242244720458984, |
|
"learning_rate": 3.8222222222222224e-06, |
|
"loss": 0.16, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"grad_norm": 12.572860717773438, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"loss": 0.1411, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"grad_norm": 21.231435775756836, |
|
"learning_rate": 3.644444444444445e-06, |
|
"loss": 0.2252, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 7.332576274871826, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.1545, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"grad_norm": 21.509479522705078, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"loss": 0.1005, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"grad_norm": 30.77674674987793, |
|
"learning_rate": 3.377777777777778e-06, |
|
"loss": 0.1603, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"grad_norm": 41.59734344482422, |
|
"learning_rate": 3.2888888888888894e-06, |
|
"loss": 0.2069, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"grad_norm": 29.843107223510742, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.2436, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"grad_norm": 38.74693298339844, |
|
"learning_rate": 3.1111111111111116e-06, |
|
"loss": 0.1175, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"grad_norm": 15.177172660827637, |
|
"learning_rate": 3.0222222222222225e-06, |
|
"loss": 0.1125, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"grad_norm": 46.20457077026367, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"loss": 0.1392, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"grad_norm": 0.42567014694213867, |
|
"learning_rate": 2.8444444444444446e-06, |
|
"loss": 0.1395, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"grad_norm": 60.429443359375, |
|
"learning_rate": 2.755555555555556e-06, |
|
"loss": 0.2866, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"grad_norm": 34.261043548583984, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 0.2126, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"grad_norm": 24.502214431762695, |
|
"learning_rate": 2.577777777777778e-06, |
|
"loss": 0.131, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"grad_norm": 1.0593976974487305, |
|
"learning_rate": 2.488888888888889e-06, |
|
"loss": 0.1685, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"grad_norm": 26.03633689880371, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.1013, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"grad_norm": 0.4252139925956726, |
|
"learning_rate": 2.311111111111111e-06, |
|
"loss": 0.1407, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.702337741851807, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0905, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.956, |
|
"eval_f1": 0.9559537578342157, |
|
"eval_loss": 0.21772001683712006, |
|
"eval_runtime": 8.4726, |
|
"eval_samples_per_second": 59.014, |
|
"eval_steps_per_second": 14.753, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.3719678223133087, |
|
"learning_rate": 2.133333333333334e-06, |
|
"loss": 0.1046, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"grad_norm": 84.62899017333984, |
|
"learning_rate": 2.0444444444444447e-06, |
|
"loss": 0.0603, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"grad_norm": 52.96356964111328, |
|
"learning_rate": 1.955555555555556e-06, |
|
"loss": 0.1158, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"grad_norm": 15.068770408630371, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"loss": 0.1248, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 59.39225769042969, |
|
"learning_rate": 1.777777777777778e-06, |
|
"loss": 0.1852, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"grad_norm": 23.1497859954834, |
|
"learning_rate": 1.688888888888889e-06, |
|
"loss": 0.143, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"grad_norm": 87.2287368774414, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.0571, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"grad_norm": 0.1568875014781952, |
|
"learning_rate": 1.5111111111111112e-06, |
|
"loss": 0.0795, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"grad_norm": 33.47188949584961, |
|
"learning_rate": 1.4222222222222223e-06, |
|
"loss": 0.2421, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"grad_norm": 0.5016130805015564, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.2073, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"grad_norm": 0.5481801629066467, |
|
"learning_rate": 1.2444444444444445e-06, |
|
"loss": 0.114, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"grad_norm": 8.88733196258545, |
|
"learning_rate": 1.1555555555555556e-06, |
|
"loss": 0.1705, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"grad_norm": 0.017446085810661316, |
|
"learning_rate": 1.066666666666667e-06, |
|
"loss": 0.103, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"grad_norm": 2.8653106689453125, |
|
"learning_rate": 9.77777777777778e-07, |
|
"loss": 0.1108, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 2.0959718227386475, |
|
"learning_rate": 8.88888888888889e-07, |
|
"loss": 0.0365, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"grad_norm": 9.384411811828613, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.0692, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 78.89244079589844, |
|
"learning_rate": 7.111111111111112e-07, |
|
"loss": 0.1545, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"grad_norm": 20.242263793945312, |
|
"learning_rate": 6.222222222222223e-07, |
|
"loss": 0.1993, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"grad_norm": 1.8956968784332275, |
|
"learning_rate": 5.333333333333335e-07, |
|
"loss": 0.0961, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"grad_norm": 13.430234909057617, |
|
"learning_rate": 4.444444444444445e-07, |
|
"loss": 0.1252, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 13.087069511413574, |
|
"learning_rate": 3.555555555555556e-07, |
|
"loss": 0.1547, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"grad_norm": 11.872249603271484, |
|
"learning_rate": 2.666666666666667e-07, |
|
"loss": 0.0899, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"grad_norm": 2.150407314300537, |
|
"learning_rate": 1.777777777777778e-07, |
|
"loss": 0.116, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"grad_norm": 5.467511177062988, |
|
"learning_rate": 8.88888888888889e-08, |
|
"loss": 0.1912, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.3495206832885742, |
|
"learning_rate": 0.0, |
|
"loss": 0.0859, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.968, |
|
"eval_f1": 0.9678344915175675, |
|
"eval_loss": 0.22622914612293243, |
|
"eval_runtime": 8.4705, |
|
"eval_samples_per_second": 59.028, |
|
"eval_steps_per_second": 14.757, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2500, |
|
"total_flos": 4.0857422266368e+18, |
|
"train_loss": 0.24487185467481612, |
|
"train_runtime": 1824.5027, |
|
"train_samples_per_second": 21.924, |
|
"train_steps_per_second": 1.37 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.0857422266368e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|