|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.6260504201680672, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 4.6151, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 4.6009, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6e-06, |
|
"loss": 4.5892, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 4.4933, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-05, |
|
"loss": 4.4838, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.2e-05, |
|
"loss": 4.0367, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.4e-05, |
|
"loss": 3.6247, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 3.329, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8e-05, |
|
"loss": 3.1007, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2e-05, |
|
"loss": 2.8089, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.992743105950653e-05, |
|
"loss": 2.5354, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9854862119013064e-05, |
|
"loss": 2.2403, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9782293178519596e-05, |
|
"loss": 2.7885, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9709724238026125e-05, |
|
"loss": 2.4836, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9637155297532658e-05, |
|
"loss": 2.3569, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9564586357039187e-05, |
|
"loss": 2.4299, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.949201741654572e-05, |
|
"loss": 2.0131, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9419448476052253e-05, |
|
"loss": 1.8533, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9346879535558782e-05, |
|
"loss": 2.0193, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9274310595065315e-05, |
|
"loss": 2.0059, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9201741654571844e-05, |
|
"loss": 1.6821, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9129172714078376e-05, |
|
"loss": 1.8958, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.905660377358491e-05, |
|
"loss": 1.9811, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.898403483309144e-05, |
|
"loss": 2.0177, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8911465892597968e-05, |
|
"loss": 1.9885, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.88388969521045e-05, |
|
"loss": 1.8754, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8766328011611033e-05, |
|
"loss": 1.5953, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8693759071117562e-05, |
|
"loss": 1.9287, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.8621190130624095e-05, |
|
"loss": 1.7173, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.8548621190130624e-05, |
|
"loss": 1.8167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.8476052249637157e-05, |
|
"loss": 1.3269, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.840348330914369e-05, |
|
"loss": 1.5738, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.833091436865022e-05, |
|
"loss": 1.5026, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.825834542815675e-05, |
|
"loss": 1.6312, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8185776487663284e-05, |
|
"loss": 1.8379, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8113207547169813e-05, |
|
"loss": 1.242, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.8040638606676342e-05, |
|
"loss": 1.5145, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7968069666182875e-05, |
|
"loss": 1.326, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7895500725689404e-05, |
|
"loss": 1.2579, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7822931785195937e-05, |
|
"loss": 1.4427, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.775036284470247e-05, |
|
"loss": 1.2246, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7677793904209e-05, |
|
"loss": 1.3649, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.760522496371553e-05, |
|
"loss": 1.5865, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.7532656023222064e-05, |
|
"loss": 1.0269, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7460087082728593e-05, |
|
"loss": 1.2287, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.7387518142235126e-05, |
|
"loss": 1.231, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7314949201741655e-05, |
|
"loss": 1.4882, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7242380261248185e-05, |
|
"loss": 1.3311, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.716981132075472e-05, |
|
"loss": 0.8074, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.709724238026125e-05, |
|
"loss": 1.3885, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.702467343976778e-05, |
|
"loss": 0.7269, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6952104499274312e-05, |
|
"loss": 1.0517, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6879535558780844e-05, |
|
"loss": 1.0979, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6806966618287374e-05, |
|
"loss": 1.2165, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6734397677793906e-05, |
|
"loss": 0.9271, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6661828737300436e-05, |
|
"loss": 1.1317, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6589259796806968e-05, |
|
"loss": 1.1018, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.65166908563135e-05, |
|
"loss": 1.054, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.644412191582003e-05, |
|
"loss": 1.0754, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6371552975326563e-05, |
|
"loss": 0.8159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.6298984034833092e-05, |
|
"loss": 0.929, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6226415094339625e-05, |
|
"loss": 1.1673, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6153846153846154e-05, |
|
"loss": 0.8993, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6081277213352687e-05, |
|
"loss": 0.8426, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6008708272859216e-05, |
|
"loss": 0.9685, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.593613933236575e-05, |
|
"loss": 0.7475, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.586357039187228e-05, |
|
"loss": 0.4813, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.579100145137881e-05, |
|
"loss": 0.8771, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5718432510885343e-05, |
|
"loss": 0.8842, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5645863570391872e-05, |
|
"loss": 1.0122, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5573294629898405e-05, |
|
"loss": 0.6247, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.5500725689404938e-05, |
|
"loss": 0.9389, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5428156748911467e-05, |
|
"loss": 0.3921, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5355587808417996e-05, |
|
"loss": 0.695, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.5283018867924532e-05, |
|
"loss": 0.4541, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.5210449927431061e-05, |
|
"loss": 0.6469, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.5137880986937592e-05, |
|
"loss": 0.6391, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.5065312046444123e-05, |
|
"loss": 0.6501, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4992743105950653e-05, |
|
"loss": 0.9943, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4920174165457187e-05, |
|
"loss": 0.5541, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4847605224963716e-05, |
|
"loss": 0.7708, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4775036284470247e-05, |
|
"loss": 0.9068, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4702467343976778e-05, |
|
"loss": 0.8202, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.462989840348331e-05, |
|
"loss": 0.6628, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4557329462989842e-05, |
|
"loss": 0.768, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.4484760522496373e-05, |
|
"loss": 0.829, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.4412191582002904e-05, |
|
"loss": 0.9561, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.4339622641509435e-05, |
|
"loss": 0.6783, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4267053701015967e-05, |
|
"loss": 0.7017, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4194484760522498e-05, |
|
"loss": 0.6462, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.4121915820029029e-05, |
|
"loss": 0.648, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4049346879535558e-05, |
|
"loss": 0.4731, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3976777939042093e-05, |
|
"loss": 0.6257, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3904208998548622e-05, |
|
"loss": 0.4245, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3831640058055153e-05, |
|
"loss": 0.687, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_Accuracy": 0.9114854517611026, |
|
"eval_F1": 0.8950475194731157, |
|
"eval_loss": 0.5604398846626282, |
|
"eval_runtime": 98.5377, |
|
"eval_samples_per_second": 33.135, |
|
"eval_steps_per_second": 2.08, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3759071117561684e-05, |
|
"loss": 0.3493, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3686502177068215e-05, |
|
"loss": 0.9263, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3613933236574748e-05, |
|
"loss": 0.4981, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3541364296081278e-05, |
|
"loss": 0.4574, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.346879535558781e-05, |
|
"loss": 0.6115, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.339622641509434e-05, |
|
"loss": 0.5973, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.3323657474600873e-05, |
|
"loss": 0.4881, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.3251088534107404e-05, |
|
"loss": 0.4824, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.3178519593613935e-05, |
|
"loss": 0.7372, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.3105950653120464e-05, |
|
"loss": 0.7515, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.3033381712626995e-05, |
|
"loss": 0.446, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2960812772133528e-05, |
|
"loss": 0.6326, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2888243831640059e-05, |
|
"loss": 0.7661, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.281567489114659e-05, |
|
"loss": 0.4974, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.274310595065312e-05, |
|
"loss": 0.7276, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2670537010159653e-05, |
|
"loss": 0.3897, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.2597968069666184e-05, |
|
"loss": 0.7384, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2525399129172715e-05, |
|
"loss": 0.7007, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.2452830188679246e-05, |
|
"loss": 0.6739, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.2380261248185777e-05, |
|
"loss": 0.5027, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.230769230769231e-05, |
|
"loss": 0.5931, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.223512336719884e-05, |
|
"loss": 0.3967, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.216255442670537e-05, |
|
"loss": 0.5215, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.2089985486211901e-05, |
|
"loss": 0.5034, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2017416545718435e-05, |
|
"loss": 0.4521, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1944847605224965e-05, |
|
"loss": 0.255, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1872278664731495e-05, |
|
"loss": 0.2349, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1799709724238026e-05, |
|
"loss": 0.6059, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1727140783744557e-05, |
|
"loss": 0.3443, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.165457184325109e-05, |
|
"loss": 0.3367, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1582002902757621e-05, |
|
"loss": 0.4613, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1509433962264152e-05, |
|
"loss": 0.3399, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1436865021770683e-05, |
|
"loss": 0.5677, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.1364296081277216e-05, |
|
"loss": 0.8075, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.1291727140783746e-05, |
|
"loss": 0.4384, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.1219158200290277e-05, |
|
"loss": 0.3861, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1146589259796807e-05, |
|
"loss": 0.1245, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1074020319303338e-05, |
|
"loss": 0.6668, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.100145137880987e-05, |
|
"loss": 0.7036, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0928882438316401e-05, |
|
"loss": 0.205, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0856313497822932e-05, |
|
"loss": 0.588, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0783744557329463e-05, |
|
"loss": 0.7293, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.0711175616835996e-05, |
|
"loss": 0.602, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0638606676342527e-05, |
|
"loss": 0.3027, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0566037735849058e-05, |
|
"loss": 0.4086, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.0493468795355589e-05, |
|
"loss": 0.4706, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0420899854862121e-05, |
|
"loss": 0.2685, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0348330914368652e-05, |
|
"loss": 0.4058, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0275761973875183e-05, |
|
"loss": 0.4807, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.0203193033381712e-05, |
|
"loss": 0.3557, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0130624092888243e-05, |
|
"loss": 0.3329, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.0058055152394776e-05, |
|
"loss": 0.2153, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.985486211901307e-06, |
|
"loss": 0.7515, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.912917271407838e-06, |
|
"loss": 0.3258, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.84034833091437e-06, |
|
"loss": 0.1792, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.7677793904209e-06, |
|
"loss": 0.5118, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.69521044992743e-06, |
|
"loss": 0.3737, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.622641509433963e-06, |
|
"loss": 0.3137, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.550072568940494e-06, |
|
"loss": 0.473, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.477503628447025e-06, |
|
"loss": 0.5268, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.404934687953556e-06, |
|
"loss": 0.4378, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.332365747460089e-06, |
|
"loss": 0.4031, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.259796806966618e-06, |
|
"loss": 0.7125, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.187227866473151e-06, |
|
"loss": 0.3404, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 9.114658925979682e-06, |
|
"loss": 0.4025, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.042089985486213e-06, |
|
"loss": 0.6217, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.969521044992744e-06, |
|
"loss": 0.4097, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.896952104499275e-06, |
|
"loss": 0.4337, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.824383164005806e-06, |
|
"loss": 0.2964, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.751814223512337e-06, |
|
"loss": 0.5013, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.67924528301887e-06, |
|
"loss": 0.6303, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.6066763425254e-06, |
|
"loss": 0.3938, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.534107402031931e-06, |
|
"loss": 0.1885, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 0.3159, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.388969521044995e-06, |
|
"loss": 0.5674, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.316400580551524e-06, |
|
"loss": 0.1875, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.243831640058055e-06, |
|
"loss": 0.4107, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.171262699564588e-06, |
|
"loss": 0.693, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.098693759071119e-06, |
|
"loss": 0.5109, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.02612481857765e-06, |
|
"loss": 0.3853, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.95355587808418e-06, |
|
"loss": 0.4673, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.880986937590711e-06, |
|
"loss": 0.2653, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.808417997097242e-06, |
|
"loss": 0.3816, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.735849056603775e-06, |
|
"loss": 0.2759, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.663280116110306e-06, |
|
"loss": 0.2267, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.590711175616836e-06, |
|
"loss": 0.3894, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.518142235123368e-06, |
|
"loss": 0.1902, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.445573294629899e-06, |
|
"loss": 0.3042, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.373004354136431e-06, |
|
"loss": 0.2981, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.300435413642961e-06, |
|
"loss": 0.2297, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.227866473149493e-06, |
|
"loss": 0.2528, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 7.1552975326560235e-06, |
|
"loss": 0.2361, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.082728592162555e-06, |
|
"loss": 0.327, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.010159651669086e-06, |
|
"loss": 0.2838, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.937590711175617e-06, |
|
"loss": 0.3027, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Accuracy": 0.935375191424196, |
|
"eval_F1": 0.9231380335717158, |
|
"eval_loss": 0.42070162296295166, |
|
"eval_runtime": 98.699, |
|
"eval_samples_per_second": 33.08, |
|
"eval_steps_per_second": 2.077, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.865021770682149e-06, |
|
"loss": 0.3852, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.792452830188679e-06, |
|
"loss": 0.4178, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.719883889695211e-06, |
|
"loss": 0.4089, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.647314949201742e-06, |
|
"loss": 0.3711, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.574746008708274e-06, |
|
"loss": 0.4505, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.502177068214805e-06, |
|
"loss": 0.2162, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.429608127721336e-06, |
|
"loss": 0.2023, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.357039187227867e-06, |
|
"loss": 0.2912, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.2844702467343975e-06, |
|
"loss": 0.4556, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.211901306240929e-06, |
|
"loss": 0.3712, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 6.13933236574746e-06, |
|
"loss": 0.407, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 6.066763425253992e-06, |
|
"loss": 0.3238, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.994194484760523e-06, |
|
"loss": 0.118, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.921625544267055e-06, |
|
"loss": 0.1698, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.849056603773585e-06, |
|
"loss": 0.3557, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.776487663280117e-06, |
|
"loss": 0.2857, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.703918722786648e-06, |
|
"loss": 0.6379, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.6313497822931794e-06, |
|
"loss": 0.7017, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.55878084179971e-06, |
|
"loss": 0.2319, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.486211901306241e-06, |
|
"loss": 0.2963, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.413642960812773e-06, |
|
"loss": 0.284, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.341074020319303e-06, |
|
"loss": 0.2335, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.268505079825835e-06, |
|
"loss": 0.3575, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.195936139332366e-06, |
|
"loss": 0.3061, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 5.123367198838898e-06, |
|
"loss": 0.2509, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 5.050798258345429e-06, |
|
"loss": 0.669, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.97822931785196e-06, |
|
"loss": 0.3276, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.905660377358491e-06, |
|
"loss": 0.2792, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.8330914368650224e-06, |
|
"loss": 0.3574, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.760522496371553e-06, |
|
"loss": 0.1847, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.687953555878084e-06, |
|
"loss": 0.3664, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 0.313, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.542815674891147e-06, |
|
"loss": 0.4165, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.470246734397678e-06, |
|
"loss": 0.2627, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.397677793904209e-06, |
|
"loss": 0.4678, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.325108853410741e-06, |
|
"loss": 0.4254, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.252539912917272e-06, |
|
"loss": 0.2166, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.179970972423803e-06, |
|
"loss": 0.2313, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.1074020319303345e-06, |
|
"loss": 0.2503, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.0348330914368655e-06, |
|
"loss": 0.2127, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.962264150943396e-06, |
|
"loss": 0.1339, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.889695210449927e-06, |
|
"loss": 0.1467, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.817126269956458e-06, |
|
"loss": 0.3517, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.74455732946299e-06, |
|
"loss": 0.1513, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.671988388969521e-06, |
|
"loss": 0.1561, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.5994194484760525e-06, |
|
"loss": 0.2388, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.526850507982584e-06, |
|
"loss": 0.5018, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.454281567489115e-06, |
|
"loss": 0.3328, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.381712626995646e-06, |
|
"loss": 0.4347, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.3091436865021775e-06, |
|
"loss": 0.3993, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.236574746008709e-06, |
|
"loss": 0.4217, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.1640058055152394e-06, |
|
"loss": 0.651, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.091436865021771e-06, |
|
"loss": 0.3912, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.018867924528302e-06, |
|
"loss": 0.2198, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.946298984034833e-06, |
|
"loss": 0.1841, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8737300435413645e-06, |
|
"loss": 0.2908, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.801161103047896e-06, |
|
"loss": 0.2179, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.728592162554427e-06, |
|
"loss": 0.1909, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.6560232220609582e-06, |
|
"loss": 0.4162, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.5834542815674896e-06, |
|
"loss": 0.3347, |
|
"step": 2500 |
|
} |
|
], |
|
"max_steps": 2856, |
|
"num_train_epochs": 3, |
|
"total_flos": 5266095390720000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|