|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4480959833072509, |
|
"eval_steps": 500, |
|
"global_step": 1718, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.604166666666667e-07, |
|
"loss": 0.7721, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.604166666666667e-06, |
|
"loss": 0.7229, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.208333333333334e-06, |
|
"loss": 0.6469, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.5906, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0416666666666668e-05, |
|
"loss": 0.5379, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3020833333333334e-05, |
|
"loss": 0.4474, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.4287, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8229166666666668e-05, |
|
"loss": 0.371, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.3793, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.34375e-05, |
|
"loss": 0.3635, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.604166666666667e-05, |
|
"loss": 0.3436, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.8645833333333333e-05, |
|
"loss": 0.3837, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.354, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.385416666666667e-05, |
|
"loss": 0.3472, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.6458333333333336e-05, |
|
"loss": 0.347, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.90625e-05, |
|
"loss": 0.3444, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.342, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.4270833333333337e-05, |
|
"loss": 0.3392, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.3424, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.947916666666667e-05, |
|
"loss": 0.3616, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.99994047380455e-05, |
|
"loss": 0.3431, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999698653493815e-05, |
|
"loss": 0.3418, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999270836660003e-05, |
|
"loss": 0.3645, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.998657055135927e-05, |
|
"loss": 0.3418, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9978573545915854e-05, |
|
"loss": 0.335, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.996871794530757e-05, |
|
"loss": 0.3441, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9957004482865796e-05, |
|
"loss": 0.3463, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.994343403016093e-05, |
|
"loss": 0.3284, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.992800759693746e-05, |
|
"loss": 0.3232, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9910726331038935e-05, |
|
"loss": 0.3251, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.989159151832251e-05, |
|
"loss": 0.3346, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.987060458256324e-05, |
|
"loss": 0.3397, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.985013406298429e-05, |
|
"loss": 0.3273, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9825632509424134e-05, |
|
"loss": 0.3227, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.979928374067127e-05, |
|
"loss": 0.329, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.977108971727373e-05, |
|
"loss": 0.3177, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9741052537080565e-05, |
|
"loss": 0.3428, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.970917443508558e-05, |
|
"loss": 0.3077, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.967545778326114e-05, |
|
"loss": 0.3248, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.963990509038167e-05, |
|
"loss": 0.3073, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9602519001836933e-05, |
|
"loss": 0.3386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9563302299435246e-05, |
|
"loss": 0.3272, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.952225790119644e-05, |
|
"loss": 0.3355, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.947938886113482e-05, |
|
"loss": 0.3166, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.943469836903181e-05, |
|
"loss": 0.3289, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.938818975019871e-05, |
|
"loss": 0.3369, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.933986646522921e-05, |
|
"loss": 0.3161, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.928973210974192e-05, |
|
"loss": 0.3296, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9237790414112806e-05, |
|
"loss": 0.2899, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.918404524319766e-05, |
|
"loss": 0.3216, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.912850059604448e-05, |
|
"loss": 0.3128, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.5092809200286865, |
|
"eval_runtime": 107.6589, |
|
"eval_samples_per_second": 7.914, |
|
"eval_steps_per_second": 1.978, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.907116060559596e-05, |
|
"loss": 0.3152, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.901202953838191e-05, |
|
"loss": 0.3091, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.8951111794201845e-05, |
|
"loss": 0.3238, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.8888411905797574e-05, |
|
"loss": 0.3462, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8823934538515946e-05, |
|
"loss": 0.3253, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.875768448996173e-05, |
|
"loss": 0.3321, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.868966668964057e-05, |
|
"loss": 0.3204, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8619886198592275e-05, |
|
"loss": 0.3109, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.854834820901419e-05, |
|
"loss": 0.3277, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8475058043874875e-05, |
|
"loss": 0.3081, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.840002115651802e-05, |
|
"loss": 0.3314, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.832324313025669e-05, |
|
"loss": 0.3164, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.824472967795788e-05, |
|
"loss": 0.294, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.816448664161743e-05, |
|
"loss": 0.3211, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.808251999192536e-05, |
|
"loss": 0.3309, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.7998835827821536e-05, |
|
"loss": 0.3374, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.7913440376041975e-05, |
|
"loss": 0.3269, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.782633999065541e-05, |
|
"loss": 0.3159, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.77375411525906e-05, |
|
"loss": 0.3138, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.764705046915402e-05, |
|
"loss": 0.3362, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.755487467353829e-05, |
|
"loss": 0.3315, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.7461020624321104e-05, |
|
"loss": 0.3344, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.7365495304955e-05, |
|
"loss": 0.3148, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.7268305823247635e-05, |
|
"loss": 0.3054, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.7169459410832986e-05, |
|
"loss": 0.3213, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.706896342263319e-05, |
|
"loss": 0.3165, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.6966825336311376e-05, |
|
"loss": 0.3516, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.6863052751715175e-05, |
|
"loss": 0.3083, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.67576533903113e-05, |
|
"loss": 0.3044, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 0.3207, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.65420058275864e-05, |
|
"loss": 0.3172, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.643177367207827e-05, |
|
"loss": 0.3177, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.6319946830194314e-05, |
|
"loss": 0.3072, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.620653362269902e-05, |
|
"loss": 0.3115, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.609154248839449e-05, |
|
"loss": 0.2946, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.597498198349254e-05, |
|
"loss": 0.2979, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.585686078097806e-05, |
|
"loss": 0.2866, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.5737187669963675e-05, |
|
"loss": 0.2918, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.561597155503574e-05, |
|
"loss": 0.2801, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.549322145559181e-05, |
|
"loss": 0.2839, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.536894650516952e-05, |
|
"loss": 0.2815, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.524315595076695e-05, |
|
"loss": 0.2825, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.511585915215462e-05, |
|
"loss": 0.2788, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.498706558117903e-05, |
|
"loss": 0.3051, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.485678482105789e-05, |
|
"loss": 0.2595, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.4725026565667055e-05, |
|
"loss": 0.2741, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.45918006188192e-05, |
|
"loss": 0.2862, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.44571168935344e-05, |
|
"loss": 0.2712, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.432098541130247e-05, |
|
"loss": 0.2935, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.418341630133733e-05, |
|
"loss": 0.2751, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.4884406328201294, |
|
"eval_runtime": 107.9712, |
|
"eval_samples_per_second": 7.891, |
|
"eval_steps_per_second": 1.973, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.404441979982329e-05, |
|
"loss": 0.2706, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.3904006249153395e-05, |
|
"loss": 0.2509, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.376218609715992e-05, |
|
"loss": 0.2774, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.36189698963369e-05, |
|
"loss": 0.2675, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.3474368303055e-05, |
|
"loss": 0.2946, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.3328392076768597e-05, |
|
"loss": 0.275, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.318105207921515e-05, |
|
"loss": 0.2635, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.303235927360706e-05, |
|
"loss": 0.2917, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.288232472381589e-05, |
|
"loss": 0.275, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.273095959354914e-05, |
|
"loss": 0.2551, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.257827514551957e-05, |
|
"loss": 0.2716, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.2424282740607205e-05, |
|
"loss": 0.2693, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.2268993837013935e-05, |
|
"loss": 0.2862, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.2112419989411026e-05, |
|
"loss": 0.2869, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.195457284807927e-05, |
|
"loss": 0.2698, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.179546415804221e-05, |
|
"loss": 0.275, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.163510575819212e-05, |
|
"loss": 0.2569, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.1473509580409174e-05, |
|
"loss": 0.249, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.131068764867363e-05, |
|
"loss": 0.283, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.11466520781711e-05, |
|
"loss": 0.2735, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.0981415074391124e-05, |
|
"loss": 0.2677, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.081498893221899e-05, |
|
"loss": 0.2928, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.0647386035020884e-05, |
|
"loss": 0.2819, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.047861885372249e-05, |
|
"loss": 0.2644, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.0308699945881055e-05, |
|
"loss": 0.2757, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.013764195475101e-05, |
|
"loss": 0.2782, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.9965457608343194e-05, |
|
"loss": 0.279, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.979215971847783e-05, |
|
"loss": 0.2726, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.961776117983123e-05, |
|
"loss": 0.2542, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.944227496897629e-05, |
|
"loss": 0.263, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.9265714143416967e-05, |
|
"loss": 0.2762, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.9088091840616705e-05, |
|
"loss": 0.2825, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.890942127702089e-05, |
|
"loss": 0.2862, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.872971574707345e-05, |
|
"loss": 0.2598, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.8548988622227666e-05, |
|
"loss": 0.2636, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.836725334995123e-05, |
|
"loss": 0.258, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.81845234527256e-05, |
|
"loss": 0.2562, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.800081252703993e-05, |
|
"loss": 0.2711, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.781613424237926e-05, |
|
"loss": 0.249, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.763050234020752e-05, |
|
"loss": 0.2386, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.744393063294499e-05, |
|
"loss": 0.2517, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.725643300294056e-05, |
|
"loss": 0.2762, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.706802340143881e-05, |
|
"loss": 0.2344, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.68787158475419e-05, |
|
"loss": 0.2555, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.668852442716645e-05, |
|
"loss": 0.2632, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.649746329199545e-05, |
|
"loss": 0.2422, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.630554665842525e-05, |
|
"loss": 0.2545, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.611278880650779e-05, |
|
"loss": 0.2518, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.5919204078888004e-05, |
|
"loss": 0.2467, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.572480687973665e-05, |
|
"loss": 0.2585, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.49433010816574097, |
|
"eval_runtime": 107.5959, |
|
"eval_samples_per_second": 7.919, |
|
"eval_steps_per_second": 1.98, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.5529611673678543e-05, |
|
"loss": 0.2635, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.5333632984716226e-05, |
|
"loss": 0.2351, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.5136885395149345e-05, |
|
"loss": 0.2663, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.493938354448954e-05, |
|
"loss": 0.2527, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.474114212837123e-05, |
|
"loss": 0.2453, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.454217589745809e-05, |
|
"loss": 0.2462, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.43424996563455e-05, |
|
"loss": 0.2612, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.414212826245898e-05, |
|
"loss": 0.2333, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.394107662494872e-05, |
|
"loss": 0.257, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.3739359703580144e-05, |
|
"loss": 0.249, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.3536992507620854e-05, |
|
"loss": 0.2542, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.3333990094723826e-05, |
|
"loss": 0.2789, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.3130367569806965e-05, |
|
"loss": 0.2743, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.292614008392923e-05, |
|
"loss": 0.2592, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.272132283316324e-05, |
|
"loss": 0.2371, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.25159310574646e-05, |
|
"loss": 0.2574, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.230998003953793e-05, |
|
"loss": 0.2278, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.210348510369972e-05, |
|
"loss": 0.2496, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.1896461614738026e-05, |
|
"loss": 0.2349, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.1688924976769324e-05, |
|
"loss": 0.2339, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.1480890632092236e-05, |
|
"loss": 0.249, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"step": 1718, |
|
"total_flos": 3.091619088653353e+17, |
|
"train_loss": 0.30544669220139453, |
|
"train_runtime": 6733.7325, |
|
"train_samples_per_second": 2.277, |
|
"train_steps_per_second": 0.569 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3834, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 3.091619088653353e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|