{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.4444444444444447e-05, "loss": 1.5005, "step": 10 }, { "epoch": 0.16, "learning_rate": 8.888888888888889e-05, "loss": 1.2648, "step": 20 }, { "epoch": 0.24, "learning_rate": 0.00013333333333333334, "loss": 0.8406, "step": 30 }, { "epoch": 0.32, "learning_rate": 0.00017777777777777779, "loss": 0.496, "step": 40 }, { "epoch": 0.4, "learning_rate": 0.00019931271477663232, "loss": 0.2602, "step": 50 }, { "epoch": 0.48, "learning_rate": 0.00019793814432989693, "loss": 0.1365, "step": 60 }, { "epoch": 0.56, "learning_rate": 0.0001965635738831615, "loss": 0.131, "step": 70 }, { "epoch": 0.64, "learning_rate": 0.00019518900343642613, "loss": 0.1219, "step": 80 }, { "epoch": 0.72, "learning_rate": 0.00019381443298969073, "loss": 0.1053, "step": 90 }, { "epoch": 0.8, "learning_rate": 0.00019243986254295533, "loss": 0.1071, "step": 100 }, { "epoch": 0.88, "learning_rate": 0.00019106529209621996, "loss": 0.0878, "step": 110 }, { "epoch": 0.96, "learning_rate": 0.00018969072164948454, "loss": 0.0863, "step": 120 }, { "epoch": 1.04, "learning_rate": 0.00018831615120274914, "loss": 0.0918, "step": 130 }, { "epoch": 1.12, "learning_rate": 0.00018694158075601377, "loss": 0.0898, "step": 140 }, { "epoch": 1.2, "learning_rate": 0.00018556701030927837, "loss": 0.0829, "step": 150 }, { "epoch": 1.28, "learning_rate": 0.00018419243986254294, "loss": 0.0721, "step": 160 }, { "epoch": 1.36, "learning_rate": 0.00018281786941580757, "loss": 0.0752, "step": 170 }, { "epoch": 1.44, "learning_rate": 0.00018144329896907217, "loss": 0.0821, "step": 180 }, { "epoch": 1.52, "learning_rate": 0.00018006872852233677, "loss": 0.0741, "step": 190 }, { "epoch": 1.6, "learning_rate": 0.0001786941580756014, "loss": 0.0665, "step": 200 }, { "epoch": 1.68, "learning_rate": 0.00017731958762886598, "loss": 0.0833, "step": 210 }, { "epoch": 1.76, "learning_rate": 0.00017594501718213058, "loss": 0.0726, "step": 220 }, { "epoch": 1.84, "learning_rate": 0.0001745704467353952, "loss": 0.0624, "step": 230 }, { "epoch": 1.92, "learning_rate": 0.0001731958762886598, "loss": 0.0716, "step": 240 }, { "epoch": 2.0, "learning_rate": 0.00017182130584192438, "loss": 0.0827, "step": 250 }, { "epoch": 2.08, "learning_rate": 0.000170446735395189, "loss": 0.0506, "step": 260 }, { "epoch": 2.16, "learning_rate": 0.00016907216494845361, "loss": 0.0647, "step": 270 }, { "epoch": 2.24, "learning_rate": 0.00016769759450171822, "loss": 0.0614, "step": 280 }, { "epoch": 2.32, "learning_rate": 0.00016632302405498285, "loss": 0.0528, "step": 290 }, { "epoch": 2.4, "learning_rate": 0.00016494845360824742, "loss": 0.0514, "step": 300 }, { "epoch": 2.48, "learning_rate": 0.00016357388316151202, "loss": 0.0635, "step": 310 }, { "epoch": 2.56, "learning_rate": 0.00016219931271477665, "loss": 0.0552, "step": 320 }, { "epoch": 2.64, "learning_rate": 0.00016082474226804125, "loss": 0.06, "step": 330 }, { "epoch": 2.72, "learning_rate": 0.00015945017182130585, "loss": 0.0611, "step": 340 }, { "epoch": 2.8, "learning_rate": 0.00015807560137457046, "loss": 0.0667, "step": 350 }, { "epoch": 2.88, "learning_rate": 0.00015670103092783506, "loss": 0.0669, "step": 360 }, { "epoch": 2.96, "learning_rate": 0.00015532646048109966, "loss": 0.0589, "step": 370 }, { "epoch": 3.04, "learning_rate": 0.0001539518900343643, "loss": 0.0516, "step": 380 }, { "epoch": 3.12, "learning_rate": 0.00015257731958762886, "loss": 0.0415, "step": 390 }, { "epoch": 3.2, "learning_rate": 0.00015120274914089346, "loss": 0.043, "step": 400 }, { "epoch": 3.28, "learning_rate": 0.0001498281786941581, "loss": 0.0449, "step": 410 }, { "epoch": 3.36, "learning_rate": 0.0001484536082474227, "loss": 0.0467, "step": 420 }, { "epoch": 3.44, "learning_rate": 0.0001470790378006873, "loss": 0.0442, "step": 430 }, { "epoch": 3.52, "learning_rate": 0.0001457044673539519, "loss": 0.0501, "step": 440 }, { "epoch": 3.6, "learning_rate": 0.0001443298969072165, "loss": 0.0468, "step": 450 }, { "epoch": 3.68, "learning_rate": 0.0001429553264604811, "loss": 0.0503, "step": 460 }, { "epoch": 3.76, "learning_rate": 0.00014158075601374573, "loss": 0.0503, "step": 470 }, { "epoch": 3.84, "learning_rate": 0.0001402061855670103, "loss": 0.0477, "step": 480 }, { "epoch": 3.92, "learning_rate": 0.0001388316151202749, "loss": 0.0467, "step": 490 }, { "epoch": 4.0, "learning_rate": 0.00013745704467353953, "loss": 0.0525, "step": 500 }, { "epoch": 4.08, "learning_rate": 0.00013608247422680414, "loss": 0.0434, "step": 510 }, { "epoch": 4.16, "learning_rate": 0.00013470790378006874, "loss": 0.0371, "step": 520 }, { "epoch": 4.24, "learning_rate": 0.00013333333333333334, "loss": 0.0393, "step": 530 }, { "epoch": 4.32, "learning_rate": 0.00013195876288659794, "loss": 0.0378, "step": 540 }, { "epoch": 4.4, "learning_rate": 0.00013058419243986254, "loss": 0.0358, "step": 550 }, { "epoch": 4.48, "learning_rate": 0.00012920962199312717, "loss": 0.04, "step": 560 }, { "epoch": 4.56, "learning_rate": 0.00012783505154639175, "loss": 0.0355, "step": 570 }, { "epoch": 4.64, "learning_rate": 0.00012646048109965635, "loss": 0.0424, "step": 580 }, { "epoch": 4.72, "learning_rate": 0.00012508591065292098, "loss": 0.0411, "step": 590 }, { "epoch": 4.8, "learning_rate": 0.00012371134020618558, "loss": 0.0374, "step": 600 }, { "epoch": 4.88, "learning_rate": 0.00012233676975945018, "loss": 0.0402, "step": 610 }, { "epoch": 4.96, "learning_rate": 0.00012096219931271477, "loss": 0.0417, "step": 620 }, { "epoch": 5.04, "learning_rate": 0.00011958762886597938, "loss": 0.0353, "step": 630 }, { "epoch": 5.12, "learning_rate": 0.000118213058419244, "loss": 0.0328, "step": 640 }, { "epoch": 5.2, "learning_rate": 0.0001168384879725086, "loss": 0.0356, "step": 650 }, { "epoch": 5.28, "learning_rate": 0.00011546391752577319, "loss": 0.0354, "step": 660 }, { "epoch": 5.36, "learning_rate": 0.0001140893470790378, "loss": 0.0326, "step": 670 }, { "epoch": 5.44, "learning_rate": 0.0001127147766323024, "loss": 0.0358, "step": 680 }, { "epoch": 5.52, "learning_rate": 0.00011134020618556702, "loss": 0.0355, "step": 690 }, { "epoch": 5.6, "learning_rate": 0.00010996563573883164, "loss": 0.0342, "step": 700 }, { "epoch": 5.68, "learning_rate": 0.00010859106529209621, "loss": 0.0335, "step": 710 }, { "epoch": 5.76, "learning_rate": 0.00010721649484536083, "loss": 0.0362, "step": 720 }, { "epoch": 5.84, "learning_rate": 0.00010584192439862544, "loss": 0.0343, "step": 730 }, { "epoch": 5.92, "learning_rate": 0.00010446735395189004, "loss": 0.0329, "step": 740 }, { "epoch": 6.0, "learning_rate": 0.00010309278350515463, "loss": 0.0313, "step": 750 }, { "epoch": 6.08, "learning_rate": 0.00010171821305841925, "loss": 0.028, "step": 760 }, { "epoch": 6.16, "learning_rate": 0.00010034364261168385, "loss": 0.0317, "step": 770 }, { "epoch": 6.24, "learning_rate": 9.896907216494846e-05, "loss": 0.029, "step": 780 }, { "epoch": 6.32, "learning_rate": 9.759450171821306e-05, "loss": 0.0288, "step": 790 }, { "epoch": 6.4, "learning_rate": 9.621993127147767e-05, "loss": 0.0281, "step": 800 }, { "epoch": 6.48, "learning_rate": 9.484536082474227e-05, "loss": 0.0309, "step": 810 }, { "epoch": 6.56, "learning_rate": 9.347079037800688e-05, "loss": 0.0334, "step": 820 }, { "epoch": 6.64, "learning_rate": 9.209621993127147e-05, "loss": 0.0292, "step": 830 }, { "epoch": 6.72, "learning_rate": 9.072164948453609e-05, "loss": 0.0331, "step": 840 }, { "epoch": 6.8, "learning_rate": 8.93470790378007e-05, "loss": 0.0304, "step": 850 }, { "epoch": 6.88, "learning_rate": 8.797250859106529e-05, "loss": 0.0308, "step": 860 }, { "epoch": 6.96, "learning_rate": 8.65979381443299e-05, "loss": 0.0318, "step": 870 }, { "epoch": 7.04, "learning_rate": 8.52233676975945e-05, "loss": 0.0257, "step": 880 }, { "epoch": 7.12, "learning_rate": 8.384879725085911e-05, "loss": 0.0266, "step": 890 }, { "epoch": 7.2, "learning_rate": 8.247422680412371e-05, "loss": 0.0251, "step": 900 }, { "epoch": 7.28, "learning_rate": 8.109965635738833e-05, "loss": 0.0269, "step": 910 }, { "epoch": 7.36, "learning_rate": 7.972508591065293e-05, "loss": 0.0296, "step": 920 }, { "epoch": 7.44, "learning_rate": 7.835051546391753e-05, "loss": 0.0285, "step": 930 }, { "epoch": 7.52, "learning_rate": 7.697594501718214e-05, "loss": 0.0286, "step": 940 }, { "epoch": 7.6, "learning_rate": 7.560137457044673e-05, "loss": 0.024, "step": 950 }, { "epoch": 7.68, "learning_rate": 7.422680412371135e-05, "loss": 0.0267, "step": 960 }, { "epoch": 7.76, "learning_rate": 7.285223367697595e-05, "loss": 0.0296, "step": 970 }, { "epoch": 7.84, "learning_rate": 7.147766323024055e-05, "loss": 0.0265, "step": 980 }, { "epoch": 7.92, "learning_rate": 7.010309278350515e-05, "loss": 0.0282, "step": 990 }, { "epoch": 8.0, "learning_rate": 6.872852233676977e-05, "loss": 0.028, "step": 1000 } ], "max_steps": 1500, "num_train_epochs": 12, "total_flos": 4.91514246463488e+17, "trial_name": null, "trial_params": null }