{ "best_metric": 6719.021484375, "best_model_checkpoint": "./coco_outputs/checkpoint-640", "epoch": 5.0, "global_step": 640, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 1.96875e-05, "loss": 6346.0008, "step": 10 }, { "epoch": 0.16, "learning_rate": 1.9375e-05, "loss": 6400.0195, "step": 20 }, { "epoch": 0.23, "learning_rate": 1.9062500000000003e-05, "loss": 6295.0742, "step": 30 }, { "epoch": 0.31, "learning_rate": 1.8750000000000002e-05, "loss": 6280.2168, "step": 40 }, { "epoch": 0.39, "learning_rate": 1.84375e-05, "loss": 6138.5656, "step": 50 }, { "epoch": 0.47, "learning_rate": 1.8125e-05, "loss": 6088.3699, "step": 60 }, { "epoch": 0.55, "learning_rate": 1.7812500000000003e-05, "loss": 6224.8137, "step": 70 }, { "epoch": 0.62, "learning_rate": 1.7500000000000002e-05, "loss": 6147.6773, "step": 80 }, { "epoch": 0.7, "learning_rate": 1.71875e-05, "loss": 6426.423, "step": 90 }, { "epoch": 0.78, "learning_rate": 1.6875e-05, "loss": 6195.2883, "step": 100 }, { "epoch": 0.86, "learning_rate": 1.6562500000000003e-05, "loss": 6409.0055, "step": 110 }, { "epoch": 0.94, "learning_rate": 1.6250000000000002e-05, "loss": 6457.3832, "step": 120 }, { "epoch": 1.0, "eval_loss": 6720.3701171875, "eval_runtime": 16.7008, "eval_samples_per_second": 15.329, "eval_steps_per_second": 1.916, "step": 128 }, { "epoch": 1.02, "learning_rate": 1.59375e-05, "loss": 6196.2672, "step": 130 }, { "epoch": 1.09, "learning_rate": 1.5625e-05, "loss": 6187.7188, "step": 140 }, { "epoch": 1.17, "learning_rate": 1.5312500000000003e-05, "loss": 6357.3324, "step": 150 }, { "epoch": 1.25, "learning_rate": 1.5000000000000002e-05, "loss": 6108.043, "step": 160 }, { "epoch": 1.33, "learning_rate": 1.4687500000000001e-05, "loss": 6499.2922, "step": 170 }, { "epoch": 1.41, "learning_rate": 1.4375e-05, "loss": 6227.1672, "step": 180 }, { "epoch": 1.48, "learning_rate": 1.4062500000000001e-05, "loss": 6416.4754, "step": 190 }, { "epoch": 1.56, "learning_rate": 1.375e-05, "loss": 6024.552, "step": 200 }, { "epoch": 1.64, "learning_rate": 1.3437500000000001e-05, "loss": 5964.7684, "step": 210 }, { "epoch": 1.72, "learning_rate": 1.3125e-05, "loss": 5876.4055, "step": 220 }, { "epoch": 1.8, "learning_rate": 1.2812500000000001e-05, "loss": 6149.1523, "step": 230 }, { "epoch": 1.88, "learning_rate": 1.25e-05, "loss": 6330.3543, "step": 240 }, { "epoch": 1.95, "learning_rate": 1.2187500000000001e-05, "loss": 6234.0246, "step": 250 }, { "epoch": 2.0, "eval_loss": 6719.166015625, "eval_runtime": 16.7578, "eval_samples_per_second": 15.276, "eval_steps_per_second": 1.91, "step": 256 }, { "epoch": 2.03, "learning_rate": 1.1875e-05, "loss": 6279.7289, "step": 260 }, { "epoch": 2.11, "learning_rate": 1.1562500000000002e-05, "loss": 6253.7887, "step": 270 }, { "epoch": 2.19, "learning_rate": 1.125e-05, "loss": 6329.3629, "step": 280 }, { "epoch": 2.27, "learning_rate": 1.0937500000000002e-05, "loss": 6185.052, "step": 290 }, { "epoch": 2.34, "learning_rate": 1.0625e-05, "loss": 5918.475, "step": 300 }, { "epoch": 2.42, "learning_rate": 1.0312500000000002e-05, "loss": 6114.2844, "step": 310 }, { "epoch": 2.5, "learning_rate": 1e-05, "loss": 6149.1941, "step": 320 }, { "epoch": 2.58, "learning_rate": 9.6875e-06, "loss": 6154.8313, "step": 330 }, { "epoch": 2.66, "learning_rate": 9.375000000000001e-06, "loss": 6146.2449, "step": 340 }, { "epoch": 2.73, "learning_rate": 9.0625e-06, "loss": 6099.1227, "step": 350 }, { "epoch": 2.81, "learning_rate": 8.750000000000001e-06, "loss": 6083.9359, "step": 360 }, { "epoch": 2.89, "learning_rate": 8.4375e-06, "loss": 6283.482, "step": 370 }, { "epoch": 2.97, "learning_rate": 8.125000000000001e-06, "loss": 6335.5293, "step": 380 }, { "epoch": 3.0, "eval_loss": 6719.03076171875, "eval_runtime": 16.7226, "eval_samples_per_second": 15.309, "eval_steps_per_second": 1.914, "step": 384 }, { "epoch": 3.05, "learning_rate": 7.8125e-06, "loss": 6466.4867, "step": 390 }, { "epoch": 3.12, "learning_rate": 7.500000000000001e-06, "loss": 6327.5625, "step": 400 }, { "epoch": 3.2, "learning_rate": 7.1875e-06, "loss": 6213.9516, "step": 410 }, { "epoch": 3.28, "learning_rate": 6.875e-06, "loss": 6337.8066, "step": 420 }, { "epoch": 3.36, "learning_rate": 6.5625e-06, "loss": 6188.782, "step": 430 }, { "epoch": 3.44, "learning_rate": 6.25e-06, "loss": 6432.0715, "step": 440 }, { "epoch": 3.52, "learning_rate": 5.9375e-06, "loss": 6178.6059, "step": 450 }, { "epoch": 3.59, "learning_rate": 5.625e-06, "loss": 6356.5617, "step": 460 }, { "epoch": 3.67, "learning_rate": 5.3125e-06, "loss": 6572.6281, "step": 470 }, { "epoch": 3.75, "learning_rate": 5e-06, "loss": 6376.5836, "step": 480 }, { "epoch": 3.83, "learning_rate": 4.6875000000000004e-06, "loss": 6186.7969, "step": 490 }, { "epoch": 3.91, "learning_rate": 4.3750000000000005e-06, "loss": 6402.0125, "step": 500 }, { "epoch": 3.98, "learning_rate": 4.0625000000000005e-06, "loss": 6520.8047, "step": 510 }, { "epoch": 4.0, "eval_loss": 6719.060546875, "eval_runtime": 16.8687, "eval_samples_per_second": 15.176, "eval_steps_per_second": 1.897, "step": 512 }, { "epoch": 4.06, "learning_rate": 3.7500000000000005e-06, "loss": 6016.2836, "step": 520 }, { "epoch": 4.14, "learning_rate": 3.4375e-06, "loss": 5934.718, "step": 530 }, { "epoch": 4.22, "learning_rate": 3.125e-06, "loss": 6232.1816, "step": 540 }, { "epoch": 4.3, "learning_rate": 2.8125e-06, "loss": 6584.9633, "step": 550 }, { "epoch": 4.38, "learning_rate": 2.5e-06, "loss": 6533.6297, "step": 560 }, { "epoch": 4.45, "learning_rate": 2.1875000000000002e-06, "loss": 6380.1676, "step": 570 }, { "epoch": 4.53, "learning_rate": 1.8750000000000003e-06, "loss": 6350.5738, "step": 580 }, { "epoch": 4.61, "learning_rate": 1.5625e-06, "loss": 5988.5664, "step": 590 }, { "epoch": 4.69, "learning_rate": 1.25e-06, "loss": 6169.0441, "step": 600 }, { "epoch": 4.77, "learning_rate": 9.375000000000001e-07, "loss": 6024.5156, "step": 610 }, { "epoch": 4.84, "learning_rate": 6.25e-07, "loss": 6515.2469, "step": 620 }, { "epoch": 4.92, "learning_rate": 3.125e-07, "loss": 6284.4926, "step": 630 }, { "epoch": 5.0, "learning_rate": 0.0, "loss": 5738.273, "step": 640 }, { "epoch": 5.0, "eval_loss": 6719.021484375, "eval_runtime": 17.1732, "eval_samples_per_second": 14.907, "eval_steps_per_second": 1.863, "step": 640 }, { "epoch": 5.0, "step": 640, "total_flos": 2.4480067682304e+18, "train_loss": 6244.167712402344, "train_runtime": 818.3257, "train_samples_per_second": 6.257, "train_steps_per_second": 0.782 } ], "max_steps": 640, "num_train_epochs": 5, "total_flos": 2.4480067682304e+18, "trial_name": null, "trial_params": null }