{ "best_metric": 0.7744118571281433, "best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e4l58-l/checkpoint-21000", "epoch": 3.868828297715549, "eval_steps": 500, "global_step": 21000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09209799226376865, "grad_norm": 0.0014697719598188996, "learning_rate": 4.884877509670289e-08, "loss": 0.4588, "step": 500 }, { "epoch": 0.09209799226376865, "eval_loss": 1.4194883108139038, "eval_runtime": 78.7703, "eval_samples_per_second": 15.31, "eval_steps_per_second": 1.917, "step": 500 }, { "epoch": 0.1841959845275373, "grad_norm": 301.76043701171875, "learning_rate": 4.769755019340578e-08, "loss": 0.4255, "step": 1000 }, { "epoch": 0.1841959845275373, "eval_loss": 1.3416553735733032, "eval_runtime": 77.0051, "eval_samples_per_second": 15.661, "eval_steps_per_second": 1.961, "step": 1000 }, { "epoch": 0.27629397679130596, "grad_norm": 35.28853225708008, "learning_rate": 4.654632529010867e-08, "loss": 0.3724, "step": 1500 }, { "epoch": 0.27629397679130596, "eval_loss": 1.2872973680496216, "eval_runtime": 77.5439, "eval_samples_per_second": 15.552, "eval_steps_per_second": 1.947, "step": 1500 }, { "epoch": 0.3683919690550746, "grad_norm": 338.9349060058594, "learning_rate": 4.539510038681156e-08, "loss": 0.3251, "step": 2000 }, { "epoch": 0.3683919690550746, "eval_loss": 1.234910249710083, "eval_runtime": 78.7715, "eval_samples_per_second": 15.31, "eval_steps_per_second": 1.917, "step": 2000 }, { "epoch": 0.46048996131884323, "grad_norm": 325.85101318359375, "learning_rate": 4.4243875483514457e-08, "loss": 0.3308, "step": 2500 }, { "epoch": 0.46048996131884323, "eval_loss": 1.1944907903671265, "eval_runtime": 79.0397, "eval_samples_per_second": 15.258, "eval_steps_per_second": 1.91, "step": 2500 }, { "epoch": 0.5525879535826119, "grad_norm": 100.85855102539062, "learning_rate": 4.309265058021735e-08, "loss": 0.3017, "step": 3000 }, { "epoch": 0.5525879535826119, "eval_loss": 1.1593009233474731, "eval_runtime": 77.6354, "eval_samples_per_second": 15.534, "eval_steps_per_second": 1.945, "step": 3000 }, { "epoch": 0.6446859458463805, "grad_norm": 8.865598678588867, "learning_rate": 4.194142567692024e-08, "loss": 0.2962, "step": 3500 }, { "epoch": 0.6446859458463805, "eval_loss": 1.1259427070617676, "eval_runtime": 78.2715, "eval_samples_per_second": 15.408, "eval_steps_per_second": 1.929, "step": 3500 }, { "epoch": 0.7367839381101492, "grad_norm": 4.393447399139404, "learning_rate": 4.079020077362314e-08, "loss": 0.2919, "step": 4000 }, { "epoch": 0.7367839381101492, "eval_loss": 1.0954023599624634, "eval_runtime": 78.5366, "eval_samples_per_second": 15.356, "eval_steps_per_second": 1.923, "step": 4000 }, { "epoch": 0.8288819303739179, "grad_norm": 1.6036031246185303, "learning_rate": 3.9638975870326026e-08, "loss": 0.307, "step": 4500 }, { "epoch": 0.8288819303739179, "eval_loss": 1.0728861093521118, "eval_runtime": 78.4032, "eval_samples_per_second": 15.382, "eval_steps_per_second": 1.926, "step": 4500 }, { "epoch": 0.9209799226376865, "grad_norm": 11.990758895874023, "learning_rate": 3.8487750967028915e-08, "loss": 0.2764, "step": 5000 }, { "epoch": 0.9209799226376865, "eval_loss": 1.0523593425750732, "eval_runtime": 78.3726, "eval_samples_per_second": 15.388, "eval_steps_per_second": 1.927, "step": 5000 }, { "epoch": 1.013077914901455, "grad_norm": 43.865562438964844, "learning_rate": 3.733652606373181e-08, "loss": 0.2456, "step": 5500 }, { "epoch": 1.013077914901455, "eval_loss": 1.0375442504882812, "eval_runtime": 78.2473, "eval_samples_per_second": 15.413, "eval_steps_per_second": 1.93, "step": 5500 }, { "epoch": 1.1051759071652238, "grad_norm": 105.91036987304688, "learning_rate": 3.61853011604347e-08, "loss": 0.2642, "step": 6000 }, { "epoch": 1.1051759071652238, "eval_loss": 1.0233356952667236, "eval_runtime": 78.3135, "eval_samples_per_second": 15.4, "eval_steps_per_second": 1.928, "step": 6000 }, { "epoch": 1.1972738994289924, "grad_norm": 221.4896240234375, "learning_rate": 3.503407625713759e-08, "loss": 0.2066, "step": 6500 }, { "epoch": 1.1972738994289924, "eval_loss": 1.0104238986968994, "eval_runtime": 78.6485, "eval_samples_per_second": 15.334, "eval_steps_per_second": 1.92, "step": 6500 }, { "epoch": 1.289371891692761, "grad_norm": 270.53253173828125, "learning_rate": 3.3882851353840485e-08, "loss": 0.2376, "step": 7000 }, { "epoch": 1.289371891692761, "eval_loss": 0.998353898525238, "eval_runtime": 78.5502, "eval_samples_per_second": 15.353, "eval_steps_per_second": 1.922, "step": 7000 }, { "epoch": 1.3814698839565298, "grad_norm": 402.9339904785156, "learning_rate": 3.2731626450543374e-08, "loss": 0.1931, "step": 7500 }, { "epoch": 1.3814698839565298, "eval_loss": 0.9887453317642212, "eval_runtime": 78.4833, "eval_samples_per_second": 15.366, "eval_steps_per_second": 1.924, "step": 7500 }, { "epoch": 1.4735678762202984, "grad_norm": 0.006355441175401211, "learning_rate": 3.158040154724626e-08, "loss": 0.2163, "step": 8000 }, { "epoch": 1.4735678762202984, "eval_loss": 0.9767189621925354, "eval_runtime": 78.5499, "eval_samples_per_second": 15.353, "eval_steps_per_second": 1.922, "step": 8000 }, { "epoch": 1.565665868484067, "grad_norm": 0.12982100248336792, "learning_rate": 3.042917664394916e-08, "loss": 0.1903, "step": 8500 }, { "epoch": 1.565665868484067, "eval_loss": 0.9664921760559082, "eval_runtime": 78.6832, "eval_samples_per_second": 15.327, "eval_steps_per_second": 1.919, "step": 8500 }, { "epoch": 1.6577638607478358, "grad_norm": 173.31788635253906, "learning_rate": 2.927795174065205e-08, "loss": 0.2069, "step": 9000 }, { "epoch": 1.6577638607478358, "eval_loss": 0.9571623206138611, "eval_runtime": 78.544, "eval_samples_per_second": 15.354, "eval_steps_per_second": 1.922, "step": 9000 }, { "epoch": 1.7498618530116044, "grad_norm": 358.7403259277344, "learning_rate": 2.8126726837354947e-08, "loss": 0.2093, "step": 9500 }, { "epoch": 1.7498618530116044, "eval_loss": 0.9496769905090332, "eval_runtime": 78.3124, "eval_samples_per_second": 15.4, "eval_steps_per_second": 1.928, "step": 9500 }, { "epoch": 1.841959845275373, "grad_norm": 435.9449768066406, "learning_rate": 2.6975501934057836e-08, "loss": 0.2523, "step": 10000 }, { "epoch": 1.841959845275373, "eval_loss": 0.9420022368431091, "eval_runtime": 78.3702, "eval_samples_per_second": 15.388, "eval_steps_per_second": 1.927, "step": 10000 }, { "epoch": 1.9340578375391417, "grad_norm": 23.49118423461914, "learning_rate": 2.582427703076073e-08, "loss": 0.2127, "step": 10500 }, { "epoch": 1.9340578375391417, "eval_loss": 0.9328628182411194, "eval_runtime": 76.6904, "eval_samples_per_second": 15.726, "eval_steps_per_second": 1.969, "step": 10500 }, { "epoch": 2.02615582980291, "grad_norm": 0.4029064476490021, "learning_rate": 2.467305212746362e-08, "loss": 0.1968, "step": 11000 }, { "epoch": 2.02615582980291, "eval_loss": 0.926975429058075, "eval_runtime": 76.7161, "eval_samples_per_second": 15.72, "eval_steps_per_second": 1.968, "step": 11000 }, { "epoch": 2.118253822066679, "grad_norm": 274.0943603515625, "learning_rate": 2.3521827224166513e-08, "loss": 0.1879, "step": 11500 }, { "epoch": 2.118253822066679, "eval_loss": 0.9231382012367249, "eval_runtime": 78.6224, "eval_samples_per_second": 15.339, "eval_steps_per_second": 1.921, "step": 11500 }, { "epoch": 2.2103518143304477, "grad_norm": 0.000765918695833534, "learning_rate": 2.2370602320869402e-08, "loss": 0.1981, "step": 12000 }, { "epoch": 2.2103518143304477, "eval_loss": 0.9183884263038635, "eval_runtime": 76.7707, "eval_samples_per_second": 15.709, "eval_steps_per_second": 1.967, "step": 12000 }, { "epoch": 2.302449806594216, "grad_norm": 0.6250036358833313, "learning_rate": 2.1219377417572295e-08, "loss": 0.1964, "step": 12500 }, { "epoch": 2.302449806594216, "eval_loss": 0.9134928584098816, "eval_runtime": 78.5128, "eval_samples_per_second": 15.361, "eval_steps_per_second": 1.923, "step": 12500 }, { "epoch": 2.394547798857985, "grad_norm": 158.6990203857422, "learning_rate": 2.0068152514275187e-08, "loss": 0.1697, "step": 13000 }, { "epoch": 2.394547798857985, "eval_loss": 0.910048246383667, "eval_runtime": 76.7476, "eval_samples_per_second": 15.714, "eval_steps_per_second": 1.967, "step": 13000 }, { "epoch": 2.4866457911217537, "grad_norm": 1.288072943687439, "learning_rate": 1.891692761097808e-08, "loss": 0.2015, "step": 13500 }, { "epoch": 2.4866457911217537, "eval_loss": 0.9051916599273682, "eval_runtime": 78.3909, "eval_samples_per_second": 15.384, "eval_steps_per_second": 1.926, "step": 13500 }, { "epoch": 2.578743783385522, "grad_norm": 5.835799311171286e-05, "learning_rate": 1.7765702707680972e-08, "loss": 0.1827, "step": 14000 }, { "epoch": 2.578743783385522, "eval_loss": 0.9026296734809875, "eval_runtime": 76.6591, "eval_samples_per_second": 15.732, "eval_steps_per_second": 1.97, "step": 14000 }, { "epoch": 2.670841775649291, "grad_norm": 30.217784881591797, "learning_rate": 1.6614477804383865e-08, "loss": 0.1435, "step": 14500 }, { "epoch": 2.670841775649291, "eval_loss": 0.899847686290741, "eval_runtime": 76.742, "eval_samples_per_second": 15.715, "eval_steps_per_second": 1.968, "step": 14500 }, { "epoch": 2.7629397679130596, "grad_norm": 202.30496215820312, "learning_rate": 1.5463252901086757e-08, "loss": 0.1541, "step": 15000 }, { "epoch": 2.7629397679130596, "eval_loss": 0.896297812461853, "eval_runtime": 78.5539, "eval_samples_per_second": 15.353, "eval_steps_per_second": 1.922, "step": 15000 }, { "epoch": 2.8550377601768284, "grad_norm": 241.57069396972656, "learning_rate": 1.4312027997789646e-08, "loss": 0.1716, "step": 15500 }, { "epoch": 2.8550377601768284, "eval_loss": 0.8935310244560242, "eval_runtime": 76.9195, "eval_samples_per_second": 15.679, "eval_steps_per_second": 1.963, "step": 15500 }, { "epoch": 2.947135752440597, "grad_norm": 0.25739413499832153, "learning_rate": 1.316080309449254e-08, "loss": 0.2056, "step": 16000 }, { "epoch": 2.947135752440597, "eval_loss": 0.890542209148407, "eval_runtime": 76.7268, "eval_samples_per_second": 15.718, "eval_steps_per_second": 1.968, "step": 16000 }, { "epoch": 3.0392337447043656, "grad_norm": 86.5535659790039, "learning_rate": 1.2009578191195431e-08, "loss": 0.1843, "step": 16500 }, { "epoch": 3.0392337447043656, "eval_loss": 0.8875145316123962, "eval_runtime": 76.4556, "eval_samples_per_second": 15.774, "eval_steps_per_second": 1.975, "step": 16500 }, { "epoch": 3.131331736968134, "grad_norm": 0.05445564165711403, "learning_rate": 1.0858353287898323e-08, "loss": 0.1611, "step": 17000 }, { "epoch": 3.131331736968134, "eval_loss": 0.8858152627944946, "eval_runtime": 77.4814, "eval_samples_per_second": 15.565, "eval_steps_per_second": 1.949, "step": 17000 }, { "epoch": 3.2240235814296243, "grad_norm": 44.673702239990234, "learning_rate": 9.699705232129698e-09, "loss": 0.1568, "step": 17500 }, { "epoch": 3.2240235814296243, "eval_loss": 0.7821062207221985, "eval_runtime": 75.6502, "eval_samples_per_second": 15.955, "eval_steps_per_second": 1.996, "step": 17500 }, { "epoch": 3.316138540899042, "grad_norm": 0.5617901086807251, "learning_rate": 8.548268238761974e-09, "loss": 0.1395, "step": 18000 }, { "epoch": 3.316138540899042, "eval_loss": 0.7793659567832947, "eval_runtime": 76.0283, "eval_samples_per_second": 15.876, "eval_steps_per_second": 1.986, "step": 18000 }, { "epoch": 3.4082535003684598, "grad_norm": 6.4552321434021, "learning_rate": 7.396831245394252e-09, "loss": 0.1804, "step": 18500 }, { "epoch": 3.4082535003684598, "eval_loss": 0.7778191566467285, "eval_runtime": 76.0536, "eval_samples_per_second": 15.87, "eval_steps_per_second": 1.985, "step": 18500 }, { "epoch": 3.500368459837878, "grad_norm": 0.31470850110054016, "learning_rate": 6.2453942520265294e-09, "loss": 0.1728, "step": 19000 }, { "epoch": 3.500368459837878, "eval_loss": 0.7768829464912415, "eval_runtime": 76.116, "eval_samples_per_second": 15.857, "eval_steps_per_second": 1.984, "step": 19000 }, { "epoch": 3.5924834193072956, "grad_norm": 0.029128307476639748, "learning_rate": 5.093957258658806e-09, "loss": 0.179, "step": 19500 }, { "epoch": 3.5924834193072956, "eval_loss": 0.7758385539054871, "eval_runtime": 76.1889, "eval_samples_per_second": 15.842, "eval_steps_per_second": 1.982, "step": 19500 }, { "epoch": 3.6845983787767134, "grad_norm": 0.0002703067730180919, "learning_rate": 3.942520265291083e-09, "loss": 0.179, "step": 20000 }, { "epoch": 3.6845983787767134, "eval_loss": 0.7751660943031311, "eval_runtime": 76.1566, "eval_samples_per_second": 15.849, "eval_steps_per_second": 1.983, "step": 20000 }, { "epoch": 3.776713338246131, "grad_norm": 259.0263366699219, "learning_rate": 2.79108327192336e-09, "loss": 0.1454, "step": 20500 }, { "epoch": 3.776713338246131, "eval_loss": 0.7746543884277344, "eval_runtime": 76.3059, "eval_samples_per_second": 15.818, "eval_steps_per_second": 1.979, "step": 20500 }, { "epoch": 3.868828297715549, "grad_norm": 0.0011778937187045813, "learning_rate": 1.6396462785556374e-09, "loss": 0.1568, "step": 21000 }, { "epoch": 3.868828297715549, "eval_loss": 0.7744118571281433, "eval_runtime": 76.0825, "eval_samples_per_second": 15.864, "eval_steps_per_second": 1.985, "step": 21000 } ], "logging_steps": 500, "max_steps": 21712, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7555105255635630.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }