{ "best_metric": 0.09665286540985107, "best_model_checkpoint": "saves/CADICA_qwenvl_direction_scale4/lora/sft/checkpoint-150", "epoch": 0.07725985063095545, "eval_steps": 50, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0025753283543651817, "grad_norm": 14.230540018977528, "learning_rate": 2.9411764705882355e-06, "loss": 1.0954, "num_input_tokens_seen": 49920, "step": 5 }, { "epoch": 0.0051506567087303634, "grad_norm": 12.725175128208063, "learning_rate": 5.882352941176471e-06, "loss": 0.9793, "num_input_tokens_seen": 99840, "step": 10 }, { "epoch": 0.007725985063095545, "grad_norm": 11.908760913870685, "learning_rate": 8.823529411764707e-06, "loss": 1.0964, "num_input_tokens_seen": 149760, "step": 15 }, { "epoch": 0.010301313417460727, "grad_norm": 7.5714179545602835, "learning_rate": 1.1764705882352942e-05, "loss": 0.7079, "num_input_tokens_seen": 199680, "step": 20 }, { "epoch": 0.012876641771825908, "grad_norm": 2.273110551179123, "learning_rate": 1.4705882352941177e-05, "loss": 0.4213, "num_input_tokens_seen": 249600, "step": 25 }, { "epoch": 0.01545197012619109, "grad_norm": 1.4511084067844011, "learning_rate": 1.7647058823529414e-05, "loss": 0.3359, "num_input_tokens_seen": 299520, "step": 30 }, { "epoch": 0.018027298480556272, "grad_norm": 1.7462649262033438, "learning_rate": 2.058823529411765e-05, "loss": 0.3463, "num_input_tokens_seen": 349440, "step": 35 }, { "epoch": 0.020602626834921454, "grad_norm": 1.2143595820366577, "learning_rate": 2.3529411764705884e-05, "loss": 0.28, "num_input_tokens_seen": 399360, "step": 40 }, { "epoch": 0.023177955189286635, "grad_norm": 0.8468719125045373, "learning_rate": 2.647058823529412e-05, "loss": 0.3442, "num_input_tokens_seen": 449280, "step": 45 }, { "epoch": 0.025753283543651816, "grad_norm": 0.9726867932660042, "learning_rate": 2.9411764705882354e-05, "loss": 0.3441, "num_input_tokens_seen": 499200, "step": 50 }, { "epoch": 0.025753283543651816, "eval_loss": 0.33834776282310486, "eval_runtime": 47.4467, "eval_samples_per_second": 1.265, "eval_steps_per_second": 0.316, "num_input_tokens_seen": 499200, "step": 50 }, { "epoch": 0.028328611898016998, "grad_norm": 1.4016556961184263, "learning_rate": 3.235294117647059e-05, "loss": 0.3182, "num_input_tokens_seen": 549120, "step": 55 }, { "epoch": 0.03090394025238218, "grad_norm": 0.6437613769459606, "learning_rate": 3.529411764705883e-05, "loss": 0.3294, "num_input_tokens_seen": 599040, "step": 60 }, { "epoch": 0.03347926860674736, "grad_norm": 0.7389008951321312, "learning_rate": 3.8235294117647055e-05, "loss": 0.3097, "num_input_tokens_seen": 648960, "step": 65 }, { "epoch": 0.036054596961112545, "grad_norm": 0.771553860801019, "learning_rate": 4.11764705882353e-05, "loss": 0.3008, "num_input_tokens_seen": 698880, "step": 70 }, { "epoch": 0.03862992531547772, "grad_norm": 0.6965369148334918, "learning_rate": 4.411764705882353e-05, "loss": 0.3278, "num_input_tokens_seen": 748800, "step": 75 }, { "epoch": 0.04120525366984291, "grad_norm": 0.912943461315541, "learning_rate": 4.705882352941177e-05, "loss": 0.3074, "num_input_tokens_seen": 798720, "step": 80 }, { "epoch": 0.043780582024208085, "grad_norm": 0.8407481737577445, "learning_rate": 5e-05, "loss": 0.3423, "num_input_tokens_seen": 848640, "step": 85 }, { "epoch": 0.04635591037857327, "grad_norm": 0.9112879058417015, "learning_rate": 5.294117647058824e-05, "loss": 0.3008, "num_input_tokens_seen": 898560, "step": 90 }, { "epoch": 0.04893123873293845, "grad_norm": 2.391489040464162, "learning_rate": 5.588235294117647e-05, "loss": 0.2815, "num_input_tokens_seen": 948480, "step": 95 }, { "epoch": 0.05150656708730363, "grad_norm": 2.155211791607199, "learning_rate": 5.882352941176471e-05, "loss": 0.2274, "num_input_tokens_seen": 998400, "step": 100 }, { "epoch": 0.05150656708730363, "eval_loss": 0.18663176894187927, "eval_runtime": 18.9199, "eval_samples_per_second": 3.171, "eval_steps_per_second": 0.793, "num_input_tokens_seen": 998400, "step": 100 }, { "epoch": 0.05408189544166881, "grad_norm": 2.2181531422996716, "learning_rate": 6.176470588235295e-05, "loss": 0.168, "num_input_tokens_seen": 1048320, "step": 105 }, { "epoch": 0.056657223796033995, "grad_norm": 3.1829920225573236, "learning_rate": 6.470588235294118e-05, "loss": 0.0709, "num_input_tokens_seen": 1098240, "step": 110 }, { "epoch": 0.05923255215039917, "grad_norm": 4.337350477588576, "learning_rate": 6.764705882352942e-05, "loss": 0.1609, "num_input_tokens_seen": 1148160, "step": 115 }, { "epoch": 0.06180788050476436, "grad_norm": 2.1010046045637365, "learning_rate": 7.058823529411765e-05, "loss": 0.0354, "num_input_tokens_seen": 1198080, "step": 120 }, { "epoch": 0.06438320885912954, "grad_norm": 2.232308844812103, "learning_rate": 7.352941176470589e-05, "loss": 0.1133, "num_input_tokens_seen": 1248000, "step": 125 }, { "epoch": 0.06695853721349472, "grad_norm": 5.641631090993415, "learning_rate": 7.647058823529411e-05, "loss": 0.0867, "num_input_tokens_seen": 1297920, "step": 130 }, { "epoch": 0.0695338655678599, "grad_norm": 1.5031437609685787, "learning_rate": 7.941176470588235e-05, "loss": 0.1352, "num_input_tokens_seen": 1347840, "step": 135 }, { "epoch": 0.07210919392222509, "grad_norm": 3.2992644431188465, "learning_rate": 8.23529411764706e-05, "loss": 0.101, "num_input_tokens_seen": 1397760, "step": 140 }, { "epoch": 0.07468452227659027, "grad_norm": 3.494236832758233, "learning_rate": 8.529411764705883e-05, "loss": 0.0334, "num_input_tokens_seen": 1447680, "step": 145 }, { "epoch": 0.07725985063095545, "grad_norm": 0.0602113869322109, "learning_rate": 8.823529411764706e-05, "loss": 0.0667, "num_input_tokens_seen": 1497600, "step": 150 }, { "epoch": 0.07725985063095545, "eval_loss": 0.09665286540985107, "eval_runtime": 19.2745, "eval_samples_per_second": 3.113, "eval_steps_per_second": 0.778, "num_input_tokens_seen": 1497600, "step": 150 } ], "logging_steps": 5, "max_steps": 3400, "num_input_tokens_seen": 1497600, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 98764767100928.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }