{ "best_metric": 0.889969527721405, "best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale4/lora/sft/checkpoint-50", "epoch": 0.04434589800443459, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022172949002217295, "grad_norm": 10.675163725573233, "learning_rate": 8.333333333333334e-06, "loss": 1.9514, "num_input_tokens_seen": 52584, "step": 5 }, { "epoch": 0.004434589800443459, "grad_norm": 40.324863066721726, "learning_rate": 1.6666666666666667e-05, "loss": 1.8884, "num_input_tokens_seen": 105192, "step": 10 }, { "epoch": 0.0066518847006651885, "grad_norm": 11.862755525069886, "learning_rate": 2.5e-05, "loss": 1.8391, "num_input_tokens_seen": 157768, "step": 15 }, { "epoch": 0.008869179600886918, "grad_norm": 6.612134100058155, "learning_rate": 3.3333333333333335e-05, "loss": 1.6078, "num_input_tokens_seen": 209816, "step": 20 }, { "epoch": 0.011086474501108648, "grad_norm": 5.778999894409448, "learning_rate": 4.166666666666667e-05, "loss": 1.3229, "num_input_tokens_seen": 262088, "step": 25 }, { "epoch": 0.013303769401330377, "grad_norm": 7.203619868080236, "learning_rate": 5e-05, "loss": 1.0884, "num_input_tokens_seen": 314464, "step": 30 }, { "epoch": 0.015521064301552107, "grad_norm": 1.9403341193840407, "learning_rate": 5.833333333333334e-05, "loss": 0.9507, "num_input_tokens_seen": 365768, "step": 35 }, { "epoch": 0.017738359201773836, "grad_norm": 1.227381877798724, "learning_rate": 6.666666666666667e-05, "loss": 0.8783, "num_input_tokens_seen": 418744, "step": 40 }, { "epoch": 0.019955654101995565, "grad_norm": 1.192341449931711, "learning_rate": 7.500000000000001e-05, "loss": 0.8569, "num_input_tokens_seen": 470176, "step": 45 }, { "epoch": 0.022172949002217297, "grad_norm": 0.9027870808759253, "learning_rate": 8.333333333333334e-05, "loss": 0.8773, "num_input_tokens_seen": 521992, "step": 50 }, { "epoch": 0.022172949002217297, "eval_loss": 0.889969527721405, "eval_runtime": 47.7178, "eval_samples_per_second": 1.257, "eval_steps_per_second": 0.314, "num_input_tokens_seen": 521992, "step": 50 }, { "epoch": 0.024390243902439025, "grad_norm": 0.9805700614384589, "learning_rate": 9.166666666666667e-05, "loss": 0.9117, "num_input_tokens_seen": 574288, "step": 55 }, { "epoch": 0.026607538802660754, "grad_norm": 0.9233792774186961, "learning_rate": 0.0001, "loss": 0.8536, "num_input_tokens_seen": 626960, "step": 60 }, { "epoch": 0.028824833702882482, "grad_norm": 0.6897618492525014, "learning_rate": 9.999525361252996e-05, "loss": 0.8525, "num_input_tokens_seen": 678248, "step": 65 }, { "epoch": 0.031042128603104215, "grad_norm": 0.4751822186720059, "learning_rate": 9.998101535124758e-05, "loss": 0.8411, "num_input_tokens_seen": 730376, "step": 70 }, { "epoch": 0.03325942350332594, "grad_norm": 0.8184941608670437, "learning_rate": 9.995728791936504e-05, "loss": 0.8105, "num_input_tokens_seen": 781648, "step": 75 }, { "epoch": 0.03547671840354767, "grad_norm": 0.930967493911239, "learning_rate": 9.992407582166581e-05, "loss": 0.8383, "num_input_tokens_seen": 833096, "step": 80 }, { "epoch": 0.037694013303769404, "grad_norm": 0.9048736490092079, "learning_rate": 9.988138536364922e-05, "loss": 0.8133, "num_input_tokens_seen": 885648, "step": 85 }, { "epoch": 0.03991130820399113, "grad_norm": 1.4017690802266505, "learning_rate": 9.98292246503335e-05, "loss": 0.8217, "num_input_tokens_seen": 938208, "step": 90 }, { "epoch": 0.04212860310421286, "grad_norm": 1.2230184844510747, "learning_rate": 9.976760358471686e-05, "loss": 0.7601, "num_input_tokens_seen": 989992, "step": 95 }, { "epoch": 0.04434589800443459, "grad_norm": 1.108668630058659, "learning_rate": 9.969653386589748e-05, "loss": 0.7938, "num_input_tokens_seen": 1042120, "step": 100 }, { "epoch": 0.04434589800443459, "eval_loss": 0.8915936946868896, "eval_runtime": 19.4403, "eval_samples_per_second": 3.086, "eval_steps_per_second": 0.772, "num_input_tokens_seen": 1042120, "step": 100 } ], "logging_steps": 5, "max_steps": 1200, "num_input_tokens_seen": 1042120, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 68727405543424.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }