|
{ |
|
"best_metric": 0.889969527721405, |
|
"best_model_checkpoint": "saves/CADICA_qwenvl_direction_then_DetectAndClassify_scale4/lora/sft/checkpoint-50", |
|
"epoch": 0.04434589800443459, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022172949002217295, |
|
"grad_norm": 10.675163725573233, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.9514, |
|
"num_input_tokens_seen": 52584, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004434589800443459, |
|
"grad_norm": 40.324863066721726, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.8884, |
|
"num_input_tokens_seen": 105192, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0066518847006651885, |
|
"grad_norm": 11.862755525069886, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.8391, |
|
"num_input_tokens_seen": 157768, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008869179600886918, |
|
"grad_norm": 6.612134100058155, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.6078, |
|
"num_input_tokens_seen": 209816, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011086474501108648, |
|
"grad_norm": 5.778999894409448, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.3229, |
|
"num_input_tokens_seen": 262088, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.013303769401330377, |
|
"grad_norm": 7.203619868080236, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0884, |
|
"num_input_tokens_seen": 314464, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.015521064301552107, |
|
"grad_norm": 1.9403341193840407, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.9507, |
|
"num_input_tokens_seen": 365768, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.017738359201773836, |
|
"grad_norm": 1.227381877798724, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.8783, |
|
"num_input_tokens_seen": 418744, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.019955654101995565, |
|
"grad_norm": 1.192341449931711, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.8569, |
|
"num_input_tokens_seen": 470176, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.022172949002217297, |
|
"grad_norm": 0.9027870808759253, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.8773, |
|
"num_input_tokens_seen": 521992, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.022172949002217297, |
|
"eval_loss": 0.889969527721405, |
|
"eval_runtime": 47.7178, |
|
"eval_samples_per_second": 1.257, |
|
"eval_steps_per_second": 0.314, |
|
"num_input_tokens_seen": 521992, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.024390243902439025, |
|
"grad_norm": 0.9805700614384589, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 0.9117, |
|
"num_input_tokens_seen": 574288, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.026607538802660754, |
|
"grad_norm": 0.9233792774186961, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8536, |
|
"num_input_tokens_seen": 626960, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.028824833702882482, |
|
"grad_norm": 0.6897618492525014, |
|
"learning_rate": 9.999525361252996e-05, |
|
"loss": 0.8525, |
|
"num_input_tokens_seen": 678248, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.031042128603104215, |
|
"grad_norm": 0.4751822186720059, |
|
"learning_rate": 9.998101535124758e-05, |
|
"loss": 0.8411, |
|
"num_input_tokens_seen": 730376, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03325942350332594, |
|
"grad_norm": 0.8184941608670437, |
|
"learning_rate": 9.995728791936504e-05, |
|
"loss": 0.8105, |
|
"num_input_tokens_seen": 781648, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03547671840354767, |
|
"grad_norm": 0.930967493911239, |
|
"learning_rate": 9.992407582166581e-05, |
|
"loss": 0.8383, |
|
"num_input_tokens_seen": 833096, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.037694013303769404, |
|
"grad_norm": 0.9048736490092079, |
|
"learning_rate": 9.988138536364922e-05, |
|
"loss": 0.8133, |
|
"num_input_tokens_seen": 885648, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03991130820399113, |
|
"grad_norm": 1.4017690802266505, |
|
"learning_rate": 9.98292246503335e-05, |
|
"loss": 0.8217, |
|
"num_input_tokens_seen": 938208, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04212860310421286, |
|
"grad_norm": 1.2230184844510747, |
|
"learning_rate": 9.976760358471686e-05, |
|
"loss": 0.7601, |
|
"num_input_tokens_seen": 989992, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04434589800443459, |
|
"grad_norm": 1.108668630058659, |
|
"learning_rate": 9.969653386589748e-05, |
|
"loss": 0.7938, |
|
"num_input_tokens_seen": 1042120, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04434589800443459, |
|
"eval_loss": 0.8915936946868896, |
|
"eval_runtime": 19.4403, |
|
"eval_samples_per_second": 3.086, |
|
"eval_steps_per_second": 0.772, |
|
"num_input_tokens_seen": 1042120, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 1042120, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 68727405543424.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|