Qwen2.5-7B-gen-dpo-2k-hhrlhf / trainer_state.json
AmberYifan's picture
Model save
ea5b2e4 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.992,
"eval_steps": 200,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 96.80832392576436,
"learning_rate": 7.142857142857142e-08,
"logits/generated": -1.177710771560669,
"logits/real": -0.5424066185951233,
"logps/generated": -206.68331909179688,
"logps/real": -268.4350280761719,
"loss": 0.956,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.16,
"grad_norm": 186.85535273079182,
"learning_rate": 4.727272727272727e-07,
"logits/generated": -0.9581692218780518,
"logits/real": -0.7258952260017395,
"logps/generated": -263.182373046875,
"logps/real": -268.8551025390625,
"loss": 0.9057,
"rewards/accuracies": 0.5972222089767456,
"rewards/generated": 0.11249147355556488,
"rewards/margins": 0.11431904882192612,
"rewards/real": 0.22681055963039398,
"step": 10
},
{
"epoch": 0.32,
"grad_norm": 118.11338598390404,
"learning_rate": 3.818181818181818e-07,
"logits/generated": -0.891255259513855,
"logits/real": -0.6647359132766724,
"logps/generated": -264.0565185546875,
"logps/real": -256.42901611328125,
"loss": 0.785,
"rewards/accuracies": 0.7250000238418579,
"rewards/generated": 0.7130780220031738,
"rewards/margins": 0.5250438451766968,
"rewards/real": 1.2381219863891602,
"step": 20
},
{
"epoch": 0.48,
"grad_norm": 89.31545609503772,
"learning_rate": 2.909090909090909e-07,
"logits/generated": -0.8781732320785522,
"logits/real": -0.5925976634025574,
"logps/generated": -256.00799560546875,
"logps/real": -247.38980102539062,
"loss": 0.7838,
"rewards/accuracies": 0.75,
"rewards/generated": 1.1103136539459229,
"rewards/margins": 0.712864875793457,
"rewards/real": 1.8231786489486694,
"step": 30
},
{
"epoch": 0.64,
"grad_norm": 88.34237989806086,
"learning_rate": 2e-07,
"logits/generated": -0.8793425559997559,
"logits/real": -0.6354281306266785,
"logps/generated": -263.5761413574219,
"logps/real": -255.0084686279297,
"loss": 0.7397,
"rewards/accuracies": 0.7749999761581421,
"rewards/generated": 1.0507025718688965,
"rewards/margins": 0.881763756275177,
"rewards/real": 1.9324661493301392,
"step": 40
},
{
"epoch": 0.8,
"grad_norm": 91.22613773952635,
"learning_rate": 1.0909090909090908e-07,
"logits/generated": -0.8102799654006958,
"logits/real": -0.5707312822341919,
"logps/generated": -248.6565704345703,
"logps/real": -260.7277526855469,
"loss": 0.7153,
"rewards/accuracies": 0.800000011920929,
"rewards/generated": 1.1541557312011719,
"rewards/margins": 0.9233818054199219,
"rewards/real": 2.0775375366210938,
"step": 50
},
{
"epoch": 0.96,
"grad_norm": 86.27514871900985,
"learning_rate": 1.818181818181818e-08,
"logits/generated": -0.9142637252807617,
"logits/real": -0.6623004078865051,
"logps/generated": -254.97494506835938,
"logps/real": -244.65640258789062,
"loss": 0.7192,
"rewards/accuracies": 0.8125,
"rewards/generated": 1.215023159980774,
"rewards/margins": 0.8442209959030151,
"rewards/real": 2.059244155883789,
"step": 60
},
{
"epoch": 0.992,
"step": 62,
"total_flos": 0.0,
"train_loss": 0.770801761457997,
"train_runtime": 772.1202,
"train_samples_per_second": 2.585,
"train_steps_per_second": 0.08
}
],
"logging_steps": 10,
"max_steps": 62,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}