|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.992, |
|
"eval_steps": 200, |
|
"global_step": 62, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 96.80832392576436, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/generated": -1.177710771560669, |
|
"logits/real": -0.5424066185951233, |
|
"logps/generated": -206.68331909179688, |
|
"logps/real": -268.4350280761719, |
|
"loss": 0.956, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 186.85535273079182, |
|
"learning_rate": 4.727272727272727e-07, |
|
"logits/generated": -0.9581692218780518, |
|
"logits/real": -0.7258952260017395, |
|
"logps/generated": -263.182373046875, |
|
"logps/real": -268.8551025390625, |
|
"loss": 0.9057, |
|
"rewards/accuracies": 0.5972222089767456, |
|
"rewards/generated": 0.11249147355556488, |
|
"rewards/margins": 0.11431904882192612, |
|
"rewards/real": 0.22681055963039398, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 118.11338598390404, |
|
"learning_rate": 3.818181818181818e-07, |
|
"logits/generated": -0.891255259513855, |
|
"logits/real": -0.6647359132766724, |
|
"logps/generated": -264.0565185546875, |
|
"logps/real": -256.42901611328125, |
|
"loss": 0.785, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/generated": 0.7130780220031738, |
|
"rewards/margins": 0.5250438451766968, |
|
"rewards/real": 1.2381219863891602, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 89.31545609503772, |
|
"learning_rate": 2.909090909090909e-07, |
|
"logits/generated": -0.8781732320785522, |
|
"logits/real": -0.5925976634025574, |
|
"logps/generated": -256.00799560546875, |
|
"logps/real": -247.38980102539062, |
|
"loss": 0.7838, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": 1.1103136539459229, |
|
"rewards/margins": 0.712864875793457, |
|
"rewards/real": 1.8231786489486694, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 88.34237989806086, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -0.8793425559997559, |
|
"logits/real": -0.6354281306266785, |
|
"logps/generated": -263.5761413574219, |
|
"logps/real": -255.0084686279297, |
|
"loss": 0.7397, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/generated": 1.0507025718688965, |
|
"rewards/margins": 0.881763756275177, |
|
"rewards/real": 1.9324661493301392, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 91.22613773952635, |
|
"learning_rate": 1.0909090909090908e-07, |
|
"logits/generated": -0.8102799654006958, |
|
"logits/real": -0.5707312822341919, |
|
"logps/generated": -248.6565704345703, |
|
"logps/real": -260.7277526855469, |
|
"loss": 0.7153, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/generated": 1.1541557312011719, |
|
"rewards/margins": 0.9233818054199219, |
|
"rewards/real": 2.0775375366210938, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 86.27514871900985, |
|
"learning_rate": 1.818181818181818e-08, |
|
"logits/generated": -0.9142637252807617, |
|
"logits/real": -0.6623004078865051, |
|
"logps/generated": -254.97494506835938, |
|
"logps/real": -244.65640258789062, |
|
"loss": 0.7192, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/generated": 1.215023159980774, |
|
"rewards/margins": 0.8442209959030151, |
|
"rewards/real": 2.059244155883789, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"step": 62, |
|
"total_flos": 0.0, |
|
"train_loss": 0.770801761457997, |
|
"train_runtime": 772.1202, |
|
"train_samples_per_second": 2.585, |
|
"train_steps_per_second": 0.08 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 62, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|