{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 500, "global_step": 156, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "grad_norm": 4.15625, "learning_rate": 4.423334927457198e-05, "logits/chosen": 0.6726851463317871, "logits/rejected": 0.5370064973831177, "logps/chosen": -353.01397705078125, "logps/rejected": -321.87738037109375, "loss": 0.6045, "rewards/accuracies": 0.6434375047683716, "rewards/chosen": 0.3095881938934326, "rewards/margins": 0.49872708320617676, "rewards/rejected": -0.18913888931274414, "step": 50 }, { "epoch": 0.64, "grad_norm": 2.09375, "learning_rate": 1.815842524819793e-05, "logits/chosen": 0.6333445310592651, "logits/rejected": 0.49084267020225525, "logps/chosen": -334.806396484375, "logps/rejected": -316.8036804199219, "loss": 0.4899, "rewards/accuracies": 0.7724999785423279, "rewards/chosen": 0.6486607193946838, "rewards/margins": 1.2206343412399292, "rewards/rejected": -0.5719736814498901, "step": 100 }, { "epoch": 0.96, "grad_norm": 3.546875, "learning_rate": 2.397392281198729e-07, "logits/chosen": 0.6054231524467468, "logits/rejected": 0.45057016611099243, "logps/chosen": -348.30419921875, "logps/rejected": -318.5997314453125, "loss": 0.47, "rewards/accuracies": 0.7762500047683716, "rewards/chosen": 0.8248878717422485, "rewards/margins": 1.4257100820541382, "rewards/rejected": -0.6008222103118896, "step": 150 }, { "epoch": 0.9984, "step": 156, "total_flos": 4.792500052780122e+18, "train_loss": 0.5214443573584924, "train_runtime": 10667.9882, "train_samples_per_second": 0.937, "train_steps_per_second": 0.015 } ], "logging_steps": 50, "max_steps": 156, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3906, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.792500052780122e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }