{ "epoch": 0.9996020692399522, "eval_logits/chosen": -2.402043581008911, "eval_logits/rejected": -2.4006083011627197, "eval_logps/chosen": -211.9842987060547, "eval_logps/rejected": -214.5289306640625, "eval_loss": 0.673168957233429, "eval_rewards/accuracies": 0.5578358173370361, "eval_rewards/chosen": -0.5129190683364868, "eval_rewards/margins": 0.052563026547431946, "eval_rewards/rejected": -0.5654820799827576, "eval_runtime": 167.0868, "eval_samples": 8552, "eval_samples_per_second": 51.183, "eval_steps_per_second": 0.802, "total_flos": 0.0, "train_loss": 0.678918091354856, "train_runtime": 7383.0158, "train_samples": 160800, "train_samples_per_second": 21.78, "train_steps_per_second": 0.043 }