{ "epoch": 0.9998604326587579, "eval_logits/chosen": -1.0269631147384644, "eval_logits/rejected": -1.061506986618042, "eval_logps/chosen": -112.56452178955078, "eval_logps/rejected": -272.78466796875, "eval_loss": 0.000979769160039723, "eval_rewards/accuracies": 0.9996050596237183, "eval_rewards/chosen": -3.615877628326416, "eval_rewards/margins": 16.76466178894043, "eval_rewards/rejected": -20.380538940429688, "eval_runtime": 151.8806, "eval_samples_per_second": 33.302, "eval_steps_per_second": 4.168 }