taicheng's picture
End of training
e92c4a3 verified
{
"epoch": 1.0,
"eval_logits/chosen": -2.5399038791656494,
"eval_logits/rejected": -2.523594379425049,
"eval_logps/chosen": -73.60810089111328,
"eval_logps/rejected": -80.95262908935547,
"eval_loss": 0.6417149901390076,
"eval_rewards/accuracies": 0.335317462682724,
"eval_rewards/chosen": 0.2396649718284607,
"eval_rewards/margins": 0.19197754561901093,
"eval_rewards/rejected": 0.04768744856119156,
"eval_runtime": 114.5526,
"eval_samples": 2000,
"eval_samples_per_second": 17.459,
"eval_steps_per_second": 0.55
}