{ "epoch": 1.0, "eval_logits/chosen": -2.0462207794189453, "eval_logits/rejected": -1.7758138179779053, "eval_logps/chosen": -90.40424346923828, "eval_logps/rejected": -73.17691802978516, "eval_loss": 0.5703843832015991, "eval_rewards/accuracies": 0.9472222328186035, "eval_rewards/chosen": 0.1581471860408783, "eval_rewards/margins": 0.2657936215400696, "eval_rewards/rejected": -0.10764642059803009, "eval_runtime": 118.208, "eval_samples": 2862, "eval_samples_per_second": 24.212, "eval_steps_per_second": 0.761, "train_loss": 0.6280729855576607, "train_runtime": 9689.6427, "train_samples": 140201, "train_samples_per_second": 14.469, "train_steps_per_second": 0.151 }