{ "epoch": 1.971563981042654, "eval_logits/chosen": -16.5866641998291, "eval_logits/rejected": -15.948689460754395, "eval_logps/chosen": -363.826171875, "eval_logps/rejected": -363.9148864746094, "eval_loss": 0.5950456857681274, "eval_rewards/accuracies": 0.71875, "eval_rewards/chosen": 0.22188298404216766, "eval_rewards/margins": 0.26559606194496155, "eval_rewards/rejected": -0.043713077902793884, "eval_runtime": 6.8121, "eval_samples": 750, "eval_samples_per_second": 110.098, "eval_steps_per_second": 3.523, "total_flos": 0.0, "train_loss": 0.6262502945386447, "train_runtime": 431.1142, "train_samples": 6750, "train_samples_per_second": 31.314, "train_steps_per_second": 0.241 }