{ "epoch": 2.994495412844037, "eval_logits/chosen": -0.21853114664554596, "eval_logits/rejected": -1.4117213487625122, "eval_logps/chosen": -274.8548889160156, "eval_logps/rejected": -203.45323181152344, "eval_loss": 0.06784019619226456, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": 0.9461619257926941, "eval_rewards/margins": 4.006012916564941, "eval_rewards/rejected": -3.05985164642334, "eval_runtime": 8.9688, "eval_samples_per_second": 5.017, "eval_steps_per_second": 5.017, "total_flos": 7.837376281021809e+17, "train_loss": 0.220101895632551, "train_runtime": 8071.0106, "train_samples_per_second": 1.619, "train_steps_per_second": 0.051 }