|
{ |
|
"epoch": 0.9989071038251366, |
|
"eval_logits/chosen": -1.200439691543579, |
|
"eval_logits/rejected": -1.150753140449524, |
|
"eval_logps/chosen": -2.5561068058013916, |
|
"eval_logps/rejected": -3.309354782104492, |
|
"eval_loss": 1.7538212537765503, |
|
"eval_rewards/accuracies": 0.8433734774589539, |
|
"eval_rewards/chosen": -25.561067581176758, |
|
"eval_rewards/margins": 7.532484531402588, |
|
"eval_rewards/rejected": -33.09355163574219, |
|
"eval_runtime": 33.6933, |
|
"eval_samples": 1318, |
|
"eval_samples_per_second": 39.118, |
|
"eval_semantic_entropy": 0.36752766370773315, |
|
"eval_steps_per_second": 2.463, |
|
"total_flos": 0.0, |
|
"train_loss": 2.585477126766347, |
|
"train_runtime": 5989.402, |
|
"train_samples": 58558, |
|
"train_samples_per_second": 9.777, |
|
"train_steps_per_second": 0.076 |
|
} |