|
{ |
|
"epoch": 1.0, |
|
"eval_dpo_losses": 0.6846575736999512, |
|
"eval_logits/chosen": -2.76676082611084, |
|
"eval_logits/rejected": -2.728935718536377, |
|
"eval_logps/chosen": -282.11871337890625, |
|
"eval_logps/rejected": -257.9208679199219, |
|
"eval_loss": 0.7750731110572815, |
|
"eval_positive_losses": 0.8595818281173706, |
|
"eval_rewards/accuracies": 0.5899999737739563, |
|
"eval_rewards/chosen": 0.024747073650360107, |
|
"eval_rewards/margins": 0.018166616559028625, |
|
"eval_rewards/margins_max": 0.12387742102146149, |
|
"eval_rewards/margins_min": -0.0705643892288208, |
|
"eval_rewards/margins_std": 0.06444399803876877, |
|
"eval_rewards/rejected": 0.006580457091331482, |
|
"eval_runtime": 427.7975, |
|
"eval_samples": 2000, |
|
"eval_samples_per_second": 4.675, |
|
"eval_steps_per_second": 0.292, |
|
"train_loss": 0.6494339942932129, |
|
"train_runtime": 4013.1211, |
|
"train_samples": 5678, |
|
"train_samples_per_second": 1.415, |
|
"train_steps_per_second": 0.088 |
|
} |