selective-pairrm-33079692-mt2 / all_results.json
wxzhang's picture
Model save
4b105c0 verified
{
"epoch": 1.0,
"eval_logits/chosen": -3.02286958694458,
"eval_logits/rejected": -3.0082170963287354,
"eval_logps/chosen": -781.7958984375,
"eval_logps/rejected": -817.7778930664062,
"eval_loss": 0.7211850881576538,
"eval_rewards/accuracies": 0.57421875,
"eval_rewards/chosen": -2.3879425525665283,
"eval_rewards/margins": 0.11641353368759155,
"eval_rewards/rejected": -2.5043559074401855,
"eval_runtime": 133.2747,
"eval_samples": 1000,
"eval_samples_per_second": 7.503,
"eval_steps_per_second": 0.24,
"train_loss": 0.5059080181213526,
"train_runtime": 5445.7158,
"train_samples": 19996,
"train_samples_per_second": 3.672,
"train_steps_per_second": 0.057
}