PEFT
Safetensors
mixtral
alignment-handbook
trl
dpo
Generated from Trainer
4-bit precision
bitsandbytes
File size: 574 Bytes
c7f70d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{
    "epoch": 1.0,
    "eval_logits/chosen": -0.7773212790489197,
    "eval_logits/rejected": -0.7749085426330566,
    "eval_logps/chosen": -275.3572082519531,
    "eval_logps/rejected": -324.0383605957031,
    "eval_loss": 0.12900209426879883,
    "eval_rewards/accuracies": 0.8596742749214172,
    "eval_rewards/chosen": -0.17991267144680023,
    "eval_rewards/margins": 5.889674186706543,
    "eval_rewards/rejected": -6.069586277008057,
    "eval_runtime": 76159.9506,
    "eval_samples": 197968,
    "eval_samples_per_second": 2.599,
    "eval_steps_per_second": 1.3
}