|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 282, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.17277637691037, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"log_odds_chosen": -0.08250121772289276, |
|
"log_odds_ratio": -0.908496081829071, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.0625, |
|
"logps/chosen": -2.078125, |
|
"logps/rejected": -2.0, |
|
"loss": 1.7207, |
|
"nll_loss": 1.6328125, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.20703125, |
|
"rewards/margins": -0.0074462890625, |
|
"rewards/rejected": -0.2001953125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 5.501828832938749, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"log_odds_chosen": -0.01568603515625, |
|
"log_odds_ratio": -0.8124023675918579, |
|
"logits/chosen": -2.109375, |
|
"logits/rejected": -2.03125, |
|
"logps/chosen": -1.8984375, |
|
"logps/rejected": -1.875, |
|
"loss": 1.662, |
|
"nll_loss": 1.59375, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.189453125, |
|
"rewards/margins": -0.00177764892578125, |
|
"rewards/rejected": -0.1875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 6.819250485621814, |
|
"learning_rate": 4.99980726386944e-07, |
|
"log_odds_chosen": 0.08204345405101776, |
|
"log_odds_ratio": -0.8072265386581421, |
|
"logits/chosen": -2.078125, |
|
"logits/rejected": -2.046875, |
|
"logps/chosen": -1.9140625, |
|
"logps/rejected": -1.984375, |
|
"loss": 1.6703, |
|
"nll_loss": 1.6171875, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.19140625, |
|
"rewards/margins": 0.006591796875, |
|
"rewards/rejected": -0.1982421875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.727303404359562, |
|
"learning_rate": 4.976714865090826e-07, |
|
"log_odds_chosen": 0.21958008408546448, |
|
"log_odds_ratio": -0.795214831829071, |
|
"logits/chosen": -2.203125, |
|
"logits/rejected": -2.125, |
|
"logps/chosen": -1.84375, |
|
"logps/rejected": -2.046875, |
|
"loss": 1.6432, |
|
"nll_loss": 1.5625, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.18359375, |
|
"rewards/margins": 0.021240234375, |
|
"rewards/rejected": -0.205078125, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.560160837530604, |
|
"learning_rate": 4.915482824798726e-07, |
|
"log_odds_chosen": 0.09776916354894638, |
|
"log_odds_ratio": -0.7564452886581421, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.078125, |
|
"logps/chosen": -1.75, |
|
"logps/rejected": -1.8359375, |
|
"loss": 1.6065, |
|
"nll_loss": 1.53125, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1748046875, |
|
"rewards/margins": 0.0087890625, |
|
"rewards/rejected": -0.1845703125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 4.645157759584829, |
|
"learning_rate": 4.817054072717832e-07, |
|
"log_odds_chosen": 0.10500488430261612, |
|
"log_odds_ratio": -0.715039074420929, |
|
"logits/chosen": -2.125, |
|
"logits/rejected": -2.140625, |
|
"logps/chosen": -1.5859375, |
|
"logps/rejected": -1.6875, |
|
"loss": 1.5635, |
|
"nll_loss": 1.4921875, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1591796875, |
|
"rewards/margins": 0.00970458984375, |
|
"rewards/rejected": -0.1689453125, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.0025739143196315, |
|
"learning_rate": 4.68294434139043e-07, |
|
"log_odds_chosen": 0.07453002780675888, |
|
"log_odds_ratio": -0.7457031011581421, |
|
"logits/chosen": -2.25, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.6875, |
|
"logps/rejected": -1.7421875, |
|
"loss": 1.5798, |
|
"nll_loss": 1.53125, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.16796875, |
|
"rewards/margins": 0.006072998046875, |
|
"rewards/rejected": -0.1748046875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.63466624849484, |
|
"learning_rate": 4.515218824976894e-07, |
|
"log_odds_chosen": 0.102294921875, |
|
"log_odds_ratio": -0.71875, |
|
"logits/chosen": -2.265625, |
|
"logits/rejected": -2.203125, |
|
"logps/chosen": -1.625, |
|
"logps/rejected": -1.7109375, |
|
"loss": 1.5814, |
|
"nll_loss": 1.546875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1630859375, |
|
"rewards/margins": 0.0078125, |
|
"rewards/rejected": -0.1708984375, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.5161909712120885, |
|
"learning_rate": 4.3164603767393594e-07, |
|
"log_odds_chosen": 0.08747558295726776, |
|
"log_odds_ratio": -0.7129882574081421, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.3125, |
|
"logps/chosen": -1.5, |
|
"logps/rejected": -1.578125, |
|
"loss": 1.5456, |
|
"nll_loss": 1.4609375, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.150390625, |
|
"rewards/margins": 0.0079345703125, |
|
"rewards/rejected": -0.158203125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.876797623785475, |
|
"learning_rate": 4.0897297349446334e-07, |
|
"log_odds_chosen": 0.2746826112270355, |
|
"log_odds_ratio": -0.6348632574081421, |
|
"logits/chosen": -2.359375, |
|
"logits/rejected": -2.390625, |
|
"logps/chosen": -1.4140625, |
|
"logps/rejected": -1.6328125, |
|
"loss": 1.4572, |
|
"nll_loss": 1.375, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1416015625, |
|
"rewards/margins": 0.021728515625, |
|
"rewards/rejected": -0.1630859375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_log_odds_chosen": 0.15282440185546875, |
|
"eval_log_odds_ratio": -0.6873779296875, |
|
"eval_logits/chosen": -2.375, |
|
"eval_logits/rejected": -2.359375, |
|
"eval_logps/chosen": -1.40625, |
|
"eval_logps/rejected": -1.5078125, |
|
"eval_loss": 1.437416672706604, |
|
"eval_nll_loss": 1.390625, |
|
"eval_rewards/accuracies": 0.546875, |
|
"eval_rewards/chosen": -0.140625, |
|
"eval_rewards/margins": 0.01092529296875, |
|
"eval_rewards/rejected": -0.1513671875, |
|
"eval_runtime": 28.8431, |
|
"eval_samples_per_second": 26.003, |
|
"eval_steps_per_second": 1.109, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.104351936223712, |
|
"learning_rate": 3.8385183896790644e-07, |
|
"log_odds_chosen": 0.14180298149585724, |
|
"log_odds_ratio": -0.6834961175918579, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.40625, |
|
"logps/rejected": -1.5, |
|
"loss": 1.3979, |
|
"nll_loss": 1.3515625, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.140625, |
|
"rewards/margins": 0.00982666015625, |
|
"rewards/rejected": -0.150390625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.824975774552339, |
|
"learning_rate": 3.566694816392771e-07, |
|
"log_odds_chosen": 0.07709960639476776, |
|
"log_odds_ratio": -0.727343738079071, |
|
"logits/chosen": -2.34375, |
|
"logits/rejected": -2.328125, |
|
"logps/chosen": -1.3515625, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.4478, |
|
"nll_loss": 1.3125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1357421875, |
|
"rewards/margins": 0.005584716796875, |
|
"rewards/rejected": -0.140625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.1045900908725934, |
|
"learning_rate": 3.278444904138297e-07, |
|
"log_odds_chosen": 0.14324340224266052, |
|
"log_odds_ratio": -0.7000976800918579, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -1.3359375, |
|
"logps/rejected": -1.4609375, |
|
"loss": 1.4356, |
|
"nll_loss": 1.375, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.1337890625, |
|
"rewards/margins": 0.0123291015625, |
|
"rewards/rejected": -0.146484375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.0100692405685825, |
|
"learning_rate": 2.9782074958662915e-07, |
|
"log_odds_chosen": 0.08026123046875, |
|
"log_odds_ratio": -0.715527355670929, |
|
"logits/chosen": -2.4375, |
|
"logits/rejected": -2.390625, |
|
"logps/chosen": -1.40625, |
|
"logps/rejected": -1.46875, |
|
"loss": 1.4861, |
|
"nll_loss": 1.4140625, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.140625, |
|
"rewards/margins": 0.0059814453125, |
|
"rewards/rejected": -0.146484375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 3.20502629473615, |
|
"learning_rate": 2.6706060334116775e-07, |
|
"log_odds_chosen": 0.22966308891773224, |
|
"log_odds_ratio": -0.6705077886581421, |
|
"logits/chosen": -2.46875, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.2890625, |
|
"logps/rejected": -1.453125, |
|
"loss": 1.437, |
|
"nll_loss": 1.3359375, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.12890625, |
|
"rewards/margins": 0.016357421875, |
|
"rewards/rejected": -0.1455078125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 2.9568341009806005, |
|
"learning_rate": 2.3603773597887236e-07, |
|
"log_odds_chosen": 0.05845336988568306, |
|
"log_odds_ratio": -0.7265625, |
|
"logits/chosen": -2.515625, |
|
"logits/rejected": -2.5, |
|
"logps/chosen": -1.3515625, |
|
"logps/rejected": -1.3984375, |
|
"loss": 1.4049, |
|
"nll_loss": 1.3515625, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.1357421875, |
|
"rewards/margins": 0.004425048828125, |
|
"rewards/rejected": -0.1396484375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 2.6182523616400624, |
|
"learning_rate": 2.0522987751888875e-07, |
|
"log_odds_chosen": 0.0771484375, |
|
"log_odds_ratio": -0.726757824420929, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.453125, |
|
"logps/chosen": -1.359375, |
|
"logps/rejected": -1.4296875, |
|
"loss": 1.4526, |
|
"nll_loss": 1.390625, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.1357421875, |
|
"rewards/margins": 0.006805419921875, |
|
"rewards/rejected": -0.142578125, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 3.6328150676008355, |
|
"learning_rate": 1.7511144699669963e-07, |
|
"log_odds_chosen": 0.10901489108800888, |
|
"log_odds_ratio": -0.703417956829071, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -1.3046875, |
|
"logps/rejected": -1.390625, |
|
"loss": 1.4426, |
|
"nll_loss": 1.3671875, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.130859375, |
|
"rewards/margins": 0.0087890625, |
|
"rewards/rejected": -0.1396484375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 3.052674455238246, |
|
"learning_rate": 1.461462467495284e-07, |
|
"log_odds_chosen": 0.23468628525733948, |
|
"log_odds_ratio": -0.652539074420929, |
|
"logits/chosen": -2.46875, |
|
"logits/rejected": -2.484375, |
|
"logps/chosen": -1.2421875, |
|
"logps/rejected": -1.421875, |
|
"loss": 1.4109, |
|
"nll_loss": 1.3125, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.12451171875, |
|
"rewards/margins": 0.0179443359375, |
|
"rewards/rejected": -0.142578125, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 3.212384657142265, |
|
"learning_rate": 1.1878032019132014e-07, |
|
"log_odds_chosen": 0.23659667372703552, |
|
"log_odds_ratio": -0.665332019329071, |
|
"logits/chosen": -2.40625, |
|
"logits/rejected": -2.453125, |
|
"logps/chosen": -1.234375, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.3846, |
|
"nll_loss": 1.2890625, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.12353515625, |
|
"rewards/margins": 0.01708984375, |
|
"rewards/rejected": -0.140625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_log_odds_chosen": 0.1745758056640625, |
|
"eval_log_odds_ratio": -0.6793212890625, |
|
"eval_logits/chosen": -2.484375, |
|
"eval_logits/rejected": -2.46875, |
|
"eval_logps/chosen": -1.28125, |
|
"eval_logps/rejected": -1.3984375, |
|
"eval_loss": 1.3828542232513428, |
|
"eval_nll_loss": 1.3359375, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": -0.1279296875, |
|
"eval_rewards/margins": 0.011474609375, |
|
"eval_rewards/rejected": -0.1396484375, |
|
"eval_runtime": 28.5986, |
|
"eval_samples_per_second": 26.225, |
|
"eval_steps_per_second": 1.119, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 2.9802043706553674, |
|
"learning_rate": 9.34350830624677e-08, |
|
"log_odds_chosen": 0.18032225966453552, |
|
"log_odds_ratio": -0.6673828363418579, |
|
"logits/chosen": -2.46875, |
|
"logits/rejected": -2.53125, |
|
"logps/chosen": -1.3203125, |
|
"logps/rejected": -1.453125, |
|
"loss": 1.3959, |
|
"nll_loss": 1.328125, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1318359375, |
|
"rewards/margins": 0.01361083984375, |
|
"rewards/rejected": -0.1455078125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.71778589765558, |
|
"learning_rate": 7.050083392813649e-08, |
|
"log_odds_chosen": 0.16171875596046448, |
|
"log_odds_ratio": -0.6756836175918579, |
|
"logits/chosen": -2.484375, |
|
"logits/rejected": -2.46875, |
|
"logps/chosen": -1.28125, |
|
"logps/rejected": -1.3984375, |
|
"loss": 1.3794, |
|
"nll_loss": 1.3203125, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.1279296875, |
|
"rewards/margins": 0.01153564453125, |
|
"rewards/rejected": -0.1396484375, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 2.662070784648976, |
|
"learning_rate": 5.033074385888189e-08, |
|
"log_odds_chosen": 0.08112792670726776, |
|
"log_odds_ratio": -0.7230468988418579, |
|
"logits/chosen": -2.515625, |
|
"logits/rejected": -2.453125, |
|
"logps/chosen": -1.3046875, |
|
"logps/rejected": -1.375, |
|
"loss": 1.393, |
|
"nll_loss": 1.328125, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.130859375, |
|
"rewards/margins": 0.00653076171875, |
|
"rewards/rejected": -0.13671875, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.5341509494230845, |
|
"learning_rate": 3.323541784818898e-08, |
|
"log_odds_chosen": 0.10616455227136612, |
|
"log_odds_ratio": -0.710742175579071, |
|
"logits/chosen": -2.53125, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -1.3046875, |
|
"logps/rejected": -1.390625, |
|
"loss": 1.434, |
|
"nll_loss": 1.3359375, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.130859375, |
|
"rewards/margins": 0.0086669921875, |
|
"rewards/rejected": -0.1396484375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 2.4611715244981904, |
|
"learning_rate": 1.9478111717223967e-08, |
|
"log_odds_chosen": 0.18498535454273224, |
|
"log_odds_ratio": -0.676074206829071, |
|
"logits/chosen": -2.59375, |
|
"logits/rejected": -2.609375, |
|
"logps/chosen": -1.2890625, |
|
"logps/rejected": -1.40625, |
|
"loss": 1.3887, |
|
"nll_loss": 1.3828125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.12890625, |
|
"rewards/margins": 0.01171875, |
|
"rewards/rejected": -0.140625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 2.5379707430688163, |
|
"learning_rate": 9.270678163050217e-09, |
|
"log_odds_chosen": 0.14968261122703552, |
|
"log_odds_ratio": -0.6859375238418579, |
|
"logits/chosen": -2.546875, |
|
"logits/rejected": -2.5625, |
|
"logps/chosen": -1.3203125, |
|
"logps/rejected": -1.4296875, |
|
"loss": 1.4595, |
|
"nll_loss": 1.3671875, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.1318359375, |
|
"rewards/margins": 0.01068115234375, |
|
"rewards/rejected": -0.142578125, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 2.818634582820743, |
|
"learning_rate": 2.7703043782735524e-09, |
|
"log_odds_chosen": 0.12449340522289276, |
|
"log_odds_ratio": -0.705078125, |
|
"logits/chosen": -2.5, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -1.2734375, |
|
"logps/rejected": -1.359375, |
|
"loss": 1.3615, |
|
"nll_loss": 1.328125, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.126953125, |
|
"rewards/margins": 0.00897216796875, |
|
"rewards/rejected": -0.1357421875, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 3.087114524720046, |
|
"learning_rate": 7.709148044679481e-11, |
|
"log_odds_chosen": 0.244537353515625, |
|
"log_odds_ratio": -0.6534179449081421, |
|
"logits/chosen": -2.5, |
|
"logits/rejected": -2.5, |
|
"logps/chosen": -1.28125, |
|
"logps/rejected": -1.484375, |
|
"loss": 1.4121, |
|
"nll_loss": 1.359375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12890625, |
|
"rewards/margins": 0.01953125, |
|
"rewards/rejected": -0.1484375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 282, |
|
"total_flos": 0.0, |
|
"train_loss": 1.48347466719066, |
|
"train_runtime": 2049.7189, |
|
"train_samples_per_second": 6.586, |
|
"train_steps_per_second": 0.138 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 282, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|