{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 100, "global_step": 282, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "grad_norm": 8.17277637691037, "learning_rate": 1.7241379310344828e-07, "log_odds_chosen": -0.08250121772289276, "log_odds_ratio": -0.908496081829071, "logits/chosen": -2.125, "logits/rejected": -2.0625, "logps/chosen": -2.078125, "logps/rejected": -2.0, "loss": 1.7207, "nll_loss": 1.6328125, "rewards/accuracies": 0.53125, "rewards/chosen": -0.20703125, "rewards/margins": -0.0074462890625, "rewards/rejected": -0.2001953125, "step": 10 }, { "epoch": 0.14, "grad_norm": 5.501828832938749, "learning_rate": 3.4482758620689656e-07, "log_odds_chosen": -0.01568603515625, "log_odds_ratio": -0.8124023675918579, "logits/chosen": -2.109375, "logits/rejected": -2.03125, "logps/chosen": -1.8984375, "logps/rejected": -1.875, "loss": 1.662, "nll_loss": 1.59375, "rewards/accuracies": 0.53125, "rewards/chosen": -0.189453125, "rewards/margins": -0.00177764892578125, "rewards/rejected": -0.1875, "step": 20 }, { "epoch": 0.21, "grad_norm": 6.819250485621814, "learning_rate": 4.99980726386944e-07, "log_odds_chosen": 0.08204345405101776, "log_odds_ratio": -0.8072265386581421, "logits/chosen": -2.078125, "logits/rejected": -2.046875, "logps/chosen": -1.9140625, "logps/rejected": -1.984375, "loss": 1.6703, "nll_loss": 1.6171875, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.19140625, "rewards/margins": 0.006591796875, "rewards/rejected": -0.1982421875, "step": 30 }, { "epoch": 0.28, "grad_norm": 4.727303404359562, "learning_rate": 4.976714865090826e-07, "log_odds_chosen": 0.21958008408546448, "log_odds_ratio": -0.795214831829071, "logits/chosen": -2.203125, "logits/rejected": -2.125, "logps/chosen": -1.84375, "logps/rejected": -2.046875, "loss": 1.6432, "nll_loss": 1.5625, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.18359375, "rewards/margins": 0.021240234375, "rewards/rejected": -0.205078125, "step": 40 }, { "epoch": 0.35, "grad_norm": 5.560160837530604, "learning_rate": 4.915482824798726e-07, "log_odds_chosen": 0.09776916354894638, "log_odds_ratio": -0.7564452886581421, "logits/chosen": -2.171875, "logits/rejected": -2.078125, "logps/chosen": -1.75, "logps/rejected": -1.8359375, "loss": 1.6065, "nll_loss": 1.53125, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1748046875, "rewards/margins": 0.0087890625, "rewards/rejected": -0.1845703125, "step": 50 }, { "epoch": 0.43, "grad_norm": 4.645157759584829, "learning_rate": 4.817054072717832e-07, "log_odds_chosen": 0.10500488430261612, "log_odds_ratio": -0.715039074420929, "logits/chosen": -2.125, "logits/rejected": -2.140625, "logps/chosen": -1.5859375, "logps/rejected": -1.6875, "loss": 1.5635, "nll_loss": 1.4921875, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.1591796875, "rewards/margins": 0.00970458984375, "rewards/rejected": -0.1689453125, "step": 60 }, { "epoch": 0.5, "grad_norm": 5.0025739143196315, "learning_rate": 4.68294434139043e-07, "log_odds_chosen": 0.07453002780675888, "log_odds_ratio": -0.7457031011581421, "logits/chosen": -2.25, "logits/rejected": -2.203125, "logps/chosen": -1.6875, "logps/rejected": -1.7421875, "loss": 1.5798, "nll_loss": 1.53125, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.16796875, "rewards/margins": 0.006072998046875, "rewards/rejected": -0.1748046875, "step": 70 }, { "epoch": 0.57, "grad_norm": 4.63466624849484, "learning_rate": 4.515218824976894e-07, "log_odds_chosen": 0.102294921875, "log_odds_ratio": -0.71875, "logits/chosen": -2.265625, "logits/rejected": -2.203125, "logps/chosen": -1.625, "logps/rejected": -1.7109375, "loss": 1.5814, "nll_loss": 1.546875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1630859375, "rewards/margins": 0.0078125, "rewards/rejected": -0.1708984375, "step": 80 }, { "epoch": 0.64, "grad_norm": 3.5161909712120885, "learning_rate": 4.3164603767393594e-07, "log_odds_chosen": 0.08747558295726776, "log_odds_ratio": -0.7129882574081421, "logits/chosen": -2.34375, "logits/rejected": -2.3125, "logps/chosen": -1.5, "logps/rejected": -1.578125, "loss": 1.5456, "nll_loss": 1.4609375, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.150390625, "rewards/margins": 0.0079345703125, "rewards/rejected": -0.158203125, "step": 90 }, { "epoch": 0.71, "grad_norm": 3.876797623785475, "learning_rate": 4.0897297349446334e-07, "log_odds_chosen": 0.2746826112270355, "log_odds_ratio": -0.6348632574081421, "logits/chosen": -2.359375, "logits/rejected": -2.390625, "logps/chosen": -1.4140625, "logps/rejected": -1.6328125, "loss": 1.4572, "nll_loss": 1.375, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1416015625, "rewards/margins": 0.021728515625, "rewards/rejected": -0.1630859375, "step": 100 }, { "epoch": 0.71, "eval_log_odds_chosen": 0.15282440185546875, "eval_log_odds_ratio": -0.6873779296875, "eval_logits/chosen": -2.375, "eval_logits/rejected": -2.359375, "eval_logps/chosen": -1.40625, "eval_logps/rejected": -1.5078125, "eval_loss": 1.437416672706604, "eval_nll_loss": 1.390625, "eval_rewards/accuracies": 0.546875, "eval_rewards/chosen": -0.140625, "eval_rewards/margins": 0.01092529296875, "eval_rewards/rejected": -0.1513671875, "eval_runtime": 28.8431, "eval_samples_per_second": 26.003, "eval_steps_per_second": 1.109, "step": 100 }, { "epoch": 0.78, "grad_norm": 3.104351936223712, "learning_rate": 3.8385183896790644e-07, "log_odds_chosen": 0.14180298149585724, "log_odds_ratio": -0.6834961175918579, "logits/chosen": -2.4375, "logits/rejected": -2.46875, "logps/chosen": -1.40625, "logps/rejected": -1.5, "loss": 1.3979, "nll_loss": 1.3515625, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.140625, "rewards/margins": 0.00982666015625, "rewards/rejected": -0.150390625, "step": 110 }, { "epoch": 0.85, "grad_norm": 4.824975774552339, "learning_rate": 3.566694816392771e-07, "log_odds_chosen": 0.07709960639476776, "log_odds_ratio": -0.727343738079071, "logits/chosen": -2.34375, "logits/rejected": -2.328125, "logps/chosen": -1.3515625, "logps/rejected": -1.40625, "loss": 1.4478, "nll_loss": 1.3125, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.1357421875, "rewards/margins": 0.005584716796875, "rewards/rejected": -0.140625, "step": 120 }, { "epoch": 0.92, "grad_norm": 3.1045900908725934, "learning_rate": 3.278444904138297e-07, "log_odds_chosen": 0.14324340224266052, "log_odds_ratio": -0.7000976800918579, "logits/chosen": -2.4375, "logits/rejected": -2.515625, "logps/chosen": -1.3359375, "logps/rejected": -1.4609375, "loss": 1.4356, "nll_loss": 1.375, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.1337890625, "rewards/margins": 0.0123291015625, "rewards/rejected": -0.146484375, "step": 130 }, { "epoch": 0.99, "grad_norm": 3.0100692405685825, "learning_rate": 2.9782074958662915e-07, "log_odds_chosen": 0.08026123046875, "log_odds_ratio": -0.715527355670929, "logits/chosen": -2.4375, "logits/rejected": -2.390625, "logps/chosen": -1.40625, "logps/rejected": -1.46875, "loss": 1.4861, "nll_loss": 1.4140625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.140625, "rewards/margins": 0.0059814453125, "rewards/rejected": -0.146484375, "step": 140 }, { "epoch": 1.06, "grad_norm": 3.20502629473615, "learning_rate": 2.6706060334116775e-07, "log_odds_chosen": 0.22966308891773224, "log_odds_ratio": -0.6705077886581421, "logits/chosen": -2.46875, "logits/rejected": -2.46875, "logps/chosen": -1.2890625, "logps/rejected": -1.453125, "loss": 1.437, "nll_loss": 1.3359375, "rewards/accuracies": 0.59375, "rewards/chosen": -0.12890625, "rewards/margins": 0.016357421875, "rewards/rejected": -0.1455078125, "step": 150 }, { "epoch": 1.13, "grad_norm": 2.9568341009806005, "learning_rate": 2.3603773597887236e-07, "log_odds_chosen": 0.05845336988568306, "log_odds_ratio": -0.7265625, "logits/chosen": -2.515625, "logits/rejected": -2.5, "logps/chosen": -1.3515625, "logps/rejected": -1.3984375, "loss": 1.4049, "nll_loss": 1.3515625, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.1357421875, "rewards/margins": 0.004425048828125, "rewards/rejected": -0.1396484375, "step": 160 }, { "epoch": 1.21, "grad_norm": 2.6182523616400624, "learning_rate": 2.0522987751888875e-07, "log_odds_chosen": 0.0771484375, "log_odds_ratio": -0.726757824420929, "logits/chosen": -2.421875, "logits/rejected": -2.453125, "logps/chosen": -1.359375, "logps/rejected": -1.4296875, "loss": 1.4526, "nll_loss": 1.390625, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1357421875, "rewards/margins": 0.006805419921875, "rewards/rejected": -0.142578125, "step": 170 }, { "epoch": 1.28, "grad_norm": 3.6328150676008355, "learning_rate": 1.7511144699669963e-07, "log_odds_chosen": 0.10901489108800888, "log_odds_ratio": -0.703417956829071, "logits/chosen": -2.484375, "logits/rejected": -2.515625, "logps/chosen": -1.3046875, "logps/rejected": -1.390625, "loss": 1.4426, "nll_loss": 1.3671875, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.130859375, "rewards/margins": 0.0087890625, "rewards/rejected": -0.1396484375, "step": 180 }, { "epoch": 1.35, "grad_norm": 3.052674455238246, "learning_rate": 1.461462467495284e-07, "log_odds_chosen": 0.23468628525733948, "log_odds_ratio": -0.652539074420929, "logits/chosen": -2.46875, "logits/rejected": -2.484375, "logps/chosen": -1.2421875, "logps/rejected": -1.421875, "loss": 1.4109, "nll_loss": 1.3125, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.12451171875, "rewards/margins": 0.0179443359375, "rewards/rejected": -0.142578125, "step": 190 }, { "epoch": 1.42, "grad_norm": 3.212384657142265, "learning_rate": 1.1878032019132014e-07, "log_odds_chosen": 0.23659667372703552, "log_odds_ratio": -0.665332019329071, "logits/chosen": -2.40625, "logits/rejected": -2.453125, "logps/chosen": -1.234375, "logps/rejected": -1.40625, "loss": 1.3846, "nll_loss": 1.2890625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12353515625, "rewards/margins": 0.01708984375, "rewards/rejected": -0.140625, "step": 200 }, { "epoch": 1.42, "eval_log_odds_chosen": 0.1745758056640625, "eval_log_odds_ratio": -0.6793212890625, "eval_logits/chosen": -2.484375, "eval_logits/rejected": -2.46875, "eval_logps/chosen": -1.28125, "eval_logps/rejected": -1.3984375, "eval_loss": 1.3828542232513428, "eval_nll_loss": 1.3359375, "eval_rewards/accuracies": 0.5625, "eval_rewards/chosen": -0.1279296875, "eval_rewards/margins": 0.011474609375, "eval_rewards/rejected": -0.1396484375, "eval_runtime": 28.5986, "eval_samples_per_second": 26.225, "eval_steps_per_second": 1.119, "step": 200 }, { "epoch": 1.49, "grad_norm": 2.9802043706553674, "learning_rate": 9.34350830624677e-08, "log_odds_chosen": 0.18032225966453552, "log_odds_ratio": -0.6673828363418579, "logits/chosen": -2.46875, "logits/rejected": -2.53125, "logps/chosen": -1.3203125, "logps/rejected": -1.453125, "loss": 1.3959, "nll_loss": 1.328125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1318359375, "rewards/margins": 0.01361083984375, "rewards/rejected": -0.1455078125, "step": 210 }, { "epoch": 1.56, "grad_norm": 2.71778589765558, "learning_rate": 7.050083392813649e-08, "log_odds_chosen": 0.16171875596046448, "log_odds_ratio": -0.6756836175918579, "logits/chosen": -2.484375, "logits/rejected": -2.46875, "logps/chosen": -1.28125, "logps/rejected": -1.3984375, "loss": 1.3794, "nll_loss": 1.3203125, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1279296875, "rewards/margins": 0.01153564453125, "rewards/rejected": -0.1396484375, "step": 220 }, { "epoch": 1.63, "grad_norm": 2.662070784648976, "learning_rate": 5.033074385888189e-08, "log_odds_chosen": 0.08112792670726776, "log_odds_ratio": -0.7230468988418579, "logits/chosen": -2.515625, "logits/rejected": -2.453125, "logps/chosen": -1.3046875, "logps/rejected": -1.375, "loss": 1.393, "nll_loss": 1.328125, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.130859375, "rewards/margins": 0.00653076171875, "rewards/rejected": -0.13671875, "step": 230 }, { "epoch": 1.7, "grad_norm": 2.5341509494230845, "learning_rate": 3.323541784818898e-08, "log_odds_chosen": 0.10616455227136612, "log_odds_ratio": -0.710742175579071, "logits/chosen": -2.53125, "logits/rejected": -2.515625, "logps/chosen": -1.3046875, "logps/rejected": -1.390625, "loss": 1.434, "nll_loss": 1.3359375, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.130859375, "rewards/margins": 0.0086669921875, "rewards/rejected": -0.1396484375, "step": 240 }, { "epoch": 1.77, "grad_norm": 2.4611715244981904, "learning_rate": 1.9478111717223967e-08, "log_odds_chosen": 0.18498535454273224, "log_odds_ratio": -0.676074206829071, "logits/chosen": -2.59375, "logits/rejected": -2.609375, "logps/chosen": -1.2890625, "logps/rejected": -1.40625, "loss": 1.3887, "nll_loss": 1.3828125, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.12890625, "rewards/margins": 0.01171875, "rewards/rejected": -0.140625, "step": 250 }, { "epoch": 1.84, "grad_norm": 2.5379707430688163, "learning_rate": 9.270678163050217e-09, "log_odds_chosen": 0.14968261122703552, "log_odds_ratio": -0.6859375238418579, "logits/chosen": -2.546875, "logits/rejected": -2.5625, "logps/chosen": -1.3203125, "logps/rejected": -1.4296875, "loss": 1.4595, "nll_loss": 1.3671875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1318359375, "rewards/margins": 0.01068115234375, "rewards/rejected": -0.142578125, "step": 260 }, { "epoch": 1.91, "grad_norm": 2.818634582820743, "learning_rate": 2.7703043782735524e-09, "log_odds_chosen": 0.12449340522289276, "log_odds_ratio": -0.705078125, "logits/chosen": -2.5, "logits/rejected": -2.515625, "logps/chosen": -1.2734375, "logps/rejected": -1.359375, "loss": 1.3615, "nll_loss": 1.328125, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.126953125, "rewards/margins": 0.00897216796875, "rewards/rejected": -0.1357421875, "step": 270 }, { "epoch": 1.99, "grad_norm": 3.087114524720046, "learning_rate": 7.709148044679481e-11, "log_odds_chosen": 0.244537353515625, "log_odds_ratio": -0.6534179449081421, "logits/chosen": -2.5, "logits/rejected": -2.5, "logps/chosen": -1.28125, "logps/rejected": -1.484375, "loss": 1.4121, "nll_loss": 1.359375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.12890625, "rewards/margins": 0.01953125, "rewards/rejected": -0.1484375, "step": 280 }, { "epoch": 2.0, "step": 282, "total_flos": 0.0, "train_loss": 1.48347466719066, "train_runtime": 2049.7189, "train_samples_per_second": 6.586, "train_steps_per_second": 0.138 } ], "logging_steps": 10, "max_steps": 282, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }