{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 396, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 0.0012223966186866164, "learning_rate": 1.8750000000000002e-05, "logits/chosen": -22.664844512939453, "logits/rejected": -22.80691909790039, "logps/chosen": -81.01699829101562, "logps/rejected": -101.25294494628906, "loss": 0.2072, "rewards/accuracies": 0.8846153616905212, "rewards/chosen": 2.5555355548858643, "rewards/margins": 4.414959907531738, "rewards/rejected": -1.859424352645874, "step": 26 }, { "epoch": 0.39, "grad_norm": 1.8405307855573483e-05, "learning_rate": 2.9073033707865168e-05, "logits/chosen": -23.12621307373047, "logits/rejected": -23.24854278564453, "logps/chosen": -43.78964614868164, "logps/rejected": -156.3304901123047, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.3214497566223145, "rewards/margins": 13.642704010009766, "rewards/rejected": -7.321253776550293, "step": 52 }, { "epoch": 0.59, "grad_norm": 1.5625999367330223e-05, "learning_rate": 2.6882022471910113e-05, "logits/chosen": -23.210811614990234, "logits/rejected": -23.32987403869629, "logps/chosen": -41.96815490722656, "logps/rejected": -167.406982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.477440357208252, "rewards/margins": 14.927824020385742, "rewards/rejected": -8.450382232666016, "step": 78 }, { "epoch": 0.79, "grad_norm": 1.5885076209087856e-05, "learning_rate": 2.4691011235955056e-05, "logits/chosen": -23.275333404541016, "logits/rejected": -23.39052391052246, "logps/chosen": -41.763607025146484, "logps/rejected": -169.25025939941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.4945831298828125, "rewards/margins": 15.126973152160645, "rewards/rejected": -8.632390975952148, "step": 104 }, { "epoch": 0.98, "grad_norm": 0.00013870897237211466, "learning_rate": 2.25e-05, "logits/chosen": -23.342487335205078, "logits/rejected": -23.45945167541504, "logps/chosen": -41.83483123779297, "logps/rejected": -169.22845458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.479532718658447, "rewards/margins": 15.119216918945312, "rewards/rejected": -8.63968276977539, "step": 130 }, { "epoch": 1.18, "grad_norm": 1.5738529327791184e-05, "learning_rate": 2.0308988764044947e-05, "logits/chosen": -23.253267288208008, "logits/rejected": -23.370222091674805, "logps/chosen": -41.68398666381836, "logps/rejected": -169.04989624023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.4891557693481445, "rewards/margins": 15.119135856628418, "rewards/rejected": -8.629980087280273, "step": 156 }, { "epoch": 1.38, "grad_norm": 1.4681028005725238e-05, "learning_rate": 1.8117977528089886e-05, "logits/chosen": -23.281639099121094, "logits/rejected": -23.397907257080078, "logps/chosen": -41.55263137817383, "logps/rejected": -170.5806427001953, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.5376057624816895, "rewards/margins": 15.297250747680664, "rewards/rejected": -8.759647369384766, "step": 182 }, { "epoch": 1.58, "grad_norm": 1.823231104935985e-05, "learning_rate": 1.5926966292134832e-05, "logits/chosen": -23.312273025512695, "logits/rejected": -23.43006706237793, "logps/chosen": -42.09364318847656, "logps/rejected": -168.72459411621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.430633544921875, "rewards/margins": 15.028059959411621, "rewards/rejected": -8.597426414489746, "step": 208 }, { "epoch": 1.77, "grad_norm": 1.3677333299710881e-05, "learning_rate": 1.3735955056179776e-05, "logits/chosen": -23.281251907348633, "logits/rejected": -23.39859390258789, "logps/chosen": -41.60188293457031, "logps/rejected": -169.98341369628906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.554170608520508, "rewards/margins": 15.241110801696777, "rewards/rejected": -8.686941146850586, "step": 234 }, { "epoch": 1.97, "grad_norm": 1.329195401922334e-05, "learning_rate": 1.154494382022472e-05, "logits/chosen": -23.316593170166016, "logits/rejected": -23.435791015625, "logps/chosen": -41.78284454345703, "logps/rejected": -169.241943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.4690632820129395, "rewards/margins": 15.107036590576172, "rewards/rejected": -8.63797378540039, "step": 260 }, { "epoch": 2.17, "grad_norm": 1.4401819498743862e-05, "learning_rate": 9.353932584269662e-06, "logits/chosen": -23.297456741333008, "logits/rejected": -23.411481857299805, "logps/chosen": -41.401039123535156, "logps/rejected": -169.64952087402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.529332160949707, "rewards/margins": 15.224197387695312, "rewards/rejected": -8.694866180419922, "step": 286 }, { "epoch": 2.36, "grad_norm": 1.4643008398707025e-05, "learning_rate": 7.162921348314607e-06, "logits/chosen": -23.277320861816406, "logits/rejected": -23.393136978149414, "logps/chosen": -41.65689468383789, "logps/rejected": -171.18663024902344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.522489070892334, "rewards/margins": 15.33745002746582, "rewards/rejected": -8.814961433410645, "step": 312 }, { "epoch": 2.56, "grad_norm": 0.00012409774353727698, "learning_rate": 4.97191011235955e-06, "logits/chosen": -23.303037643432617, "logits/rejected": -23.417268753051758, "logps/chosen": -41.39340591430664, "logps/rejected": -170.94407653808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.569211483001709, "rewards/margins": 15.359162330627441, "rewards/rejected": -8.789949417114258, "step": 338 }, { "epoch": 2.76, "grad_norm": 1.4201951671566349e-05, "learning_rate": 2.7808988764044947e-06, "logits/chosen": -23.281291961669922, "logits/rejected": -23.399757385253906, "logps/chosen": -41.88224792480469, "logps/rejected": -169.70294189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.4672112464904785, "rewards/margins": 15.137907028198242, "rewards/rejected": -8.670695304870605, "step": 364 }, { "epoch": 2.95, "grad_norm": 1.3474539628077764e-05, "learning_rate": 5.898876404494382e-07, "logits/chosen": -23.33159065246582, "logits/rejected": -23.45261573791504, "logps/chosen": -41.745670318603516, "logps/rejected": -169.70147705078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/chosen": 6.496582984924316, "rewards/margins": 15.183600425720215, "rewards/rejected": -8.687018394470215, "step": 390 } ], "logging_steps": 26, "max_steps": 396, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }