{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998324958123953, "eval_steps": 100, "global_step": 149, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.3333333333333335e-07, "logits/chosen": -2.491262435913086, "logits/rejected": -2.5593011379241943, "logps/chosen": -151.13595581054688, "logps/rejected": -176.25180053710938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -2.657362699508667, "logits/rejected": -2.6224162578582764, "logps/chosen": -177.82272338867188, "logps/rejected": -179.98106384277344, "loss": 0.6925, "rewards/accuracies": 0.4930555522441864, "rewards/chosen": -0.013637593947350979, "rewards/margins": 0.004961313679814339, "rewards/rejected": -0.018598908558487892, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.982842942906386e-06, "logits/chosen": -2.702573299407959, "logits/rejected": -2.6558778285980225, "logps/chosen": -193.16390991210938, "logps/rejected": -188.91493225097656, "loss": 0.694, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.10607340186834335, "rewards/margins": 0.0018767903093248606, "rewards/rejected": -0.10795019567012787, "step": 20 }, { "epoch": 0.2, "learning_rate": 4.846996204000967e-06, "logits/chosen": -2.6284031867980957, "logits/rejected": -2.585899829864502, "logps/chosen": -186.4259490966797, "logps/rejected": -187.31175231933594, "loss": 0.6914, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.0701591819524765, "rewards/margins": 0.008776131086051464, "rewards/rejected": -0.07893531024456024, "step": 30 }, { "epoch": 0.27, "learning_rate": 4.582735470385229e-06, "logits/chosen": -2.6113476753234863, "logits/rejected": -2.6379783153533936, "logps/chosen": -169.52566528320312, "logps/rejected": -175.36927795410156, "loss": 0.6898, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.0019457591697573662, "rewards/margins": 0.011439744383096695, "rewards/rejected": -0.009493985213339329, "step": 40 }, { "epoch": 0.34, "learning_rate": 4.204519553876095e-06, "logits/chosen": -2.6095616817474365, "logits/rejected": -2.5940325260162354, "logps/chosen": -160.98004150390625, "logps/rejected": -164.4163360595703, "loss": 0.6926, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.015476897358894348, "rewards/margins": 0.006927810609340668, "rewards/rejected": 0.008549087680876255, "step": 50 }, { "epoch": 0.4, "learning_rate": 3.7330422317447686e-06, "logits/chosen": -2.662424087524414, "logits/rejected": -2.6424801349639893, "logps/chosen": -180.03500366210938, "logps/rejected": -174.4924774169922, "loss": 0.6925, "rewards/accuracies": 0.515625, "rewards/chosen": 0.0384332574903965, "rewards/margins": 0.005103477276861668, "rewards/rejected": 0.03332977741956711, "step": 60 }, { "epoch": 0.47, "learning_rate": 3.1941000034687516e-06, "logits/chosen": -2.6400465965270996, "logits/rejected": -2.6263554096221924, "logps/chosen": -167.74423217773438, "logps/rejected": -171.72691345214844, "loss": 0.6919, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.05802411586046219, "rewards/margins": 0.008442241698503494, "rewards/rejected": -0.06646636128425598, "step": 70 }, { "epoch": 0.54, "learning_rate": 2.6171806561748503e-06, "logits/chosen": -2.6121268272399902, "logits/rejected": -2.5906381607055664, "logps/chosen": -180.24801635742188, "logps/rejected": -179.08216857910156, "loss": 0.6883, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -0.025833597406744957, "rewards/margins": 0.0072801136411726475, "rewards/rejected": -0.03311371058225632, "step": 80 }, { "epoch": 0.6, "learning_rate": 2.0338498642707977e-06, "logits/chosen": -2.663304090499878, "logits/rejected": -2.61620831489563, "logps/chosen": -175.11361694335938, "logps/rejected": -171.87161254882812, "loss": 0.6862, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03003253974020481, "rewards/margins": 0.019972536712884903, "rewards/rejected": -0.05000507831573486, "step": 90 }, { "epoch": 0.67, "learning_rate": 1.4760240991587338e-06, "logits/chosen": -2.612849712371826, "logits/rejected": -2.590980052947998, "logps/chosen": -158.46450805664062, "logps/rejected": -165.57606506347656, "loss": 0.686, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.00229537021368742, "rewards/margins": 0.028590286150574684, "rewards/rejected": -0.03088565543293953, "step": 100 }, { "epoch": 0.67, "eval_logits/chosen": -2.5771172046661377, "eval_logits/rejected": -2.4848170280456543, "eval_logps/chosen": -307.7010192871094, "eval_logps/rejected": -301.19329833984375, "eval_loss": 0.683651864528656, "eval_rewards/accuracies": 0.5720000267028809, "eval_rewards/chosen": -0.09298302233219147, "eval_rewards/margins": 0.020600860938429832, "eval_rewards/rejected": -0.11358388513326645, "eval_runtime": 382.0082, "eval_samples_per_second": 5.235, "eval_steps_per_second": 0.654, "step": 100 }, { "epoch": 0.74, "learning_rate": 9.742243453755202e-07, "logits/chosen": -2.6324477195739746, "logits/rejected": -2.6193971633911133, "logps/chosen": -169.78958129882812, "logps/rejected": -178.68710327148438, "loss": 0.69, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -0.028186390176415443, "rewards/margins": 0.006812813691794872, "rewards/rejected": -0.03499920293688774, "step": 110 }, { "epoch": 0.8, "learning_rate": 5.559061696656199e-07, "logits/chosen": -2.6105129718780518, "logits/rejected": -2.5832369327545166, "logps/chosen": -177.0843505859375, "logps/rejected": -179.75782775878906, "loss": 0.6852, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.021660596132278442, "rewards/margins": 0.016608651727437973, "rewards/rejected": -0.038269251585006714, "step": 120 }, { "epoch": 0.87, "learning_rate": 2.4395751190352924e-07, "logits/chosen": -2.590940475463867, "logits/rejected": -2.5727646350860596, "logps/chosen": -174.79104614257812, "logps/rejected": -179.25521850585938, "loss": 0.687, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.03851151093840599, "rewards/margins": 0.011401178315281868, "rewards/rejected": -0.04991268739104271, "step": 130 }, { "epoch": 0.94, "learning_rate": 5.544639001763719e-08, "logits/chosen": -2.6073455810546875, "logits/rejected": -2.6085193157196045, "logps/chosen": -189.36952209472656, "logps/rejected": -190.8089141845703, "loss": 0.688, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.04472886398434639, "rewards/margins": 0.012851757928729057, "rewards/rejected": -0.0575806125998497, "step": 140 }, { "epoch": 1.0, "step": 149, "total_flos": 0.0, "train_loss": 0.6894321513656002, "train_runtime": 6902.8391, "train_samples_per_second": 2.767, "train_steps_per_second": 0.022 } ], "logging_steps": 10, "max_steps": 149, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }