{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9989289539450197, "eval_steps": 500, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 4.0625, "learning_rate": 4.997501873438867e-06, "log_odds_chosen": 0.025170251727104187, "log_odds_ratio": -0.7360378503799438, "logits/chosen": -3.0069003105163574, "logits/rejected": -2.9992847442626953, "logps/chosen": -0.7825920581817627, "logps/rejected": -0.7986791729927063, "loss": 1.035, "nll_loss": 1.0144612789154053, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.0391295962035656, "rewards/margins": 0.0008043603738769889, "rewards/rejected": -0.039933960884809494, "step": 10 }, { "epoch": 0.01, "grad_norm": 3.765625, "learning_rate": 4.995007487521836e-06, "log_odds_chosen": 0.10049402713775635, "log_odds_ratio": -0.7046917676925659, "logits/chosen": -3.0911076068878174, "logits/rejected": -3.093308210372925, "logps/chosen": -0.6713354587554932, "logps/rejected": -0.7269105315208435, "loss": 0.4986, "nll_loss": 0.4546101689338684, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03356677293777466, "rewards/margins": 0.002778755035251379, "rewards/rejected": -0.03634553402662277, "step": 20 }, { "epoch": 0.02, "grad_norm": 5.03125, "learning_rate": 4.992516832922945e-06, "log_odds_chosen": 0.02262038365006447, "log_odds_ratio": -0.7437382936477661, "logits/chosen": -3.0949816703796387, "logits/rejected": -3.078895092010498, "logps/chosen": -0.7184325456619263, "logps/rejected": -0.741249680519104, "loss": 0.4936, "nll_loss": 0.4557243883609772, "rewards/accuracies": 0.5, "rewards/chosen": -0.03592162951827049, "rewards/margins": 0.001140856184065342, "rewards/rejected": -0.03706248849630356, "step": 30 }, { "epoch": 0.03, "grad_norm": 3.625, "learning_rate": 4.990029900348746e-06, "log_odds_chosen": 0.07682378590106964, "log_odds_ratio": -0.7152809500694275, "logits/chosen": -3.058004856109619, "logits/rejected": -3.050332546234131, "logps/chosen": -0.7058600783348083, "logps/rejected": -0.7654516696929932, "loss": 0.4828, "nll_loss": 0.4693741798400879, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03529300540685654, "rewards/margins": 0.0029795782174915075, "rewards/rejected": -0.03827258199453354, "step": 40 }, { "epoch": 0.04, "grad_norm": 3.734375, "learning_rate": 4.987546680538165e-06, "log_odds_chosen": 0.0912284404039383, "log_odds_ratio": -0.7011866569519043, "logits/chosen": -3.018758773803711, "logits/rejected": -3.0226595401763916, "logps/chosen": -0.7013871669769287, "logps/rejected": -0.7628859281539917, "loss": 0.4928, "nll_loss": 0.4483000636100769, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.035069357603788376, "rewards/margins": 0.0030749361030757427, "rewards/rejected": -0.038144297897815704, "step": 50 }, { "epoch": 0.04, "grad_norm": 4.375, "learning_rate": 4.985067164262359e-06, "log_odds_chosen": 0.0760330930352211, "log_odds_ratio": -0.7070280909538269, "logits/chosen": -3.072317123413086, "logits/rejected": -3.063037633895874, "logps/chosen": -0.6806871294975281, "logps/rejected": -0.7209664583206177, "loss": 0.4825, "nll_loss": 0.44828397035598755, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0340343602001667, "rewards/margins": 0.0020139652770012617, "rewards/rejected": -0.03604833036661148, "step": 60 }, { "epoch": 0.05, "grad_norm": 3.4375, "learning_rate": 4.98259134232457e-06, "log_odds_chosen": 0.09440124034881592, "log_odds_ratio": -0.6967779397964478, "logits/chosen": -3.0573623180389404, "logits/rejected": -3.0700790882110596, "logps/chosen": -0.6866449117660522, "logps/rejected": -0.734825611114502, "loss": 0.4992, "nll_loss": 0.45950204133987427, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.03433224931359291, "rewards/margins": 0.0024090311489999294, "rewards/rejected": -0.03674127906560898, "step": 70 }, { "epoch": 0.06, "grad_norm": 3.6875, "learning_rate": 4.980119205559974e-06, "log_odds_chosen": 0.08085120469331741, "log_odds_ratio": -0.7097643613815308, "logits/chosen": -3.059769630432129, "logits/rejected": -3.051427125930786, "logps/chosen": -0.709705650806427, "logps/rejected": -0.7418494820594788, "loss": 0.4921, "nll_loss": 0.4506589472293854, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03548528254032135, "rewards/margins": 0.001607193029485643, "rewards/rejected": -0.03709247335791588, "step": 80 }, { "epoch": 0.06, "grad_norm": 4.25, "learning_rate": 4.977650744835555e-06, "log_odds_chosen": 0.0729355439543724, "log_odds_ratio": -0.7067269086837769, "logits/chosen": -3.0575215816497803, "logits/rejected": -3.0501961708068848, "logps/chosen": -0.6798839569091797, "logps/rejected": -0.723492443561554, "loss": 0.4612, "nll_loss": 0.4302699565887451, "rewards/accuracies": 0.5, "rewards/chosen": -0.033994197845458984, "rewards/margins": 0.0021804238203912973, "rewards/rejected": -0.03617462143301964, "step": 90 }, { "epoch": 0.07, "grad_norm": 3.140625, "learning_rate": 4.975185951049947e-06, "log_odds_chosen": 0.060979098081588745, "log_odds_ratio": -0.7311732172966003, "logits/chosen": -3.0919365882873535, "logits/rejected": -3.0861101150512695, "logps/chosen": -0.7287726402282715, "logps/rejected": -0.7664063572883606, "loss": 0.4375, "nll_loss": 0.44655561447143555, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.036438629031181335, "rewards/margins": 0.0018816888332366943, "rewards/rejected": -0.03832032158970833, "step": 100 }, { "epoch": 0.08, "grad_norm": 4.375, "learning_rate": 4.972724815133302e-06, "log_odds_chosen": 0.09216396510601044, "log_odds_ratio": -0.7083900570869446, "logits/chosen": -3.0463244915008545, "logits/rejected": -3.0574841499328613, "logps/chosen": -0.6721808314323425, "logps/rejected": -0.7125221490859985, "loss": 0.4205, "nll_loss": 0.4226047992706299, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.033609043806791306, "rewards/margins": 0.002017062855884433, "rewards/rejected": -0.03562610596418381, "step": 110 }, { "epoch": 0.09, "grad_norm": 3.890625, "learning_rate": 4.970267328047151e-06, "log_odds_chosen": 0.11277220398187637, "log_odds_ratio": -0.7001327276229858, "logits/chosen": -3.0501303672790527, "logits/rejected": -3.0421934127807617, "logps/chosen": -0.6840373277664185, "logps/rejected": -0.7523829936981201, "loss": 0.4378, "nll_loss": 0.39421382546424866, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03420186787843704, "rewards/margins": 0.003417283995077014, "rewards/rejected": -0.037619151175022125, "step": 120 }, { "epoch": 0.09, "grad_norm": 4.0625, "learning_rate": 4.9678134807842575e-06, "log_odds_chosen": 0.07883547991514206, "log_odds_ratio": -0.7197215557098389, "logits/chosen": -3.0699033737182617, "logits/rejected": -3.079275608062744, "logps/chosen": -0.7041226625442505, "logps/rejected": -0.7548140287399292, "loss": 0.4516, "nll_loss": 0.40113186836242676, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.035206131637096405, "rewards/margins": 0.0025345697067677975, "rewards/rejected": -0.03774070367217064, "step": 130 }, { "epoch": 0.1, "grad_norm": 4.25, "learning_rate": 4.965363264368484e-06, "log_odds_chosen": 0.09876411408185959, "log_odds_ratio": -0.6897016167640686, "logits/chosen": -3.053811550140381, "logits/rejected": -3.044391632080078, "logps/chosen": -0.6403803825378418, "logps/rejected": -0.69780433177948, "loss": 0.4603, "nll_loss": 0.4174041748046875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03201901912689209, "rewards/margins": 0.0028712009079754353, "rewards/rejected": -0.03489021584391594, "step": 140 }, { "epoch": 0.11, "grad_norm": 4.4375, "learning_rate": 4.962916669854652e-06, "log_odds_chosen": 0.09207304567098618, "log_odds_ratio": -0.7143452167510986, "logits/chosen": -3.0426297187805176, "logits/rejected": -3.047588348388672, "logps/chosen": -0.6891868114471436, "logps/rejected": -0.7397756576538086, "loss": 0.4813, "nll_loss": 0.42559295892715454, "rewards/accuracies": 0.5625, "rewards/chosen": -0.03445933759212494, "rewards/margins": 0.0025294471997767687, "rewards/rejected": -0.03698878362774849, "step": 150 }, { "epoch": 0.11, "grad_norm": 3.765625, "learning_rate": 4.960473688328407e-06, "log_odds_chosen": -0.003090596292167902, "log_odds_ratio": -0.7442878484725952, "logits/chosen": -3.0109572410583496, "logits/rejected": -3.0156314373016357, "logps/chosen": -0.6730798482894897, "logps/rejected": -0.6759648323059082, "loss": 0.451, "nll_loss": 0.4037221074104309, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.03365398943424225, "rewards/margins": 0.00014425367407966405, "rewards/rejected": -0.03379824012517929, "step": 160 }, { "epoch": 0.12, "grad_norm": 3.53125, "learning_rate": 4.95803431090608e-06, "log_odds_chosen": 0.14792785048484802, "log_odds_ratio": -0.6736660003662109, "logits/chosen": -3.082308530807495, "logits/rejected": -3.0970399379730225, "logps/chosen": -0.6571928262710571, "logps/rejected": -0.7308694124221802, "loss": 0.4832, "nll_loss": 0.446014404296875, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03285963833332062, "rewards/margins": 0.0036838273517787457, "rewards/rejected": -0.03654346987605095, "step": 170 }, { "epoch": 0.13, "grad_norm": 3.90625, "learning_rate": 4.955598528734554e-06, "log_odds_chosen": 0.14215320348739624, "log_odds_ratio": -0.6766169667243958, "logits/chosen": -3.07114577293396, "logits/rejected": -3.0830941200256348, "logps/chosen": -0.6559098362922668, "logps/rejected": -0.7295840382575989, "loss": 0.4597, "nll_loss": 0.4427860677242279, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.0327954925596714, "rewards/margins": 0.003683712799102068, "rewards/rejected": -0.036479201167821884, "step": 180 }, { "epoch": 0.14, "grad_norm": 3.78125, "learning_rate": 4.953166332991125e-06, "log_odds_chosen": 0.10648401081562042, "log_odds_ratio": -0.6966953873634338, "logits/chosen": -3.0742716789245605, "logits/rejected": -3.0770392417907715, "logps/chosen": -0.6506658792495728, "logps/rejected": -0.7110509872436523, "loss": 0.4531, "nll_loss": 0.4198247790336609, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03253329545259476, "rewards/margins": 0.0030192541889846325, "rewards/rejected": -0.03555255010724068, "step": 190 }, { "epoch": 0.14, "grad_norm": 3.9375, "learning_rate": 4.950737714883372e-06, "log_odds_chosen": 0.15272760391235352, "log_odds_ratio": -0.6655218005180359, "logits/chosen": -3.05012845993042, "logits/rejected": -3.059617280960083, "logps/chosen": -0.642218291759491, "logps/rejected": -0.732742428779602, "loss": 0.4325, "nll_loss": 0.38566985726356506, "rewards/accuracies": 0.59375, "rewards/chosen": -0.03211091831326485, "rewards/margins": 0.004526205360889435, "rewards/rejected": -0.03663711994886398, "step": 200 }, { "epoch": 0.15, "grad_norm": 4.1875, "learning_rate": 4.948312665649022e-06, "log_odds_chosen": -0.027218470349907875, "log_odds_ratio": -0.7473064661026001, "logits/chosen": -3.046825408935547, "logits/rejected": -3.0432162284851074, "logps/chosen": -0.7018707990646362, "logps/rejected": -0.6928382515907288, "loss": 0.4554, "nll_loss": 0.40666255354881287, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.03509353846311569, "rewards/margins": -0.00045162736205384135, "rewards/rejected": -0.03464191406965256, "step": 210 }, { "epoch": 0.16, "grad_norm": 3.1875, "learning_rate": 4.945891176555817e-06, "log_odds_chosen": 0.07610191404819489, "log_odds_ratio": -0.7075673937797546, "logits/chosen": -3.0963082313537598, "logits/rejected": -3.0836503505706787, "logps/chosen": -0.6464287638664246, "logps/rejected": -0.6858932971954346, "loss": 0.45, "nll_loss": 0.44592347741127014, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.03232143819332123, "rewards/margins": 0.001973227132111788, "rewards/rejected": -0.034294672310352325, "step": 220 }, { "epoch": 0.16, "grad_norm": 4.75, "learning_rate": 4.943473238901383e-06, "log_odds_chosen": 0.13835494220256805, "log_odds_ratio": -0.6842303276062012, "logits/chosen": -3.0742976665496826, "logits/rejected": -3.0593740940093994, "logps/chosen": -0.6491039991378784, "logps/rejected": -0.7158457636833191, "loss": 0.4757, "nll_loss": 0.4227268695831299, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.0324552021920681, "rewards/margins": 0.003337086644023657, "rewards/rejected": -0.035792287439107895, "step": 230 }, { "epoch": 0.17, "grad_norm": 4.4375, "learning_rate": 4.941058844013094e-06, "log_odds_chosen": 0.13980118930339813, "log_odds_ratio": -0.6755300164222717, "logits/chosen": -3.0632362365722656, "logits/rejected": -3.072343587875366, "logps/chosen": -0.6666185855865479, "logps/rejected": -0.7350472211837769, "loss": 0.5006, "nll_loss": 0.4720947742462158, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03333092853426933, "rewards/margins": 0.003421428380534053, "rewards/rejected": -0.0367523618042469, "step": 240 }, { "epoch": 0.18, "grad_norm": 4.96875, "learning_rate": 4.938647983247949e-06, "log_odds_chosen": 0.09538677334785461, "log_odds_ratio": -0.7015306353569031, "logits/chosen": -3.0158989429473877, "logits/rejected": -3.0254528522491455, "logps/chosen": -0.6664192080497742, "logps/rejected": -0.7085865139961243, "loss": 0.4741, "nll_loss": 0.43773213028907776, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03332095965743065, "rewards/margins": 0.0021083655301481485, "rewards/rejected": -0.035429324954748154, "step": 250 }, { "epoch": 0.19, "grad_norm": 3.875, "learning_rate": 4.936240647992436e-06, "log_odds_chosen": 0.11822772026062012, "log_odds_ratio": -0.6918594837188721, "logits/chosen": -3.091538906097412, "logits/rejected": -3.0833094120025635, "logps/chosen": -0.6330246925354004, "logps/rejected": -0.7016401290893555, "loss": 0.4651, "nll_loss": 0.43838900327682495, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03165123611688614, "rewards/margins": 0.0034307721070945263, "rewards/rejected": -0.035082004964351654, "step": 260 }, { "epoch": 0.19, "grad_norm": 4.34375, "learning_rate": 4.933836829662409e-06, "log_odds_chosen": 0.11577316373586655, "log_odds_ratio": -0.6955476999282837, "logits/chosen": -3.0408403873443604, "logits/rejected": -3.0357627868652344, "logps/chosen": -0.6436070203781128, "logps/rejected": -0.694503903388977, "loss": 0.4654, "nll_loss": 0.413259357213974, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03218035027384758, "rewards/margins": 0.0025448468513786793, "rewards/rejected": -0.03472520038485527, "step": 270 }, { "epoch": 0.2, "grad_norm": 3.984375, "learning_rate": 4.9314365197029475e-06, "log_odds_chosen": 0.1174582988023758, "log_odds_ratio": -0.686394453048706, "logits/chosen": -3.0753908157348633, "logits/rejected": -3.0675904750823975, "logps/chosen": -0.6645399332046509, "logps/rejected": -0.7241753339767456, "loss": 0.4623, "nll_loss": 0.43675488233566284, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.033226996660232544, "rewards/margins": 0.002981774974614382, "rewards/rejected": -0.03620877116918564, "step": 280 }, { "epoch": 0.21, "grad_norm": 4.03125, "learning_rate": 4.9290397095882446e-06, "log_odds_chosen": 0.0028309740591794252, "log_odds_ratio": -0.7518080472946167, "logits/chosen": -3.0335021018981934, "logits/rejected": -3.0376157760620117, "logps/chosen": -0.6744915843009949, "logps/rejected": -0.6811094284057617, "loss": 0.462, "nll_loss": 0.4603661000728607, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.0337245799601078, "rewards/margins": 0.0003308878222014755, "rewards/rejected": -0.03405546769499779, "step": 290 }, { "epoch": 0.21, "grad_norm": 4.09375, "learning_rate": 4.9266463908214664e-06, "log_odds_chosen": 0.15336385369300842, "log_odds_ratio": -0.6764160394668579, "logits/chosen": -3.054356575012207, "logits/rejected": -3.04469895362854, "logps/chosen": -0.6236392259597778, "logps/rejected": -0.7085258960723877, "loss": 0.4276, "nll_loss": 0.382522314786911, "rewards/accuracies": 0.5625, "rewards/chosen": -0.03118196502327919, "rewards/margins": 0.0042443363927304745, "rewards/rejected": -0.035426296293735504, "step": 300 }, { "epoch": 0.22, "grad_norm": 3.0625, "learning_rate": 4.924256554934632e-06, "log_odds_chosen": 0.09482895582914352, "log_odds_ratio": -0.6974529027938843, "logits/chosen": -3.0809385776519775, "logits/rejected": -3.0694050788879395, "logps/chosen": -0.6786897778511047, "logps/rejected": -0.7294089198112488, "loss": 0.4291, "nll_loss": 0.3742545247077942, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03393448889255524, "rewards/margins": 0.0025359555147588253, "rewards/rejected": -0.0364704467356205, "step": 310 }, { "epoch": 0.23, "grad_norm": 3.96875, "learning_rate": 4.9218701934884865e-06, "log_odds_chosen": 0.11670760810375214, "log_odds_ratio": -0.6872875094413757, "logits/chosen": -3.0846877098083496, "logits/rejected": -3.0740857124328613, "logps/chosen": -0.6775652170181274, "logps/rejected": -0.7535160779953003, "loss": 0.457, "nll_loss": 0.4142323136329651, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03387826308608055, "rewards/margins": 0.003797541605308652, "rewards/rejected": -0.037675801664590836, "step": 320 }, { "epoch": 0.24, "grad_norm": 4.875, "learning_rate": 4.919487298072377e-06, "log_odds_chosen": 0.14943461120128632, "log_odds_ratio": -0.6863323450088501, "logits/chosen": -3.072746515274048, "logits/rejected": -3.079110622406006, "logps/chosen": -0.6483927965164185, "logps/rejected": -0.731627345085144, "loss": 0.4325, "nll_loss": 0.4230882525444031, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.032419633120298386, "rewards/margins": 0.00416173180565238, "rewards/rejected": -0.0365813709795475, "step": 330 }, { "epoch": 0.24, "grad_norm": 4.71875, "learning_rate": 4.917107860304125e-06, "log_odds_chosen": 0.12119798362255096, "log_odds_ratio": -0.6793508529663086, "logits/chosen": -3.0940206050872803, "logits/rejected": -3.085106372833252, "logps/chosen": -0.6430903673171997, "logps/rejected": -0.7014886736869812, "loss": 0.4435, "nll_loss": 0.39326339960098267, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.032154522836208344, "rewards/margins": 0.002919913502410054, "rewards/rejected": -0.03507443517446518, "step": 340 }, { "epoch": 0.25, "grad_norm": 4.4375, "learning_rate": 4.914731871829905e-06, "log_odds_chosen": 0.07167927175760269, "log_odds_ratio": -0.7104091644287109, "logits/chosen": -3.1065337657928467, "logits/rejected": -3.1125102043151855, "logps/chosen": -0.6676374673843384, "logps/rejected": -0.7051427960395813, "loss": 0.4474, "nll_loss": 0.3768962323665619, "rewards/accuracies": 0.53125, "rewards/chosen": -0.0333818756043911, "rewards/margins": 0.0018752632895484567, "rewards/rejected": -0.035257138311862946, "step": 350 }, { "epoch": 0.26, "grad_norm": 4.15625, "learning_rate": 4.912359324324121e-06, "log_odds_chosen": 0.1598653495311737, "log_odds_ratio": -0.6569629311561584, "logits/chosen": -3.0918948650360107, "logits/rejected": -3.095064163208008, "logps/chosen": -0.6417919397354126, "logps/rejected": -0.716173529624939, "loss": 0.4582, "nll_loss": 0.43566614389419556, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03208959847688675, "rewards/margins": 0.0037190779112279415, "rewards/rejected": -0.03580867126584053, "step": 360 }, { "epoch": 0.26, "grad_norm": 4.28125, "learning_rate": 4.909990209489284e-06, "log_odds_chosen": 0.07765550911426544, "log_odds_ratio": -0.7101280093193054, "logits/chosen": -3.0903449058532715, "logits/rejected": -3.0981061458587646, "logps/chosen": -0.6579892635345459, "logps/rejected": -0.706508994102478, "loss": 0.4318, "nll_loss": 0.3671954274177551, "rewards/accuracies": 0.59375, "rewards/chosen": -0.032899461686611176, "rewards/margins": 0.0024259877391159534, "rewards/rejected": -0.03532545268535614, "step": 370 }, { "epoch": 0.27, "grad_norm": 3.09375, "learning_rate": 4.907624519055888e-06, "log_odds_chosen": 0.04517578333616257, "log_odds_ratio": -0.7237863540649414, "logits/chosen": -3.0686240196228027, "logits/rejected": -3.0702924728393555, "logps/chosen": -0.7173329591751099, "logps/rejected": -0.7416359186172485, "loss": 0.456, "nll_loss": 0.4173048138618469, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.03586665168404579, "rewards/margins": 0.0012151498813182116, "rewards/rejected": -0.037081796675920486, "step": 380 }, { "epoch": 0.28, "grad_norm": 4.15625, "learning_rate": 4.905262244782294e-06, "log_odds_chosen": 0.08456889539957047, "log_odds_ratio": -0.6937842965126038, "logits/chosen": -3.0970964431762695, "logits/rejected": -3.080461025238037, "logps/chosen": -0.6729081869125366, "logps/rejected": -0.7218735218048096, "loss": 0.4449, "nll_loss": 0.3938611149787903, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.03364541009068489, "rewards/margins": 0.0024482656735926867, "rewards/rejected": -0.03609367460012436, "step": 390 }, { "epoch": 0.29, "grad_norm": 3.484375, "learning_rate": 4.902903378454601e-06, "log_odds_chosen": 0.037387169897556305, "log_odds_ratio": -0.7369678616523743, "logits/chosen": -3.039968967437744, "logits/rejected": -3.0381274223327637, "logps/chosen": -0.652083694934845, "logps/rejected": -0.6617721319198608, "loss": 0.4187, "nll_loss": 0.37590348720550537, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.03260418400168419, "rewards/margins": 0.0004844216746278107, "rewards/rejected": -0.03308860585093498, "step": 400 }, { "epoch": 0.29, "grad_norm": 3.234375, "learning_rate": 4.900547911886537e-06, "log_odds_chosen": 0.1538991630077362, "log_odds_ratio": -0.6789106726646423, "logits/chosen": -3.056162118911743, "logits/rejected": -3.066361904144287, "logps/chosen": -0.6691062450408936, "logps/rejected": -0.7363560795783997, "loss": 0.4431, "nll_loss": 0.41332411766052246, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03345531225204468, "rewards/margins": 0.003362491726875305, "rewards/rejected": -0.03681780770421028, "step": 410 }, { "epoch": 0.3, "grad_norm": 3.015625, "learning_rate": 4.898195836919327e-06, "log_odds_chosen": 0.012691575102508068, "log_odds_ratio": -0.736660897731781, "logits/chosen": -3.0776524543762207, "logits/rejected": -3.0749094486236572, "logps/chosen": -0.6716221570968628, "logps/rejected": -0.6983057856559753, "loss": 0.4395, "nll_loss": 0.4083561301231384, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.03358110040426254, "rewards/margins": 0.0013341851299628615, "rewards/rejected": -0.034915290772914886, "step": 420 }, { "epoch": 0.31, "grad_norm": 3.640625, "learning_rate": 4.895847145421587e-06, "log_odds_chosen": 0.17691221833229065, "log_odds_ratio": -0.6581443548202515, "logits/chosen": -3.0717618465423584, "logits/rejected": -3.062566041946411, "logps/chosen": -0.6120357513427734, "logps/rejected": -0.6999993324279785, "loss": 0.409, "nll_loss": 0.37641653418540955, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03060179017484188, "rewards/margins": 0.004398178309202194, "rewards/rejected": -0.034999970346689224, "step": 430 }, { "epoch": 0.31, "grad_norm": 3.765625, "learning_rate": 4.893501829289195e-06, "log_odds_chosen": 0.0902964323759079, "log_odds_ratio": -0.7079063653945923, "logits/chosen": -3.07722806930542, "logits/rejected": -3.061527967453003, "logps/chosen": -0.6769050359725952, "logps/rejected": -0.7332392930984497, "loss": 0.4493, "nll_loss": 0.4267081618309021, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03384525328874588, "rewards/margins": 0.0028167106211185455, "rewards/rejected": -0.036661963909864426, "step": 440 }, { "epoch": 0.32, "grad_norm": 3.6875, "learning_rate": 4.891159880445185e-06, "log_odds_chosen": -0.02371296100318432, "log_odds_ratio": -0.7615897059440613, "logits/chosen": -3.1056759357452393, "logits/rejected": -3.1098315715789795, "logps/chosen": -0.6948748826980591, "logps/rejected": -0.6747169494628906, "loss": 0.4496, "nll_loss": 0.39099669456481934, "rewards/accuracies": 0.4375, "rewards/chosen": -0.034743744879961014, "rewards/margins": -0.0010078941704705358, "rewards/rejected": -0.03373584896326065, "step": 450 }, { "epoch": 0.33, "grad_norm": 3.625, "learning_rate": 4.888821290839617e-06, "log_odds_chosen": 0.07755931466817856, "log_odds_ratio": -0.7155352830886841, "logits/chosen": -3.1021580696105957, "logits/rejected": -3.099865436553955, "logps/chosen": -0.6417344808578491, "logps/rejected": -0.689717173576355, "loss": 0.4337, "nll_loss": 0.3798291087150574, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.032086726278066635, "rewards/margins": 0.0023991346824914217, "rewards/rejected": -0.03448585793375969, "step": 460 }, { "epoch": 0.34, "grad_norm": 3.484375, "learning_rate": 4.886486052449469e-06, "log_odds_chosen": 0.021442702040076256, "log_odds_ratio": -0.7490790486335754, "logits/chosen": -3.102329969406128, "logits/rejected": -3.1005027294158936, "logps/chosen": -0.6745606660842896, "logps/rejected": -0.696607768535614, "loss": 0.4617, "nll_loss": 0.375292032957077, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.033728040754795074, "rewards/margins": 0.0011023514671251178, "rewards/rejected": -0.03483038768172264, "step": 470 }, { "epoch": 0.34, "grad_norm": 3.40625, "learning_rate": 4.8841541572785224e-06, "log_odds_chosen": 0.05353527143597603, "log_odds_ratio": -0.7253191471099854, "logits/chosen": -3.082537889480591, "logits/rejected": -3.07082462310791, "logps/chosen": -0.7033270597457886, "logps/rejected": -0.7368647456169128, "loss": 0.4892, "nll_loss": 0.4859234690666199, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03516635298728943, "rewards/margins": 0.0016768805216997862, "rewards/rejected": -0.036843232810497284, "step": 480 }, { "epoch": 0.35, "grad_norm": 4.03125, "learning_rate": 4.881825597357242e-06, "log_odds_chosen": 0.10252328217029572, "log_odds_ratio": -0.7058262825012207, "logits/chosen": -3.066099166870117, "logits/rejected": -3.070496082305908, "logps/chosen": -0.6735079884529114, "logps/rejected": -0.7267085313796997, "loss": 0.4362, "nll_loss": 0.4278966784477234, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03367539495229721, "rewards/margins": 0.002660029800608754, "rewards/rejected": -0.036335431039333344, "step": 490 }, { "epoch": 0.36, "grad_norm": 4.96875, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 0.04843845218420029, "log_odds_ratio": -0.739460825920105, "logits/chosen": -3.0594677925109863, "logits/rejected": -3.0537750720977783, "logps/chosen": -0.6733946800231934, "logps/rejected": -0.7030497789382935, "loss": 0.4764, "nll_loss": 0.43981099128723145, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.03366973623633385, "rewards/margins": 0.0014827511040493846, "rewards/rejected": -0.03515248745679855, "step": 500 }, { "epoch": 0.36, "grad_norm": 3.984375, "learning_rate": 4.877178451518289e-06, "log_odds_chosen": -0.0036692798603326082, "log_odds_ratio": -0.7525519132614136, "logits/chosen": -3.0770444869995117, "logits/rejected": -3.0733017921447754, "logps/chosen": -0.6757587194442749, "logps/rejected": -0.6810072660446167, "loss": 0.435, "nll_loss": 0.3937914967536926, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.033787935972213745, "rewards/margins": 0.0002624286280479282, "rewards/rejected": -0.034050360321998596, "step": 510 }, { "epoch": 0.37, "grad_norm": 4.78125, "learning_rate": 4.8748598497939494e-06, "log_odds_chosen": 0.05728853866457939, "log_odds_ratio": -0.7186366319656372, "logits/chosen": -3.052861213684082, "logits/rejected": -3.058351755142212, "logps/chosen": -0.6729940176010132, "logps/rejected": -0.7037376165390015, "loss": 0.4767, "nll_loss": 0.4060978889465332, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03364970162510872, "rewards/margins": 0.0015371815534308553, "rewards/rejected": -0.035186879336833954, "step": 520 }, { "epoch": 0.38, "grad_norm": 3.453125, "learning_rate": 4.872544551705718e-06, "log_odds_chosen": 0.11197223514318466, "log_odds_ratio": -0.6946540474891663, "logits/chosen": -3.060675621032715, "logits/rejected": -3.066896915435791, "logps/chosen": -0.6789919137954712, "logps/rejected": -0.7225015163421631, "loss": 0.4256, "nll_loss": 0.37904661893844604, "rewards/accuracies": 0.59375, "rewards/chosen": -0.0339495949447155, "rewards/margins": 0.0021754808258265257, "rewards/rejected": -0.03612507879734039, "step": 530 }, { "epoch": 0.39, "grad_norm": 3.796875, "learning_rate": 4.870232549415787e-06, "log_odds_chosen": 0.02510526217520237, "log_odds_ratio": -0.7521008849143982, "logits/chosen": -3.077195167541504, "logits/rejected": -3.076308012008667, "logps/chosen": -0.6907860040664673, "logps/rejected": -0.6919427514076233, "loss": 0.4412, "nll_loss": 0.4135584831237793, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.034539300948381424, "rewards/margins": 5.783573578810319e-05, "rewards/rejected": -0.034597136080265045, "step": 540 }, { "epoch": 0.39, "grad_norm": 3.8125, "learning_rate": 4.867923835112355e-06, "log_odds_chosen": 0.08614515513181686, "log_odds_ratio": -0.7050543427467346, "logits/chosen": -3.082448959350586, "logits/rejected": -3.078526496887207, "logps/chosen": -0.6387960910797119, "logps/rejected": -0.6914810538291931, "loss": 0.4572, "nll_loss": 0.45853734016418457, "rewards/accuracies": 0.5, "rewards/chosen": -0.031939808279275894, "rewards/margins": 0.0026342463679611683, "rewards/rejected": -0.034574054181575775, "step": 550 }, { "epoch": 0.4, "grad_norm": 4.15625, "learning_rate": 4.865618401009519e-06, "log_odds_chosen": 0.04126372188329697, "log_odds_ratio": -0.7287080883979797, "logits/chosen": -3.064847230911255, "logits/rejected": -3.0784311294555664, "logps/chosen": -0.6930364370346069, "logps/rejected": -0.7094193696975708, "loss": 0.4728, "nll_loss": 0.43362417817115784, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03465181961655617, "rewards/margins": 0.0008191479137167335, "rewards/rejected": -0.03547096997499466, "step": 560 }, { "epoch": 0.41, "grad_norm": 3.734375, "learning_rate": 4.863316239347163e-06, "log_odds_chosen": 0.11334598064422607, "log_odds_ratio": -0.6950859427452087, "logits/chosen": -3.0916595458984375, "logits/rejected": -3.0792717933654785, "logps/chosen": -0.6042887568473816, "logps/rejected": -0.6623607873916626, "loss": 0.4228, "nll_loss": 0.3507387638092041, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.03021443821489811, "rewards/margins": 0.0029036046471446753, "rewards/rejected": -0.03311804682016373, "step": 570 }, { "epoch": 0.41, "grad_norm": 3.34375, "learning_rate": 4.861017342390847e-06, "log_odds_chosen": 0.13918332755565643, "log_odds_ratio": -0.6786795854568481, "logits/chosen": -3.0410194396972656, "logits/rejected": -3.0295698642730713, "logps/chosen": -0.6332284212112427, "logps/rejected": -0.7104531526565552, "loss": 0.4472, "nll_loss": 0.407381147146225, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.031661417335271835, "rewards/margins": 0.0038612366188317537, "rewards/rejected": -0.03552265465259552, "step": 580 }, { "epoch": 0.42, "grad_norm": 4.1875, "learning_rate": 4.858721702431704e-06, "log_odds_chosen": 0.16907373070716858, "log_odds_ratio": -0.659919261932373, "logits/chosen": -3.0990092754364014, "logits/rejected": -3.100250244140625, "logps/chosen": -0.6247608661651611, "logps/rejected": -0.7146947383880615, "loss": 0.4387, "nll_loss": 0.4016880989074707, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.031238043680787086, "rewards/margins": 0.004496692679822445, "rewards/rejected": -0.03573473542928696, "step": 590 }, { "epoch": 0.43, "grad_norm": 3.28125, "learning_rate": 4.856429311786322e-06, "log_odds_chosen": 0.18647165596485138, "log_odds_ratio": -0.6580454111099243, "logits/chosen": -3.07918119430542, "logits/rejected": -3.0714163780212402, "logps/chosen": -0.6333354711532593, "logps/rejected": -0.7353613972663879, "loss": 0.4259, "nll_loss": 0.39573854207992554, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.031666774302721024, "rewards/margins": 0.005101297982037067, "rewards/rejected": -0.036768071353435516, "step": 600 }, { "epoch": 0.44, "grad_norm": 3.71875, "learning_rate": 4.8541401627966426e-06, "log_odds_chosen": 0.12605203688144684, "log_odds_ratio": -0.6847308278083801, "logits/chosen": -3.0200388431549072, "logits/rejected": -3.0232505798339844, "logps/chosen": -0.637053370475769, "logps/rejected": -0.7107265591621399, "loss": 0.4465, "nll_loss": 0.4514044225215912, "rewards/accuracies": 0.625, "rewards/chosen": -0.03185266628861427, "rewards/margins": 0.003683661576360464, "rewards/rejected": -0.035536326467990875, "step": 610 }, { "epoch": 0.44, "grad_norm": 3.078125, "learning_rate": 4.85185424782985e-06, "log_odds_chosen": 0.11318854242563248, "log_odds_ratio": -0.6869757771492004, "logits/chosen": -3.0487678050994873, "logits/rejected": -3.0563690662384033, "logps/chosen": -0.6196079254150391, "logps/rejected": -0.6818262338638306, "loss": 0.432, "nll_loss": 0.3603076636791229, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.030980398878455162, "rewards/margins": 0.003110917517915368, "rewards/rejected": -0.034091316163539886, "step": 620 }, { "epoch": 0.45, "grad_norm": 3.765625, "learning_rate": 4.8495715592782715e-06, "log_odds_chosen": 0.08510233461856842, "log_odds_ratio": -0.708472490310669, "logits/chosen": -3.079819440841675, "logits/rejected": -3.0707499980926514, "logps/chosen": -0.6599884629249573, "logps/rejected": -0.7195941209793091, "loss": 0.4679, "nll_loss": 0.42595186829566956, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.032999418675899506, "rewards/margins": 0.002980284858494997, "rewards/rejected": -0.035979703068733215, "step": 630 }, { "epoch": 0.46, "grad_norm": 4.28125, "learning_rate": 4.847292089559258e-06, "log_odds_chosen": 0.12237779796123505, "log_odds_ratio": -0.6955515146255493, "logits/chosen": -3.0802435874938965, "logits/rejected": -3.0712850093841553, "logps/chosen": -0.6394826769828796, "logps/rejected": -0.6949468851089478, "loss": 0.4013, "nll_loss": 0.38351970911026, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03197413310408592, "rewards/margins": 0.002773215062916279, "rewards/rejected": -0.03474734351038933, "step": 640 }, { "epoch": 0.46, "grad_norm": 3.984375, "learning_rate": 4.845015831115093e-06, "log_odds_chosen": 0.20742936432361603, "log_odds_ratio": -0.6493484377861023, "logits/chosen": -3.0841879844665527, "logits/rejected": -3.0699145793914795, "logps/chosen": -0.659258246421814, "logps/rejected": -0.7655901312828064, "loss": 0.4404, "nll_loss": 0.40948915481567383, "rewards/accuracies": 0.59375, "rewards/chosen": -0.032962918281555176, "rewards/margins": 0.005316597409546375, "rewards/rejected": -0.03827951103448868, "step": 650 }, { "epoch": 0.47, "grad_norm": 5.3125, "learning_rate": 4.842742776412874e-06, "log_odds_chosen": 0.13828198611736298, "log_odds_ratio": -0.6830775141716003, "logits/chosen": -3.0765130519866943, "logits/rejected": -3.070868968963623, "logps/chosen": -0.6441447138786316, "logps/rejected": -0.7223222851753235, "loss": 0.4015, "nll_loss": 0.38920858502388, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03220723941922188, "rewards/margins": 0.00390887726098299, "rewards/rejected": -0.036116115748882294, "step": 660 }, { "epoch": 0.48, "grad_norm": 3.21875, "learning_rate": 4.840472917944417e-06, "log_odds_chosen": -0.011784842237830162, "log_odds_ratio": -0.7628757357597351, "logits/chosen": -3.0604257583618164, "logits/rejected": -3.063080310821533, "logps/chosen": -0.7110339403152466, "logps/rejected": -0.6998971700668335, "loss": 0.4561, "nll_loss": 0.42479920387268066, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.03555169701576233, "rewards/margins": -0.0005568360211327672, "rewards/rejected": -0.03499486297369003, "step": 670 }, { "epoch": 0.49, "grad_norm": 3.15625, "learning_rate": 4.838206248226147e-06, "log_odds_chosen": 0.13070423901081085, "log_odds_ratio": -0.6919819116592407, "logits/chosen": -3.0732009410858154, "logits/rejected": -3.0725150108337402, "logps/chosen": -0.6451132893562317, "logps/rejected": -0.7241078019142151, "loss": 0.4765, "nll_loss": 0.4737609922885895, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.032255664467811584, "rewards/margins": 0.003949729725718498, "rewards/rejected": -0.036205392330884933, "step": 680 }, { "epoch": 0.49, "grad_norm": 2.96875, "learning_rate": 4.835942759799002e-06, "log_odds_chosen": 0.09634308516979218, "log_odds_ratio": -0.7033903002738953, "logits/chosen": -3.0458781719207764, "logits/rejected": -3.0505826473236084, "logps/chosen": -0.6358808279037476, "logps/rejected": -0.6862252950668335, "loss": 0.4477, "nll_loss": 0.4233109951019287, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03179404139518738, "rewards/margins": 0.002517223823815584, "rewards/rejected": -0.034311264753341675, "step": 690 }, { "epoch": 0.5, "grad_norm": 3.171875, "learning_rate": 4.833682445228318e-06, "log_odds_chosen": 0.1403178572654724, "log_odds_ratio": -0.6865570545196533, "logits/chosen": -3.054211139678955, "logits/rejected": -3.0681564807891846, "logps/chosen": -0.6709010601043701, "logps/rejected": -0.7344124913215637, "loss": 0.4113, "nll_loss": 0.3880983293056488, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.033545054495334625, "rewards/margins": 0.0031755701638758183, "rewards/rejected": -0.036720626056194305, "step": 700 }, { "epoch": 0.51, "grad_norm": 3.5625, "learning_rate": 4.831425297103738e-06, "log_odds_chosen": 0.046028655022382736, "log_odds_ratio": -0.7205643057823181, "logits/chosen": -3.0962376594543457, "logits/rejected": -3.093686580657959, "logps/chosen": -0.6647442579269409, "logps/rejected": -0.7020038962364197, "loss": 0.4411, "nll_loss": 0.40460824966430664, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.033237211406230927, "rewards/margins": 0.0018629844998940825, "rewards/rejected": -0.03510019928216934, "step": 710 }, { "epoch": 0.51, "grad_norm": 3.78125, "learning_rate": 4.829171308039099e-06, "log_odds_chosen": 0.07206249237060547, "log_odds_ratio": -0.7229174375534058, "logits/chosen": -3.0669052600860596, "logits/rejected": -3.064042568206787, "logps/chosen": -0.6743772625923157, "logps/rejected": -0.720771849155426, "loss": 0.4448, "nll_loss": 0.3993741273880005, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03371886536478996, "rewards/margins": 0.002319728024303913, "rewards/rejected": -0.0360385924577713, "step": 720 }, { "epoch": 0.52, "grad_norm": 3.484375, "learning_rate": 4.826920470672344e-06, "log_odds_chosen": 0.14494310319423676, "log_odds_ratio": -0.6867069005966187, "logits/chosen": -3.087284803390503, "logits/rejected": -3.0750741958618164, "logps/chosen": -0.6538446545600891, "logps/rejected": -0.7387341260910034, "loss": 0.4293, "nll_loss": 0.41786569356918335, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.032692231237888336, "rewards/margins": 0.004244474694132805, "rewards/rejected": -0.03693670779466629, "step": 730 }, { "epoch": 0.53, "grad_norm": 3.453125, "learning_rate": 4.824672777665406e-06, "log_odds_chosen": 0.15031760931015015, "log_odds_ratio": -0.6803394556045532, "logits/chosen": -3.0734050273895264, "logits/rejected": -3.075320243835449, "logps/chosen": -0.673099160194397, "logps/rejected": -0.7478404641151428, "loss": 0.4761, "nll_loss": 0.454674631357193, "rewards/accuracies": 0.5625, "rewards/chosen": -0.03365495055913925, "rewards/margins": 0.003737064078450203, "rewards/rejected": -0.0373920202255249, "step": 740 }, { "epoch": 0.54, "grad_norm": 4.25, "learning_rate": 4.822428221704122e-06, "log_odds_chosen": 0.11640326678752899, "log_odds_ratio": -0.6864844560623169, "logits/chosen": -3.079780101776123, "logits/rejected": -3.0830986499786377, "logps/chosen": -0.6119939088821411, "logps/rejected": -0.6690106987953186, "loss": 0.4331, "nll_loss": 0.3945719599723816, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.030599694699048996, "rewards/margins": 0.002850837307050824, "rewards/rejected": -0.03345053270459175, "step": 750 }, { "epoch": 0.54, "grad_norm": 3.765625, "learning_rate": 4.820186795498119e-06, "log_odds_chosen": 0.06874329596757889, "log_odds_ratio": -0.7021539807319641, "logits/chosen": -3.0491080284118652, "logits/rejected": -3.052577257156372, "logps/chosen": -0.6264130473136902, "logps/rejected": -0.6755816340446472, "loss": 0.4409, "nll_loss": 0.4055160582065582, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.03132065013051033, "rewards/margins": 0.002458432223647833, "rewards/rejected": -0.0337790846824646, "step": 760 }, { "epoch": 0.55, "grad_norm": 4.125, "learning_rate": 4.817948491780728e-06, "log_odds_chosen": 0.06512076407670975, "log_odds_ratio": -0.7139188647270203, "logits/chosen": -3.058718681335449, "logits/rejected": -3.052968978881836, "logps/chosen": -0.6462647914886475, "logps/rejected": -0.6857739686965942, "loss": 0.4438, "nll_loss": 0.40586766600608826, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03231323882937431, "rewards/margins": 0.0019754543900489807, "rewards/rejected": -0.03428869694471359, "step": 770 }, { "epoch": 0.56, "grad_norm": 3.875, "learning_rate": 4.815713303308872e-06, "log_odds_chosen": 0.060712821781635284, "log_odds_ratio": -0.7178046107292175, "logits/chosen": -3.0182127952575684, "logits/rejected": -3.0026001930236816, "logps/chosen": -0.6681068539619446, "logps/rejected": -0.6965761184692383, "loss": 0.4418, "nll_loss": 0.4022420346736908, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03340534120798111, "rewards/margins": 0.0014234638074412942, "rewards/rejected": -0.034828804433345795, "step": 780 }, { "epoch": 0.56, "grad_norm": 3.46875, "learning_rate": 4.813481222862981e-06, "log_odds_chosen": 0.11153805255889893, "log_odds_ratio": -0.695169985294342, "logits/chosen": -3.0789120197296143, "logits/rejected": -3.076815128326416, "logps/chosen": -0.6458239555358887, "logps/rejected": -0.7027527093887329, "loss": 0.4465, "nll_loss": 0.4205663204193115, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.032291196286678314, "rewards/margins": 0.0028464333154261112, "rewards/rejected": -0.03513763099908829, "step": 790 }, { "epoch": 0.57, "grad_norm": 3.703125, "learning_rate": 4.811252243246881e-06, "log_odds_chosen": 0.12044986337423325, "log_odds_ratio": -0.6857193112373352, "logits/chosen": -3.0292160511016846, "logits/rejected": -3.0415356159210205, "logps/chosen": -0.6784808039665222, "logps/rejected": -0.7429660558700562, "loss": 0.4334, "nll_loss": 0.4157021939754486, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03392403945326805, "rewards/margins": 0.0032242611050605774, "rewards/rejected": -0.03714829683303833, "step": 800 }, { "epoch": 0.58, "grad_norm": 3.125, "learning_rate": 4.809026357287709e-06, "log_odds_chosen": 0.09020865708589554, "log_odds_ratio": -0.7137196660041809, "logits/chosen": -3.0410451889038086, "logits/rejected": -3.0433433055877686, "logps/chosen": -0.6462265849113464, "logps/rejected": -0.7007557153701782, "loss": 0.4586, "nll_loss": 0.4317532479763031, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0323113352060318, "rewards/margins": 0.0027264528907835484, "rewards/rejected": -0.03503778204321861, "step": 810 }, { "epoch": 0.59, "grad_norm": 3.296875, "learning_rate": 4.806803557835802e-06, "log_odds_chosen": 0.01391223631799221, "log_odds_ratio": -0.7324727773666382, "logits/chosen": -3.0630288124084473, "logits/rejected": -3.0616655349731445, "logps/chosen": -0.6577921509742737, "logps/rejected": -0.6736438870429993, "loss": 0.4437, "nll_loss": 0.4458716809749603, "rewards/accuracies": 0.5, "rewards/chosen": -0.032889604568481445, "rewards/margins": 0.0007925864192657173, "rewards/rejected": -0.033682193607091904, "step": 820 }, { "epoch": 0.59, "grad_norm": 3.5, "learning_rate": 4.804583837764616e-06, "log_odds_chosen": 0.11316128820180893, "log_odds_ratio": -0.6908625364303589, "logits/chosen": -3.065793037414551, "logits/rejected": -3.0383734703063965, "logps/chosen": -0.6727164387702942, "logps/rejected": -0.742289662361145, "loss": 0.4375, "nll_loss": 0.41268715262413025, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03363582491874695, "rewards/margins": 0.0034786623436957598, "rewards/rejected": -0.03711448609828949, "step": 830 }, { "epoch": 0.6, "grad_norm": 4.5625, "learning_rate": 4.802367189970616e-06, "log_odds_chosen": 0.07681518793106079, "log_odds_ratio": -0.7137131690979004, "logits/chosen": -3.0363595485687256, "logits/rejected": -3.031625747680664, "logps/chosen": -0.6656880378723145, "logps/rejected": -0.7070342302322388, "loss": 0.4302, "nll_loss": 0.4184693396091461, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03328440338373184, "rewards/margins": 0.0020673065446317196, "rewards/rejected": -0.0353517085313797, "step": 840 }, { "epoch": 0.61, "grad_norm": 3.9375, "learning_rate": 4.8001536073731936e-06, "log_odds_chosen": 0.1605026125907898, "log_odds_ratio": -0.6604620218276978, "logits/chosen": -3.0630927085876465, "logits/rejected": -3.047419548034668, "logps/chosen": -0.6152976751327515, "logps/rejected": -0.6827403903007507, "loss": 0.4175, "nll_loss": 0.3885991871356964, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.030764881521463394, "rewards/margins": 0.003372135339304805, "rewards/rejected": -0.03413701802492142, "step": 850 }, { "epoch": 0.61, "grad_norm": 3.5625, "learning_rate": 4.797943082914558e-06, "log_odds_chosen": 0.122996486723423, "log_odds_ratio": -0.6884230375289917, "logits/chosen": -3.0407307147979736, "logits/rejected": -3.0454647541046143, "logps/chosen": -0.6646740436553955, "logps/rejected": -0.7323697805404663, "loss": 0.4232, "nll_loss": 0.40015679597854614, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.033233705908060074, "rewards/margins": 0.0033847857266664505, "rewards/rejected": -0.036618489772081375, "step": 860 }, { "epoch": 0.62, "grad_norm": 3.484375, "learning_rate": 4.795735609559657e-06, "log_odds_chosen": 0.06328854709863663, "log_odds_ratio": -0.7167618870735168, "logits/chosen": -3.024827480316162, "logits/rejected": -3.0302538871765137, "logps/chosen": -0.6877762675285339, "logps/rejected": -0.7096621990203857, "loss": 0.451, "nll_loss": 0.41245803236961365, "rewards/accuracies": 0.53125, "rewards/chosen": -0.034388814121484756, "rewards/margins": 0.001094298204407096, "rewards/rejected": -0.035483114421367645, "step": 870 }, { "epoch": 0.63, "grad_norm": 3.703125, "learning_rate": 4.793531180296065e-06, "log_odds_chosen": 0.1262568235397339, "log_odds_ratio": -0.6865631341934204, "logits/chosen": -3.0428998470306396, "logits/rejected": -3.052109956741333, "logps/chosen": -0.6537882685661316, "logps/rejected": -0.7123113870620728, "loss": 0.4601, "nll_loss": 0.40879708528518677, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0326894149184227, "rewards/margins": 0.002926155924797058, "rewards/rejected": -0.03561556711792946, "step": 880 }, { "epoch": 0.64, "grad_norm": 3.78125, "learning_rate": 4.7913297881339085e-06, "log_odds_chosen": 0.16561809182167053, "log_odds_ratio": -0.6622673273086548, "logits/chosen": -3.0884785652160645, "logits/rejected": -3.080212116241455, "logps/chosen": -0.6301072835922241, "logps/rejected": -0.7207618951797485, "loss": 0.4692, "nll_loss": 0.45619016885757446, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03150536119937897, "rewards/margins": 0.004532733000814915, "rewards/rejected": -0.036038096994161606, "step": 890 }, { "epoch": 0.64, "grad_norm": 4.0625, "learning_rate": 4.789131426105757e-06, "log_odds_chosen": 0.15549907088279724, "log_odds_ratio": -0.6794939637184143, "logits/chosen": -3.0835204124450684, "logits/rejected": -3.0894227027893066, "logps/chosen": -0.6019552946090698, "logps/rejected": -0.6908131837844849, "loss": 0.4578, "nll_loss": 0.39178740978240967, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03009776398539543, "rewards/margins": 0.004442893899977207, "rewards/rejected": -0.03454066067934036, "step": 900 }, { "epoch": 0.65, "grad_norm": 3.578125, "learning_rate": 4.786936087266542e-06, "log_odds_chosen": 0.16360627114772797, "log_odds_ratio": -0.6856591105461121, "logits/chosen": -3.0507102012634277, "logits/rejected": -3.054673433303833, "logps/chosen": -0.609703540802002, "logps/rejected": -0.6908944845199585, "loss": 0.4374, "nll_loss": 0.39051851630210876, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.030485177412629128, "rewards/margins": 0.004059544298797846, "rewards/rejected": -0.034544724971055984, "step": 910 }, { "epoch": 0.66, "grad_norm": 3.578125, "learning_rate": 4.784743764693455e-06, "log_odds_chosen": 0.09084954112768173, "log_odds_ratio": -0.7138122320175171, "logits/chosen": -3.072230577468872, "logits/rejected": -3.056391954421997, "logps/chosen": -0.6599885821342468, "logps/rejected": -0.7167444229125977, "loss": 0.4664, "nll_loss": 0.4432450830936432, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.032999418675899506, "rewards/margins": 0.002837798558175564, "rewards/rejected": -0.03583722561597824, "step": 920 }, { "epoch": 0.66, "grad_norm": 3.609375, "learning_rate": 4.7825544514858655e-06, "log_odds_chosen": 0.12197308242321014, "log_odds_ratio": -0.6981927156448364, "logits/chosen": -3.06807804107666, "logits/rejected": -3.067018508911133, "logps/chosen": -0.6602961421012878, "logps/rejected": -0.7247858047485352, "loss": 0.4545, "nll_loss": 0.4333201050758362, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03301481157541275, "rewards/margins": 0.0032244804315268993, "rewards/rejected": -0.03623929247260094, "step": 930 }, { "epoch": 0.67, "grad_norm": 4.34375, "learning_rate": 4.780368140765222e-06, "log_odds_chosen": 0.06546248495578766, "log_odds_ratio": -0.7227157354354858, "logits/chosen": -3.039170026779175, "logits/rejected": -3.0273942947387695, "logps/chosen": -0.6119587421417236, "logps/rejected": -0.6562652587890625, "loss": 0.4094, "nll_loss": 0.3886231482028961, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.030597934499382973, "rewards/margins": 0.002215325366705656, "rewards/rejected": -0.032813262194395065, "step": 940 }, { "epoch": 0.68, "grad_norm": 3.625, "learning_rate": 4.778184825674966e-06, "log_odds_chosen": 0.1726696789264679, "log_odds_ratio": -0.659112811088562, "logits/chosen": -3.0425727367401123, "logits/rejected": -3.030287265777588, "logps/chosen": -0.6211417317390442, "logps/rejected": -0.7097448706626892, "loss": 0.4393, "nll_loss": 0.384087473154068, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.031057089567184448, "rewards/margins": 0.004430153872817755, "rewards/rejected": -0.03548724204301834, "step": 950 }, { "epoch": 0.69, "grad_norm": 3.53125, "learning_rate": 4.776004499380439e-06, "log_odds_chosen": 0.011781233362853527, "log_odds_ratio": -0.7452099323272705, "logits/chosen": -3.018817663192749, "logits/rejected": -3.0265917778015137, "logps/chosen": -0.6494359970092773, "logps/rejected": -0.6629678606987, "loss": 0.4265, "nll_loss": 0.38487789034843445, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.032471802085638046, "rewards/margins": 0.0006765943253412843, "rewards/rejected": -0.033148396760225296, "step": 960 }, { "epoch": 0.69, "grad_norm": 4.0, "learning_rate": 4.773827155068793e-06, "log_odds_chosen": 0.06863684952259064, "log_odds_ratio": -0.7090519070625305, "logits/chosen": -3.020681619644165, "logits/rejected": -3.021437168121338, "logps/chosen": -0.6377496123313904, "logps/rejected": -0.6642230749130249, "loss": 0.4364, "nll_loss": 0.38367873430252075, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0318874828517437, "rewards/margins": 0.0013236708473414183, "rewards/rejected": -0.033211153000593185, "step": 970 }, { "epoch": 0.7, "grad_norm": 3.421875, "learning_rate": 4.771652785948902e-06, "log_odds_chosen": 0.10076969861984253, "log_odds_ratio": -0.7029331922531128, "logits/chosen": -3.047610282897949, "logits/rejected": -3.04512357711792, "logps/chosen": -0.6631830930709839, "logps/rejected": -0.7198564410209656, "loss": 0.424, "nll_loss": 0.37212181091308594, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.033159155398607254, "rewards/margins": 0.00283367233350873, "rewards/rejected": -0.0359928235411644, "step": 980 }, { "epoch": 0.71, "grad_norm": 3.5625, "learning_rate": 4.769481385251275e-06, "log_odds_chosen": 0.059241972863674164, "log_odds_ratio": -0.7323654294013977, "logits/chosen": -3.064662218093872, "logits/rejected": -3.065473794937134, "logps/chosen": -0.6276484131813049, "logps/rejected": -0.6770464181900024, "loss": 0.4371, "nll_loss": 0.4228192865848541, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.031382422894239426, "rewards/margins": 0.002469896338880062, "rewards/rejected": -0.03385232016444206, "step": 990 }, { "epoch": 0.71, "grad_norm": 4.46875, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.12544479966163635, "log_odds_ratio": -0.684360682964325, "logits/chosen": -3.0453364849090576, "logits/rejected": -3.038565158843994, "logps/chosen": -0.6325899362564087, "logps/rejected": -0.7051112055778503, "loss": 0.4117, "nll_loss": 0.3851761221885681, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.03162949159741402, "rewards/margins": 0.0036260623019188643, "rewards/rejected": -0.0352555587887764, "step": 1000 }, { "epoch": 0.72, "grad_norm": 3.765625, "learning_rate": 4.765147462152471e-06, "log_odds_chosen": -0.049488991498947144, "log_odds_ratio": -0.767659604549408, "logits/chosen": -3.0783731937408447, "logits/rejected": -3.0841751098632812, "logps/chosen": -0.677563488483429, "logps/rejected": -0.6517989039421082, "loss": 0.443, "nll_loss": 0.44325417280197144, "rewards/accuracies": 0.5, "rewards/chosen": -0.03387816995382309, "rewards/margins": -0.001288228202611208, "rewards/rejected": -0.03258994594216347, "step": 1010 }, { "epoch": 0.73, "grad_norm": 3.46875, "learning_rate": 4.762984926319677e-06, "log_odds_chosen": 0.06131023168563843, "log_odds_ratio": -0.7227293848991394, "logits/chosen": -3.0039877891540527, "logits/rejected": -3.0252082347869873, "logps/chosen": -0.6900344491004944, "logps/rejected": -0.7198086977005005, "loss": 0.4176, "nll_loss": 0.3607541024684906, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.03450172394514084, "rewards/margins": 0.0014887071447446942, "rewards/rejected": -0.035990431904792786, "step": 1020 }, { "epoch": 0.74, "grad_norm": 3.390625, "learning_rate": 4.760825332045738e-06, "log_odds_chosen": 0.13325969874858856, "log_odds_ratio": -0.6899516582489014, "logits/chosen": -3.0545506477355957, "logits/rejected": -3.056483745574951, "logps/chosen": -0.683167040348053, "logps/rejected": -0.7419314980506897, "loss": 0.4492, "nll_loss": 0.3988763689994812, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.03415834903717041, "rewards/margins": 0.0029382240027189255, "rewards/rejected": -0.037096574902534485, "step": 1030 }, { "epoch": 0.74, "grad_norm": 3.171875, "learning_rate": 4.758668672668006e-06, "log_odds_chosen": -0.023545902222394943, "log_odds_ratio": -0.7494568824768066, "logits/chosen": -3.05180287361145, "logits/rejected": -3.037919282913208, "logps/chosen": -0.7081907987594604, "logps/rejected": -0.7100042104721069, "loss": 0.4308, "nll_loss": 0.4296863079071045, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.03540953993797302, "rewards/margins": 9.066825441550463e-05, "rewards/rejected": -0.03550020977854729, "step": 1040 }, { "epoch": 0.75, "grad_norm": 3.609375, "learning_rate": 4.756514941544941e-06, "log_odds_chosen": 0.2092513144016266, "log_odds_ratio": -0.6568443775177002, "logits/chosen": -3.038727283477783, "logits/rejected": -3.027693510055542, "logps/chosen": -0.6478935480117798, "logps/rejected": -0.7545783519744873, "loss": 0.4238, "nll_loss": 0.3853406310081482, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.03239467367529869, "rewards/margins": 0.005334246437996626, "rewards/rejected": -0.0377289243042469, "step": 1050 }, { "epoch": 0.76, "grad_norm": 3.4375, "learning_rate": 4.754364132056025e-06, "log_odds_chosen": 0.20006871223449707, "log_odds_ratio": -0.6599471569061279, "logits/chosen": -3.0662925243377686, "logits/rejected": -3.0749499797821045, "logps/chosen": -0.6217712759971619, "logps/rejected": -0.7256768941879272, "loss": 0.4448, "nll_loss": 0.42285776138305664, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.031088566407561302, "rewards/margins": 0.00519528379663825, "rewards/rejected": -0.03628384694457054, "step": 1060 }, { "epoch": 0.76, "grad_norm": 3.0625, "learning_rate": 4.752216237601676e-06, "log_odds_chosen": 0.12204728275537491, "log_odds_ratio": -0.6856037378311157, "logits/chosen": -3.069291591644287, "logits/rejected": -3.0735158920288086, "logps/chosen": -0.6233230829238892, "logps/rejected": -0.676108717918396, "loss": 0.4169, "nll_loss": 0.4054155945777893, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.031166154891252518, "rewards/margins": 0.002639283426105976, "rewards/rejected": -0.03380543738603592, "step": 1070 }, { "epoch": 0.77, "grad_norm": 4.25, "learning_rate": 4.750071251603165e-06, "log_odds_chosen": 0.06005765125155449, "log_odds_ratio": -0.7120882272720337, "logits/chosen": -3.0675432682037354, "logits/rejected": -3.0618348121643066, "logps/chosen": -0.6603859066963196, "logps/rejected": -0.7038144469261169, "loss": 0.4334, "nll_loss": 0.411041259765625, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0330192968249321, "rewards/margins": 0.002171426545828581, "rewards/rejected": -0.03519072383642197, "step": 1080 }, { "epoch": 0.78, "grad_norm": 3.859375, "learning_rate": 4.7479291675025314e-06, "log_odds_chosen": 0.11815843731164932, "log_odds_ratio": -0.6803543567657471, "logits/chosen": -3.0646190643310547, "logits/rejected": -3.0728182792663574, "logps/chosen": -0.6378597617149353, "logps/rejected": -0.6973134279251099, "loss": 0.4627, "nll_loss": 0.4038930833339691, "rewards/accuracies": 0.5625, "rewards/chosen": -0.031892985105514526, "rewards/margins": 0.002972686430439353, "rewards/rejected": -0.034865669906139374, "step": 1090 }, { "epoch": 0.79, "grad_norm": 3.328125, "learning_rate": 4.745789978762496e-06, "log_odds_chosen": 0.05584443733096123, "log_odds_ratio": -0.722913384437561, "logits/chosen": -3.0451390743255615, "logits/rejected": -3.0612053871154785, "logps/chosen": -0.6970195174217224, "logps/rejected": -0.7409615516662598, "loss": 0.4267, "nll_loss": 0.4191984236240387, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.03485097363591194, "rewards/margins": 0.0021971003152430058, "rewards/rejected": -0.03704807907342911, "step": 1100 }, { "epoch": 0.79, "grad_norm": 3.109375, "learning_rate": 4.7436536788663765e-06, "log_odds_chosen": 0.031207162886857986, "log_odds_ratio": -0.73769611120224, "logits/chosen": -3.0800411701202393, "logits/rejected": -3.079658031463623, "logps/chosen": -0.6475512981414795, "logps/rejected": -0.6596473455429077, "loss": 0.4172, "nll_loss": 0.3865991234779358, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.032377563416957855, "rewards/margins": 0.0006048032082617283, "rewards/rejected": -0.032982368022203445, "step": 1110 }, { "epoch": 0.8, "grad_norm": 3.703125, "learning_rate": 4.74152026131801e-06, "log_odds_chosen": 0.03759531304240227, "log_odds_ratio": -0.7334020733833313, "logits/chosen": -3.074477434158325, "logits/rejected": -3.0724005699157715, "logps/chosen": -0.6538852453231812, "logps/rejected": -0.6732013821601868, "loss": 0.4401, "nll_loss": 0.40496626496315, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.0326942577958107, "rewards/margins": 0.0009658055496402085, "rewards/rejected": -0.03366006538271904, "step": 1120 }, { "epoch": 0.81, "grad_norm": 3.71875, "learning_rate": 4.739389719641665e-06, "log_odds_chosen": 0.019005518406629562, "log_odds_ratio": -0.7340518236160278, "logits/chosen": -3.085136890411377, "logits/rejected": -3.084186553955078, "logps/chosen": -0.6766825914382935, "logps/rejected": -0.6808497309684753, "loss": 0.4498, "nll_loss": 0.4107235372066498, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.03383412957191467, "rewards/margins": 0.00020835567556787282, "rewards/rejected": -0.03404248505830765, "step": 1130 }, { "epoch": 0.81, "grad_norm": 3.96875, "learning_rate": 4.7372620473819615e-06, "log_odds_chosen": 0.10682245343923569, "log_odds_ratio": -0.6928954720497131, "logits/chosen": -3.0896780490875244, "logits/rejected": -3.0857176780700684, "logps/chosen": -0.632037341594696, "logps/rejected": -0.6993937492370605, "loss": 0.4268, "nll_loss": 0.3929973244667053, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.03160186856985092, "rewards/margins": 0.0033678212203085423, "rewards/rejected": -0.034969694912433624, "step": 1140 }, { "epoch": 0.82, "grad_norm": 3.78125, "learning_rate": 4.735137238103785e-06, "log_odds_chosen": -0.09269218146800995, "log_odds_ratio": -0.8012792468070984, "logits/chosen": -3.061591625213623, "logits/rejected": -3.076413631439209, "logps/chosen": -0.6523951888084412, "logps/rejected": -0.6016907691955566, "loss": 0.4324, "nll_loss": 0.405519962310791, "rewards/accuracies": 0.4375, "rewards/chosen": -0.03261975571513176, "rewards/margins": -0.002535218372941017, "rewards/rejected": -0.03008453920483589, "step": 1150 }, { "epoch": 0.83, "grad_norm": 3.125, "learning_rate": 4.7330152853922064e-06, "log_odds_chosen": 0.08306973427534103, "log_odds_ratio": -0.7034454345703125, "logits/chosen": -3.0880343914031982, "logits/rejected": -3.091386318206787, "logps/chosen": -0.6433897614479065, "logps/rejected": -0.6894704103469849, "loss": 0.4094, "nll_loss": 0.3951466679573059, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.032169491052627563, "rewards/margins": 0.0023040329106152058, "rewards/rejected": -0.034473519772291183, "step": 1160 }, { "epoch": 0.84, "grad_norm": 3.421875, "learning_rate": 4.730896182852409e-06, "log_odds_chosen": 0.07055424898862839, "log_odds_ratio": -0.7197698354721069, "logits/chosen": -3.0347607135772705, "logits/rejected": -3.034852981567383, "logps/chosen": -0.648642361164093, "logps/rejected": -0.6928626894950867, "loss": 0.4251, "nll_loss": 0.39131125807762146, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.03243211656808853, "rewards/margins": 0.0022110205609351397, "rewards/rejected": -0.034643132239580154, "step": 1170 }, { "epoch": 0.84, "grad_norm": 3.015625, "learning_rate": 4.72877992410959e-06, "log_odds_chosen": 0.07622877508401871, "log_odds_ratio": -0.718718945980072, "logits/chosen": -3.0938053131103516, "logits/rejected": -3.069287061691284, "logps/chosen": -0.5954622030258179, "logps/rejected": -0.6459757685661316, "loss": 0.4228, "nll_loss": 0.36942896246910095, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.029773110523819923, "rewards/margins": 0.002525678602978587, "rewards/rejected": -0.03229879215359688, "step": 1180 }, { "epoch": 0.85, "grad_norm": 3.359375, "learning_rate": 4.7266665028088985e-06, "log_odds_chosen": 0.04807063192129135, "log_odds_ratio": -0.7262119650840759, "logits/chosen": -3.0554616451263428, "logits/rejected": -3.058136463165283, "logps/chosen": -0.6289108991622925, "logps/rejected": -0.653433620929718, "loss": 0.4182, "nll_loss": 0.3666822016239166, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03144555166363716, "rewards/margins": 0.0012261316878721118, "rewards/rejected": -0.03267168253660202, "step": 1190 }, { "epoch": 0.86, "grad_norm": 3.640625, "learning_rate": 4.72455591261534e-06, "log_odds_chosen": 0.07576905190944672, "log_odds_ratio": -0.7169931530952454, "logits/chosen": -3.0383033752441406, "logits/rejected": -3.047996997833252, "logps/chosen": -0.6378492712974548, "logps/rejected": -0.688778281211853, "loss": 0.4271, "nll_loss": 0.35137873888015747, "rewards/accuracies": 0.5, "rewards/chosen": -0.03189246729016304, "rewards/margins": 0.0025464531499892473, "rewards/rejected": -0.03443891555070877, "step": 1200 }, { "epoch": 0.86, "grad_norm": 3.75, "learning_rate": 4.722448147213712e-06, "log_odds_chosen": 0.15186946094036102, "log_odds_ratio": -0.6881891489028931, "logits/chosen": -3.0256996154785156, "logits/rejected": -3.032496929168701, "logps/chosen": -0.648317813873291, "logps/rejected": -0.715408205986023, "loss": 0.4191, "nll_loss": 0.4192207455635071, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.03241588920354843, "rewards/margins": 0.00335451727733016, "rewards/rejected": -0.03577040880918503, "step": 1210 }, { "epoch": 0.87, "grad_norm": 3.71875, "learning_rate": 4.720343200308507e-06, "log_odds_chosen": 0.03504772111773491, "log_odds_ratio": -0.7351424098014832, "logits/chosen": -3.0351805686950684, "logits/rejected": -3.0360119342803955, "logps/chosen": -0.6886129379272461, "logps/rejected": -0.7105122804641724, "loss": 0.4581, "nll_loss": 0.40671506524086, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.034430645406246185, "rewards/margins": 0.0010949661955237389, "rewards/rejected": -0.0355256088078022, "step": 1220 }, { "epoch": 0.88, "grad_norm": 3.578125, "learning_rate": 4.7182410656238484e-06, "log_odds_chosen": 0.06219751387834549, "log_odds_ratio": -0.7128943204879761, "logits/chosen": -2.9832043647766113, "logits/rejected": -2.9864988327026367, "logps/chosen": -0.6394472122192383, "logps/rejected": -0.6639925837516785, "loss": 0.4388, "nll_loss": 0.3537364602088928, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.031972359865903854, "rewards/margins": 0.0012272674357518554, "rewards/rejected": -0.03319963067770004, "step": 1230 }, { "epoch": 0.89, "grad_norm": 3.609375, "learning_rate": 4.716141736903407e-06, "log_odds_chosen": 0.17500340938568115, "log_odds_ratio": -0.6878092885017395, "logits/chosen": -3.00567626953125, "logits/rejected": -2.998495578765869, "logps/chosen": -0.6631456017494202, "logps/rejected": -0.7714527249336243, "loss": 0.4187, "nll_loss": 0.40692028403282166, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03315728157758713, "rewards/margins": 0.00541535671800375, "rewards/rejected": -0.038572631776332855, "step": 1240 }, { "epoch": 0.89, "grad_norm": 3.640625, "learning_rate": 4.714045207910318e-06, "log_odds_chosen": 0.08970724046230316, "log_odds_ratio": -0.6932206153869629, "logits/chosen": -2.9962329864501953, "logits/rejected": -2.9803242683410645, "logps/chosen": -0.6143691539764404, "logps/rejected": -0.6589905619621277, "loss": 0.4354, "nll_loss": 0.40181055665016174, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.030718455091118813, "rewards/margins": 0.0022310761269181967, "rewards/rejected": -0.032949529588222504, "step": 1250 }, { "epoch": 0.9, "grad_norm": 3.578125, "learning_rate": 4.71195147242711e-06, "log_odds_chosen": 0.009160471148788929, "log_odds_ratio": -0.7380466461181641, "logits/chosen": -2.964049816131592, "logits/rejected": -2.9813032150268555, "logps/chosen": -0.6386082172393799, "logps/rejected": -0.6451534032821655, "loss": 0.4423, "nll_loss": 0.43473586440086365, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.031930409371852875, "rewards/margins": 0.00032726413337513804, "rewards/rejected": -0.032257676124572754, "step": 1260 }, { "epoch": 0.91, "grad_norm": 4.8125, "learning_rate": 4.709860524255622e-06, "log_odds_chosen": 0.06628690659999847, "log_odds_ratio": -0.7211881875991821, "logits/chosen": -3.0270466804504395, "logits/rejected": -3.017808437347412, "logps/chosen": -0.6635085344314575, "logps/rejected": -0.7107102274894714, "loss": 0.4182, "nll_loss": 0.38054361939430237, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.033175431191921234, "rewards/margins": 0.00236008083447814, "rewards/rejected": -0.03553551062941551, "step": 1270 }, { "epoch": 0.91, "grad_norm": 3.828125, "learning_rate": 4.707772357216934e-06, "log_odds_chosen": 0.20300555229187012, "log_odds_ratio": -0.6594520807266235, "logits/chosen": -3.018721342086792, "logits/rejected": -3.0158908367156982, "logps/chosen": -0.5924292802810669, "logps/rejected": -0.686690628528595, "loss": 0.4256, "nll_loss": 0.3867045044898987, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.029621466994285583, "rewards/margins": 0.004713063593953848, "rewards/rejected": -0.03433452919125557, "step": 1280 }, { "epoch": 0.92, "grad_norm": 3.96875, "learning_rate": 4.705686965151282e-06, "log_odds_chosen": 0.07721348106861115, "log_odds_ratio": -0.7010698914527893, "logits/chosen": -2.993330240249634, "logits/rejected": -2.9904725551605225, "logps/chosen": -0.6403943300247192, "logps/rejected": -0.6734997034072876, "loss": 0.4682, "nll_loss": 0.43530288338661194, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.0320197157561779, "rewards/margins": 0.0016552701126784086, "rewards/rejected": -0.03367498517036438, "step": 1290 }, { "epoch": 0.93, "grad_norm": 3.65625, "learning_rate": 4.703604341917987e-06, "log_odds_chosen": 0.14772483706474304, "log_odds_ratio": -0.6791850328445435, "logits/chosen": -2.990086793899536, "logits/rejected": -2.9869673252105713, "logps/chosen": -0.5991423726081848, "logps/rejected": -0.6550507545471191, "loss": 0.4303, "nll_loss": 0.38325631618499756, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0299571193754673, "rewards/margins": 0.0027954198885709047, "rewards/rejected": -0.03275253623723984, "step": 1300 }, { "epoch": 0.94, "grad_norm": 3.390625, "learning_rate": 4.701524481395374e-06, "log_odds_chosen": 0.0440853051841259, "log_odds_ratio": -0.7231958508491516, "logits/chosen": -2.9886057376861572, "logits/rejected": -2.982839584350586, "logps/chosen": -0.6433655619621277, "logps/rejected": -0.6593060493469238, "loss": 0.4335, "nll_loss": 0.3637477159500122, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.032168276607990265, "rewards/margins": 0.0007970236474648118, "rewards/rejected": -0.03296530246734619, "step": 1310 }, { "epoch": 0.94, "grad_norm": 3.5625, "learning_rate": 4.699447377480703e-06, "log_odds_chosen": 0.1278941035270691, "log_odds_ratio": -0.6739621758460999, "logits/chosen": -3.006826877593994, "logits/rejected": -2.9995763301849365, "logps/chosen": -0.6524965763092041, "logps/rejected": -0.7152012586593628, "loss": 0.4119, "nll_loss": 0.3763750195503235, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.032624829560518265, "rewards/margins": 0.0031352366786450148, "rewards/rejected": -0.0357600674033165, "step": 1320 }, { "epoch": 0.95, "grad_norm": 3.625, "learning_rate": 4.6973730240900876e-06, "log_odds_chosen": 0.19130215048789978, "log_odds_ratio": -0.6467028856277466, "logits/chosen": -3.015193223953247, "logits/rejected": -3.001615047454834, "logps/chosen": -0.6201332807540894, "logps/rejected": -0.7134720087051392, "loss": 0.438, "nll_loss": 0.4063757359981537, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.031006669625639915, "rewards/margins": 0.004666934255510569, "rewards/rejected": -0.0356735996901989, "step": 1330 }, { "epoch": 0.96, "grad_norm": 3.671875, "learning_rate": 4.695301415158426e-06, "log_odds_chosen": 0.07511954009532928, "log_odds_ratio": -0.7167226672172546, "logits/chosen": -3.0027642250061035, "logits/rejected": -3.005274534225464, "logps/chosen": -0.6853114366531372, "logps/rejected": -0.7025988698005676, "loss": 0.4573, "nll_loss": 0.43577417731285095, "rewards/accuracies": 0.53125, "rewards/chosen": -0.03426557406783104, "rewards/margins": 0.0008643665350973606, "rewards/rejected": -0.035129938274621964, "step": 1340 }, { "epoch": 0.96, "grad_norm": 3.140625, "learning_rate": 4.693232544639321e-06, "log_odds_chosen": 0.08261342346668243, "log_odds_ratio": -0.6981923580169678, "logits/chosen": -3.0221314430236816, "logits/rejected": -3.0046355724334717, "logps/chosen": -0.6420024633407593, "logps/rejected": -0.6855612397193909, "loss": 0.4204, "nll_loss": 0.3711979389190674, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.032100118696689606, "rewards/margins": 0.0021779430098831654, "rewards/rejected": -0.034278061240911484, "step": 1350 }, { "epoch": 0.97, "grad_norm": 4.1875, "learning_rate": 4.691166406505011e-06, "log_odds_chosen": 0.19216451048851013, "log_odds_ratio": -0.6696075797080994, "logits/chosen": -2.974529266357422, "logits/rejected": -2.972568988800049, "logps/chosen": -0.6075069904327393, "logps/rejected": -0.7166960835456848, "loss": 0.448, "nll_loss": 0.41793543100357056, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.030375352129340172, "rewards/margins": 0.005459453910589218, "rewards/rejected": -0.03583480417728424, "step": 1360 }, { "epoch": 0.98, "grad_norm": 4.0625, "learning_rate": 4.689102994746289e-06, "log_odds_chosen": 0.0898146703839302, "log_odds_ratio": -0.6897388696670532, "logits/chosen": -2.9398107528686523, "logits/rejected": -2.9382925033569336, "logps/chosen": -0.6259759664535522, "logps/rejected": -0.6724158525466919, "loss": 0.4825, "nll_loss": 0.43117284774780273, "rewards/accuracies": 0.5625, "rewards/chosen": -0.03129879757761955, "rewards/margins": 0.002321994863450527, "rewards/rejected": -0.033620793372392654, "step": 1370 }, { "epoch": 0.99, "grad_norm": 3.734375, "learning_rate": 4.687042303372439e-06, "log_odds_chosen": 0.08219285309314728, "log_odds_ratio": -0.7135015726089478, "logits/chosen": -2.9899113178253174, "logits/rejected": -2.9920835494995117, "logps/chosen": -0.6560535430908203, "logps/rejected": -0.6911671757698059, "loss": 0.4251, "nll_loss": 0.3988112509250641, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.032802678644657135, "rewards/margins": 0.00175568088889122, "rewards/rejected": -0.03455835580825806, "step": 1380 }, { "epoch": 0.99, "grad_norm": 2.734375, "learning_rate": 4.684984326411154e-06, "log_odds_chosen": 0.08102826029062271, "log_odds_ratio": -0.7022677659988403, "logits/chosen": -2.9960074424743652, "logits/rejected": -2.9929840564727783, "logps/chosen": -0.632602870464325, "logps/rejected": -0.6744332313537598, "loss": 0.4155, "nll_loss": 0.38121408224105835, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03163014352321625, "rewards/margins": 0.002091518370434642, "rewards/rejected": -0.03372166305780411, "step": 1390 }, { "epoch": 1.0, "grad_norm": 3.46875, "learning_rate": 4.68292905790847e-06, "log_odds_chosen": 0.24791984260082245, "log_odds_ratio": -0.6269276142120361, "logits/chosen": -3.0270581245422363, "logits/rejected": -3.022244930267334, "logps/chosen": -0.5875059366226196, "logps/rejected": -0.6980295181274414, "loss": 0.4538, "nll_loss": 0.40735164284706116, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.029375296086072922, "rewards/margins": 0.005526180844753981, "rewards/rejected": -0.03490147739648819, "step": 1400 }, { "epoch": 1.01, "grad_norm": 3.9375, "learning_rate": 4.6808764919286885e-06, "log_odds_chosen": 0.7641569375991821, "log_odds_ratio": -0.4179181158542633, "logits/chosen": -2.998579740524292, "logits/rejected": -2.9980528354644775, "logps/chosen": -0.45704132318496704, "logps/rejected": -0.7995768785476685, "loss": 0.3483, "nll_loss": 0.31959548592567444, "rewards/accuracies": 0.90625, "rewards/chosen": -0.022852066904306412, "rewards/margins": 0.01712678000330925, "rewards/rejected": -0.03997884318232536, "step": 1410 }, { "epoch": 1.01, "grad_norm": 3.265625, "learning_rate": 4.678826622554307e-06, "log_odds_chosen": 0.7109326124191284, "log_odds_ratio": -0.46446171402931213, "logits/chosen": -2.9916954040527344, "logits/rejected": -2.9853482246398926, "logps/chosen": -0.43861061334609985, "logps/rejected": -0.7429765462875366, "loss": 0.3446, "nll_loss": 0.29604512453079224, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.021930530667304993, "rewards/margins": 0.015218299813568592, "rewards/rejected": -0.03714882582426071, "step": 1420 }, { "epoch": 1.02, "grad_norm": 3.703125, "learning_rate": 4.676779443885949e-06, "log_odds_chosen": 0.8248852491378784, "log_odds_ratio": -0.4319123327732086, "logits/chosen": -3.016470432281494, "logits/rejected": -3.0123398303985596, "logps/chosen": -0.46995511651039124, "logps/rejected": -0.8320964574813843, "loss": 0.3222, "nll_loss": 0.30043524503707886, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.02349775657057762, "rewards/margins": 0.01810706965625286, "rewards/rejected": -0.04160482436418533, "step": 1430 }, { "epoch": 1.03, "grad_norm": 3.828125, "learning_rate": 4.674734950042287e-06, "log_odds_chosen": 0.8707025647163391, "log_odds_ratio": -0.4093431830406189, "logits/chosen": -3.0059139728546143, "logits/rejected": -3.007094144821167, "logps/chosen": -0.4496062397956848, "logps/rejected": -0.8141134977340698, "loss": 0.3012, "nll_loss": 0.2952999770641327, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.0224803127348423, "rewards/margins": 0.01822536624968052, "rewards/rejected": -0.04070568084716797, "step": 1440 }, { "epoch": 1.04, "grad_norm": 2.984375, "learning_rate": 4.672693135159978e-06, "log_odds_chosen": 0.75970858335495, "log_odds_ratio": -0.4426960051059723, "logits/chosen": -2.996731996536255, "logits/rejected": -2.9878756999969482, "logps/chosen": -0.47424378991127014, "logps/rejected": -0.8213974237442017, "loss": 0.3287, "nll_loss": 0.316914826631546, "rewards/accuracies": 0.8125, "rewards/chosen": -0.023712188005447388, "rewards/margins": 0.017357680946588516, "rewards/rejected": -0.0410698726773262, "step": 1450 }, { "epoch": 1.04, "grad_norm": 4.78125, "learning_rate": 4.67065399339359e-06, "log_odds_chosen": 0.8751314282417297, "log_odds_ratio": -0.4245772957801819, "logits/chosen": -3.012390613555908, "logits/rejected": -3.0302467346191406, "logps/chosen": -0.43874502182006836, "logps/rejected": -0.8058280944824219, "loss": 0.324, "nll_loss": 0.2915206253528595, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.021937251091003418, "rewards/margins": 0.018354153260588646, "rewards/rejected": -0.04029140621423721, "step": 1460 }, { "epoch": 1.05, "grad_norm": 4.25, "learning_rate": 4.668617518915533e-06, "log_odds_chosen": 0.7831146717071533, "log_odds_ratio": -0.4319920539855957, "logits/chosen": -3.016066074371338, "logits/rejected": -3.020158052444458, "logps/chosen": -0.4539521634578705, "logps/rejected": -0.7879105806350708, "loss": 0.3127, "nll_loss": 0.28853967785835266, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.022697608917951584, "rewards/margins": 0.016697922721505165, "rewards/rejected": -0.0393955297768116, "step": 1470 }, { "epoch": 1.06, "grad_norm": 4.71875, "learning_rate": 4.666583705915985e-06, "log_odds_chosen": 0.8952881097793579, "log_odds_ratio": -0.3987427353858948, "logits/chosen": -2.9969162940979004, "logits/rejected": -2.9946084022521973, "logps/chosen": -0.44170403480529785, "logps/rejected": -0.8406509160995483, "loss": 0.3346, "nll_loss": 0.2882642149925232, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02208520472049713, "rewards/margins": 0.019947348162531853, "rewards/rejected": -0.042032547295093536, "step": 1480 }, { "epoch": 1.06, "grad_norm": 4.15625, "learning_rate": 4.664552548602825e-06, "log_odds_chosen": 0.9522247314453125, "log_odds_ratio": -0.40322345495224, "logits/chosen": -2.999772548675537, "logits/rejected": -3.0032694339752197, "logps/chosen": -0.4039291441440582, "logps/rejected": -0.8009316325187683, "loss": 0.3084, "nll_loss": 0.2881791293621063, "rewards/accuracies": 0.875, "rewards/chosen": -0.02019645646214485, "rewards/margins": 0.019850121811032295, "rewards/rejected": -0.040046580135822296, "step": 1490 }, { "epoch": 1.07, "grad_norm": 4.5, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.8567419052124023, "log_odds_ratio": -0.4125964641571045, "logits/chosen": -3.0338215827941895, "logits/rejected": -3.0163111686706543, "logps/chosen": -0.47505998611450195, "logps/rejected": -0.8815017938613892, "loss": 0.3503, "nll_loss": 0.3534831404685974, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.023753000423312187, "rewards/margins": 0.020322084426879883, "rewards/rejected": -0.04407508298754692, "step": 1500 }, { "epoch": 1.08, "grad_norm": 5.25, "learning_rate": 4.660498177955291e-06, "log_odds_chosen": 0.8828569650650024, "log_odds_ratio": -0.3914152979850769, "logits/chosen": -3.002957820892334, "logits/rejected": -2.983429431915283, "logps/chosen": -0.4480772912502289, "logps/rejected": -0.846545398235321, "loss": 0.3399, "nll_loss": 0.2991197109222412, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.022403866052627563, "rewards/margins": 0.01992340385913849, "rewards/rejected": -0.042327266186475754, "step": 1510 }, { "epoch": 1.09, "grad_norm": 3.546875, "learning_rate": 4.658474953124562e-06, "log_odds_chosen": 0.8172259330749512, "log_odds_ratio": -0.42330318689346313, "logits/chosen": -3.0249977111816406, "logits/rejected": -3.041341543197632, "logps/chosen": -0.44706234335899353, "logps/rejected": -0.8112581372261047, "loss": 0.3294, "nll_loss": 0.3381724953651428, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.022353118285536766, "rewards/margins": 0.01820979081094265, "rewards/rejected": -0.040562909096479416, "step": 1520 }, { "epoch": 1.09, "grad_norm": 4.5625, "learning_rate": 4.656454360987378e-06, "log_odds_chosen": 0.890351414680481, "log_odds_ratio": -0.3964175581932068, "logits/chosen": -3.020611047744751, "logits/rejected": -3.014650821685791, "logps/chosen": -0.4163076877593994, "logps/rejected": -0.7933549284934998, "loss": 0.3247, "nll_loss": 0.30330485105514526, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.02081538364291191, "rewards/margins": 0.018852364271879196, "rewards/rejected": -0.03966774791479111, "step": 1530 }, { "epoch": 1.1, "grad_norm": 4.0, "learning_rate": 4.654436395839094e-06, "log_odds_chosen": 0.9347902536392212, "log_odds_ratio": -0.3876606822013855, "logits/chosen": -2.9748523235321045, "logits/rejected": -2.967477321624756, "logps/chosen": -0.4595278799533844, "logps/rejected": -0.8981553316116333, "loss": 0.325, "nll_loss": 0.318674772977829, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.02297639288008213, "rewards/margins": 0.021931374445557594, "rewards/rejected": -0.044907767325639725, "step": 1540 }, { "epoch": 1.11, "grad_norm": 3.609375, "learning_rate": 4.652421051992354e-06, "log_odds_chosen": 1.0142195224761963, "log_odds_ratio": -0.36705464124679565, "logits/chosen": -3.0127391815185547, "logits/rejected": -3.0099148750305176, "logps/chosen": -0.39149796962738037, "logps/rejected": -0.8059983253479004, "loss": 0.304, "nll_loss": 0.27671000361442566, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.019574899226427078, "rewards/margins": 0.020725015550851822, "rewards/rejected": -0.0402999147772789, "step": 1550 }, { "epoch": 1.11, "grad_norm": 4.15625, "learning_rate": 4.650408323777029e-06, "log_odds_chosen": 0.85090172290802, "log_odds_ratio": -0.4213482737541199, "logits/chosen": -2.993288516998291, "logits/rejected": -2.9915058612823486, "logps/chosen": -0.46174225211143494, "logps/rejected": -0.8299263119697571, "loss": 0.3221, "nll_loss": 0.3079577386379242, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.023087114095687866, "rewards/margins": 0.018409203737974167, "rewards/rejected": -0.04149631783366203, "step": 1560 }, { "epoch": 1.12, "grad_norm": 3.015625, "learning_rate": 4.6483982055401415e-06, "log_odds_chosen": 0.9606901407241821, "log_odds_ratio": -0.40272608399391174, "logits/chosen": -2.9789042472839355, "logits/rejected": -2.9753804206848145, "logps/chosen": -0.43407297134399414, "logps/rejected": -0.8554197549819946, "loss": 0.308, "nll_loss": 0.27577269077301025, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.021703649312257767, "rewards/margins": 0.021067334339022636, "rewards/rejected": -0.04277098551392555, "step": 1570 }, { "epoch": 1.13, "grad_norm": 4.40625, "learning_rate": 4.646390691645805e-06, "log_odds_chosen": 0.9029256701469421, "log_odds_ratio": -0.41600775718688965, "logits/chosen": -2.98231840133667, "logits/rejected": -2.9891891479492188, "logps/chosen": -0.4505884051322937, "logps/rejected": -0.8385009765625, "loss": 0.2977, "nll_loss": 0.2726622223854065, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.022529423236846924, "rewards/margins": 0.019395625218749046, "rewards/rejected": -0.04192505031824112, "step": 1580 }, { "epoch": 1.14, "grad_norm": 4.0625, "learning_rate": 4.644385776475159e-06, "log_odds_chosen": 0.8042148351669312, "log_odds_ratio": -0.43356090784072876, "logits/chosen": -2.993663787841797, "logits/rejected": -2.9955239295959473, "logps/chosen": -0.46376895904541016, "logps/rejected": -0.8224666714668274, "loss": 0.3391, "nll_loss": 0.30400586128234863, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.023188449442386627, "rewards/margins": 0.017934877425432205, "rewards/rejected": -0.04112333059310913, "step": 1590 }, { "epoch": 1.14, "grad_norm": 4.71875, "learning_rate": 4.642383454426297e-06, "log_odds_chosen": 0.9061915278434753, "log_odds_ratio": -0.4048829972743988, "logits/chosen": -3.0117270946502686, "logits/rejected": -3.0108304023742676, "logps/chosen": -0.4536532759666443, "logps/rejected": -0.8338489532470703, "loss": 0.3294, "nll_loss": 0.31496718525886536, "rewards/accuracies": 0.84375, "rewards/chosen": -0.022682661190629005, "rewards/margins": 0.0190097875893116, "rewards/rejected": -0.041692450642585754, "step": 1600 }, { "epoch": 1.15, "grad_norm": 4.03125, "learning_rate": 4.640383719914205e-06, "log_odds_chosen": 0.9622236490249634, "log_odds_ratio": -0.3791292905807495, "logits/chosen": -2.9718332290649414, "logits/rejected": -2.982408046722412, "logps/chosen": -0.45162805914878845, "logps/rejected": -0.884894073009491, "loss": 0.3514, "nll_loss": 0.33110564947128296, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.022581404075026512, "rewards/margins": 0.021663302555680275, "rewards/rejected": -0.04424469918012619, "step": 1610 }, { "epoch": 1.16, "grad_norm": 4.15625, "learning_rate": 4.638386567370694e-06, "log_odds_chosen": 0.9826571345329285, "log_odds_ratio": -0.38472285866737366, "logits/chosen": -3.019824504852295, "logits/rejected": -3.0200533866882324, "logps/chosen": -0.44388723373413086, "logps/rejected": -0.8713322877883911, "loss": 0.3082, "nll_loss": 0.2907964885234833, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.022194361314177513, "rewards/margins": 0.021372247487306595, "rewards/rejected": -0.04356660693883896, "step": 1620 }, { "epoch": 1.16, "grad_norm": 3.90625, "learning_rate": 4.636391991244338e-06, "log_odds_chosen": 0.9497779607772827, "log_odds_ratio": -0.39113032817840576, "logits/chosen": -3.033468723297119, "logits/rejected": -3.0411276817321777, "logps/chosen": -0.42741212248802185, "logps/rejected": -0.8322615623474121, "loss": 0.3013, "nll_loss": 0.27280479669570923, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.021370604634284973, "rewards/margins": 0.02024247497320175, "rewards/rejected": -0.041613079607486725, "step": 1630 }, { "epoch": 1.17, "grad_norm": 4.03125, "learning_rate": 4.634399986000405e-06, "log_odds_chosen": 0.9831953048706055, "log_odds_ratio": -0.3696615993976593, "logits/chosen": -3.04827880859375, "logits/rejected": -3.0377655029296875, "logps/chosen": -0.42226511240005493, "logps/rejected": -0.8507382273674011, "loss": 0.3175, "nll_loss": 0.32773536443710327, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.021113255992531776, "rewards/margins": 0.02142365835607052, "rewards/rejected": -0.042536910623311996, "step": 1640 }, { "epoch": 1.18, "grad_norm": 4.15625, "learning_rate": 4.632410546120794e-06, "log_odds_chosen": 0.7976962327957153, "log_odds_ratio": -0.4500705599784851, "logits/chosen": -2.9811854362487793, "logits/rejected": -2.9926586151123047, "logps/chosen": -0.4913281798362732, "logps/rejected": -0.8470379710197449, "loss": 0.3568, "nll_loss": 0.34398216009140015, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.0245664082467556, "rewards/margins": 0.017785491421818733, "rewards/rejected": -0.04235190153121948, "step": 1650 }, { "epoch": 1.19, "grad_norm": 4.9375, "learning_rate": 4.6304236661039765e-06, "log_odds_chosen": 0.9772316813468933, "log_odds_ratio": -0.38792848587036133, "logits/chosen": -3.0127968788146973, "logits/rejected": -3.0002083778381348, "logps/chosen": -0.4268715977668762, "logps/rejected": -0.8594573736190796, "loss": 0.3105, "nll_loss": 0.2772853374481201, "rewards/accuracies": 0.875, "rewards/chosen": -0.02134357951581478, "rewards/margins": 0.021629294380545616, "rewards/rejected": -0.0429728738963604, "step": 1660 }, { "epoch": 1.19, "grad_norm": 4.15625, "learning_rate": 4.628439340464919e-06, "log_odds_chosen": 0.8175104260444641, "log_odds_ratio": -0.43452611565589905, "logits/chosen": -2.9928994178771973, "logits/rejected": -2.9971837997436523, "logps/chosen": -0.4584590792655945, "logps/rejected": -0.8173287510871887, "loss": 0.3243, "nll_loss": 0.3162625730037689, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.022922953590750694, "rewards/margins": 0.017943482846021652, "rewards/rejected": -0.040866438299417496, "step": 1670 }, { "epoch": 1.2, "grad_norm": 4.25, "learning_rate": 4.626457563735034e-06, "log_odds_chosen": 0.9215971827507019, "log_odds_ratio": -0.3820372223854065, "logits/chosen": -2.988609790802002, "logits/rejected": -3.001538038253784, "logps/chosen": -0.44239306449890137, "logps/rejected": -0.8532567024230957, "loss": 0.3114, "nll_loss": 0.31571969389915466, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.02211965247988701, "rewards/margins": 0.020543182268738747, "rewards/rejected": -0.042662836611270905, "step": 1680 }, { "epoch": 1.21, "grad_norm": 4.3125, "learning_rate": 4.624478330462108e-06, "log_odds_chosen": 0.8879310488700867, "log_odds_ratio": -0.407911479473114, "logits/chosen": -2.994410991668701, "logits/rejected": -2.9959537982940674, "logps/chosen": -0.4827430248260498, "logps/rejected": -0.8839041590690613, "loss": 0.3346, "nll_loss": 0.3236854672431946, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.02413715049624443, "rewards/margins": 0.020058058202266693, "rewards/rejected": -0.044195204973220825, "step": 1690 }, { "epoch": 1.21, "grad_norm": 3.859375, "learning_rate": 4.622501635210244e-06, "log_odds_chosen": 0.8714966773986816, "log_odds_ratio": -0.40486255288124084, "logits/chosen": -2.9864673614501953, "logits/rejected": -2.977064847946167, "logps/chosen": -0.45541587471961975, "logps/rejected": -0.8393024206161499, "loss": 0.3436, "nll_loss": 0.3302624523639679, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.022770795971155167, "rewards/margins": 0.019194331020116806, "rewards/rejected": -0.041965123265981674, "step": 1700 }, { "epoch": 1.22, "grad_norm": 4.625, "learning_rate": 4.62052747255979e-06, "log_odds_chosen": 0.9106301069259644, "log_odds_ratio": -0.41219109296798706, "logits/chosen": -3.0064098834991455, "logits/rejected": -3.0043795108795166, "logps/chosen": -0.44815391302108765, "logps/rejected": -0.8492811918258667, "loss": 0.3459, "nll_loss": 0.31014305353164673, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.022407695651054382, "rewards/margins": 0.020056361332535744, "rewards/rejected": -0.04246405512094498, "step": 1710 }, { "epoch": 1.23, "grad_norm": 4.09375, "learning_rate": 4.61855583710729e-06, "log_odds_chosen": 0.8164472579956055, "log_odds_ratio": -0.43010982871055603, "logits/chosen": -2.9966959953308105, "logits/rejected": -3.0127968788146973, "logps/chosen": -0.4636779725551605, "logps/rejected": -0.812666118144989, "loss": 0.3345, "nll_loss": 0.3050321936607361, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.023183895274996758, "rewards/margins": 0.017449412494897842, "rewards/rejected": -0.04063330963253975, "step": 1720 }, { "epoch": 1.24, "grad_norm": 3.859375, "learning_rate": 4.616586723465408e-06, "log_odds_chosen": 0.9442536234855652, "log_odds_ratio": -0.3859529495239258, "logits/chosen": -2.9846928119659424, "logits/rejected": -2.9883875846862793, "logps/chosen": -0.4129181504249573, "logps/rejected": -0.811172366142273, "loss": 0.3061, "nll_loss": 0.27430829405784607, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.020645907148718834, "rewards/margins": 0.019912714138627052, "rewards/rejected": -0.04055861756205559, "step": 1730 }, { "epoch": 1.24, "grad_norm": 3.515625, "learning_rate": 4.614620126262875e-06, "log_odds_chosen": 0.9240094423294067, "log_odds_ratio": -0.39624181389808655, "logits/chosen": -3.013193130493164, "logits/rejected": -3.034301996231079, "logps/chosen": -0.4600726068019867, "logps/rejected": -0.8644978404045105, "loss": 0.3183, "nll_loss": 0.3232848048210144, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.023003632202744484, "rewards/margins": 0.02022126503288746, "rewards/rejected": -0.043224893510341644, "step": 1740 }, { "epoch": 1.25, "grad_norm": 3.65625, "learning_rate": 4.6126560401444256e-06, "log_odds_chosen": 0.9176782369613647, "log_odds_ratio": -0.3987888693809509, "logits/chosen": -3.0033655166625977, "logits/rejected": -3.0092477798461914, "logps/chosen": -0.40962448716163635, "logps/rejected": -0.8342093229293823, "loss": 0.3201, "nll_loss": 0.30792126059532166, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.020481223240494728, "rewards/margins": 0.02122923731803894, "rewards/rejected": -0.04171045869588852, "step": 1750 }, { "epoch": 1.26, "grad_norm": 3.578125, "learning_rate": 4.610694459770736e-06, "log_odds_chosen": 0.8412426114082336, "log_odds_ratio": -0.43404024839401245, "logits/chosen": -3.0195138454437256, "logits/rejected": -3.019099235534668, "logps/chosen": -0.48159152269363403, "logps/rejected": -0.8417989611625671, "loss": 0.35, "nll_loss": 0.3374321758747101, "rewards/accuracies": 0.84375, "rewards/chosen": -0.0240795761346817, "rewards/margins": 0.018010370433330536, "rewards/rejected": -0.04208994656801224, "step": 1760 }, { "epoch": 1.26, "grad_norm": 4.625, "learning_rate": 4.6087353798183585e-06, "log_odds_chosen": 0.892247200012207, "log_odds_ratio": -0.41821059584617615, "logits/chosen": -3.0529439449310303, "logits/rejected": -3.054018497467041, "logps/chosen": -0.44770580530166626, "logps/rejected": -0.8256362080574036, "loss": 0.3151, "nll_loss": 0.31823089718818665, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.022385289892554283, "rewards/margins": 0.018896516412496567, "rewards/rejected": -0.0412818118929863, "step": 1770 }, { "epoch": 1.27, "grad_norm": 4.3125, "learning_rate": 4.606778794979673e-06, "log_odds_chosen": 0.9465528726577759, "log_odds_ratio": -0.397424578666687, "logits/chosen": -2.958522319793701, "logits/rejected": -2.9766972064971924, "logps/chosen": -0.43618613481521606, "logps/rejected": -0.8612610697746277, "loss": 0.3436, "nll_loss": 0.3344579041004181, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.021809305995702744, "rewards/margins": 0.02125374786555767, "rewards/rejected": -0.043063051998615265, "step": 1780 }, { "epoch": 1.28, "grad_norm": 3.515625, "learning_rate": 4.604824699962814e-06, "log_odds_chosen": 0.9071775674819946, "log_odds_ratio": -0.4148840308189392, "logits/chosen": -2.9953811168670654, "logits/rejected": -2.993844509124756, "logps/chosen": -0.44896164536476135, "logps/rejected": -0.8559948205947876, "loss": 0.3306, "nll_loss": 0.29880887269973755, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.022448081523180008, "rewards/margins": 0.020351659506559372, "rewards/rejected": -0.04279974102973938, "step": 1790 }, { "epoch": 1.29, "grad_norm": 4.09375, "learning_rate": 4.602873089491618e-06, "log_odds_chosen": 0.9761806726455688, "log_odds_ratio": -0.3809584081172943, "logits/chosen": -3.014904499053955, "logits/rejected": -3.0227017402648926, "logps/chosen": -0.41416603326797485, "logps/rejected": -0.8325015902519226, "loss": 0.2966, "nll_loss": 0.2885209918022156, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.020708302035927773, "rewards/margins": 0.020916782319545746, "rewards/rejected": -0.04162507876753807, "step": 1800 }, { "epoch": 1.29, "grad_norm": 3.46875, "learning_rate": 4.600923958305558e-06, "log_odds_chosen": 0.8553352355957031, "log_odds_ratio": -0.41224122047424316, "logits/chosen": -3.01104474067688, "logits/rejected": -3.0122313499450684, "logps/chosen": -0.4588088393211365, "logps/rejected": -0.8460550308227539, "loss": 0.324, "nll_loss": 0.3278103470802307, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.022940443828701973, "rewards/margins": 0.0193623099476099, "rewards/rejected": -0.042302750051021576, "step": 1810 }, { "epoch": 1.3, "grad_norm": 4.125, "learning_rate": 4.59897730115969e-06, "log_odds_chosen": 1.001803994178772, "log_odds_ratio": -0.38690775632858276, "logits/chosen": -3.019709825515747, "logits/rejected": -3.0385499000549316, "logps/chosen": -0.44529399275779724, "logps/rejected": -0.9027696847915649, "loss": 0.3248, "nll_loss": 0.3024645447731018, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.022264700382947922, "rewards/margins": 0.022873779758810997, "rewards/rejected": -0.04513847827911377, "step": 1820 }, { "epoch": 1.31, "grad_norm": 3.765625, "learning_rate": 4.597033112824591e-06, "log_odds_chosen": 0.8532528877258301, "log_odds_ratio": -0.42210277915000916, "logits/chosen": -2.9994237422943115, "logits/rejected": -3.004047155380249, "logps/chosen": -0.4748069643974304, "logps/rejected": -0.8539941906929016, "loss": 0.3307, "nll_loss": 0.3168404698371887, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.02374034747481346, "rewards/margins": 0.01895935833454132, "rewards/rejected": -0.04269970953464508, "step": 1830 }, { "epoch": 1.31, "grad_norm": 4.28125, "learning_rate": 4.595091388086298e-06, "log_odds_chosen": 0.813404381275177, "log_odds_ratio": -0.4155808389186859, "logits/chosen": -3.0344955921173096, "logits/rejected": -3.0332906246185303, "logps/chosen": -0.46515345573425293, "logps/rejected": -0.8212959170341492, "loss": 0.3219, "nll_loss": 0.2850883901119232, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.023257672786712646, "rewards/margins": 0.017807122319936752, "rewards/rejected": -0.0410647951066494, "step": 1840 }, { "epoch": 1.32, "grad_norm": 4.65625, "learning_rate": 4.593152121746254e-06, "log_odds_chosen": 0.82573002576828, "log_odds_ratio": -0.42741161584854126, "logits/chosen": -3.0195837020874023, "logits/rejected": -3.0251100063323975, "logps/chosen": -0.4514719843864441, "logps/rejected": -0.8051018714904785, "loss": 0.342, "nll_loss": 0.29345589876174927, "rewards/accuracies": 0.84375, "rewards/chosen": -0.022573599591851234, "rewards/margins": 0.01768149621784687, "rewards/rejected": -0.040255095809698105, "step": 1850 }, { "epoch": 1.33, "grad_norm": 4.03125, "learning_rate": 4.591215308621242e-06, "log_odds_chosen": 0.8715616464614868, "log_odds_ratio": -0.40363240242004395, "logits/chosen": -3.00486421585083, "logits/rejected": -2.9942901134490967, "logps/chosen": -0.44383150339126587, "logps/rejected": -0.8399602770805359, "loss": 0.3232, "nll_loss": 0.2906932234764099, "rewards/accuracies": 0.90625, "rewards/chosen": -0.022191572934389114, "rewards/margins": 0.01980643905699253, "rewards/rejected": -0.041998013854026794, "step": 1860 }, { "epoch": 1.34, "grad_norm": 3.90625, "learning_rate": 4.5892809435433355e-06, "log_odds_chosen": 0.9153439402580261, "log_odds_ratio": -0.4107298254966736, "logits/chosen": -3.0188369750976562, "logits/rejected": -3.018990993499756, "logps/chosen": -0.4508902430534363, "logps/rejected": -0.8545784950256348, "loss": 0.3261, "nll_loss": 0.327529639005661, "rewards/accuracies": 0.875, "rewards/chosen": -0.022544514387845993, "rewards/margins": 0.020184412598609924, "rewards/rejected": -0.042728934437036514, "step": 1870 }, { "epoch": 1.34, "grad_norm": 3.515625, "learning_rate": 4.587349021359836e-06, "log_odds_chosen": 0.8184522390365601, "log_odds_ratio": -0.42650118470191956, "logits/chosen": -3.0405290126800537, "logits/rejected": -3.044990062713623, "logps/chosen": -0.4635538160800934, "logps/rejected": -0.8331626653671265, "loss": 0.3345, "nll_loss": 0.3048376739025116, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.023177694529294968, "rewards/margins": 0.018480444326996803, "rewards/rejected": -0.04165813699364662, "step": 1880 }, { "epoch": 1.35, "grad_norm": 3.53125, "learning_rate": 4.585419536933215e-06, "log_odds_chosen": 0.9167385101318359, "log_odds_ratio": -0.3995562791824341, "logits/chosen": -3.0831446647644043, "logits/rejected": -3.082552194595337, "logps/chosen": -0.425593763589859, "logps/rejected": -0.8321810960769653, "loss": 0.3415, "nll_loss": 0.3395434021949768, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.02127968892455101, "rewards/margins": 0.020329371094703674, "rewards/rejected": -0.041609056293964386, "step": 1890 }, { "epoch": 1.36, "grad_norm": 3.296875, "learning_rate": 4.583492485141056e-06, "log_odds_chosen": 0.9593385457992554, "log_odds_ratio": -0.397580087184906, "logits/chosen": -3.014707326889038, "logits/rejected": -3.0367960929870605, "logps/chosen": -0.43250972032546997, "logps/rejected": -0.8235841989517212, "loss": 0.2924, "nll_loss": 0.27714481949806213, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02162548527121544, "rewards/margins": 0.01955372467637062, "rewards/rejected": -0.04117920622229576, "step": 1900 }, { "epoch": 1.36, "grad_norm": 4.1875, "learning_rate": 4.581567860876004e-06, "log_odds_chosen": 0.8826379776000977, "log_odds_ratio": -0.40401822328567505, "logits/chosen": -3.0603883266448975, "logits/rejected": -3.0550103187561035, "logps/chosen": -0.43654412031173706, "logps/rejected": -0.8326064348220825, "loss": 0.3426, "nll_loss": 0.3314969539642334, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.02182720974087715, "rewards/margins": 0.019803116098046303, "rewards/rejected": -0.041630327701568604, "step": 1910 }, { "epoch": 1.37, "grad_norm": 4.34375, "learning_rate": 4.579645659045699e-06, "log_odds_chosen": 0.9182140231132507, "log_odds_ratio": -0.3977579176425934, "logits/chosen": -3.0444254875183105, "logits/rejected": -3.044851541519165, "logps/chosen": -0.4135459363460541, "logps/rejected": -0.8132508397102356, "loss": 0.3319, "nll_loss": 0.3249804675579071, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.020677298307418823, "rewards/margins": 0.019985243678092957, "rewards/rejected": -0.04066254198551178, "step": 1920 }, { "epoch": 1.38, "grad_norm": 2.875, "learning_rate": 4.577725874572724e-06, "log_odds_chosen": 0.8466962575912476, "log_odds_ratio": -0.42698168754577637, "logits/chosen": -2.9972777366638184, "logits/rejected": -3.010082244873047, "logps/chosen": -0.4568180441856384, "logps/rejected": -0.8199788928031921, "loss": 0.3272, "nll_loss": 0.2934577167034149, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.02284090220928192, "rewards/margins": 0.018158044666051865, "rewards/rejected": -0.040998946875333786, "step": 1930 }, { "epoch": 1.39, "grad_norm": 3.96875, "learning_rate": 4.575808502394551e-06, "log_odds_chosen": 0.8241879343986511, "log_odds_ratio": -0.4375346601009369, "logits/chosen": -3.028289318084717, "logits/rejected": -3.0276591777801514, "logps/chosen": -0.45763087272644043, "logps/rejected": -0.831399142742157, "loss": 0.3216, "nll_loss": 0.3027299642562866, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02288154885172844, "rewards/margins": 0.018688412383198738, "rewards/rejected": -0.04156995564699173, "step": 1940 }, { "epoch": 1.39, "grad_norm": 3.84375, "learning_rate": 4.573893537463482e-06, "log_odds_chosen": 0.7692528963088989, "log_odds_ratio": -0.4386647641658783, "logits/chosen": -3.02350115776062, "logits/rejected": -3.0304954051971436, "logps/chosen": -0.4589407444000244, "logps/rejected": -0.7872682809829712, "loss": 0.3263, "nll_loss": 0.30421844124794006, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.0229470394551754, "rewards/margins": 0.01641637459397316, "rewards/rejected": -0.03936341404914856, "step": 1950 }, { "epoch": 1.4, "grad_norm": 4.65625, "learning_rate": 4.5719809747465946e-06, "log_odds_chosen": 0.8744810223579407, "log_odds_ratio": -0.39987024664878845, "logits/chosen": -3.0022664070129395, "logits/rejected": -3.0017523765563965, "logps/chosen": -0.43699949979782104, "logps/rejected": -0.8227006793022156, "loss": 0.3244, "nll_loss": 0.29690784215927124, "rewards/accuracies": 0.875, "rewards/chosen": -0.0218499768525362, "rewards/margins": 0.019285056740045547, "rewards/rejected": -0.0411350317299366, "step": 1960 }, { "epoch": 1.41, "grad_norm": 4.53125, "learning_rate": 4.570070809225682e-06, "log_odds_chosen": 0.8736169934272766, "log_odds_ratio": -0.4059488773345947, "logits/chosen": -3.041970729827881, "logits/rejected": -3.0416646003723145, "logps/chosen": -0.4508230686187744, "logps/rejected": -0.8296302556991577, "loss": 0.3286, "nll_loss": 0.29727891087532043, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.02254115603864193, "rewards/margins": 0.018940357491374016, "rewards/rejected": -0.041481517255306244, "step": 1970 }, { "epoch": 1.41, "grad_norm": 4.8125, "learning_rate": 4.568163035897205e-06, "log_odds_chosen": 0.8864185214042664, "log_odds_ratio": -0.3975692093372345, "logits/chosen": -3.04966139793396, "logits/rejected": -3.070582866668701, "logps/chosen": -0.4513399600982666, "logps/rejected": -0.8385303616523743, "loss": 0.3383, "nll_loss": 0.3385816812515259, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.02256700024008751, "rewards/margins": 0.019359519705176353, "rewards/rejected": -0.04192651808261871, "step": 1980 }, { "epoch": 1.42, "grad_norm": 4.84375, "learning_rate": 4.566257649772231e-06, "log_odds_chosen": 0.8751873970031738, "log_odds_ratio": -0.4186960756778717, "logits/chosen": -3.035048484802246, "logits/rejected": -3.049337387084961, "logps/chosen": -0.43836671113967896, "logps/rejected": -0.8029166460037231, "loss": 0.3377, "nll_loss": 0.3198835253715515, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.021918337792158127, "rewards/margins": 0.01822749339044094, "rewards/rejected": -0.04014582931995392, "step": 1990 }, { "epoch": 1.43, "grad_norm": 3.359375, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.9120534062385559, "log_odds_ratio": -0.3925711512565613, "logits/chosen": -2.9674854278564453, "logits/rejected": -2.995685338973999, "logps/chosen": -0.4336552023887634, "logps/rejected": -0.8380581140518188, "loss": 0.3144, "nll_loss": 0.2831321954727173, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.02168275974690914, "rewards/margins": 0.02022015117108822, "rewards/rejected": -0.04190291464328766, "step": 2000 }, { "epoch": 1.44, "grad_norm": 4.5, "learning_rate": 4.562454019249786e-06, "log_odds_chosen": 0.7647709846496582, "log_odds_ratio": -0.441622793674469, "logits/chosen": -3.032284736633301, "logits/rejected": -3.030167579650879, "logps/chosen": -0.49349290132522583, "logps/rejected": -0.8284667730331421, "loss": 0.3677, "nll_loss": 0.30784302949905396, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.02467464469373226, "rewards/margins": 0.0167486984282732, "rewards/rejected": -0.04142334312200546, "step": 2010 }, { "epoch": 1.44, "grad_norm": 4.21875, "learning_rate": 4.560555764947004e-06, "log_odds_chosen": 0.8444417715072632, "log_odds_ratio": -0.42444032430648804, "logits/chosen": -3.0294785499572754, "logits/rejected": -3.0390660762786865, "logps/chosen": -0.458143413066864, "logps/rejected": -0.8389005661010742, "loss": 0.3322, "nll_loss": 0.34530940651893616, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02290717326104641, "rewards/margins": 0.01903786137700081, "rewards/rejected": -0.04194503277540207, "step": 2020 }, { "epoch": 1.45, "grad_norm": 3.796875, "learning_rate": 4.5586598780369954e-06, "log_odds_chosen": 0.8334053754806519, "log_odds_ratio": -0.42451953887939453, "logits/chosen": -3.0317139625549316, "logits/rejected": -3.0364439487457275, "logps/chosen": -0.45194607973098755, "logps/rejected": -0.8194429278373718, "loss": 0.3264, "nll_loss": 0.3324364423751831, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.022597305476665497, "rewards/margins": 0.018374841660261154, "rewards/rejected": -0.04097214341163635, "step": 2030 }, { "epoch": 1.46, "grad_norm": 4.03125, "learning_rate": 4.556766353603058e-06, "log_odds_chosen": 0.8343974947929382, "log_odds_ratio": -0.41841697692871094, "logits/chosen": -2.980856418609619, "logits/rejected": -2.9946112632751465, "logps/chosen": -0.44832831621170044, "logps/rejected": -0.8034370541572571, "loss": 0.3422, "nll_loss": 0.29252445697784424, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.022416414692997932, "rewards/margins": 0.01775544136762619, "rewards/rejected": -0.04017185419797897, "step": 2040 }, { "epoch": 1.46, "grad_norm": 4.5625, "learning_rate": 4.55487518674277e-06, "log_odds_chosen": 0.9057924151420593, "log_odds_ratio": -0.39900219440460205, "logits/chosen": -3.0324387550354004, "logits/rejected": -3.041830539703369, "logps/chosen": -0.4368770122528076, "logps/rejected": -0.8307033777236938, "loss": 0.3486, "nll_loss": 0.30956578254699707, "rewards/accuracies": 0.90625, "rewards/chosen": -0.02184385061264038, "rewards/margins": 0.01969131827354431, "rewards/rejected": -0.04153517261147499, "step": 2050 }, { "epoch": 1.47, "grad_norm": 3.578125, "learning_rate": 4.552986372567943e-06, "log_odds_chosen": 0.8615995645523071, "log_odds_ratio": -0.4165472388267517, "logits/chosen": -3.02634859085083, "logits/rejected": -3.0268537998199463, "logps/chosen": -0.47078457474708557, "logps/rejected": -0.8573616743087769, "loss": 0.3119, "nll_loss": 0.31275492906570435, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.0235392265021801, "rewards/margins": 0.019328858703374863, "rewards/rejected": -0.04286808893084526, "step": 2060 }, { "epoch": 1.48, "grad_norm": 3.734375, "learning_rate": 4.5510999062045625e-06, "log_odds_chosen": 0.9326503872871399, "log_odds_ratio": -0.4075629711151123, "logits/chosen": -2.9575231075286865, "logits/rejected": -2.958918333053589, "logps/chosen": -0.459758996963501, "logps/rejected": -0.8816938400268555, "loss": 0.321, "nll_loss": 0.31160447001457214, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.02298795059323311, "rewards/margins": 0.021096741780638695, "rewards/rejected": -0.04408469423651695, "step": 2070 }, { "epoch": 1.49, "grad_norm": 3.828125, "learning_rate": 4.5492157827927435e-06, "log_odds_chosen": 0.8180831670761108, "log_odds_ratio": -0.4271041452884674, "logits/chosen": -3.0648279190063477, "logits/rejected": -3.0439138412475586, "logps/chosen": -0.46030980348587036, "logps/rejected": -0.8149603605270386, "loss": 0.3088, "nll_loss": 0.299540251493454, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.023015489801764488, "rewards/margins": 0.0177325326949358, "rewards/rejected": -0.04074802249670029, "step": 2080 }, { "epoch": 1.49, "grad_norm": 4.0625, "learning_rate": 4.54733399748667e-06, "log_odds_chosen": 0.9109243154525757, "log_odds_ratio": -0.416052907705307, "logits/chosen": -3.0241446495056152, "logits/rejected": -3.0273067951202393, "logps/chosen": -0.4576999247074127, "logps/rejected": -0.8654853105545044, "loss": 0.3183, "nll_loss": 0.2810487747192383, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.022884996607899666, "rewards/margins": 0.020389268174767494, "rewards/rejected": -0.04327426105737686, "step": 2090 }, { "epoch": 1.5, "grad_norm": 4.46875, "learning_rate": 4.5454545454545455e-06, "log_odds_chosen": 0.8633116483688354, "log_odds_ratio": -0.42237916588783264, "logits/chosen": -3.0222301483154297, "logits/rejected": -3.0321128368377686, "logps/chosen": -0.4666077494621277, "logps/rejected": -0.8641067743301392, "loss": 0.3218, "nll_loss": 0.302145779132843, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.023330386728048325, "rewards/margins": 0.019874950870871544, "rewards/rejected": -0.043205343186855316, "step": 2100 }, { "epoch": 1.51, "grad_norm": 4.3125, "learning_rate": 4.543577421878542e-06, "log_odds_chosen": 0.9252461194992065, "log_odds_ratio": -0.3920586407184601, "logits/chosen": -3.072540760040283, "logits/rejected": -3.0683538913726807, "logps/chosen": -0.43902939558029175, "logps/rejected": -0.8597552180290222, "loss": 0.3323, "nll_loss": 0.30206722021102905, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.021951472386717796, "rewards/margins": 0.021036287769675255, "rewards/rejected": -0.04298776015639305, "step": 2110 }, { "epoch": 1.51, "grad_norm": 4.03125, "learning_rate": 4.541702621954749e-06, "log_odds_chosen": 0.7774366736412048, "log_odds_ratio": -0.4470534920692444, "logits/chosen": -3.0575690269470215, "logits/rejected": -3.0704360008239746, "logps/chosen": -0.4687027037143707, "logps/rejected": -0.811191201210022, "loss": 0.3324, "nll_loss": 0.3171001970767975, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.023435134440660477, "rewards/margins": 0.017124421894550323, "rewards/rejected": -0.0405595563352108, "step": 2120 }, { "epoch": 1.52, "grad_norm": 4.09375, "learning_rate": 4.539830140893113e-06, "log_odds_chosen": 0.9549965858459473, "log_odds_ratio": -0.40796393156051636, "logits/chosen": -3.0004522800445557, "logits/rejected": -3.018793821334839, "logps/chosen": -0.4332790970802307, "logps/rejected": -0.8418404459953308, "loss": 0.3312, "nll_loss": 0.31611257791519165, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.021663954481482506, "rewards/margins": 0.020428068935871124, "rewards/rejected": -0.04209202527999878, "step": 2130 }, { "epoch": 1.53, "grad_norm": 3.890625, "learning_rate": 4.537959973917404e-06, "log_odds_chosen": 0.8113375902175903, "log_odds_ratio": -0.4234372079372406, "logits/chosen": -3.0147581100463867, "logits/rejected": -3.016428232192993, "logps/chosen": -0.4412393569946289, "logps/rejected": -0.7943958044052124, "loss": 0.3259, "nll_loss": 0.30531343817710876, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.022061970084905624, "rewards/margins": 0.017657820135354996, "rewards/rejected": -0.03971978649497032, "step": 2140 }, { "epoch": 1.54, "grad_norm": 4.375, "learning_rate": 4.536092116265145e-06, "log_odds_chosen": 0.8169578313827515, "log_odds_ratio": -0.43316078186035156, "logits/chosen": -3.032147169113159, "logits/rejected": -3.0362045764923096, "logps/chosen": -0.4521457552909851, "logps/rejected": -0.8128098249435425, "loss": 0.3235, "nll_loss": 0.3006267547607422, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.022607287392020226, "rewards/margins": 0.018033206462860107, "rewards/rejected": -0.04064049571752548, "step": 2150 }, { "epoch": 1.54, "grad_norm": 4.34375, "learning_rate": 4.534226563187573e-06, "log_odds_chosen": 0.8375827074050903, "log_odds_ratio": -0.42143577337265015, "logits/chosen": -3.0169320106506348, "logits/rejected": -3.025740623474121, "logps/chosen": -0.47437024116516113, "logps/rejected": -0.8409256935119629, "loss": 0.331, "nll_loss": 0.34703296422958374, "rewards/accuracies": 0.84375, "rewards/chosen": -0.023718515411019325, "rewards/margins": 0.018327776342630386, "rewards/rejected": -0.04204628989100456, "step": 2160 }, { "epoch": 1.55, "grad_norm": 4.75, "learning_rate": 4.532363309949585e-06, "log_odds_chosen": 0.7523244619369507, "log_odds_ratio": -0.4391745626926422, "logits/chosen": -2.9868111610412598, "logits/rejected": -2.997650146484375, "logps/chosen": -0.4674256443977356, "logps/rejected": -0.7955536246299744, "loss": 0.3356, "nll_loss": 0.31363824009895325, "rewards/accuracies": 0.84375, "rewards/chosen": -0.02337128296494484, "rewards/margins": 0.01640639826655388, "rewards/rejected": -0.03977768495678902, "step": 2170 }, { "epoch": 1.56, "grad_norm": 4.03125, "learning_rate": 4.530502351829687e-06, "log_odds_chosen": 0.8416998982429504, "log_odds_ratio": -0.4306592047214508, "logits/chosen": -2.9833836555480957, "logits/rejected": -2.9960689544677734, "logps/chosen": -0.44344082474708557, "logps/rejected": -0.8194982409477234, "loss": 0.3246, "nll_loss": 0.27749571204185486, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.022172043099999428, "rewards/margins": 0.018802868202328682, "rewards/rejected": -0.04097491130232811, "step": 2180 }, { "epoch": 1.56, "grad_norm": 4.03125, "learning_rate": 4.528643684119943e-06, "log_odds_chosen": 0.8591219186782837, "log_odds_ratio": -0.39550989866256714, "logits/chosen": -2.9780657291412354, "logits/rejected": -2.995789051055908, "logps/chosen": -0.4373961389064789, "logps/rejected": -0.8135690689086914, "loss": 0.3261, "nll_loss": 0.307101309299469, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.021869808435440063, "rewards/margins": 0.018808647990226746, "rewards/rejected": -0.04067845270037651, "step": 2190 }, { "epoch": 1.57, "grad_norm": 4.46875, "learning_rate": 4.526787302125927e-06, "log_odds_chosen": 0.8592750430107117, "log_odds_ratio": -0.4122786521911621, "logits/chosen": -2.9955615997314453, "logits/rejected": -3.0038466453552246, "logps/chosen": -0.4712749123573303, "logps/rejected": -0.8675462007522583, "loss": 0.3409, "nll_loss": 0.3323027491569519, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.023563742637634277, "rewards/margins": 0.01981356367468834, "rewards/rejected": -0.043377310037612915, "step": 2200 }, { "epoch": 1.58, "grad_norm": 4.4375, "learning_rate": 4.524933201166673e-06, "log_odds_chosen": 0.7079171538352966, "log_odds_ratio": -0.4696483612060547, "logits/chosen": -2.9790871143341064, "logits/rejected": -2.9989562034606934, "logps/chosen": -0.46632423996925354, "logps/rejected": -0.7727037668228149, "loss": 0.3546, "nll_loss": 0.3396429121494293, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.023316213861107826, "rewards/margins": 0.01531897485256195, "rewards/rejected": -0.03863518685102463, "step": 2210 }, { "epoch": 1.59, "grad_norm": 4.84375, "learning_rate": 4.523081376574626e-06, "log_odds_chosen": 0.8301213979721069, "log_odds_ratio": -0.4185447692871094, "logits/chosen": -2.946075916290283, "logits/rejected": -2.948723316192627, "logps/chosen": -0.4505835175514221, "logps/rejected": -0.8109132647514343, "loss": 0.3566, "nll_loss": 0.34889137744903564, "rewards/accuracies": 0.875, "rewards/chosen": -0.022529179230332375, "rewards/margins": 0.018016483634710312, "rewards/rejected": -0.040545664727687836, "step": 2220 }, { "epoch": 1.59, "grad_norm": 3.734375, "learning_rate": 4.521231823695586e-06, "log_odds_chosen": 0.8056713342666626, "log_odds_ratio": -0.4291091859340668, "logits/chosen": -3.047243595123291, "logits/rejected": -3.051359176635742, "logps/chosen": -0.45491790771484375, "logps/rejected": -0.8111447095870972, "loss": 0.3263, "nll_loss": 0.3066962659358978, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.02274589240550995, "rewards/margins": 0.017811346799135208, "rewards/rejected": -0.04055723547935486, "step": 2230 }, { "epoch": 1.6, "grad_norm": 4.21875, "learning_rate": 4.519384537888671e-06, "log_odds_chosen": 0.7709326148033142, "log_odds_ratio": -0.45784300565719604, "logits/chosen": -2.985696315765381, "logits/rejected": -2.9782907962799072, "logps/chosen": -0.4642552435398102, "logps/rejected": -0.8008000254631042, "loss": 0.34, "nll_loss": 0.3125024437904358, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.023212764412164688, "rewards/margins": 0.016827242448925972, "rewards/rejected": -0.04004000499844551, "step": 2240 }, { "epoch": 1.61, "grad_norm": 3.84375, "learning_rate": 4.517539514526257e-06, "log_odds_chosen": 0.867225170135498, "log_odds_ratio": -0.4193996489048004, "logits/chosen": -3.0175483226776123, "logits/rejected": -3.0031814575195312, "logps/chosen": -0.47603535652160645, "logps/rejected": -0.8671073913574219, "loss": 0.35, "nll_loss": 0.32443031668663025, "rewards/accuracies": 0.84375, "rewards/chosen": -0.0238017700612545, "rewards/margins": 0.019553598016500473, "rewards/rejected": -0.043355368077754974, "step": 2250 }, { "epoch": 1.61, "grad_norm": 3.65625, "learning_rate": 4.515696748993935e-06, "log_odds_chosen": 0.8432467579841614, "log_odds_ratio": -0.4082673192024231, "logits/chosen": -3.0002920627593994, "logits/rejected": -3.0049033164978027, "logps/chosen": -0.4464034140110016, "logps/rejected": -0.7996863126754761, "loss": 0.3459, "nll_loss": 0.33959877490997314, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.02232016995549202, "rewards/margins": 0.017664145678281784, "rewards/rejected": -0.039984315633773804, "step": 2260 }, { "epoch": 1.62, "grad_norm": 3.875, "learning_rate": 4.513856236690462e-06, "log_odds_chosen": 0.795701265335083, "log_odds_ratio": -0.438093364238739, "logits/chosen": -3.0044660568237305, "logits/rejected": -3.0060501098632812, "logps/chosen": -0.4668704867362976, "logps/rejected": -0.8155612945556641, "loss": 0.3401, "nll_loss": 0.2977873384952545, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.02334352396428585, "rewards/margins": 0.017434537410736084, "rewards/rejected": -0.040778063237667084, "step": 2270 }, { "epoch": 1.63, "grad_norm": 4.34375, "learning_rate": 4.51201797302771e-06, "log_odds_chosen": 0.8113659024238586, "log_odds_ratio": -0.4456149935722351, "logits/chosen": -3.020380735397339, "logits/rejected": -3.0095772743225098, "logps/chosen": -0.48883056640625, "logps/rejected": -0.8754503130912781, "loss": 0.3415, "nll_loss": 0.30122286081314087, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.02444152720272541, "rewards/margins": 0.019330989569425583, "rewards/rejected": -0.043772514909505844, "step": 2280 }, { "epoch": 1.64, "grad_norm": 4.90625, "learning_rate": 4.510181953430622e-06, "log_odds_chosen": 0.7731764912605286, "log_odds_ratio": -0.4465632438659668, "logits/chosen": -2.9889421463012695, "logits/rejected": -3.0027027130126953, "logps/chosen": -0.46255379915237427, "logps/rejected": -0.7920294404029846, "loss": 0.3401, "nll_loss": 0.31316250562667847, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.023127688094973564, "rewards/margins": 0.016473785042762756, "rewards/rejected": -0.03960147127509117, "step": 2290 }, { "epoch": 1.64, "grad_norm": 3.890625, "learning_rate": 4.508348173337162e-06, "log_odds_chosen": 0.8094249963760376, "log_odds_ratio": -0.4257411062717438, "logits/chosen": -3.0202667713165283, "logits/rejected": -3.021433115005493, "logps/chosen": -0.4780099391937256, "logps/rejected": -0.8506327867507935, "loss": 0.3431, "nll_loss": 0.3186896741390228, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.02390049770474434, "rewards/margins": 0.018631141632795334, "rewards/rejected": -0.04253163933753967, "step": 2300 }, { "epoch": 1.65, "grad_norm": 4.40625, "learning_rate": 4.5065166281982656e-06, "log_odds_chosen": 0.8483291864395142, "log_odds_ratio": -0.4191041886806488, "logits/chosen": -3.01615309715271, "logits/rejected": -3.018214702606201, "logps/chosen": -0.4690191149711609, "logps/rejected": -0.8457120656967163, "loss": 0.3149, "nll_loss": 0.31129592657089233, "rewards/accuracies": 0.8125, "rewards/chosen": -0.023450955748558044, "rewards/margins": 0.01883464865386486, "rewards/rejected": -0.042285606265068054, "step": 2310 }, { "epoch": 1.66, "grad_norm": 4.03125, "learning_rate": 4.5046873134777955e-06, "log_odds_chosen": 0.7973805665969849, "log_odds_ratio": -0.43768900632858276, "logits/chosen": -3.0356364250183105, "logits/rejected": -3.0410075187683105, "logps/chosen": -0.4451252818107605, "logps/rejected": -0.7737647891044617, "loss": 0.3302, "nll_loss": 0.3009418547153473, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.022256266325712204, "rewards/margins": 0.01643197424709797, "rewards/rejected": -0.038688234984874725, "step": 2320 }, { "epoch": 1.66, "grad_norm": 4.1875, "learning_rate": 4.5028602246524934e-06, "log_odds_chosen": 0.7888227105140686, "log_odds_ratio": -0.43405622243881226, "logits/chosen": -2.980956792831421, "logits/rejected": -2.978771209716797, "logps/chosen": -0.4761333465576172, "logps/rejected": -0.8276389837265015, "loss": 0.325, "nll_loss": 0.3202365040779114, "rewards/accuracies": 0.84375, "rewards/chosen": -0.02380666695535183, "rewards/margins": 0.017575280740857124, "rewards/rejected": -0.041381947696208954, "step": 2330 }, { "epoch": 1.67, "grad_norm": 4.46875, "learning_rate": 4.5010353572119344e-06, "log_odds_chosen": 0.8019825220108032, "log_odds_ratio": -0.4324628710746765, "logits/chosen": -3.0106167793273926, "logits/rejected": -3.0131564140319824, "logps/chosen": -0.47986140847206116, "logps/rejected": -0.8427394032478333, "loss": 0.3332, "nll_loss": 0.30359187722206116, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02399306558072567, "rewards/margins": 0.018143903464078903, "rewards/rejected": -0.04213697090744972, "step": 2340 }, { "epoch": 1.68, "grad_norm": 4.4375, "learning_rate": 4.499212706658476e-06, "log_odds_chosen": 0.8726344108581543, "log_odds_ratio": -0.42116135358810425, "logits/chosen": -2.953211784362793, "logits/rejected": -2.9644522666931152, "logps/chosen": -0.47161465883255005, "logps/rejected": -0.8811071515083313, "loss": 0.3193, "nll_loss": 0.2635403275489807, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.023580733686685562, "rewards/margins": 0.020474622026085854, "rewards/rejected": -0.044055357575416565, "step": 2350 }, { "epoch": 1.69, "grad_norm": 4.3125, "learning_rate": 4.497392268507216e-06, "log_odds_chosen": 0.9059100151062012, "log_odds_ratio": -0.40985527634620667, "logits/chosen": -2.987654447555542, "logits/rejected": -2.994426727294922, "logps/chosen": -0.44170433282852173, "logps/rejected": -0.8370461463928223, "loss": 0.3233, "nll_loss": 0.3086358904838562, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.022085215896368027, "rewards/margins": 0.01976708695292473, "rewards/rejected": -0.041852306574583054, "step": 2360 }, { "epoch": 1.69, "grad_norm": 4.125, "learning_rate": 4.495574038285945e-06, "log_odds_chosen": 0.8176547288894653, "log_odds_ratio": -0.42810124158859253, "logits/chosen": -2.987663507461548, "logits/rejected": -2.993683338165283, "logps/chosen": -0.4684585630893707, "logps/rejected": -0.8597729802131653, "loss": 0.3246, "nll_loss": 0.3058486580848694, "rewards/accuracies": 0.84375, "rewards/chosen": -0.023422928526997566, "rewards/margins": 0.01956571824848652, "rewards/rejected": -0.042988650500774384, "step": 2370 }, { "epoch": 1.7, "grad_norm": 3.625, "learning_rate": 4.493758011535097e-06, "log_odds_chosen": 0.8193448781967163, "log_odds_ratio": -0.424041211605072, "logits/chosen": -2.9878551959991455, "logits/rejected": -2.989800214767456, "logps/chosen": -0.4475332200527191, "logps/rejected": -0.7955509424209595, "loss": 0.3411, "nll_loss": 0.30333036184310913, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.022376660257577896, "rewards/margins": 0.017400886863470078, "rewards/rejected": -0.039777547121047974, "step": 2380 }, { "epoch": 1.71, "grad_norm": 4.5, "learning_rate": 4.491944183807709e-06, "log_odds_chosen": 0.7266278862953186, "log_odds_ratio": -0.4684472680091858, "logits/chosen": -3.016446590423584, "logits/rejected": -3.01550555229187, "logps/chosen": -0.4889986515045166, "logps/rejected": -0.815593421459198, "loss": 0.286, "nll_loss": 0.2626423239707947, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.02444993332028389, "rewards/margins": 0.01632973738014698, "rewards/rejected": -0.04077967256307602, "step": 2390 }, { "epoch": 1.71, "grad_norm": 4.53125, "learning_rate": 4.490132550669373e-06, "log_odds_chosen": 0.7895680665969849, "log_odds_ratio": -0.43217116594314575, "logits/chosen": -3.0383188724517822, "logits/rejected": -3.032744884490967, "logps/chosen": -0.50458824634552, "logps/rejected": -0.8632110357284546, "loss": 0.3556, "nll_loss": 0.3427043855190277, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.025229409337043762, "rewards/margins": 0.017931144684553146, "rewards/rejected": -0.04316055774688721, "step": 2400 }, { "epoch": 1.72, "grad_norm": 4.53125, "learning_rate": 4.488323107698186e-06, "log_odds_chosen": 0.9081867933273315, "log_odds_ratio": -0.3992438018321991, "logits/chosen": -3.041257619857788, "logits/rejected": -3.0402400493621826, "logps/chosen": -0.4225694239139557, "logps/rejected": -0.7932690978050232, "loss": 0.3496, "nll_loss": 0.33278927206993103, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.021128471940755844, "rewards/margins": 0.018534980714321136, "rewards/rejected": -0.03966345265507698, "step": 2410 }, { "epoch": 1.73, "grad_norm": 3.734375, "learning_rate": 4.486515850484713e-06, "log_odds_chosen": 0.7412691712379456, "log_odds_ratio": -0.4616422653198242, "logits/chosen": -3.029561758041382, "logits/rejected": -3.034426212310791, "logps/chosen": -0.4756532609462738, "logps/rejected": -0.800695538520813, "loss": 0.3509, "nll_loss": 0.3143712878227234, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02378266304731369, "rewards/margins": 0.016252117231488228, "rewards/rejected": -0.04003477841615677, "step": 2420 }, { "epoch": 1.74, "grad_norm": 4.15625, "learning_rate": 4.484710774631934e-06, "log_odds_chosen": 0.8182842135429382, "log_odds_ratio": -0.42540591955184937, "logits/chosen": -3.002006769180298, "logits/rejected": -3.0059545040130615, "logps/chosen": -0.474771112203598, "logps/rejected": -0.829581081867218, "loss": 0.3269, "nll_loss": 0.32158905267715454, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.02373855747282505, "rewards/margins": 0.01774049736559391, "rewards/rejected": -0.04147905483841896, "step": 2430 }, { "epoch": 1.74, "grad_norm": 4.53125, "learning_rate": 4.482907875755205e-06, "log_odds_chosen": 0.9482046961784363, "log_odds_ratio": -0.3902207016944885, "logits/chosen": -2.9898977279663086, "logits/rejected": -2.9990992546081543, "logps/chosen": -0.4407358169555664, "logps/rejected": -0.8314850926399231, "loss": 0.349, "nll_loss": 0.33501288294792175, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.02203679084777832, "rewards/margins": 0.019537465646862984, "rewards/rejected": -0.04157425835728645, "step": 2440 }, { "epoch": 1.75, "grad_norm": 5.125, "learning_rate": 4.481107149482208e-06, "log_odds_chosen": 0.9116214513778687, "log_odds_ratio": -0.39691099524497986, "logits/chosen": -3.0267539024353027, "logits/rejected": -3.015885829925537, "logps/chosen": -0.44874605536460876, "logps/rejected": -0.8508473634719849, "loss": 0.3236, "nll_loss": 0.32752102613449097, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.02243730239570141, "rewards/margins": 0.020105065777897835, "rewards/rejected": -0.042542364448308945, "step": 2450 }, { "epoch": 1.76, "grad_norm": 3.78125, "learning_rate": 4.4793085914529136e-06, "log_odds_chosen": 0.7441302537918091, "log_odds_ratio": -0.4505217969417572, "logits/chosen": -3.014552593231201, "logits/rejected": -3.0138773918151855, "logps/chosen": -0.4606143534183502, "logps/rejected": -0.7877427935600281, "loss": 0.3263, "nll_loss": 0.31659382581710815, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.02303071692585945, "rewards/margins": 0.016356423497200012, "rewards/rejected": -0.039387140423059464, "step": 2460 }, { "epoch": 1.76, "grad_norm": 4.25, "learning_rate": 4.477512197319528e-06, "log_odds_chosen": 0.8623677492141724, "log_odds_ratio": -0.4320920407772064, "logits/chosen": -2.9938783645629883, "logits/rejected": -2.9951791763305664, "logps/chosen": -0.4614431858062744, "logps/rejected": -0.8407228589057922, "loss": 0.3052, "nll_loss": 0.28734108805656433, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.02307215891778469, "rewards/margins": 0.018963981419801712, "rewards/rejected": -0.04203614592552185, "step": 2470 }, { "epoch": 1.77, "grad_norm": 3.796875, "learning_rate": 4.475717962746456e-06, "log_odds_chosen": 0.7775101065635681, "log_odds_ratio": -0.4383625388145447, "logits/chosen": -3.0018043518066406, "logits/rejected": -3.005997657775879, "logps/chosen": -0.48191872239112854, "logps/rejected": -0.8342337608337402, "loss": 0.3025, "nll_loss": 0.28509336709976196, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.024095937609672546, "rewards/margins": 0.017615750432014465, "rewards/rejected": -0.04171168431639671, "step": 2480 }, { "epoch": 1.78, "grad_norm": 3.5, "learning_rate": 4.4739258834102515e-06, "log_odds_chosen": 0.6905413866043091, "log_odds_ratio": -0.47094640135765076, "logits/chosen": -3.0034241676330566, "logits/rejected": -3.0084078311920166, "logps/chosen": -0.4959983229637146, "logps/rejected": -0.8073428869247437, "loss": 0.3128, "nll_loss": 0.3074991703033447, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.02479991875588894, "rewards/margins": 0.015567230060696602, "rewards/rejected": -0.04036714881658554, "step": 2490 }, { "epoch": 1.79, "grad_norm": 4.59375, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 0.9156475067138672, "log_odds_ratio": -0.40092092752456665, "logits/chosen": -3.0588412284851074, "logits/rejected": -3.054736614227295, "logps/chosen": -0.4974744915962219, "logps/rejected": -0.9120408296585083, "loss": 0.3276, "nll_loss": 0.32681915163993835, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.024873726069927216, "rewards/margins": 0.020728319883346558, "rewards/rejected": -0.045602042227983475, "step": 2500 }, { "epoch": 1.79, "grad_norm": 3.796875, "learning_rate": 4.470348173215168e-06, "log_odds_chosen": 0.8080534934997559, "log_odds_ratio": -0.4320489764213562, "logits/chosen": -3.0508053302764893, "logits/rejected": -3.055711269378662, "logps/chosen": -0.44746845960617065, "logps/rejected": -0.8131664991378784, "loss": 0.3188, "nll_loss": 0.30606070160865784, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.022373421117663383, "rewards/margins": 0.018284905701875687, "rewards/rejected": -0.04065832868218422, "step": 2510 }, { "epoch": 1.8, "grad_norm": 4.40625, "learning_rate": 4.468562533769766e-06, "log_odds_chosen": 0.7910436391830444, "log_odds_ratio": -0.44537654519081116, "logits/chosen": -3.030313014984131, "logits/rejected": -3.0502493381500244, "logps/chosen": -0.4794743061065674, "logps/rejected": -0.83622807264328, "loss": 0.3418, "nll_loss": 0.30839359760284424, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.02397371456027031, "rewards/margins": 0.01783769205212593, "rewards/rejected": -0.04181140661239624, "step": 2520 }, { "epoch": 1.81, "grad_norm": 4.40625, "learning_rate": 4.4667790323881e-06, "log_odds_chosen": 0.7763202786445618, "log_odds_ratio": -0.4613625109195709, "logits/chosen": -3.0052618980407715, "logits/rejected": -3.0094947814941406, "logps/chosen": -0.5583103895187378, "logps/rejected": -0.9147736430168152, "loss": 0.3618, "nll_loss": 0.37591391801834106, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.02791551686823368, "rewards/margins": 0.01782316528260708, "rewards/rejected": -0.04573867842555046, "step": 2530 }, { "epoch": 1.81, "grad_norm": 4.53125, "learning_rate": 4.464997664806832e-06, "log_odds_chosen": 0.7353029251098633, "log_odds_ratio": -0.465457022190094, "logits/chosen": -2.990004777908325, "logits/rejected": -3.0065526962280273, "logps/chosen": -0.4965083599090576, "logps/rejected": -0.8237360119819641, "loss": 0.3189, "nll_loss": 0.2914542555809021, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.02482541836798191, "rewards/margins": 0.016361381858587265, "rewards/rejected": -0.041186802089214325, "step": 2540 }, { "epoch": 1.82, "grad_norm": 3.8125, "learning_rate": 4.463218426774518e-06, "log_odds_chosen": 0.8119770288467407, "log_odds_ratio": -0.4308405816555023, "logits/chosen": -3.023669481277466, "logits/rejected": -3.0249171257019043, "logps/chosen": -0.45973339676856995, "logps/rejected": -0.8200874328613281, "loss": 0.3189, "nll_loss": 0.3169053792953491, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.022986669093370438, "rewards/margins": 0.018017705529928207, "rewards/rejected": -0.041004374623298645, "step": 2550 }, { "epoch": 1.83, "grad_norm": 3.640625, "learning_rate": 4.461441314051561e-06, "log_odds_chosen": 0.8659493327140808, "log_odds_ratio": -0.4132528305053711, "logits/chosen": -3.0160984992980957, "logits/rejected": -3.008359432220459, "logps/chosen": -0.45377054810523987, "logps/rejected": -0.8377643823623657, "loss": 0.31, "nll_loss": 0.3230209946632385, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.022688528522849083, "rewards/margins": 0.019199693575501442, "rewards/rejected": -0.041888222098350525, "step": 2560 }, { "epoch": 1.84, "grad_norm": 4.125, "learning_rate": 4.459666322410172e-06, "log_odds_chosen": 0.8520922660827637, "log_odds_ratio": -0.4295163154602051, "logits/chosen": -2.984938859939575, "logits/rejected": -2.9917848110198975, "logps/chosen": -0.45458802580833435, "logps/rejected": -0.8432715535163879, "loss": 0.3342, "nll_loss": 0.30451488494873047, "rewards/accuracies": 0.84375, "rewards/chosen": -0.022729400545358658, "rewards/margins": 0.019434181973338127, "rewards/rejected": -0.042163580656051636, "step": 2570 }, { "epoch": 1.84, "grad_norm": 4.34375, "learning_rate": 4.457893447634326e-06, "log_odds_chosen": 0.7468984723091125, "log_odds_ratio": -0.4626026153564453, "logits/chosen": -3.044098138809204, "logits/rejected": -3.0482001304626465, "logps/chosen": -0.502227783203125, "logps/rejected": -0.8190582990646362, "loss": 0.3474, "nll_loss": 0.331245481967926, "rewards/accuracies": 0.8125, "rewards/chosen": -0.02511138655245304, "rewards/margins": 0.0158415324985981, "rewards/rejected": -0.04095291346311569, "step": 2580 }, { "epoch": 1.85, "grad_norm": 4.6875, "learning_rate": 4.456122685519721e-06, "log_odds_chosen": 0.8202278017997742, "log_odds_ratio": -0.4273694157600403, "logits/chosen": -3.041612148284912, "logits/rejected": -3.04099178314209, "logps/chosen": -0.44320958852767944, "logps/rejected": -0.7977688312530518, "loss": 0.3353, "nll_loss": 0.28820180892944336, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.02216048166155815, "rewards/margins": 0.017727959901094437, "rewards/rejected": -0.03988844156265259, "step": 2590 }, { "epoch": 1.86, "grad_norm": 3.921875, "learning_rate": 4.45435403187374e-06, "log_odds_chosen": 0.9900849461555481, "log_odds_ratio": -0.3686661422252655, "logits/chosen": -3.0403106212615967, "logits/rejected": -3.0422418117523193, "logps/chosen": -0.4134953022003174, "logps/rejected": -0.8272613286972046, "loss": 0.3304, "nll_loss": 0.2957596182823181, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.02067476511001587, "rewards/margins": 0.020688306540250778, "rewards/rejected": -0.04136306792497635, "step": 2600 }, { "epoch": 1.86, "grad_norm": 4.625, "learning_rate": 4.452587482515399e-06, "log_odds_chosen": 0.7774473428726196, "log_odds_ratio": -0.43920189142227173, "logits/chosen": -3.0550570487976074, "logits/rejected": -3.046712875366211, "logps/chosen": -0.4647607207298279, "logps/rejected": -0.8117767572402954, "loss": 0.3264, "nll_loss": 0.31224942207336426, "rewards/accuracies": 0.84375, "rewards/chosen": -0.023238036781549454, "rewards/margins": 0.01735079661011696, "rewards/rejected": -0.04058883339166641, "step": 2610 }, { "epoch": 1.87, "grad_norm": 4.5, "learning_rate": 4.450823033275315e-06, "log_odds_chosen": 0.7449740171432495, "log_odds_ratio": -0.44729799032211304, "logits/chosen": -3.0584347248077393, "logits/rejected": -3.0416550636291504, "logps/chosen": -0.4711737036705017, "logps/rejected": -0.8072509765625, "loss": 0.3499, "nll_loss": 0.33794453740119934, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.023558687418699265, "rewards/margins": 0.016803864389657974, "rewards/rejected": -0.04036254808306694, "step": 2620 }, { "epoch": 1.88, "grad_norm": 4.5625, "learning_rate": 4.4490606799956615e-06, "log_odds_chosen": 0.823355495929718, "log_odds_ratio": -0.41928333044052124, "logits/chosen": -3.0240085124969482, "logits/rejected": -3.024479866027832, "logps/chosen": -0.4763035774230957, "logps/rejected": -0.8605529069900513, "loss": 0.338, "nll_loss": 0.31633228063583374, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.023815181106328964, "rewards/margins": 0.01921246573328972, "rewards/rejected": -0.04302764683961868, "step": 2630 }, { "epoch": 1.89, "grad_norm": 5.21875, "learning_rate": 4.447300418530126e-06, "log_odds_chosen": 0.7356880307197571, "log_odds_ratio": -0.45234304666519165, "logits/chosen": -3.0536632537841797, "logits/rejected": -3.0563602447509766, "logps/chosen": -0.47896629571914673, "logps/rejected": -0.8162487149238586, "loss": 0.3509, "nll_loss": 0.3251205384731293, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.023948315531015396, "rewards/margins": 0.016864119097590446, "rewards/rejected": -0.04081243276596069, "step": 2640 }, { "epoch": 1.89, "grad_norm": 4.34375, "learning_rate": 4.4455422447438715e-06, "log_odds_chosen": 0.7540526390075684, "log_odds_ratio": -0.4576408863067627, "logits/chosen": -3.040320873260498, "logits/rejected": -3.055922746658325, "logps/chosen": -0.46336060762405396, "logps/rejected": -0.7819021940231323, "loss": 0.3386, "nll_loss": 0.30370932817459106, "rewards/accuracies": 0.84375, "rewards/chosen": -0.023168031126260757, "rewards/margins": 0.015927080065011978, "rewards/rejected": -0.039095114916563034, "step": 2650 }, { "epoch": 1.9, "grad_norm": 3.84375, "learning_rate": 4.443786154513493e-06, "log_odds_chosen": 0.8572918176651001, "log_odds_ratio": -0.40293556451797485, "logits/chosen": -3.018540859222412, "logits/rejected": -3.01985502243042, "logps/chosen": -0.4758778512477875, "logps/rejected": -0.8578616976737976, "loss": 0.3133, "nll_loss": 0.2729211747646332, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.023793894797563553, "rewards/margins": 0.019099194556474686, "rewards/rejected": -0.04289308562874794, "step": 2660 }, { "epoch": 1.91, "grad_norm": 4.46875, "learning_rate": 4.442032143726981e-06, "log_odds_chosen": 0.7635393738746643, "log_odds_ratio": -0.44135743379592896, "logits/chosen": -3.006277561187744, "logits/rejected": -3.007960796356201, "logps/chosen": -0.5131677985191345, "logps/rejected": -0.8635655641555786, "loss": 0.3421, "nll_loss": 0.33079543709754944, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.025658389553427696, "rewards/margins": 0.017519883811473846, "rewards/rejected": -0.04317827522754669, "step": 2670 }, { "epoch": 1.91, "grad_norm": 4.21875, "learning_rate": 4.440280208283675e-06, "log_odds_chosen": 0.7914074659347534, "log_odds_ratio": -0.4446859359741211, "logits/chosen": -3.0452609062194824, "logits/rejected": -3.0353732109069824, "logps/chosen": -0.448996365070343, "logps/rejected": -0.7661417722702026, "loss": 0.3259, "nll_loss": 0.29526346921920776, "rewards/accuracies": 0.84375, "rewards/chosen": -0.02244981750845909, "rewards/margins": 0.01585726998746395, "rewards/rejected": -0.03830708935856819, "step": 2680 }, { "epoch": 1.92, "grad_norm": 4.03125, "learning_rate": 4.43853034409423e-06, "log_odds_chosen": 0.8670626878738403, "log_odds_ratio": -0.4227162003517151, "logits/chosen": -3.0091984272003174, "logits/rejected": -3.0035605430603027, "logps/chosen": -0.4637516140937805, "logps/rejected": -0.8526164889335632, "loss": 0.3277, "nll_loss": 0.31113773584365845, "rewards/accuracies": 0.84375, "rewards/chosen": -0.023187585175037384, "rewards/margins": 0.019443243741989136, "rewards/rejected": -0.04263082146644592, "step": 2690 }, { "epoch": 1.93, "grad_norm": 4.0625, "learning_rate": 4.43678254708057e-06, "log_odds_chosen": 0.756043553352356, "log_odds_ratio": -0.4458427429199219, "logits/chosen": -3.025430202484131, "logits/rejected": -3.0258235931396484, "logps/chosen": -0.4689392149448395, "logps/rejected": -0.7990790605545044, "loss": 0.3374, "nll_loss": 0.31917357444763184, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.023446960374712944, "rewards/margins": 0.016506997868418694, "rewards/rejected": -0.03995395824313164, "step": 2700 }, { "epoch": 1.94, "grad_norm": 4.09375, "learning_rate": 4.435036813175853e-06, "log_odds_chosen": 0.7083145976066589, "log_odds_ratio": -0.4755108952522278, "logits/chosen": -3.0111260414123535, "logits/rejected": -3.015259265899658, "logps/chosen": -0.4857967495918274, "logps/rejected": -0.7993541955947876, "loss": 0.351, "nll_loss": 0.3297528624534607, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.024289840832352638, "rewards/margins": 0.01567787304520607, "rewards/rejected": -0.03996770828962326, "step": 2710 }, { "epoch": 1.94, "grad_norm": 4.0, "learning_rate": 4.4332931383244296e-06, "log_odds_chosen": 0.8236778974533081, "log_odds_ratio": -0.42724496126174927, "logits/chosen": -3.0053114891052246, "logits/rejected": -3.0031795501708984, "logps/chosen": -0.48205310106277466, "logps/rejected": -0.8685757517814636, "loss": 0.3341, "nll_loss": 0.3143610656261444, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.024102654308080673, "rewards/margins": 0.019326135516166687, "rewards/rejected": -0.04342878982424736, "step": 2720 }, { "epoch": 1.95, "grad_norm": 4.46875, "learning_rate": 4.431551518481802e-06, "log_odds_chosen": 0.8632327914237976, "log_odds_ratio": -0.4138604998588562, "logits/chosen": -2.9793882369995117, "logits/rejected": -2.976903200149536, "logps/chosen": -0.4733366370201111, "logps/rejected": -0.8459447622299194, "loss": 0.3229, "nll_loss": 0.28703364729881287, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.023666828870773315, "rewards/margins": 0.018630409613251686, "rewards/rejected": -0.04229723662137985, "step": 2730 }, { "epoch": 1.96, "grad_norm": 4.3125, "learning_rate": 4.429811949614588e-06, "log_odds_chosen": 0.8912683725357056, "log_odds_ratio": -0.4031354784965515, "logits/chosen": -3.026247024536133, "logits/rejected": -3.021430492401123, "logps/chosen": -0.4420657157897949, "logps/rejected": -0.8404544591903687, "loss": 0.3538, "nll_loss": 0.34370797872543335, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.022103283554315567, "rewards/margins": 0.019919438287615776, "rewards/rejected": -0.04202272370457649, "step": 2740 }, { "epoch": 1.96, "grad_norm": 3.84375, "learning_rate": 4.428074427700477e-06, "log_odds_chosen": 0.8247370719909668, "log_odds_ratio": -0.41037946939468384, "logits/chosen": -3.034496307373047, "logits/rejected": -3.035128593444824, "logps/chosen": -0.43201231956481934, "logps/rejected": -0.7829247713088989, "loss": 0.3242, "nll_loss": 0.29720133543014526, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.021600617095828056, "rewards/margins": 0.01754561997950077, "rewards/rejected": -0.03914623707532883, "step": 2750 }, { "epoch": 1.97, "grad_norm": 4.3125, "learning_rate": 4.426338948728195e-06, "log_odds_chosen": 0.7835749387741089, "log_odds_ratio": -0.445622056722641, "logits/chosen": -3.002962589263916, "logits/rejected": -2.995482921600342, "logps/chosen": -0.48915895819664, "logps/rejected": -0.8424232602119446, "loss": 0.3761, "nll_loss": 0.36115747690200806, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.02445795014500618, "rewards/margins": 0.01766321435570717, "rewards/rejected": -0.04212115705013275, "step": 2760 }, { "epoch": 1.98, "grad_norm": 3.515625, "learning_rate": 4.424605508697463e-06, "log_odds_chosen": 0.7757275700569153, "log_odds_ratio": -0.45807161927223206, "logits/chosen": -2.9874606132507324, "logits/rejected": -2.9826886653900146, "logps/chosen": -0.46530455350875854, "logps/rejected": -0.819189727306366, "loss": 0.3217, "nll_loss": 0.31295087933540344, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.023265227675437927, "rewards/margins": 0.0176942590624094, "rewards/rejected": -0.04095948860049248, "step": 2770 }, { "epoch": 1.99, "grad_norm": 4.0, "learning_rate": 4.42287410361896e-06, "log_odds_chosen": 0.7990993857383728, "log_odds_ratio": -0.4491378366947174, "logits/chosen": -2.9978973865509033, "logits/rejected": -2.988628387451172, "logps/chosen": -0.46599721908569336, "logps/rejected": -0.82252436876297, "loss": 0.2959, "nll_loss": 0.28105488419532776, "rewards/accuracies": 0.78125, "rewards/chosen": -0.023299861699342728, "rewards/margins": 0.01782635971903801, "rewards/rejected": -0.04112621396780014, "step": 2780 }, { "epoch": 1.99, "grad_norm": 4.0, "learning_rate": 4.421144729514289e-06, "log_odds_chosen": 0.7353337407112122, "log_odds_ratio": -0.44960951805114746, "logits/chosen": -3.013390064239502, "logits/rejected": -3.000366687774658, "logps/chosen": -0.47515836358070374, "logps/rejected": -0.8189493417739868, "loss": 0.3346, "nll_loss": 0.3258515000343323, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.023757919669151306, "rewards/margins": 0.017189551144838333, "rewards/rejected": -0.04094746708869934, "step": 2790 }, { "epoch": 2.0, "grad_norm": 4.21875, "learning_rate": 4.419417382415923e-06, "log_odds_chosen": 0.8415131568908691, "log_odds_ratio": -0.4145146310329437, "logits/chosen": -2.972963809967041, "logits/rejected": -2.9580955505371094, "logps/chosen": -0.44089460372924805, "logps/rejected": -0.8264049291610718, "loss": 0.323, "nll_loss": 0.29400014877319336, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.022044729441404343, "rewards/margins": 0.019275514408946037, "rewards/rejected": -0.04132024943828583, "step": 2800 }, { "epoch": 2.01, "grad_norm": 6.0, "learning_rate": 4.417692058367186e-06, "log_odds_chosen": 1.7442114353179932, "log_odds_ratio": -0.22452226281166077, "logits/chosen": -3.005614757537842, "logits/rejected": -3.0185272693634033, "logps/chosen": -0.32035571336746216, "logps/rejected": -1.0690141916275024, "loss": 0.2363, "nll_loss": 0.2332218438386917, "rewards/accuracies": 0.96875, "rewards/chosen": -0.016017789021134377, "rewards/margins": 0.037432920187711716, "rewards/rejected": -0.053450703620910645, "step": 2810 }, { "epoch": 2.01, "grad_norm": 4.375, "learning_rate": 4.415968753422204e-06, "log_odds_chosen": 2.035170078277588, "log_odds_ratio": -0.17474523186683655, "logits/chosen": -2.941028356552124, "logits/rejected": -2.966252088546753, "logps/chosen": -0.25109726190567017, "logps/rejected": -1.092346429824829, "loss": 0.2133, "nll_loss": 0.19823488593101501, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.012554863467812538, "rewards/margins": 0.04206245020031929, "rewards/rejected": -0.05461730808019638, "step": 2820 }, { "epoch": 2.02, "grad_norm": 3.3125, "learning_rate": 4.414247463645868e-06, "log_odds_chosen": 2.02545428276062, "log_odds_ratio": -0.1842285543680191, "logits/chosen": -2.951359272003174, "logits/rejected": -2.9670932292938232, "logps/chosen": -0.3034682869911194, "logps/rejected": -1.1695163249969482, "loss": 0.2249, "nll_loss": 0.21431966125965118, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.015173414722084999, "rewards/margins": 0.04330240562558174, "rewards/rejected": -0.05847581475973129, "step": 2830 }, { "epoch": 2.03, "grad_norm": 4.65625, "learning_rate": 4.4125281851137995e-06, "log_odds_chosen": 2.11085844039917, "log_odds_ratio": -0.17323088645935059, "logits/chosen": -2.9365506172180176, "logits/rejected": -2.940143585205078, "logps/chosen": -0.2617949843406677, "logps/rejected": -1.191651463508606, "loss": 0.2231, "nll_loss": 0.20334085822105408, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013089749030768871, "rewards/margins": 0.04649282246828079, "rewards/rejected": -0.059582579880952835, "step": 2840 }, { "epoch": 2.03, "grad_norm": 4.90625, "learning_rate": 4.41081091391231e-06, "log_odds_chosen": 2.124652147293091, "log_odds_ratio": -0.1659231185913086, "logits/chosen": -2.936309337615967, "logits/rejected": -2.9508724212646484, "logps/chosen": -0.27816253900527954, "logps/rejected": -1.1635429859161377, "loss": 0.2287, "nll_loss": 0.22624854743480682, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013908127322793007, "rewards/margins": 0.044269029051065445, "rewards/rejected": -0.0581771545112133, "step": 2850 }, { "epoch": 2.04, "grad_norm": 5.25, "learning_rate": 4.409095646138363e-06, "log_odds_chosen": 2.16214919090271, "log_odds_ratio": -0.16544964909553528, "logits/chosen": -2.931002140045166, "logits/rejected": -2.959987163543701, "logps/chosen": -0.2674095034599304, "logps/rejected": -1.1652556657791138, "loss": 0.2138, "nll_loss": 0.2004723995923996, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01337047666311264, "rewards/margins": 0.04489230364561081, "rewards/rejected": -0.05826277658343315, "step": 2860 }, { "epoch": 2.05, "grad_norm": 4.59375, "learning_rate": 4.4073823778995425e-06, "log_odds_chosen": 2.1190052032470703, "log_odds_ratio": -0.16538389027118683, "logits/chosen": -2.895883083343506, "logits/rejected": -2.912191390991211, "logps/chosen": -0.2597828507423401, "logps/rejected": -1.1385289430618286, "loss": 0.1955, "nll_loss": 0.18875320255756378, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.012989142909646034, "rewards/margins": 0.04393729940056801, "rewards/rejected": -0.05692644044756889, "step": 2870 }, { "epoch": 2.06, "grad_norm": 5.15625, "learning_rate": 4.405671105314009e-06, "log_odds_chosen": 2.194033622741699, "log_odds_ratio": -0.17002928256988525, "logits/chosen": -2.880614757537842, "logits/rejected": -2.8951869010925293, "logps/chosen": -0.26455721259117126, "logps/rejected": -1.1890400648117065, "loss": 0.2118, "nll_loss": 0.198002889752388, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013227862305939198, "rewards/margins": 0.04622413590550423, "rewards/rejected": -0.05945199728012085, "step": 2880 }, { "epoch": 2.06, "grad_norm": 4.375, "learning_rate": 4.4039618245104645e-06, "log_odds_chosen": 2.055791139602661, "log_odds_ratio": -0.18313267827033997, "logits/chosen": -2.94709849357605, "logits/rejected": -2.941755533218384, "logps/chosen": -0.2858212888240814, "logps/rejected": -1.1849796772003174, "loss": 0.2285, "nll_loss": 0.2383888065814972, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.014291064813733101, "rewards/margins": 0.04495792090892792, "rewards/rejected": -0.059248991310596466, "step": 2890 }, { "epoch": 2.07, "grad_norm": 5.1875, "learning_rate": 4.4022545316281195e-06, "log_odds_chosen": 1.9994585514068604, "log_odds_ratio": -0.17715023458003998, "logits/chosen": -2.916703939437866, "logits/rejected": -2.938720703125, "logps/chosen": -0.27788764238357544, "logps/rejected": -1.1122925281524658, "loss": 0.2156, "nll_loss": 0.2203611433506012, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.013894381932914257, "rewards/margins": 0.04172024130821228, "rewards/rejected": -0.05561462789773941, "step": 2900 }, { "epoch": 2.08, "grad_norm": 5.34375, "learning_rate": 4.40054922281665e-06, "log_odds_chosen": 2.3552050590515137, "log_odds_ratio": -0.1600455790758133, "logits/chosen": -2.90094256401062, "logits/rejected": -2.916411876678467, "logps/chosen": -0.2543887197971344, "logps/rejected": -1.230857253074646, "loss": 0.2469, "nll_loss": 0.23771822452545166, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01271943561732769, "rewards/margins": 0.048823438584804535, "rewards/rejected": -0.06154286861419678, "step": 2910 }, { "epoch": 2.08, "grad_norm": 4.5, "learning_rate": 4.398845894236168e-06, "log_odds_chosen": 2.13662052154541, "log_odds_ratio": -0.1536364108324051, "logits/chosen": -2.9522786140441895, "logits/rejected": -2.960909366607666, "logps/chosen": -0.26080501079559326, "logps/rejected": -1.1585144996643066, "loss": 0.2352, "nll_loss": 0.2126685082912445, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.013040250167250633, "rewards/margins": 0.04488547891378403, "rewards/rejected": -0.05792573094367981, "step": 2920 }, { "epoch": 2.09, "grad_norm": 4.375, "learning_rate": 4.397144542057179e-06, "log_odds_chosen": 1.932802438735962, "log_odds_ratio": -0.19244706630706787, "logits/chosen": -2.937756061553955, "logits/rejected": -2.952364444732666, "logps/chosen": -0.30458444356918335, "logps/rejected": -1.0755188465118408, "loss": 0.2315, "nll_loss": 0.20522145926952362, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015229222364723682, "rewards/margins": 0.038546714931726456, "rewards/rejected": -0.053775936365127563, "step": 2930 }, { "epoch": 2.1, "grad_norm": 5.25, "learning_rate": 4.395445162460548e-06, "log_odds_chosen": 2.177537441253662, "log_odds_ratio": -0.16058263182640076, "logits/chosen": -2.9258477687835693, "logits/rejected": -2.944967031478882, "logps/chosen": -0.25012099742889404, "logps/rejected": -1.1385070085525513, "loss": 0.2113, "nll_loss": 0.19516515731811523, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.012506050989031792, "rewards/margins": 0.0444192960858345, "rewards/rejected": -0.056925348937511444, "step": 2940 }, { "epoch": 2.11, "grad_norm": 3.703125, "learning_rate": 4.393747751637469e-06, "log_odds_chosen": 2.3222155570983887, "log_odds_ratio": -0.1461641937494278, "logits/chosen": -2.9448673725128174, "logits/rejected": -2.963576078414917, "logps/chosen": -0.26513126492500305, "logps/rejected": -1.2608281373977661, "loss": 0.2235, "nll_loss": 0.21424666047096252, "rewards/accuracies": 1.0, "rewards/chosen": -0.013256562873721123, "rewards/margins": 0.04978484660387039, "rewards/rejected": -0.06304140388965607, "step": 2950 }, { "epoch": 2.11, "grad_norm": 4.8125, "learning_rate": 4.392052305789416e-06, "log_odds_chosen": 2.1221888065338135, "log_odds_ratio": -0.16647639870643616, "logits/chosen": -2.88997220993042, "logits/rejected": -2.9043383598327637, "logps/chosen": -0.2757899761199951, "logps/rejected": -1.1634830236434937, "loss": 0.2451, "nll_loss": 0.22998282313346863, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.013789497315883636, "rewards/margins": 0.044384654611349106, "rewards/rejected": -0.058174144476652145, "step": 2960 }, { "epoch": 2.12, "grad_norm": 4.6875, "learning_rate": 4.390358821128123e-06, "log_odds_chosen": 2.1035635471343994, "log_odds_ratio": -0.1626226007938385, "logits/chosen": -2.942446708679199, "logits/rejected": -2.9398980140686035, "logps/chosen": -0.23974958062171936, "logps/rejected": -1.0798718929290771, "loss": 0.2239, "nll_loss": 0.22319245338439941, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.011987479403614998, "rewards/margins": 0.04200611263513565, "rewards/rejected": -0.0539935939013958, "step": 2970 }, { "epoch": 2.13, "grad_norm": 4.125, "learning_rate": 4.388667293875536e-06, "log_odds_chosen": 2.1456093788146973, "log_odds_ratio": -0.159576416015625, "logits/chosen": -2.898560047149658, "logits/rejected": -2.913954496383667, "logps/chosen": -0.2875900864601135, "logps/rejected": -1.200217604637146, "loss": 0.2174, "nll_loss": 0.21192331612110138, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.014379504136741161, "rewards/margins": 0.04563137888908386, "rewards/rejected": -0.0600108802318573, "step": 2980 }, { "epoch": 2.13, "grad_norm": 4.03125, "learning_rate": 4.386977720263786e-06, "log_odds_chosen": 2.182795286178589, "log_odds_ratio": -0.17162509262561798, "logits/chosen": -2.92887806892395, "logits/rejected": -2.9375483989715576, "logps/chosen": -0.2756284177303314, "logps/rejected": -1.2184598445892334, "loss": 0.212, "nll_loss": 0.21715661883354187, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01378142274916172, "rewards/margins": 0.04714157432317734, "rewards/rejected": -0.06092298775911331, "step": 2990 }, { "epoch": 2.14, "grad_norm": 4.6875, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 2.192145824432373, "log_odds_ratio": -0.16665682196617126, "logits/chosen": -2.893583297729492, "logits/rejected": -2.9158935546875, "logps/chosen": -0.2467484027147293, "logps/rejected": -1.158375859260559, "loss": 0.2083, "nll_loss": 0.19111524522304535, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.012337420135736465, "rewards/margins": 0.04558136314153671, "rewards/rejected": -0.05791878700256348, "step": 3000 }, { "epoch": 2.15, "grad_norm": 5.59375, "learning_rate": 4.383604418942005e-06, "log_odds_chosen": 2.275881767272949, "log_odds_ratio": -0.14857785403728485, "logits/chosen": -2.8811442852020264, "logits/rejected": -2.9039580821990967, "logps/chosen": -0.25105297565460205, "logps/rejected": -1.2186074256896973, "loss": 0.2255, "nll_loss": 0.2043989598751068, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.012552648782730103, "rewards/margins": 0.04837772995233536, "rewards/rejected": -0.06093037873506546, "step": 3010 }, { "epoch": 2.16, "grad_norm": 5.75, "learning_rate": 4.381920683746824e-06, "log_odds_chosen": 2.2012124061584473, "log_odds_ratio": -0.15712103247642517, "logits/chosen": -2.925220251083374, "logits/rejected": -2.9428842067718506, "logps/chosen": -0.28707730770111084, "logps/rejected": -1.2705217599868774, "loss": 0.2301, "nll_loss": 0.22399909794330597, "rewards/accuracies": 1.0, "rewards/chosen": -0.014353866688907146, "rewards/margins": 0.04917223006486893, "rewards/rejected": -0.06352610141038895, "step": 3020 }, { "epoch": 2.16, "grad_norm": 5.125, "learning_rate": 4.380238887222108e-06, "log_odds_chosen": 2.34934401512146, "log_odds_ratio": -0.15532293915748596, "logits/chosen": -2.9372026920318604, "logits/rejected": -2.9321727752685547, "logps/chosen": -0.24252362549304962, "logps/rejected": -1.2427626848220825, "loss": 0.2276, "nll_loss": 0.22118857502937317, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.012126180343329906, "rewards/margins": 0.050011951476335526, "rewards/rejected": -0.06213812157511711, "step": 3030 }, { "epoch": 2.17, "grad_norm": 4.59375, "learning_rate": 4.378559025650368e-06, "log_odds_chosen": 2.146629810333252, "log_odds_ratio": -0.17220303416252136, "logits/chosen": -2.9507367610931396, "logits/rejected": -2.9520468711853027, "logps/chosen": -0.2865048348903656, "logps/rejected": -1.2147105932235718, "loss": 0.2203, "nll_loss": 0.22103562951087952, "rewards/accuracies": 0.96875, "rewards/chosen": -0.014325241558253765, "rewards/margins": 0.04641028866171837, "rewards/rejected": -0.06073553487658501, "step": 3040 }, { "epoch": 2.18, "grad_norm": 4.8125, "learning_rate": 4.376881095324086e-06, "log_odds_chosen": 2.236276626586914, "log_odds_ratio": -0.15167517960071564, "logits/chosen": -2.933054208755493, "logits/rejected": -2.941648006439209, "logps/chosen": -0.23623302578926086, "logps/rejected": -1.137139081954956, "loss": 0.2082, "nll_loss": 0.19374367594718933, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.011811651289463043, "rewards/margins": 0.04504530504345894, "rewards/rejected": -0.05685695260763168, "step": 3050 }, { "epoch": 2.18, "grad_norm": 5.15625, "learning_rate": 4.375205092545683e-06, "log_odds_chosen": 2.1014246940612793, "log_odds_ratio": -0.1651487797498703, "logits/chosen": -2.888174295425415, "logits/rejected": -2.9049601554870605, "logps/chosen": -0.2655971646308899, "logps/rejected": -1.1357053518295288, "loss": 0.2235, "nll_loss": 0.19685029983520508, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01327985804527998, "rewards/margins": 0.043505407869815826, "rewards/rejected": -0.05678526684641838, "step": 3060 }, { "epoch": 2.19, "grad_norm": 4.375, "learning_rate": 4.373531013627483e-06, "log_odds_chosen": 2.0619096755981445, "log_odds_ratio": -0.18473556637763977, "logits/chosen": -2.899890661239624, "logits/rejected": -2.908219814300537, "logps/chosen": -0.27549678087234497, "logps/rejected": -1.1109269857406616, "loss": 0.2292, "nll_loss": 0.21692724525928497, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.013774837367236614, "rewards/margins": 0.04177150875329971, "rewards/rejected": -0.055546343326568604, "step": 3070 }, { "epoch": 2.2, "grad_norm": 4.46875, "learning_rate": 4.371858854891681e-06, "log_odds_chosen": 2.1072232723236084, "log_odds_ratio": -0.17573979496955872, "logits/chosen": -2.907663345336914, "logits/rejected": -2.9256844520568848, "logps/chosen": -0.28134164214134216, "logps/rejected": -1.1764047145843506, "loss": 0.2131, "nll_loss": 0.19975362718105316, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014067083597183228, "rewards/margins": 0.04475315660238266, "rewards/rejected": -0.05882024019956589, "step": 3080 }, { "epoch": 2.21, "grad_norm": 5.125, "learning_rate": 4.370188612670307e-06, "log_odds_chosen": 2.1010029315948486, "log_odds_ratio": -0.18062375485897064, "logits/chosen": -2.9517135620117188, "logits/rejected": -2.963604688644409, "logps/chosen": -0.2760263979434967, "logps/rejected": -1.2115020751953125, "loss": 0.2257, "nll_loss": 0.20056433975696564, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01380131859332323, "rewards/margins": 0.04677379131317139, "rewards/rejected": -0.060575105249881744, "step": 3090 }, { "epoch": 2.21, "grad_norm": 4.4375, "learning_rate": 4.36852028330519e-06, "log_odds_chosen": 2.000791072845459, "log_odds_ratio": -0.1811959445476532, "logits/chosen": -2.933238983154297, "logits/rejected": -2.9328596591949463, "logps/chosen": -0.31249523162841797, "logps/rejected": -1.187408447265625, "loss": 0.2445, "nll_loss": 0.23780445754528046, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015624763444066048, "rewards/margins": 0.04374566301703453, "rewards/rejected": -0.05937042832374573, "step": 3100 }, { "epoch": 2.22, "grad_norm": 4.625, "learning_rate": 4.3668538631479314e-06, "log_odds_chosen": 2.0266451835632324, "log_odds_ratio": -0.17613288760185242, "logits/chosen": -2.937640428543091, "logits/rejected": -2.94464373588562, "logps/chosen": -0.2883804738521576, "logps/rejected": -1.1112642288208008, "loss": 0.2281, "nll_loss": 0.23061653971672058, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014419023878872395, "rewards/margins": 0.04114418849349022, "rewards/rejected": -0.055563218891620636, "step": 3110 }, { "epoch": 2.23, "grad_norm": 4.625, "learning_rate": 4.365189348559864e-06, "log_odds_chosen": 2.069337844848633, "log_odds_ratio": -0.17350783944129944, "logits/chosen": -2.8976800441741943, "logits/rejected": -2.913184881210327, "logps/chosen": -0.29224854707717896, "logps/rejected": -1.2248303890228271, "loss": 0.215, "nll_loss": 0.207895427942276, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014612426050007343, "rewards/margins": 0.04662909731268883, "rewards/rejected": -0.06124152988195419, "step": 3120 }, { "epoch": 2.23, "grad_norm": 4.96875, "learning_rate": 4.363526735912025e-06, "log_odds_chosen": 2.010529041290283, "log_odds_ratio": -0.17960003018379211, "logits/chosen": -2.9376513957977295, "logits/rejected": -2.956413745880127, "logps/chosen": -0.2961011826992035, "logps/rejected": -1.169042944908142, "loss": 0.2234, "nll_loss": 0.21883544325828552, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.014805060811340809, "rewards/margins": 0.04364708811044693, "rewards/rejected": -0.058452147990465164, "step": 3130 }, { "epoch": 2.24, "grad_norm": 5.15625, "learning_rate": 4.361866021585114e-06, "log_odds_chosen": 2.253556489944458, "log_odds_ratio": -0.1558968424797058, "logits/chosen": -2.941788673400879, "logits/rejected": -2.9526584148406982, "logps/chosen": -0.26258301734924316, "logps/rejected": -1.2327936887741089, "loss": 0.2291, "nll_loss": 0.22146275639533997, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013129149563610554, "rewards/margins": 0.04851052910089493, "rewards/rejected": -0.061639685183763504, "step": 3140 }, { "epoch": 2.25, "grad_norm": 5.15625, "learning_rate": 4.360207201969474e-06, "log_odds_chosen": 2.147181987762451, "log_odds_ratio": -0.16235658526420593, "logits/chosen": -2.9329841136932373, "logits/rejected": -2.936492443084717, "logps/chosen": -0.28112685680389404, "logps/rejected": -1.2077734470367432, "loss": 0.2104, "nll_loss": 0.19907613098621368, "rewards/accuracies": 1.0, "rewards/chosen": -0.014056342653930187, "rewards/margins": 0.046332329511642456, "rewards/rejected": -0.060388676822185516, "step": 3150 }, { "epoch": 2.26, "grad_norm": 6.0, "learning_rate": 4.358550273465042e-06, "log_odds_chosen": 2.043837070465088, "log_odds_ratio": -0.18938776850700378, "logits/chosen": -2.944366693496704, "logits/rejected": -2.939744234085083, "logps/chosen": -0.2933470606803894, "logps/rejected": -1.1603938341140747, "loss": 0.2301, "nll_loss": 0.21869127452373505, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01466735266149044, "rewards/margins": 0.04335234686732292, "rewards/rejected": -0.05801969766616821, "step": 3160 }, { "epoch": 2.26, "grad_norm": 5.3125, "learning_rate": 4.356895232481328e-06, "log_odds_chosen": 2.2068166732788086, "log_odds_ratio": -0.16851142048835754, "logits/chosen": -2.9134292602539062, "logits/rejected": -2.9248287677764893, "logps/chosen": -0.2707240879535675, "logps/rejected": -1.2389049530029297, "loss": 0.2364, "nll_loss": 0.214188814163208, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013536205515265465, "rewards/margins": 0.04840904474258423, "rewards/rejected": -0.061945248395204544, "step": 3170 }, { "epoch": 2.27, "grad_norm": 4.78125, "learning_rate": 4.3552420754373795e-06, "log_odds_chosen": 2.206259250640869, "log_odds_ratio": -0.1675238311290741, "logits/chosen": -2.8840255737304688, "logits/rejected": -2.900109052658081, "logps/chosen": -0.28494498133659363, "logps/rejected": -1.2261676788330078, "loss": 0.2324, "nll_loss": 0.23279312252998352, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014247250743210316, "rewards/margins": 0.04706113040447235, "rewards/rejected": -0.06130838394165039, "step": 3180 }, { "epoch": 2.28, "grad_norm": 5.75, "learning_rate": 4.353590798761745e-06, "log_odds_chosen": 2.020906925201416, "log_odds_ratio": -0.17635580897331238, "logits/chosen": -2.8950753211975098, "logits/rejected": -2.9187989234924316, "logps/chosen": -0.2748999297618866, "logps/rejected": -1.137479305267334, "loss": 0.239, "nll_loss": 0.2283506840467453, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01374499686062336, "rewards/margins": 0.04312897473573685, "rewards/rejected": -0.05687396973371506, "step": 3190 }, { "epoch": 2.28, "grad_norm": 5.625, "learning_rate": 4.351941398892446e-06, "log_odds_chosen": 2.084432363510132, "log_odds_ratio": -0.16278493404388428, "logits/chosen": -2.901026725769043, "logits/rejected": -2.9171338081359863, "logps/chosen": -0.3120655417442322, "logps/rejected": -1.2602002620697021, "loss": 0.246, "nll_loss": 0.2599313259124756, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.015603276900947094, "rewards/margins": 0.04740674048662186, "rewards/rejected": -0.06301002204418182, "step": 3200 }, { "epoch": 2.29, "grad_norm": 5.15625, "learning_rate": 4.350293872276944e-06, "log_odds_chosen": 2.09785532951355, "log_odds_ratio": -0.17019295692443848, "logits/chosen": -2.902366876602173, "logits/rejected": -2.914095401763916, "logps/chosen": -0.27791982889175415, "logps/rejected": -1.1667200326919556, "loss": 0.2463, "nll_loss": 0.24153220653533936, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.013895990327000618, "rewards/margins": 0.04444001242518425, "rewards/rejected": -0.058336008340120316, "step": 3210 }, { "epoch": 2.3, "grad_norm": 5.03125, "learning_rate": 4.348648215372106e-06, "log_odds_chosen": 2.1732430458068848, "log_odds_ratio": -0.1618937999010086, "logits/chosen": -2.8988194465637207, "logits/rejected": -2.918598175048828, "logps/chosen": -0.26708492636680603, "logps/rejected": -1.1949737071990967, "loss": 0.2336, "nll_loss": 0.22503013908863068, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013354247435927391, "rewards/margins": 0.046394433826208115, "rewards/rejected": -0.05974868685007095, "step": 3220 }, { "epoch": 2.31, "grad_norm": 5.15625, "learning_rate": 4.347004424644176e-06, "log_odds_chosen": 2.1138107776641846, "log_odds_ratio": -0.17214186489582062, "logits/chosen": -2.922166585922241, "logits/rejected": -2.931307315826416, "logps/chosen": -0.27875009179115295, "logps/rejected": -1.1797934770584106, "loss": 0.2309, "nll_loss": 0.23376217484474182, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.013937504962086678, "rewards/margins": 0.0450521744787693, "rewards/rejected": -0.05898967385292053, "step": 3230 }, { "epoch": 2.31, "grad_norm": 4.875, "learning_rate": 4.34536249656874e-06, "log_odds_chosen": 2.177454948425293, "log_odds_ratio": -0.1669919341802597, "logits/chosen": -2.9374043941497803, "logits/rejected": -2.9587364196777344, "logps/chosen": -0.2633463740348816, "logps/rejected": -1.1948493719100952, "loss": 0.2249, "nll_loss": 0.2171686589717865, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.013167318888008595, "rewards/margins": 0.0465751513838768, "rewards/rejected": -0.059742461889982224, "step": 3240 }, { "epoch": 2.32, "grad_norm": 4.84375, "learning_rate": 4.3437224276306945e-06, "log_odds_chosen": 2.215099334716797, "log_odds_ratio": -0.15939660370349884, "logits/chosen": -2.881951093673706, "logits/rejected": -2.9153356552124023, "logps/chosen": -0.28022632002830505, "logps/rejected": -1.2117713689804077, "loss": 0.2556, "nll_loss": 0.25692808628082275, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014011316001415253, "rewards/margins": 0.04657725617289543, "rewards/rejected": -0.06058857589960098, "step": 3250 }, { "epoch": 2.33, "grad_norm": 5.6875, "learning_rate": 4.342084214324218e-06, "log_odds_chosen": 2.1894588470458984, "log_odds_ratio": -0.16253399848937988, "logits/chosen": -2.90968656539917, "logits/rejected": -2.9309780597686768, "logps/chosen": -0.29265648126602173, "logps/rejected": -1.2534782886505127, "loss": 0.2576, "nll_loss": 0.2393181025981903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01463282573968172, "rewards/margins": 0.04804108664393425, "rewards/rejected": -0.0626739114522934, "step": 3260 }, { "epoch": 2.33, "grad_norm": 4.625, "learning_rate": 4.340447853152738e-06, "log_odds_chosen": 2.091930389404297, "log_odds_ratio": -0.17941570281982422, "logits/chosen": -2.9261553287506104, "logits/rejected": -2.9507060050964355, "logps/chosen": -0.29296717047691345, "logps/rejected": -1.187831997871399, "loss": 0.233, "nll_loss": 0.21880583465099335, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014648358337581158, "rewards/margins": 0.044743236154317856, "rewards/rejected": -0.05939158797264099, "step": 3270 }, { "epoch": 2.34, "grad_norm": 5.46875, "learning_rate": 4.338813340628896e-06, "log_odds_chosen": 2.0643372535705566, "log_odds_ratio": -0.17419815063476562, "logits/chosen": -2.911599636077881, "logits/rejected": -2.923682928085327, "logps/chosen": -0.26648417115211487, "logps/rejected": -1.1481847763061523, "loss": 0.2274, "nll_loss": 0.2277383804321289, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013324208557605743, "rewards/margins": 0.04408503323793411, "rewards/rejected": -0.05740923807024956, "step": 3280 }, { "epoch": 2.35, "grad_norm": 3.8125, "learning_rate": 4.337180673274523e-06, "log_odds_chosen": 1.9233993291854858, "log_odds_ratio": -0.1962611824274063, "logits/chosen": -2.915182113647461, "logits/rejected": -2.913625955581665, "logps/chosen": -0.26856499910354614, "logps/rejected": -1.045851469039917, "loss": 0.2345, "nll_loss": 0.23353508114814758, "rewards/accuracies": 0.96875, "rewards/chosen": -0.013428251259028912, "rewards/margins": 0.03886432573199272, "rewards/rejected": -0.05229257419705391, "step": 3290 }, { "epoch": 2.36, "grad_norm": 6.5, "learning_rate": 4.3355498476206e-06, "log_odds_chosen": 2.2630984783172607, "log_odds_ratio": -0.15485426783561707, "logits/chosen": -2.911195755004883, "logits/rejected": -2.931729793548584, "logps/chosen": -0.2658364772796631, "logps/rejected": -1.2245619297027588, "loss": 0.2423, "nll_loss": 0.23118266463279724, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01329182367771864, "rewards/margins": 0.047936275601387024, "rewards/rejected": -0.06122808903455734, "step": 3300 }, { "epoch": 2.36, "grad_norm": 4.84375, "learning_rate": 4.333920860207238e-06, "log_odds_chosen": 2.060558319091797, "log_odds_ratio": -0.18207843601703644, "logits/chosen": -2.9397740364074707, "logits/rejected": -2.9511916637420654, "logps/chosen": -0.30885249376296997, "logps/rejected": -1.188513159751892, "loss": 0.2367, "nll_loss": 0.2180083692073822, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015442626550793648, "rewards/margins": 0.04398304224014282, "rewards/rejected": -0.05942566320300102, "step": 3310 }, { "epoch": 2.37, "grad_norm": 4.9375, "learning_rate": 4.332293707583636e-06, "log_odds_chosen": 2.2353901863098145, "log_odds_ratio": -0.1515626609325409, "logits/chosen": -2.9267566204071045, "logits/rejected": -2.9455149173736572, "logps/chosen": -0.253523588180542, "logps/rejected": -1.1828291416168213, "loss": 0.2525, "nll_loss": 0.245566725730896, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.01267617754638195, "rewards/margins": 0.04646528139710426, "rewards/rejected": -0.059141457080841064, "step": 3320 }, { "epoch": 2.38, "grad_norm": 4.46875, "learning_rate": 4.330668386308059e-06, "log_odds_chosen": 2.0343799591064453, "log_odds_ratio": -0.1873868852853775, "logits/chosen": -3.0006039142608643, "logits/rejected": -2.9913387298583984, "logps/chosen": -0.2894473373889923, "logps/rejected": -1.190096139907837, "loss": 0.2459, "nll_loss": 0.2212885320186615, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014472367241978645, "rewards/margins": 0.04503244161605835, "rewards/rejected": -0.059504806995391846, "step": 3330 }, { "epoch": 2.38, "grad_norm": 5.28125, "learning_rate": 4.329044892947799e-06, "log_odds_chosen": 2.3476614952087402, "log_odds_ratio": -0.14223968982696533, "logits/chosen": -2.9360368251800537, "logits/rejected": -2.9362521171569824, "logps/chosen": -0.2786175608634949, "logps/rejected": -1.3310235738754272, "loss": 0.2135, "nll_loss": 0.20772752165794373, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.01393087673932314, "rewards/margins": 0.052620310336351395, "rewards/rejected": -0.06655117124319077, "step": 3340 }, { "epoch": 2.39, "grad_norm": 5.1875, "learning_rate": 4.327423224079155e-06, "log_odds_chosen": 2.1379871368408203, "log_odds_ratio": -0.16850581765174866, "logits/chosen": -2.9171721935272217, "logits/rejected": -2.941715717315674, "logps/chosen": -0.2577691972255707, "logps/rejected": -1.1481659412384033, "loss": 0.2221, "nll_loss": 0.21284492313861847, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01288845855742693, "rewards/margins": 0.04451984167098999, "rewards/rejected": -0.05740829557180405, "step": 3350 }, { "epoch": 2.4, "grad_norm": 5.59375, "learning_rate": 4.325803376287392e-06, "log_odds_chosen": 2.0512311458587646, "log_odds_ratio": -0.18327447772026062, "logits/chosen": -2.9439637660980225, "logits/rejected": -2.9505419731140137, "logps/chosen": -0.27385398745536804, "logps/rejected": -1.1267859935760498, "loss": 0.2476, "nll_loss": 0.255187451839447, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.013692701235413551, "rewards/margins": 0.042646609246730804, "rewards/rejected": -0.05633930489420891, "step": 3360 }, { "epoch": 2.41, "grad_norm": 5.34375, "learning_rate": 4.32418534616672e-06, "log_odds_chosen": 2.1584973335266113, "log_odds_ratio": -0.1700998991727829, "logits/chosen": -2.954423427581787, "logits/rejected": -2.9609222412109375, "logps/chosen": -0.2891232371330261, "logps/rejected": -1.2464900016784668, "loss": 0.227, "nll_loss": 0.22372078895568848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014456162229180336, "rewards/margins": 0.04786834493279457, "rewards/rejected": -0.06232450157403946, "step": 3370 }, { "epoch": 2.41, "grad_norm": 5.8125, "learning_rate": 4.322569130320256e-06, "log_odds_chosen": 2.1953792572021484, "log_odds_ratio": -0.153684601187706, "logits/chosen": -2.9256153106689453, "logits/rejected": -2.934563159942627, "logps/chosen": -0.2697674632072449, "logps/rejected": -1.2171337604522705, "loss": 0.2279, "nll_loss": 0.23045894503593445, "rewards/accuracies": 1.0, "rewards/chosen": -0.013488374650478363, "rewards/margins": 0.04736831784248352, "rewards/rejected": -0.060856692492961884, "step": 3380 }, { "epoch": 2.42, "grad_norm": 4.9375, "learning_rate": 4.320954725359999e-06, "log_odds_chosen": 2.2562520503997803, "log_odds_ratio": -0.1594361513853073, "logits/chosen": -2.9553780555725098, "logits/rejected": -2.973090648651123, "logps/chosen": -0.28375449776649475, "logps/rejected": -1.2374476194381714, "loss": 0.249, "nll_loss": 0.249527245759964, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014187723398208618, "rewards/margins": 0.04768466204404831, "rewards/rejected": -0.06187238171696663, "step": 3390 }, { "epoch": 2.43, "grad_norm": 4.46875, "learning_rate": 4.319342127906801e-06, "log_odds_chosen": 2.0723280906677246, "log_odds_ratio": -0.1904068887233734, "logits/chosen": -2.935558319091797, "logits/rejected": -2.9476749897003174, "logps/chosen": -0.29698675870895386, "logps/rejected": -1.168005347251892, "loss": 0.2175, "nll_loss": 0.2165364772081375, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.014849339611828327, "rewards/margins": 0.04355093091726303, "rewards/rejected": -0.05840027332305908, "step": 3400 }, { "epoch": 2.43, "grad_norm": 5.875, "learning_rate": 4.317731334590332e-06, "log_odds_chosen": 2.097609043121338, "log_odds_ratio": -0.17266616225242615, "logits/chosen": -2.8817784786224365, "logits/rejected": -2.905477523803711, "logps/chosen": -0.2604636549949646, "logps/rejected": -1.1006355285644531, "loss": 0.2233, "nll_loss": 0.20613804459571838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01302318461239338, "rewards/margins": 0.042008593678474426, "rewards/rejected": -0.055031776428222656, "step": 3410 }, { "epoch": 2.44, "grad_norm": 4.65625, "learning_rate": 4.316122342049056e-06, "log_odds_chosen": 2.262716770172119, "log_odds_ratio": -0.15888556838035583, "logits/chosen": -2.930333375930786, "logits/rejected": -2.9403672218322754, "logps/chosen": -0.2604190409183502, "logps/rejected": -1.234446406364441, "loss": 0.2291, "nll_loss": 0.2312559336423874, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0130209531635046, "rewards/margins": 0.048701368272304535, "rewards/rejected": -0.061722319573163986, "step": 3420 }, { "epoch": 2.45, "grad_norm": 5.875, "learning_rate": 4.314515146930197e-06, "log_odds_chosen": 2.2008256912231445, "log_odds_ratio": -0.1563161164522171, "logits/chosen": -2.9282329082489014, "logits/rejected": -2.9527525901794434, "logps/chosen": -0.2750408351421356, "logps/rejected": -1.2426904439926147, "loss": 0.2161, "nll_loss": 0.20492339134216309, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.013752041384577751, "rewards/margins": 0.048382483422756195, "rewards/rejected": -0.062134526669979095, "step": 3430 }, { "epoch": 2.46, "grad_norm": 4.75, "learning_rate": 4.312909745889715e-06, "log_odds_chosen": 1.970311164855957, "log_odds_ratio": -0.18553660809993744, "logits/chosen": -2.9028189182281494, "logits/rejected": -2.9310402870178223, "logps/chosen": -0.3335861265659332, "logps/rejected": -1.2309215068817139, "loss": 0.222, "nll_loss": 0.2111777812242508, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0166793055832386, "rewards/margins": 0.04486677423119545, "rewards/rejected": -0.06154607981443405, "step": 3440 }, { "epoch": 2.46, "grad_norm": 5.21875, "learning_rate": 4.311306135592269e-06, "log_odds_chosen": 2.143489360809326, "log_odds_ratio": -0.17285478115081787, "logits/chosen": -2.926811933517456, "logits/rejected": -2.9482407569885254, "logps/chosen": -0.2610476315021515, "logps/rejected": -1.172515630722046, "loss": 0.2149, "nll_loss": 0.2024037390947342, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.013052381575107574, "rewards/margins": 0.0455734021961689, "rewards/rejected": -0.058625780045986176, "step": 3450 }, { "epoch": 2.47, "grad_norm": 5.4375, "learning_rate": 4.309704312711197e-06, "log_odds_chosen": 1.9968293905258179, "log_odds_ratio": -0.18701621890068054, "logits/chosen": -2.9047210216522217, "logits/rejected": -2.929392099380493, "logps/chosen": -0.29451262950897217, "logps/rejected": -1.130751132965088, "loss": 0.2495, "nll_loss": 0.2290346622467041, "rewards/accuracies": 0.96875, "rewards/chosen": -0.014725630171597004, "rewards/margins": 0.041811924427747726, "rewards/rejected": -0.056537557393312454, "step": 3460 }, { "epoch": 2.48, "grad_norm": 5.15625, "learning_rate": 4.3081042739284794e-06, "log_odds_chosen": 1.987897276878357, "log_odds_ratio": -0.1887591928243637, "logits/chosen": -2.938721179962158, "logits/rejected": -2.953474760055542, "logps/chosen": -0.30027034878730774, "logps/rejected": -1.1636924743652344, "loss": 0.222, "nll_loss": 0.20906376838684082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015013515949249268, "rewards/margins": 0.04317110404372215, "rewards/rejected": -0.05818462371826172, "step": 3470 }, { "epoch": 2.48, "grad_norm": 5.65625, "learning_rate": 4.306506015934716e-06, "log_odds_chosen": 2.015403985977173, "log_odds_ratio": -0.1880418211221695, "logits/chosen": -2.9391512870788574, "logits/rejected": -2.935800075531006, "logps/chosen": -0.3008590340614319, "logps/rejected": -1.1385343074798584, "loss": 0.2314, "nll_loss": 0.21299763023853302, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015042951330542564, "rewards/margins": 0.04188377410173416, "rewards/rejected": -0.05692671984434128, "step": 3480 }, { "epoch": 2.49, "grad_norm": 5.6875, "learning_rate": 4.304909535429091e-06, "log_odds_chosen": 2.0902674198150635, "log_odds_ratio": -0.16483107209205627, "logits/chosen": -2.907440662384033, "logits/rejected": -2.9203438758850098, "logps/chosen": -0.2806297242641449, "logps/rejected": -1.2069978713989258, "loss": 0.2289, "nll_loss": 0.21812649071216583, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.014031486585736275, "rewards/margins": 0.0463184118270874, "rewards/rejected": -0.06034989282488823, "step": 3490 }, { "epoch": 2.5, "grad_norm": 3.90625, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 1.9934600591659546, "log_odds_ratio": -0.19454535841941833, "logits/chosen": -2.8893580436706543, "logits/rejected": -2.9071896076202393, "logps/chosen": -0.28396016359329224, "logps/rejected": -1.1285146474838257, "loss": 0.2238, "nll_loss": 0.20403584837913513, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014198007993400097, "rewards/margins": 0.04222772270441055, "rewards/rejected": -0.056425731629133224, "step": 3500 }, { "epoch": 2.51, "grad_norm": 4.53125, "learning_rate": 4.301721893721773e-06, "log_odds_chosen": 2.1158764362335205, "log_odds_ratio": -0.16996172070503235, "logits/chosen": -2.926219940185547, "logits/rejected": -2.93003511428833, "logps/chosen": -0.27365607023239136, "logps/rejected": -1.182794213294983, "loss": 0.2373, "nll_loss": 0.26615434885025024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.013682805001735687, "rewards/margins": 0.045456912368535995, "rewards/rejected": -0.05913971737027168, "step": 3510 }, { "epoch": 2.51, "grad_norm": 4.5625, "learning_rate": 4.300130725961134e-06, "log_odds_chosen": 2.0993151664733887, "log_odds_ratio": -0.18101660907268524, "logits/chosen": -2.9107916355133057, "logits/rejected": -2.9079277515411377, "logps/chosen": -0.28615525364875793, "logps/rejected": -1.183293342590332, "loss": 0.222, "nll_loss": 0.22606563568115234, "rewards/accuracies": 0.96875, "rewards/chosen": -0.014307759702205658, "rewards/margins": 0.044856905937194824, "rewards/rejected": -0.05916466563940048, "step": 3520 }, { "epoch": 2.52, "grad_norm": 4.40625, "learning_rate": 4.298541322570686e-06, "log_odds_chosen": 2.079677104949951, "log_odds_ratio": -0.17266994714736938, "logits/chosen": -2.8908724784851074, "logits/rejected": -2.920081615447998, "logps/chosen": -0.26691848039627075, "logps/rejected": -1.142862319946289, "loss": 0.2083, "nll_loss": 0.20062704384326935, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013345924206078053, "rewards/margins": 0.043797191232442856, "rewards/rejected": -0.05714312940835953, "step": 3530 }, { "epoch": 2.53, "grad_norm": 5.09375, "learning_rate": 4.296953680292129e-06, "log_odds_chosen": 2.0655577182769775, "log_odds_ratio": -0.1739802211523056, "logits/chosen": -2.8886682987213135, "logits/rejected": -2.9118006229400635, "logps/chosen": -0.284760981798172, "logps/rejected": -1.1525925397872925, "loss": 0.2331, "nll_loss": 0.23845157027244568, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014238047413527966, "rewards/margins": 0.04339158535003662, "rewards/rejected": -0.05762963369488716, "step": 3540 }, { "epoch": 2.53, "grad_norm": 5.3125, "learning_rate": 4.295367795875578e-06, "log_odds_chosen": 2.1380605697631836, "log_odds_ratio": -0.176556795835495, "logits/chosen": -2.8978960514068604, "logits/rejected": -2.899569034576416, "logps/chosen": -0.29269880056381226, "logps/rejected": -1.2450029850006104, "loss": 0.2397, "nll_loss": 0.2633494734764099, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014634939841926098, "rewards/margins": 0.047615211457014084, "rewards/rejected": -0.06225014850497246, "step": 3550 }, { "epoch": 2.54, "grad_norm": 5.90625, "learning_rate": 4.293783666079539e-06, "log_odds_chosen": 2.104384183883667, "log_odds_ratio": -0.17723865807056427, "logits/chosen": -2.8846993446350098, "logits/rejected": -2.907013416290283, "logps/chosen": -0.2507666349411011, "logps/rejected": -1.0877444744110107, "loss": 0.2258, "nll_loss": 0.21293959021568298, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.012538331560790539, "rewards/margins": 0.041848890483379364, "rewards/rejected": -0.054387230426073074, "step": 3560 }, { "epoch": 2.55, "grad_norm": 5.4375, "learning_rate": 4.292201287670881e-06, "log_odds_chosen": 2.007575511932373, "log_odds_ratio": -0.19995658099651337, "logits/chosen": -2.9402010440826416, "logits/rejected": -2.9410014152526855, "logps/chosen": -0.3186001181602478, "logps/rejected": -1.2102280855178833, "loss": 0.2423, "nll_loss": 0.22782094776630402, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01593000628054142, "rewards/margins": 0.04458140209317207, "rewards/rejected": -0.06051139906048775, "step": 3570 }, { "epoch": 2.56, "grad_norm": 5.15625, "learning_rate": 4.2906206574248056e-06, "log_odds_chosen": 1.961155891418457, "log_odds_ratio": -0.19278380274772644, "logits/chosen": -2.93312406539917, "logits/rejected": -2.9524636268615723, "logps/chosen": -0.3163544535636902, "logps/rejected": -1.1956135034561157, "loss": 0.2465, "nll_loss": 0.22848501801490784, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.015817726030945778, "rewards/margins": 0.04396295174956322, "rewards/rejected": -0.059780675917863846, "step": 3580 }, { "epoch": 2.56, "grad_norm": 4.71875, "learning_rate": 4.289041772124823e-06, "log_odds_chosen": 1.896654486656189, "log_odds_ratio": -0.20070913434028625, "logits/chosen": -2.9290380477905273, "logits/rejected": -2.9414310455322266, "logps/chosen": -0.3589678406715393, "logps/rejected": -1.1963083744049072, "loss": 0.2485, "nll_loss": 0.23321954905986786, "rewards/accuracies": 0.96875, "rewards/chosen": -0.017948392778635025, "rewards/margins": 0.04186702519655228, "rewards/rejected": -0.0598154179751873, "step": 3590 }, { "epoch": 2.57, "grad_norm": 4.15625, "learning_rate": 4.2874646285627205e-06, "log_odds_chosen": 2.0257813930511475, "log_odds_ratio": -0.16877955198287964, "logits/chosen": -2.9236552715301514, "logits/rejected": -2.9332711696624756, "logps/chosen": -0.28219860792160034, "logps/rejected": -1.1740574836730957, "loss": 0.23, "nll_loss": 0.22192803025245667, "rewards/accuracies": 1.0, "rewards/chosen": -0.014109930023550987, "rewards/margins": 0.04459294304251671, "rewards/rejected": -0.05870287865400314, "step": 3600 }, { "epoch": 2.58, "grad_norm": 4.09375, "learning_rate": 4.2858892235385405e-06, "log_odds_chosen": 2.039332866668701, "log_odds_ratio": -0.17890916764736176, "logits/chosen": -2.9258196353912354, "logits/rejected": -2.9339518547058105, "logps/chosen": -0.278897225856781, "logps/rejected": -1.153327465057373, "loss": 0.2475, "nll_loss": 0.22460360825061798, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01394486241042614, "rewards/margins": 0.0437215119600296, "rewards/rejected": -0.05766637995839119, "step": 3610 }, { "epoch": 2.58, "grad_norm": 5.0, "learning_rate": 4.2843155538605454e-06, "log_odds_chosen": 1.980555534362793, "log_odds_ratio": -0.1820518672466278, "logits/chosen": -2.942025661468506, "logits/rejected": -2.938452959060669, "logps/chosen": -0.29569873213768005, "logps/rejected": -1.1704866886138916, "loss": 0.2455, "nll_loss": 0.2306048572063446, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.014784934930503368, "rewards/margins": 0.04373939707875252, "rewards/rejected": -0.05852434039115906, "step": 3620 }, { "epoch": 2.59, "grad_norm": 6.0625, "learning_rate": 4.2827436163452e-06, "log_odds_chosen": 1.9946845769882202, "log_odds_ratio": -0.18808093667030334, "logits/chosen": -2.935342311859131, "logits/rejected": -2.940308094024658, "logps/chosen": -0.30601242184638977, "logps/rejected": -1.178787350654602, "loss": 0.2493, "nll_loss": 0.24989327788352966, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.015300621278584003, "rewards/margins": 0.043638743460178375, "rewards/rejected": -0.0589393675327301, "step": 3630 }, { "epoch": 2.6, "grad_norm": 5.25, "learning_rate": 4.2811734078171365e-06, "log_odds_chosen": 2.064699649810791, "log_odds_ratio": -0.1740376055240631, "logits/chosen": -2.90887713432312, "logits/rejected": -2.9222257137298584, "logps/chosen": -0.25412940979003906, "logps/rejected": -1.119011640548706, "loss": 0.2456, "nll_loss": 0.2369098663330078, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.012706471607089043, "rewards/margins": 0.04324410855770111, "rewards/rejected": -0.055950574576854706, "step": 3640 }, { "epoch": 2.61, "grad_norm": 6.15625, "learning_rate": 4.27960492510913e-06, "log_odds_chosen": 2.057417392730713, "log_odds_ratio": -0.18223950266838074, "logits/chosen": -2.8936500549316406, "logits/rejected": -2.91770601272583, "logps/chosen": -0.2895287573337555, "logps/rejected": -1.1759768724441528, "loss": 0.2359, "nll_loss": 0.22096876800060272, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01447643805295229, "rewards/margins": 0.044322412461042404, "rewards/rejected": -0.05879884958267212, "step": 3650 }, { "epoch": 2.61, "grad_norm": 6.8125, "learning_rate": 4.278038165062074e-06, "log_odds_chosen": 2.0553503036499023, "log_odds_ratio": -0.1832961142063141, "logits/chosen": -2.8744332790374756, "logits/rejected": -2.884823799133301, "logps/chosen": -0.29930487275123596, "logps/rejected": -1.1658499240875244, "loss": 0.2364, "nll_loss": 0.2239643633365631, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014965241774916649, "rewards/margins": 0.04332726448774338, "rewards/rejected": -0.058292508125305176, "step": 3660 }, { "epoch": 2.62, "grad_norm": 4.75, "learning_rate": 4.276473124524951e-06, "log_odds_chosen": 2.067509174346924, "log_odds_ratio": -0.18179437518119812, "logits/chosen": -2.920393466949463, "logits/rejected": -2.933912754058838, "logps/chosen": -0.2947308421134949, "logps/rejected": -1.1749290227890015, "loss": 0.258, "nll_loss": 0.24315175414085388, "rewards/accuracies": 0.96875, "rewards/chosen": -0.014736543409526348, "rewards/margins": 0.04400990530848503, "rewards/rejected": -0.058746449649333954, "step": 3670 }, { "epoch": 2.63, "grad_norm": 5.0, "learning_rate": 4.274909800354809e-06, "log_odds_chosen": 2.0636065006256104, "log_odds_ratio": -0.18221907317638397, "logits/chosen": -2.913560390472412, "logits/rejected": -2.898419141769409, "logps/chosen": -0.29892921447753906, "logps/rejected": -1.2026621103286743, "loss": 0.2298, "nll_loss": 0.24287386238574982, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014946460723876953, "rewards/margins": 0.04518664628267288, "rewards/rejected": -0.060133107006549835, "step": 3680 }, { "epoch": 2.63, "grad_norm": 4.90625, "learning_rate": 4.27334818941673e-06, "log_odds_chosen": 2.065413475036621, "log_odds_ratio": -0.17128421366214752, "logits/chosen": -2.9174418449401855, "logits/rejected": -2.9187283515930176, "logps/chosen": -0.31790125370025635, "logps/rejected": -1.2642498016357422, "loss": 0.2479, "nll_loss": 0.23799021542072296, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015895063057541847, "rewards/margins": 0.047317422926425934, "rewards/rejected": -0.06321249902248383, "step": 3690 }, { "epoch": 2.64, "grad_norm": 5.5625, "learning_rate": 4.271788288583805e-06, "log_odds_chosen": 2.0177149772644043, "log_odds_ratio": -0.18031322956085205, "logits/chosen": -2.8912901878356934, "logits/rejected": -2.906595230102539, "logps/chosen": -0.3026626706123352, "logps/rejected": -1.2138030529022217, "loss": 0.2464, "nll_loss": 0.24549183249473572, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01513313502073288, "rewards/margins": 0.04555702209472656, "rewards/rejected": -0.060690153390169144, "step": 3700 }, { "epoch": 2.65, "grad_norm": 5.625, "learning_rate": 4.270230094737115e-06, "log_odds_chosen": 2.204523801803589, "log_odds_ratio": -0.16268154978752136, "logits/chosen": -2.8965342044830322, "logits/rejected": -2.9044742584228516, "logps/chosen": -0.2737334370613098, "logps/rejected": -1.2588539123535156, "loss": 0.2117, "nll_loss": 0.20202577114105225, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01368667371571064, "rewards/margins": 0.049256034195423126, "rewards/rejected": -0.06294270604848862, "step": 3710 }, { "epoch": 2.66, "grad_norm": 5.9375, "learning_rate": 4.268673604765692e-06, "log_odds_chosen": 2.133755922317505, "log_odds_ratio": -0.16594740748405457, "logits/chosen": -2.9114432334899902, "logits/rejected": -2.9318737983703613, "logps/chosen": -0.2706742584705353, "logps/rejected": -1.159444808959961, "loss": 0.2416, "nll_loss": 0.257633775472641, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.013533711433410645, "rewards/margins": 0.04443852975964546, "rewards/rejected": -0.05797224119305611, "step": 3720 }, { "epoch": 2.66, "grad_norm": 4.71875, "learning_rate": 4.267118815566505e-06, "log_odds_chosen": 2.304955005645752, "log_odds_ratio": -0.1501806378364563, "logits/chosen": -2.910193920135498, "logits/rejected": -2.9351253509521484, "logps/chosen": -0.2745073437690735, "logps/rejected": -1.3218328952789307, "loss": 0.2258, "nll_loss": 0.21883317828178406, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.013725368306040764, "rewards/margins": 0.05236627906560898, "rewards/rejected": -0.06609164923429489, "step": 3730 }, { "epoch": 2.67, "grad_norm": 6.78125, "learning_rate": 4.265565724044426e-06, "log_odds_chosen": 2.0478031635284424, "log_odds_ratio": -0.1753019243478775, "logits/chosen": -2.9269707202911377, "logits/rejected": -2.9262709617614746, "logps/chosen": -0.30243998765945435, "logps/rejected": -1.2427728176116943, "loss": 0.2337, "nll_loss": 0.22658483684062958, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015122001059353352, "rewards/margins": 0.047016650438308716, "rewards/rejected": -0.06213865429162979, "step": 3740 }, { "epoch": 2.68, "grad_norm": 5.34375, "learning_rate": 4.264014327112208e-06, "log_odds_chosen": 2.0601630210876465, "log_odds_ratio": -0.17402124404907227, "logits/chosen": -2.9387831687927246, "logits/rejected": -2.9434120655059814, "logps/chosen": -0.2765277028083801, "logps/rejected": -1.1455837488174438, "loss": 0.2231, "nll_loss": 0.19860777258872986, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01382638793438673, "rewards/margins": 0.04345279186964035, "rewards/rejected": -0.057279180735349655, "step": 3750 }, { "epoch": 2.68, "grad_norm": 5.5, "learning_rate": 4.26246462169046e-06, "log_odds_chosen": 2.146266460418701, "log_odds_ratio": -0.16794727742671967, "logits/chosen": -2.956972360610962, "logits/rejected": -2.962459087371826, "logps/chosen": -0.27213069796562195, "logps/rejected": -1.1795165538787842, "loss": 0.2451, "nll_loss": 0.23898108303546906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.013606536202132702, "rewards/margins": 0.04536928981542587, "rewards/rejected": -0.05897582694888115, "step": 3760 }, { "epoch": 2.69, "grad_norm": 4.625, "learning_rate": 4.260916604707614e-06, "log_odds_chosen": 1.895447015762329, "log_odds_ratio": -0.1990579068660736, "logits/chosen": -2.922109603881836, "logits/rejected": -2.921861410140991, "logps/chosen": -0.2980845868587494, "logps/rejected": -1.1166658401489258, "loss": 0.2235, "nll_loss": 0.231489896774292, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.014904230833053589, "rewards/margins": 0.04092906787991524, "rewards/rejected": -0.05583329126238823, "step": 3770 }, { "epoch": 2.7, "grad_norm": 5.0625, "learning_rate": 4.25937027309991e-06, "log_odds_chosen": 2.003633975982666, "log_odds_ratio": -0.18709906935691833, "logits/chosen": -2.9259610176086426, "logits/rejected": -2.9269964694976807, "logps/chosen": -0.31452521681785583, "logps/rejected": -1.1990089416503906, "loss": 0.2623, "nll_loss": 0.2658897042274475, "rewards/accuracies": 0.96875, "rewards/chosen": -0.015726260840892792, "rewards/margins": 0.04422418028116226, "rewards/rejected": -0.05995044857263565, "step": 3780 }, { "epoch": 2.71, "grad_norm": 5.25, "learning_rate": 4.257825623811364e-06, "log_odds_chosen": 2.155564785003662, "log_odds_ratio": -0.17152473330497742, "logits/chosen": -2.9300765991210938, "logits/rejected": -2.937422275543213, "logps/chosen": -0.2750275135040283, "logps/rejected": -1.208417534828186, "loss": 0.2323, "nll_loss": 0.23154130578041077, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013751375488936901, "rewards/margins": 0.046669501811265945, "rewards/rejected": -0.06042087823152542, "step": 3790 }, { "epoch": 2.71, "grad_norm": 4.28125, "learning_rate": 4.256282653793743e-06, "log_odds_chosen": 1.9499279260635376, "log_odds_ratio": -0.17946331202983856, "logits/chosen": -2.891995668411255, "logits/rejected": -2.9052202701568604, "logps/chosen": -0.302379846572876, "logps/rejected": -1.131711721420288, "loss": 0.2387, "nll_loss": 0.22251956164836884, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015118993818759918, "rewards/margins": 0.04146658629179001, "rewards/rejected": -0.05658558011054993, "step": 3800 }, { "epoch": 2.72, "grad_norm": 7.90625, "learning_rate": 4.254741360006543e-06, "log_odds_chosen": 1.972935438156128, "log_odds_ratio": -0.1962296962738037, "logits/chosen": -2.9211554527282715, "logits/rejected": -2.918924331665039, "logps/chosen": -0.291885107755661, "logps/rejected": -1.1757776737213135, "loss": 0.2119, "nll_loss": 0.194951131939888, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01459425687789917, "rewards/margins": 0.04419463127851486, "rewards/rejected": -0.058788884431123734, "step": 3810 }, { "epoch": 2.73, "grad_norm": 4.8125, "learning_rate": 4.25320173941696e-06, "log_odds_chosen": 2.04215931892395, "log_odds_ratio": -0.17209404706954956, "logits/chosen": -2.9103143215179443, "logits/rejected": -2.911741256713867, "logps/chosen": -0.3265768587589264, "logps/rejected": -1.2739063501358032, "loss": 0.2268, "nll_loss": 0.22781126201152802, "rewards/accuracies": 1.0, "rewards/chosen": -0.0163288414478302, "rewards/margins": 0.04736647382378578, "rewards/rejected": -0.06369531899690628, "step": 3820 }, { "epoch": 2.73, "grad_norm": 4.28125, "learning_rate": 4.251663788999866e-06, "log_odds_chosen": 2.139819860458374, "log_odds_ratio": -0.17184752225875854, "logits/chosen": -2.9420456886291504, "logits/rejected": -2.955049753189087, "logps/chosen": -0.30578261613845825, "logps/rejected": -1.2684471607208252, "loss": 0.2322, "nll_loss": 0.22287222743034363, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015289132483303547, "rewards/margins": 0.048133231699466705, "rewards/rejected": -0.06342236697673798, "step": 3830 }, { "epoch": 2.74, "grad_norm": 6.3125, "learning_rate": 4.250127505737787e-06, "log_odds_chosen": 2.155581474304199, "log_odds_ratio": -0.16084398329257965, "logits/chosen": -2.900238275527954, "logits/rejected": -2.923945903778076, "logps/chosen": -0.27417129278182983, "logps/rejected": -1.2404823303222656, "loss": 0.2415, "nll_loss": 0.22194568812847137, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.013708564452826977, "rewards/margins": 0.04831555113196373, "rewards/rejected": -0.06202411651611328, "step": 3840 }, { "epoch": 2.75, "grad_norm": 4.96875, "learning_rate": 4.2485928866208736e-06, "log_odds_chosen": 2.0622453689575195, "log_odds_ratio": -0.1771204173564911, "logits/chosen": -2.9182658195495605, "logits/rejected": -2.9299063682556152, "logps/chosen": -0.2977674901485443, "logps/rejected": -1.1828943490982056, "loss": 0.2409, "nll_loss": 0.22787527740001678, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014888373203575611, "rewards/margins": 0.04425634443759918, "rewards/rejected": -0.059144724160432816, "step": 3850 }, { "epoch": 2.76, "grad_norm": 4.375, "learning_rate": 4.247059928646881e-06, "log_odds_chosen": 2.0154402256011963, "log_odds_ratio": -0.1942734271287918, "logits/chosen": -2.9291112422943115, "logits/rejected": -2.9103519916534424, "logps/chosen": -0.281333327293396, "logps/rejected": -1.153630018234253, "loss": 0.2252, "nll_loss": 0.21655559539794922, "rewards/accuracies": 0.96875, "rewards/chosen": -0.0140666663646698, "rewards/margins": 0.043614838272333145, "rewards/rejected": -0.057681500911712646, "step": 3860 }, { "epoch": 2.76, "grad_norm": 6.0625, "learning_rate": 4.245528628821135e-06, "log_odds_chosen": 2.113131046295166, "log_odds_ratio": -0.18273907899856567, "logits/chosen": -2.917736530303955, "logits/rejected": -2.92810320854187, "logps/chosen": -0.29141664505004883, "logps/rejected": -1.1947263479232788, "loss": 0.2206, "nll_loss": 0.23844237625598907, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014570832252502441, "rewards/margins": 0.045165497809648514, "rewards/rejected": -0.059736330062150955, "step": 3870 }, { "epoch": 2.77, "grad_norm": 5.25, "learning_rate": 4.243998984156526e-06, "log_odds_chosen": 2.0922975540161133, "log_odds_ratio": -0.17185820639133453, "logits/chosen": -2.8744683265686035, "logits/rejected": -2.9042606353759766, "logps/chosen": -0.2935090661048889, "logps/rejected": -1.2336745262145996, "loss": 0.2394, "nll_loss": 0.22349922358989716, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.014675453305244446, "rewards/margins": 0.04700827598571777, "rewards/rejected": -0.06168372556567192, "step": 3880 }, { "epoch": 2.78, "grad_norm": 4.96875, "learning_rate": 4.242470991673459e-06, "log_odds_chosen": 1.9983400106430054, "log_odds_ratio": -0.19769485294818878, "logits/chosen": -2.859917402267456, "logits/rejected": -2.871030330657959, "logps/chosen": -0.30384406447410583, "logps/rejected": -1.1578266620635986, "loss": 0.2376, "nll_loss": 0.21438777446746826, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.015192203223705292, "rewards/margins": 0.04269913211464882, "rewards/rejected": -0.05789133161306381, "step": 3890 }, { "epoch": 2.78, "grad_norm": 5.03125, "learning_rate": 4.240944648399854e-06, "log_odds_chosen": 2.0351266860961914, "log_odds_ratio": -0.18826396763324738, "logits/chosen": -2.9190921783447266, "logits/rejected": -2.923105001449585, "logps/chosen": -0.28536805510520935, "logps/rejected": -1.1602951288223267, "loss": 0.2491, "nll_loss": 0.25211653113365173, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.014268402941524982, "rewards/margins": 0.04374634847044945, "rewards/rejected": -0.05801475793123245, "step": 3900 }, { "epoch": 2.79, "grad_norm": 4.40625, "learning_rate": 4.239419951371107e-06, "log_odds_chosen": 1.8209683895111084, "log_odds_ratio": -0.21575722098350525, "logits/chosen": -2.902409553527832, "logits/rejected": -2.9143335819244385, "logps/chosen": -0.3155234158039093, "logps/rejected": -1.0607740879058838, "loss": 0.2109, "nll_loss": 0.21318945288658142, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.015776170417666435, "rewards/margins": 0.03726252540946007, "rewards/rejected": -0.05303869768977165, "step": 3910 }, { "epoch": 2.8, "grad_norm": 4.5, "learning_rate": 4.237896897630065e-06, "log_odds_chosen": 2.0626044273376465, "log_odds_ratio": -0.17133717238903046, "logits/chosen": -2.9341623783111572, "logits/rejected": -2.967869281768799, "logps/chosen": -0.28656071424484253, "logps/rejected": -1.1975888013839722, "loss": 0.2267, "nll_loss": 0.21498951315879822, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014328035525977612, "rewards/margins": 0.04555141180753708, "rewards/rejected": -0.05987944453954697, "step": 3920 }, { "epoch": 2.81, "grad_norm": 4.4375, "learning_rate": 4.2363754842270135e-06, "log_odds_chosen": 2.000506639480591, "log_odds_ratio": -0.1933794468641281, "logits/chosen": -2.8884902000427246, "logits/rejected": -2.8795971870422363, "logps/chosen": -0.30968278646469116, "logps/rejected": -1.1907284259796143, "loss": 0.2348, "nll_loss": 0.2107667624950409, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015484139323234558, "rewards/margins": 0.04405229166150093, "rewards/rejected": -0.05953642725944519, "step": 3930 }, { "epoch": 2.81, "grad_norm": 6.65625, "learning_rate": 4.23485570821964e-06, "log_odds_chosen": 2.1047844886779785, "log_odds_ratio": -0.16426527500152588, "logits/chosen": -2.906376361846924, "logits/rejected": -2.91554594039917, "logps/chosen": -0.28010639548301697, "logps/rejected": -1.2043761014938354, "loss": 0.2411, "nll_loss": 0.23691853880882263, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014005321078002453, "rewards/margins": 0.046213485300540924, "rewards/rejected": -0.06021880358457565, "step": 3940 }, { "epoch": 2.82, "grad_norm": 5.5625, "learning_rate": 4.233337566673017e-06, "log_odds_chosen": 1.9270137548446655, "log_odds_ratio": -0.19595381617546082, "logits/chosen": -2.867866039276123, "logits/rejected": -2.8823580741882324, "logps/chosen": -0.29533451795578003, "logps/rejected": -1.118667721748352, "loss": 0.2218, "nll_loss": 0.20369569957256317, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014766724780201912, "rewards/margins": 0.04116666316986084, "rewards/rejected": -0.0559333935379982, "step": 3950 }, { "epoch": 2.83, "grad_norm": 4.9375, "learning_rate": 4.2318210566595795e-06, "log_odds_chosen": 1.8993085622787476, "log_odds_ratio": -0.19441360235214233, "logits/chosen": -2.917694568634033, "logits/rejected": -2.926478147506714, "logps/chosen": -0.3067987859249115, "logps/rejected": -1.1037228107452393, "loss": 0.24, "nll_loss": 0.24077454209327698, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015339940786361694, "rewards/margins": 0.03984620422124863, "rewards/rejected": -0.05518614128232002, "step": 3960 }, { "epoch": 2.83, "grad_norm": 5.46875, "learning_rate": 4.230306175259094e-06, "log_odds_chosen": 2.1352219581604004, "log_odds_ratio": -0.17009180784225464, "logits/chosen": -2.898860454559326, "logits/rejected": -2.910273551940918, "logps/chosen": -0.291853666305542, "logps/rejected": -1.2408645153045654, "loss": 0.239, "nll_loss": 0.23617322742938995, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01459268294274807, "rewards/margins": 0.04745054244995117, "rewards/rejected": -0.06204322725534439, "step": 3970 }, { "epoch": 2.84, "grad_norm": 6.84375, "learning_rate": 4.228792919558642e-06, "log_odds_chosen": 1.9759700298309326, "log_odds_ratio": -0.1895657181739807, "logits/chosen": -2.8813636302948, "logits/rejected": -2.897648334503174, "logps/chosen": -0.28039294481277466, "logps/rejected": -1.106951355934143, "loss": 0.236, "nll_loss": 0.2396521121263504, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.014019647613167763, "rewards/margins": 0.04132791981101036, "rewards/rejected": -0.05534757301211357, "step": 3980 }, { "epoch": 2.85, "grad_norm": 5.09375, "learning_rate": 4.227281286652593e-06, "log_odds_chosen": 2.006622791290283, "log_odds_ratio": -0.18658286333084106, "logits/chosen": -2.9210548400878906, "logits/rejected": -2.9346868991851807, "logps/chosen": -0.2725484371185303, "logps/rejected": -1.111262321472168, "loss": 0.231, "nll_loss": 0.213303804397583, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.013627422973513603, "rewards/margins": 0.041935697197914124, "rewards/rejected": -0.05556311458349228, "step": 3990 }, { "epoch": 2.86, "grad_norm": 5.09375, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 1.8391090631484985, "log_odds_ratio": -0.21399907767772675, "logits/chosen": -2.8890693187713623, "logits/rejected": -2.8955116271972656, "logps/chosen": -0.3432529866695404, "logps/rejected": -1.1629167795181274, "loss": 0.2349, "nll_loss": 0.2137899398803711, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.01716265082359314, "rewards/margins": 0.04098319262266159, "rewards/rejected": -0.05814583972096443, "step": 4000 }, { "epoch": 2.86, "grad_norm": 4.09375, "learning_rate": 4.224262877637488e-06, "log_odds_chosen": 1.8755204677581787, "log_odds_ratio": -0.20459318161010742, "logits/chosen": -2.8525354862213135, "logits/rejected": -2.8731565475463867, "logps/chosen": -0.3197447955608368, "logps/rejected": -1.1406848430633545, "loss": 0.2335, "nll_loss": 0.22804567217826843, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01598723977804184, "rewards/margins": 0.04104700684547424, "rewards/rejected": -0.057034242898225784, "step": 4010 }, { "epoch": 2.87, "grad_norm": 4.5, "learning_rate": 4.2227560957534054e-06, "log_odds_chosen": 2.0266966819763184, "log_odds_ratio": -0.19052095711231232, "logits/chosen": -2.8965086936950684, "logits/rejected": -2.896414041519165, "logps/chosen": -0.3156454265117645, "logps/rejected": -1.208319902420044, "loss": 0.2316, "nll_loss": 0.2188856601715088, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.015782270580530167, "rewards/margins": 0.04463372379541397, "rewards/rejected": -0.06041599065065384, "step": 4020 }, { "epoch": 2.88, "grad_norm": 5.0, "learning_rate": 4.221250925113625e-06, "log_odds_chosen": 1.875878930091858, "log_odds_ratio": -0.21098235249519348, "logits/chosen": -2.9176907539367676, "logits/rejected": -2.933192253112793, "logps/chosen": -0.30482417345046997, "logps/rejected": -1.1155188083648682, "loss": 0.2427, "nll_loss": 0.22833077609539032, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.015241208486258984, "rewards/margins": 0.04053472727537155, "rewards/rejected": -0.05577593296766281, "step": 4030 }, { "epoch": 2.88, "grad_norm": 5.3125, "learning_rate": 4.219747362848612e-06, "log_odds_chosen": 2.0864899158477783, "log_odds_ratio": -0.19180326163768768, "logits/chosen": -2.8865394592285156, "logits/rejected": -2.8893821239471436, "logps/chosen": -0.2769967019557953, "logps/rejected": -1.1684348583221436, "loss": 0.2404, "nll_loss": 0.238722562789917, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.013849836774170399, "rewards/margins": 0.04457191750407219, "rewards/rejected": -0.05842175334692001, "step": 4040 }, { "epoch": 2.89, "grad_norm": 5.34375, "learning_rate": 4.2182454060959784e-06, "log_odds_chosen": 2.0901265144348145, "log_odds_ratio": -0.16994525492191315, "logits/chosen": -2.930729389190674, "logits/rejected": -2.9224894046783447, "logps/chosen": -0.29616624116897583, "logps/rejected": -1.2133194208145142, "loss": 0.2475, "nll_loss": 0.24481268227100372, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014808312058448792, "rewards/margins": 0.04585765302181244, "rewards/rejected": -0.06066596508026123, "step": 4050 }, { "epoch": 2.9, "grad_norm": 5.03125, "learning_rate": 4.216745052000467e-06, "log_odds_chosen": 1.9087632894515991, "log_odds_ratio": -0.20587584376335144, "logits/chosen": -2.8943722248077393, "logits/rejected": -2.9035511016845703, "logps/chosen": -0.2947707772254944, "logps/rejected": -1.1172235012054443, "loss": 0.2132, "nll_loss": 0.20221516489982605, "rewards/accuracies": 0.96875, "rewards/chosen": -0.0147385373711586, "rewards/margins": 0.041122641414403915, "rewards/rejected": -0.055861182510852814, "step": 4060 }, { "epoch": 2.91, "grad_norm": 4.875, "learning_rate": 4.21524629771392e-06, "log_odds_chosen": 1.9671287536621094, "log_odds_ratio": -0.1902194321155548, "logits/chosen": -2.9334776401519775, "logits/rejected": -2.963710308074951, "logps/chosen": -0.2739933133125305, "logps/rejected": -1.1077688932418823, "loss": 0.2348, "nll_loss": 0.2265622317790985, "rewards/accuracies": 0.96875, "rewards/chosen": -0.013699667528271675, "rewards/margins": 0.04168878495693207, "rewards/rejected": -0.055388450622558594, "step": 4070 }, { "epoch": 2.91, "grad_norm": 4.875, "learning_rate": 4.213749140395264e-06, "log_odds_chosen": 2.1268839836120605, "log_odds_ratio": -0.16453871130943298, "logits/chosen": -2.893812656402588, "logits/rejected": -2.9219605922698975, "logps/chosen": -0.2740415930747986, "logps/rejected": -1.2125803232192993, "loss": 0.2362, "nll_loss": 0.21871864795684814, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.01370207965373993, "rewards/margins": 0.04692693427205086, "rewards/rejected": -0.06062900274991989, "step": 4080 }, { "epoch": 2.92, "grad_norm": 5.40625, "learning_rate": 4.2122535772104825e-06, "log_odds_chosen": 2.1293797492980957, "log_odds_ratio": -0.18520024418830872, "logits/chosen": -2.948119878768921, "logits/rejected": -2.940035581588745, "logps/chosen": -0.26464852690696716, "logps/rejected": -1.1528732776641846, "loss": 0.2205, "nll_loss": 0.19622497260570526, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013232424855232239, "rewards/margins": 0.04441123828291893, "rewards/rejected": -0.057643670588731766, "step": 4090 }, { "epoch": 2.93, "grad_norm": 5.09375, "learning_rate": 4.2107596053325946e-06, "log_odds_chosen": 1.943811058998108, "log_odds_ratio": -0.19135324656963348, "logits/chosen": -2.927293300628662, "logits/rejected": -2.942063570022583, "logps/chosen": -0.3171078562736511, "logps/rejected": -1.1890798807144165, "loss": 0.2319, "nll_loss": 0.24939313530921936, "rewards/accuracies": 0.96875, "rewards/chosen": -0.015855394303798676, "rewards/margins": 0.04359859973192215, "rewards/rejected": -0.05945398658514023, "step": 4100 }, { "epoch": 2.93, "grad_norm": 5.4375, "learning_rate": 4.209267221941637e-06, "log_odds_chosen": 1.955772042274475, "log_odds_ratio": -0.19047009944915771, "logits/chosen": -2.933640956878662, "logits/rejected": -2.9383158683776855, "logps/chosen": -0.3040629029273987, "logps/rejected": -1.129675030708313, "loss": 0.2534, "nll_loss": 0.24320121109485626, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.015203145332634449, "rewards/margins": 0.041280604898929596, "rewards/rejected": -0.05648375302553177, "step": 4110 }, { "epoch": 2.94, "grad_norm": 5.6875, "learning_rate": 4.207776424224631e-06, "log_odds_chosen": 2.1329073905944824, "log_odds_ratio": -0.16541200876235962, "logits/chosen": -2.928443431854248, "logits/rejected": -2.9326348304748535, "logps/chosen": -0.2823640704154968, "logps/rejected": -1.2579089403152466, "loss": 0.2212, "nll_loss": 0.2003752887248993, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.014118203893303871, "rewards/margins": 0.04877724125981331, "rewards/rejected": -0.06289544701576233, "step": 4120 }, { "epoch": 2.95, "grad_norm": 5.40625, "learning_rate": 4.206287209375573e-06, "log_odds_chosen": 1.9787099361419678, "log_odds_ratio": -0.19640588760375977, "logits/chosen": -2.931318759918213, "logits/rejected": -2.939894437789917, "logps/chosen": -0.3001793920993805, "logps/rejected": -1.1403203010559082, "loss": 0.2251, "nll_loss": 0.2245466709136963, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01500896830111742, "rewards/margins": 0.042007043957710266, "rewards/rejected": -0.05701601505279541, "step": 4130 }, { "epoch": 2.96, "grad_norm": 4.28125, "learning_rate": 4.204799574595403e-06, "log_odds_chosen": 2.0686066150665283, "log_odds_ratio": -0.17827217280864716, "logits/chosen": -2.9051029682159424, "logits/rejected": -2.918928623199463, "logps/chosen": -0.28031125664711, "logps/rejected": -1.1746606826782227, "loss": 0.219, "nll_loss": 0.19026514887809753, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014015564695000648, "rewards/margins": 0.04471747949719429, "rewards/rejected": -0.05873303860425949, "step": 4140 }, { "epoch": 2.96, "grad_norm": 5.1875, "learning_rate": 4.203313517091987e-06, "log_odds_chosen": 2.162344217300415, "log_odds_ratio": -0.15621480345726013, "logits/chosen": -2.88960599899292, "logits/rejected": -2.909137487411499, "logps/chosen": -0.26011472940444946, "logps/rejected": -1.1822528839111328, "loss": 0.226, "nll_loss": 0.19404032826423645, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.013005738146603107, "rewards/margins": 0.046106912195682526, "rewards/rejected": -0.05911264941096306, "step": 4150 }, { "epoch": 2.97, "grad_norm": 4.65625, "learning_rate": 4.201829034080091e-06, "log_odds_chosen": 2.0039446353912354, "log_odds_ratio": -0.192692369222641, "logits/chosen": -2.8722825050354004, "logits/rejected": -2.8747358322143555, "logps/chosen": -0.3029446005821228, "logps/rejected": -1.1576130390167236, "loss": 0.224, "nll_loss": 0.23223236203193665, "rewards/accuracies": 0.96875, "rewards/chosen": -0.01514722965657711, "rewards/margins": 0.04273342341184616, "rewards/rejected": -0.057880647480487823, "step": 4160 }, { "epoch": 2.98, "grad_norm": 6.53125, "learning_rate": 4.200346122781363e-06, "log_odds_chosen": 2.091663360595703, "log_odds_ratio": -0.18267402052879333, "logits/chosen": -2.900761604309082, "logits/rejected": -2.91227388381958, "logps/chosen": -0.26256534457206726, "logps/rejected": -1.1385129690170288, "loss": 0.229, "nll_loss": 0.20695538818836212, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.013128268532454967, "rewards/margins": 0.04379738122224808, "rewards/rejected": -0.05692564696073532, "step": 4170 }, { "epoch": 2.98, "grad_norm": 5.28125, "learning_rate": 4.1988647804243155e-06, "log_odds_chosen": 2.0570085048675537, "log_odds_ratio": -0.17320160567760468, "logits/chosen": -2.8818447589874268, "logits/rejected": -2.9016239643096924, "logps/chosen": -0.3113471567630768, "logps/rejected": -1.2070872783660889, "loss": 0.2463, "nll_loss": 0.23814329504966736, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.015567359514534473, "rewards/margins": 0.04478701204061508, "rewards/rejected": -0.06035437062382698, "step": 4180 }, { "epoch": 2.99, "grad_norm": 4.0, "learning_rate": 4.197385004244289e-06, "log_odds_chosen": 2.086322069168091, "log_odds_ratio": -0.1687101125717163, "logits/chosen": -2.9205222129821777, "logits/rejected": -2.924051284790039, "logps/chosen": -0.2813686430454254, "logps/rejected": -1.1617660522460938, "loss": 0.2372, "nll_loss": 0.2399667501449585, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01406843215227127, "rewards/margins": 0.04401987046003342, "rewards/rejected": -0.058088310062885284, "step": 4190 }, { "epoch": 3.0, "grad_norm": 4.90625, "learning_rate": 4.195906791483446e-06, "log_odds_chosen": 1.8499743938446045, "log_odds_ratio": -0.22145910561084747, "logits/chosen": -2.91683030128479, "logits/rejected": -2.925769805908203, "logps/chosen": -0.32796710729599, "logps/rejected": -1.1194093227386475, "loss": 0.257, "nll_loss": 0.2433611899614334, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.01639835350215435, "rewards/margins": 0.03957211226224899, "rewards/rejected": -0.05597046762704849, "step": 4200 }, { "epoch": 3.0, "step": 4200, "total_flos": 0.0, "train_loss": 0.33665566980838774, "train_runtime": 52171.861, "train_samples_per_second": 2.577, "train_steps_per_second": 0.081 } ], "logging_steps": 10, "max_steps": 4200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }