{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9989289539450197, "eval_steps": 500, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 6.3125, "learning_rate": 4.997501873438867e-06, "log_odds_chosen": 0.027651220560073853, "log_odds_ratio": -0.7304251194000244, "logits/chosen": -3.0225749015808105, "logits/rejected": -3.0150020122528076, "logps/chosen": -0.815263569355011, "logps/rejected": -0.8314116597175598, "loss": 1.153, "nll_loss": 1.0209743976593018, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1630527228116989, "rewards/margins": 0.0032296099234372377, "rewards/rejected": -0.16628232598304749, "step": 10 }, { "epoch": 0.01, "grad_norm": 5.40625, "learning_rate": 4.995007487521836e-06, "log_odds_chosen": 0.12160005420446396, "log_odds_ratio": -0.6930473446846008, "logits/chosen": -3.121931791305542, "logits/rejected": -3.1250596046447754, "logps/chosen": -0.7088677883148193, "logps/rejected": -0.7795363664627075, "loss": 0.6108, "nll_loss": 0.46274280548095703, "rewards/accuracies": 0.5625, "rewards/chosen": -0.14177358150482178, "rewards/margins": 0.014133691787719727, "rewards/rejected": -0.15590724349021912, "step": 20 }, { "epoch": 0.02, "grad_norm": 6.9375, "learning_rate": 4.992516832922945e-06, "log_odds_chosen": 0.03587382286787033, "log_odds_ratio": -0.7343233227729797, "logits/chosen": -3.130347728729248, "logits/rejected": -3.114506244659424, "logps/chosen": -0.7532758116722107, "logps/rejected": -0.7803520560264587, "loss": 0.6097, "nll_loss": 0.462548166513443, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.15065516531467438, "rewards/margins": 0.005415262188762426, "rewards/rejected": -0.15607044100761414, "step": 30 }, { "epoch": 0.03, "grad_norm": 5.65625, "learning_rate": 4.990029900348746e-06, "log_odds_chosen": 0.06624144315719604, "log_odds_ratio": -0.7134107351303101, "logits/chosen": -3.125946521759033, "logits/rejected": -3.1197519302368164, "logps/chosen": -0.7543585896492004, "logps/rejected": -0.8100128173828125, "loss": 0.595, "nll_loss": 0.47750869393348694, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.15087170898914337, "rewards/margins": 0.011130847968161106, "rewards/rejected": -0.1620025634765625, "step": 40 }, { "epoch": 0.04, "grad_norm": 5.21875, "learning_rate": 4.987546680538165e-06, "log_odds_chosen": 0.14277830719947815, "log_odds_ratio": -0.6764869093894958, "logits/chosen": -3.073333263397217, "logits/rejected": -3.081392288208008, "logps/chosen": -0.7409430146217346, "logps/rejected": -0.831874668598175, "loss": 0.6027, "nll_loss": 0.4561688005924225, "rewards/accuracies": 0.5625, "rewards/chosen": -0.14818862080574036, "rewards/margins": 0.018186338245868683, "rewards/rejected": -0.16637495160102844, "step": 50 }, { "epoch": 0.04, "grad_norm": 5.65625, "learning_rate": 4.985067164262359e-06, "log_odds_chosen": 0.09204573929309845, "log_odds_ratio": -0.7011358737945557, "logits/chosen": -3.172628879547119, "logits/rejected": -3.173214912414551, "logps/chosen": -0.7226709127426147, "logps/rejected": -0.7736715078353882, "loss": 0.5972, "nll_loss": 0.4593353867530823, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1445341557264328, "rewards/margins": 0.010200141929090023, "rewards/rejected": -0.15473432838916779, "step": 60 }, { "epoch": 0.05, "grad_norm": 5.59375, "learning_rate": 4.98259134232457e-06, "log_odds_chosen": 0.09953074157238007, "log_odds_ratio": -0.6920744776725769, "logits/chosen": -3.1694746017456055, "logits/rejected": -3.185354471206665, "logps/chosen": -0.7267014980316162, "logps/rejected": -0.7774870991706848, "loss": 0.6119, "nll_loss": 0.46830782294273376, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.14534030854701996, "rewards/margins": 0.010157106444239616, "rewards/rejected": -0.15549740195274353, "step": 70 }, { "epoch": 0.06, "grad_norm": 5.53125, "learning_rate": 4.980119205559974e-06, "log_odds_chosen": 0.07614605873823166, "log_odds_ratio": -0.7038400769233704, "logits/chosen": -3.1640992164611816, "logits/rejected": -3.1609697341918945, "logps/chosen": -0.7791558504104614, "logps/rejected": -0.8135568499565125, "loss": 0.6066, "nll_loss": 0.461737722158432, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.15583118796348572, "rewards/margins": 0.006880161818116903, "rewards/rejected": -0.16271135210990906, "step": 80 }, { "epoch": 0.06, "grad_norm": 5.3125, "learning_rate": 4.977650744835555e-06, "log_odds_chosen": 0.07529701292514801, "log_odds_ratio": -0.6971887946128845, "logits/chosen": -3.1437430381774902, "logits/rejected": -3.14058518409729, "logps/chosen": -0.7142292261123657, "logps/rejected": -0.7589999437332153, "loss": 0.5784, "nll_loss": 0.4407345652580261, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.14284583926200867, "rewards/margins": 0.008954131975769997, "rewards/rejected": -0.1517999768257141, "step": 90 }, { "epoch": 0.07, "grad_norm": 5.375, "learning_rate": 4.975185951049947e-06, "log_odds_chosen": 0.06737220287322998, "log_odds_ratio": -0.7220064401626587, "logits/chosen": -3.1887149810791016, "logits/rejected": -3.1891226768493652, "logps/chosen": -0.7800450921058655, "logps/rejected": -0.8231565356254578, "loss": 0.551, "nll_loss": 0.4563975930213928, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1560090035200119, "rewards/margins": 0.008622287772595882, "rewards/rejected": -0.16463130712509155, "step": 100 }, { "epoch": 0.08, "grad_norm": 5.9375, "learning_rate": 4.972724815133302e-06, "log_odds_chosen": 0.09414532780647278, "log_odds_ratio": -0.7080024480819702, "logits/chosen": -3.1148273944854736, "logits/rejected": -3.1315975189208984, "logps/chosen": -0.7162569761276245, "logps/rejected": -0.7553704977035522, "loss": 0.5325, "nll_loss": 0.43158403038978577, "rewards/accuracies": 0.5, "rewards/chosen": -0.14325138926506042, "rewards/margins": 0.00782269798219204, "rewards/rejected": -0.1510740965604782, "step": 110 }, { "epoch": 0.09, "grad_norm": 5.8125, "learning_rate": 4.970267328047151e-06, "log_odds_chosen": 0.10901321470737457, "log_odds_ratio": -0.6950778961181641, "logits/chosen": -3.14462947845459, "logits/rejected": -3.137329578399658, "logps/chosen": -0.7042136192321777, "logps/rejected": -0.7720807194709778, "loss": 0.5508, "nll_loss": 0.40320760011672974, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.1408427208662033, "rewards/margins": 0.0135734211653471, "rewards/rejected": -0.15441615879535675, "step": 120 }, { "epoch": 0.09, "grad_norm": 5.65625, "learning_rate": 4.9678134807842575e-06, "log_odds_chosen": 0.07895953953266144, "log_odds_ratio": -0.7133638262748718, "logits/chosen": -3.125828266143799, "logits/rejected": -3.1391282081604004, "logps/chosen": -0.7350977063179016, "logps/rejected": -0.7833020091056824, "loss": 0.5682, "nll_loss": 0.41112464666366577, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.14701953530311584, "rewards/margins": 0.009640867821872234, "rewards/rejected": -0.15666040778160095, "step": 130 }, { "epoch": 0.1, "grad_norm": 6.84375, "learning_rate": 4.965363264368484e-06, "log_odds_chosen": 0.12207289040088654, "log_odds_ratio": -0.6769169569015503, "logits/chosen": -3.14564847946167, "logits/rejected": -3.1439242362976074, "logps/chosen": -0.6801249980926514, "logps/rejected": -0.7512098550796509, "loss": 0.5738, "nll_loss": 0.4280276298522949, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.13602499663829803, "rewards/margins": 0.014216972514986992, "rewards/rejected": -0.15024198591709137, "step": 140 }, { "epoch": 0.11, "grad_norm": 5.71875, "learning_rate": 4.962916669854652e-06, "log_odds_chosen": 0.1224343553185463, "log_odds_ratio": -0.6913517713546753, "logits/chosen": -3.1227478981018066, "logits/rejected": -3.1305549144744873, "logps/chosen": -0.7392982840538025, "logps/rejected": -0.8002193570137024, "loss": 0.5956, "nll_loss": 0.43565186858177185, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14785966277122498, "rewards/margins": 0.01218421570956707, "rewards/rejected": -0.1600438803434372, "step": 150 }, { "epoch": 0.11, "grad_norm": 5.96875, "learning_rate": 4.960473688328407e-06, "log_odds_chosen": 0.0026047558058053255, "log_odds_ratio": -0.7348340153694153, "logits/chosen": -3.0425188541412354, "logits/rejected": -3.0503063201904297, "logps/chosen": -0.7252613306045532, "logps/rejected": -0.7286208271980286, "loss": 0.5688, "nll_loss": 0.41286152601242065, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1450522541999817, "rewards/margins": 0.0006719084340147674, "rewards/rejected": -0.14572417736053467, "step": 160 }, { "epoch": 0.12, "grad_norm": 5.1875, "learning_rate": 4.95803431090608e-06, "log_odds_chosen": 0.16818314790725708, "log_odds_ratio": -0.6584939956665039, "logits/chosen": -3.142646074295044, "logits/rejected": -3.161475419998169, "logps/chosen": -0.7002934217453003, "logps/rejected": -0.7836463451385498, "loss": 0.5958, "nll_loss": 0.4563008248806, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14005866646766663, "rewards/margins": 0.016670597717165947, "rewards/rejected": -0.15672926604747772, "step": 170 }, { "epoch": 0.13, "grad_norm": 5.4375, "learning_rate": 4.955598528734554e-06, "log_odds_chosen": 0.1538088172674179, "log_odds_ratio": -0.6675732135772705, "logits/chosen": -3.100310802459717, "logits/rejected": -3.117645740509033, "logps/chosen": -0.6987863183021545, "logps/rejected": -0.7786335945129395, "loss": 0.5698, "nll_loss": 0.45257002115249634, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.13975724577903748, "rewards/margins": 0.01596945896744728, "rewards/rejected": -0.15572671592235565, "step": 180 }, { "epoch": 0.14, "grad_norm": 6.28125, "learning_rate": 4.953166332991125e-06, "log_odds_chosen": 0.12078976631164551, "log_odds_ratio": -0.6859411001205444, "logits/chosen": -3.134028434753418, "logits/rejected": -3.137901782989502, "logps/chosen": -0.6982489824295044, "logps/rejected": -0.7687091827392578, "loss": 0.5676, "nll_loss": 0.43058595061302185, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13964977860450745, "rewards/margins": 0.014092043042182922, "rewards/rejected": -0.15374182164669037, "step": 190 }, { "epoch": 0.14, "grad_norm": 6.28125, "learning_rate": 4.950737714883372e-06, "log_odds_chosen": 0.17639026045799255, "log_odds_ratio": -0.655586838722229, "logits/chosen": -3.102017641067505, "logits/rejected": -3.1128413677215576, "logps/chosen": -0.6888889670372009, "logps/rejected": -0.7917199730873108, "loss": 0.5431, "nll_loss": 0.39507898688316345, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13777779042720795, "rewards/margins": 0.020566195249557495, "rewards/rejected": -0.15834400057792664, "step": 200 }, { "epoch": 0.15, "grad_norm": 6.21875, "learning_rate": 4.948312665649022e-06, "log_odds_chosen": 0.00438351184129715, "log_odds_ratio": -0.731238603591919, "logits/chosen": -3.072948932647705, "logits/rejected": -3.0700042247772217, "logps/chosen": -0.7648705244064331, "logps/rejected": -0.7703164219856262, "loss": 0.5744, "nll_loss": 0.41652363538742065, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.15297409892082214, "rewards/margins": 0.001089166384190321, "rewards/rejected": -0.15406326949596405, "step": 210 }, { "epoch": 0.16, "grad_norm": 5.0, "learning_rate": 4.945891176555817e-06, "log_odds_chosen": 0.09887596219778061, "log_odds_ratio": -0.6929382085800171, "logits/chosen": -3.109269618988037, "logits/rejected": -3.0964112281799316, "logps/chosen": -0.7023528218269348, "logps/rejected": -0.7567712068557739, "loss": 0.5629, "nll_loss": 0.4561777710914612, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14047057926654816, "rewards/margins": 0.010883677750825882, "rewards/rejected": -0.15135423839092255, "step": 220 }, { "epoch": 0.16, "grad_norm": 6.5, "learning_rate": 4.943473238901383e-06, "log_odds_chosen": 0.12996645271778107, "log_odds_ratio": -0.6799687147140503, "logits/chosen": -3.0217673778533936, "logits/rejected": -3.0064945220947266, "logps/chosen": -0.6958892345428467, "logps/rejected": -0.7603468298912048, "loss": 0.5877, "nll_loss": 0.43383994698524475, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1391778290271759, "rewards/margins": 0.012891518883407116, "rewards/rejected": -0.15206937491893768, "step": 230 }, { "epoch": 0.17, "grad_norm": 6.40625, "learning_rate": 4.941058844013094e-06, "log_odds_chosen": 0.15019556879997253, "log_odds_ratio": -0.665575385093689, "logits/chosen": -3.062437057495117, "logits/rejected": -3.067084789276123, "logps/chosen": -0.7049697041511536, "logps/rejected": -0.7816177606582642, "loss": 0.6147, "nll_loss": 0.48391756415367126, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.14099393784999847, "rewards/margins": 0.015329602174460888, "rewards/rejected": -0.15632352232933044, "step": 240 }, { "epoch": 0.18, "grad_norm": 6.9375, "learning_rate": 4.938647983247949e-06, "log_odds_chosen": 0.0968349426984787, "log_odds_ratio": -0.6998175382614136, "logits/chosen": -2.946869134902954, "logits/rejected": -2.954613208770752, "logps/chosen": -0.7149877548217773, "logps/rejected": -0.7582697868347168, "loss": 0.592, "nll_loss": 0.4492533206939697, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.14299754798412323, "rewards/margins": 0.008656383492052555, "rewards/rejected": -0.1516539454460144, "step": 250 }, { "epoch": 0.19, "grad_norm": 5.4375, "learning_rate": 4.936240647992436e-06, "log_odds_chosen": 0.13414113223552704, "log_odds_ratio": -0.670508861541748, "logits/chosen": -3.0646378993988037, "logits/rejected": -3.0543510913848877, "logps/chosen": -0.686463475227356, "logps/rejected": -0.7646616697311401, "loss": 0.577, "nll_loss": 0.4495324194431305, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.13729269802570343, "rewards/margins": 0.015639634802937508, "rewards/rejected": -0.1529323160648346, "step": 260 }, { "epoch": 0.19, "grad_norm": 5.65625, "learning_rate": 4.933836829662409e-06, "log_odds_chosen": 0.11647520214319229, "log_odds_ratio": -0.6881713271141052, "logits/chosen": -2.9797616004943848, "logits/rejected": -2.9717679023742676, "logps/chosen": -0.6882697939872742, "logps/rejected": -0.7442315220832825, "loss": 0.5775, "nll_loss": 0.42454642057418823, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13765396177768707, "rewards/margins": 0.01119234412908554, "rewards/rejected": -0.14884629845619202, "step": 270 }, { "epoch": 0.2, "grad_norm": 5.40625, "learning_rate": 4.9314365197029475e-06, "log_odds_chosen": 0.12243346869945526, "log_odds_ratio": -0.6820363998413086, "logits/chosen": -3.0487539768218994, "logits/rejected": -3.0383288860321045, "logps/chosen": -0.6958305239677429, "logps/rejected": -0.7610360980033875, "loss": 0.5734, "nll_loss": 0.4472366273403168, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.13916611671447754, "rewards/margins": 0.013041108846664429, "rewards/rejected": -0.15220721065998077, "step": 280 }, { "epoch": 0.21, "grad_norm": 5.46875, "learning_rate": 4.9290397095882446e-06, "log_odds_chosen": 0.02017252705991268, "log_odds_ratio": -0.7348771095275879, "logits/chosen": -2.9625377655029297, "logits/rejected": -2.9633870124816895, "logps/chosen": -0.7289597988128662, "logps/rejected": -0.7401062250137329, "loss": 0.5777, "nll_loss": 0.47131404280662537, "rewards/accuracies": 0.53125, "rewards/chosen": -0.14579197764396667, "rewards/margins": 0.0022292803041636944, "rewards/rejected": -0.14802125096321106, "step": 290 }, { "epoch": 0.21, "grad_norm": 5.625, "learning_rate": 4.9266463908214664e-06, "log_odds_chosen": 0.13380487263202667, "log_odds_ratio": -0.6815378069877625, "logits/chosen": -3.0238821506500244, "logits/rejected": -3.012429714202881, "logps/chosen": -0.6980360746383667, "logps/rejected": -0.772484540939331, "loss": 0.5416, "nll_loss": 0.3923744261264801, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.13960722088813782, "rewards/margins": 0.014889700338244438, "rewards/rejected": -0.1544969230890274, "step": 300 }, { "epoch": 0.22, "grad_norm": 4.8125, "learning_rate": 4.924256554934632e-06, "log_odds_chosen": 0.10960634052753448, "log_odds_ratio": -0.6801453828811646, "logits/chosen": -2.9736227989196777, "logits/rejected": -2.9606356620788574, "logps/chosen": -0.7132256031036377, "logps/rejected": -0.7729824781417847, "loss": 0.5434, "nll_loss": 0.38369157910346985, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.14264513552188873, "rewards/margins": 0.01195137295871973, "rewards/rejected": -0.1545964926481247, "step": 310 }, { "epoch": 0.23, "grad_norm": 5.84375, "learning_rate": 4.9218701934884865e-06, "log_odds_chosen": 0.15664125978946686, "log_odds_ratio": -0.6632459759712219, "logits/chosen": -3.026071071624756, "logits/rejected": -3.011920690536499, "logps/chosen": -0.7300186157226562, "logps/rejected": -0.8273404836654663, "loss": 0.57, "nll_loss": 0.42472705245018005, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.14600373804569244, "rewards/margins": 0.01946437731385231, "rewards/rejected": -0.16546811163425446, "step": 320 }, { "epoch": 0.24, "grad_norm": 6.65625, "learning_rate": 4.919487298072377e-06, "log_odds_chosen": 0.16137509047985077, "log_odds_ratio": -0.672458291053772, "logits/chosen": -2.949476718902588, "logits/rejected": -2.9505724906921387, "logps/chosen": -0.6964123249053955, "logps/rejected": -0.7866235971450806, "loss": 0.5429, "nll_loss": 0.4337022304534912, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1392824798822403, "rewards/margins": 0.018042229115962982, "rewards/rejected": -0.15732471644878387, "step": 330 }, { "epoch": 0.24, "grad_norm": 6.25, "learning_rate": 4.917107860304125e-06, "log_odds_chosen": 0.12605342268943787, "log_odds_ratio": -0.672644317150116, "logits/chosen": -3.1077914237976074, "logits/rejected": -3.0998547077178955, "logps/chosen": -0.6887668967247009, "logps/rejected": -0.7507665753364563, "loss": 0.5602, "nll_loss": 0.40362483263015747, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.13775338232517242, "rewards/margins": 0.012399917468428612, "rewards/rejected": -0.15015330910682678, "step": 340 }, { "epoch": 0.25, "grad_norm": 5.625, "learning_rate": 4.914731871829905e-06, "log_odds_chosen": 0.0800851434469223, "log_odds_ratio": -0.6991033554077148, "logits/chosen": -3.1771771907806396, "logits/rejected": -3.183584451675415, "logps/chosen": -0.7016042470932007, "logps/rejected": -0.7426560521125793, "loss": 0.5623, "nll_loss": 0.38557925820350647, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -0.14032086730003357, "rewards/margins": 0.008210362866520882, "rewards/rejected": -0.1485312283039093, "step": 350 }, { "epoch": 0.26, "grad_norm": 5.625, "learning_rate": 4.912359324324121e-06, "log_odds_chosen": 0.17609532177448273, "log_odds_ratio": -0.6462699174880981, "logits/chosen": -3.1602416038513184, "logits/rejected": -3.1689493656158447, "logps/chosen": -0.6804074048995972, "logps/rejected": -0.7664049863815308, "loss": 0.5685, "nll_loss": 0.4449498653411865, "rewards/accuracies": 0.625, "rewards/chosen": -0.1360814869403839, "rewards/margins": 0.01719951257109642, "rewards/rejected": -0.15328100323677063, "step": 360 }, { "epoch": 0.26, "grad_norm": 5.84375, "learning_rate": 4.909990209489284e-06, "log_odds_chosen": 0.09661266207695007, "log_odds_ratio": -0.6989172697067261, "logits/chosen": -3.1702992916107178, "logits/rejected": -3.1816422939300537, "logps/chosen": -0.6909779906272888, "logps/rejected": -0.7484806776046753, "loss": 0.5448, "nll_loss": 0.3755747377872467, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.13819561898708344, "rewards/margins": 0.011500529944896698, "rewards/rejected": -0.14969615638256073, "step": 370 }, { "epoch": 0.27, "grad_norm": 5.4375, "learning_rate": 4.907624519055888e-06, "log_odds_chosen": 0.053890157490968704, "log_odds_ratio": -0.7154654860496521, "logits/chosen": -3.1567158699035645, "logits/rejected": -3.165017604827881, "logps/chosen": -0.7759397625923157, "logps/rejected": -0.8097392320632935, "loss": 0.5697, "nll_loss": 0.42835354804992676, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1551879346370697, "rewards/margins": 0.0067598940804600716, "rewards/rejected": -0.1619478464126587, "step": 380 }, { "epoch": 0.28, "grad_norm": 5.75, "learning_rate": 4.905262244782294e-06, "log_odds_chosen": 0.10774964094161987, "log_odds_ratio": -0.6780427694320679, "logits/chosen": -3.1538772583007812, "logits/rejected": -3.139371395111084, "logps/chosen": -0.72501140832901, "logps/rejected": -0.7887372970581055, "loss": 0.5558, "nll_loss": 0.4034719467163086, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14500227570533752, "rewards/margins": 0.01274518109858036, "rewards/rejected": -0.15774747729301453, "step": 390 }, { "epoch": 0.29, "grad_norm": 6.4375, "learning_rate": 4.902903378454601e-06, "log_odds_chosen": 0.013013715855777264, "log_odds_ratio": -0.7487560510635376, "logits/chosen": -3.1208155155181885, "logits/rejected": -3.1211535930633545, "logps/chosen": -0.7125126719474792, "logps/rejected": -0.7153986692428589, "loss": 0.5383, "nll_loss": 0.3862176537513733, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.142502561211586, "rewards/margins": 0.0005772082367911935, "rewards/rejected": -0.1430797278881073, "step": 400 }, { "epoch": 0.29, "grad_norm": 4.53125, "learning_rate": 4.900547911886537e-06, "log_odds_chosen": 0.14288419485092163, "log_odds_ratio": -0.6765764951705933, "logits/chosen": -3.208242416381836, "logits/rejected": -3.2231342792510986, "logps/chosen": -0.7180224657058716, "logps/rejected": -0.7839994430541992, "loss": 0.5571, "nll_loss": 0.4236370921134949, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.14360451698303223, "rewards/margins": 0.013195383362472057, "rewards/rejected": -0.15679989755153656, "step": 410 }, { "epoch": 0.3, "grad_norm": 4.875, "learning_rate": 4.898195836919327e-06, "log_odds_chosen": 0.03629889339208603, "log_odds_ratio": -0.7236992716789246, "logits/chosen": -3.2515082359313965, "logits/rejected": -3.2554900646209717, "logps/chosen": -0.7256291508674622, "logps/rejected": -0.7619522213935852, "loss": 0.5552, "nll_loss": 0.41900959610939026, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.14512582123279572, "rewards/margins": 0.0072646364569664, "rewards/rejected": -0.1523904651403427, "step": 420 }, { "epoch": 0.31, "grad_norm": 5.21875, "learning_rate": 4.895847145421587e-06, "log_odds_chosen": 0.18708305060863495, "log_odds_ratio": -0.6497930288314819, "logits/chosen": -3.263317108154297, "logits/rejected": -3.263544797897339, "logps/chosen": -0.6555394530296326, "logps/rejected": -0.7548009753227234, "loss": 0.5175, "nll_loss": 0.386501282453537, "rewards/accuracies": 0.625, "rewards/chosen": -0.1311078816652298, "rewards/margins": 0.01985231228172779, "rewards/rejected": -0.15096020698547363, "step": 430 }, { "epoch": 0.31, "grad_norm": 5.46875, "learning_rate": 4.893501829289195e-06, "log_odds_chosen": 0.10671345144510269, "log_odds_ratio": -0.696262001991272, "logits/chosen": -3.2451579570770264, "logits/rejected": -3.2367336750030518, "logps/chosen": -0.711258590221405, "logps/rejected": -0.7748275995254517, "loss": 0.5643, "nll_loss": 0.435871422290802, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14225171506404877, "rewards/margins": 0.012713788077235222, "rewards/rejected": -0.15496549010276794, "step": 440 }, { "epoch": 0.32, "grad_norm": 5.15625, "learning_rate": 4.891159880445185e-06, "log_odds_chosen": -0.009761805646121502, "log_odds_ratio": -0.7526240348815918, "logits/chosen": -3.3140366077423096, "logits/rejected": -3.3279337882995605, "logps/chosen": -0.7342787981033325, "logps/rejected": -0.7224690318107605, "loss": 0.5655, "nll_loss": 0.3994046151638031, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.14685577154159546, "rewards/margins": -0.002361953491345048, "rewards/rejected": -0.14449380338191986, "step": 450 }, { "epoch": 0.33, "grad_norm": 5.3125, "learning_rate": 4.888821290839617e-06, "log_odds_chosen": 0.08875492960214615, "log_odds_ratio": -0.7100639343261719, "logits/chosen": -3.2668724060058594, "logits/rejected": -3.266616106033325, "logps/chosen": -0.6991187334060669, "logps/rejected": -0.7543323040008545, "loss": 0.5456, "nll_loss": 0.39074766635894775, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.13982374966144562, "rewards/margins": 0.011042733676731586, "rewards/rejected": -0.15086647868156433, "step": 460 }, { "epoch": 0.34, "grad_norm": 4.84375, "learning_rate": 4.886486052449469e-06, "log_odds_chosen": 0.04209429770708084, "log_odds_ratio": -0.7359583973884583, "logits/chosen": -3.2799735069274902, "logits/rejected": -3.2890267372131348, "logps/chosen": -0.7149407267570496, "logps/rejected": -0.7467728853225708, "loss": 0.5785, "nll_loss": 0.3849171996116638, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.1429881602525711, "rewards/margins": 0.006366416811943054, "rewards/rejected": -0.14935457706451416, "step": 470 }, { "epoch": 0.34, "grad_norm": 5.0, "learning_rate": 4.8841541572785224e-06, "log_odds_chosen": 0.08705852925777435, "log_odds_ratio": -0.7012760043144226, "logits/chosen": -3.229048490524292, "logits/rejected": -3.2325732707977295, "logps/chosen": -0.7453920245170593, "logps/rejected": -0.7918086051940918, "loss": 0.6028, "nll_loss": 0.49805039167404175, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14907841384410858, "rewards/margins": 0.009283310733735561, "rewards/rejected": -0.15836171805858612, "step": 480 }, { "epoch": 0.35, "grad_norm": 5.71875, "learning_rate": 4.881825597357242e-06, "log_odds_chosen": 0.12286486476659775, "log_odds_ratio": -0.6923102736473083, "logits/chosen": -3.2311508655548096, "logits/rejected": -3.241130828857422, "logps/chosen": -0.7206076979637146, "logps/rejected": -0.7855247259140015, "loss": 0.5515, "nll_loss": 0.4401041865348816, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.14412155747413635, "rewards/margins": 0.012983402237296104, "rewards/rejected": -0.15710493922233582, "step": 490 }, { "epoch": 0.36, "grad_norm": 6.03125, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 0.08800844103097916, "log_odds_ratio": -0.71002596616745, "logits/chosen": -3.240417957305908, "logits/rejected": -3.249009609222412, "logps/chosen": -0.7202855348587036, "logps/rejected": -0.7685345411300659, "loss": 0.593, "nll_loss": 0.4520091116428375, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.14405712485313416, "rewards/margins": 0.00964980386197567, "rewards/rejected": -0.15370693802833557, "step": 500 }, { "epoch": 0.36, "grad_norm": 5.46875, "learning_rate": 4.877178451518289e-06, "log_odds_chosen": 0.017683375626802444, "log_odds_ratio": -0.7400511503219604, "logits/chosen": -3.2630767822265625, "logits/rejected": -3.2636375427246094, "logps/chosen": -0.7481754422187805, "logps/rejected": -0.7609735727310181, "loss": 0.5502, "nll_loss": 0.403683602809906, "rewards/accuracies": 0.5, "rewards/chosen": -0.14963507652282715, "rewards/margins": 0.0025596513878554106, "rewards/rejected": -0.15219472348690033, "step": 510 }, { "epoch": 0.37, "grad_norm": 6.5, "learning_rate": 4.8748598497939494e-06, "log_odds_chosen": 0.08491934835910797, "log_odds_ratio": -0.6992539763450623, "logits/chosen": -3.2163689136505127, "logits/rejected": -3.2333626747131348, "logps/chosen": -0.7194134593009949, "logps/rejected": -0.7636415958404541, "loss": 0.5908, "nll_loss": 0.4157852232456207, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.14388270676136017, "rewards/margins": 0.008845620788633823, "rewards/rejected": -0.15272831916809082, "step": 520 }, { "epoch": 0.38, "grad_norm": 4.84375, "learning_rate": 4.872544551705718e-06, "log_odds_chosen": 0.12621939182281494, "log_odds_ratio": -0.6822875738143921, "logits/chosen": -3.219284772872925, "logits/rejected": -3.2353568077087402, "logps/chosen": -0.7198256254196167, "logps/rejected": -0.770844578742981, "loss": 0.5348, "nll_loss": 0.38905125856399536, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.14396515488624573, "rewards/margins": 0.010203775018453598, "rewards/rejected": -0.15416888892650604, "step": 530 }, { "epoch": 0.39, "grad_norm": 5.8125, "learning_rate": 4.870232549415787e-06, "log_odds_chosen": 0.039147090166807175, "log_odds_ratio": -0.7419449687004089, "logits/chosen": -3.215195894241333, "logits/rejected": -3.216196060180664, "logps/chosen": -0.7427582144737244, "logps/rejected": -0.7526665925979614, "loss": 0.5612, "nll_loss": 0.4239347577095032, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.14855164289474487, "rewards/margins": 0.001981673762202263, "rewards/rejected": -0.15053331851959229, "step": 540 }, { "epoch": 0.39, "grad_norm": 5.1875, "learning_rate": 4.867923835112355e-06, "log_odds_chosen": 0.12316079437732697, "log_odds_ratio": -0.6851805448532104, "logits/chosen": -3.174053907394409, "logits/rejected": -3.1798911094665527, "logps/chosen": -0.6940979957580566, "logps/rejected": -0.7675840258598328, "loss": 0.5704, "nll_loss": 0.46909135580062866, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1388196051120758, "rewards/margins": 0.01469721831381321, "rewards/rejected": -0.15351681411266327, "step": 550 }, { "epoch": 0.4, "grad_norm": 5.4375, "learning_rate": 4.865618401009519e-06, "log_odds_chosen": 0.06827814131975174, "log_odds_ratio": -0.7105705738067627, "logits/chosen": -3.1506597995758057, "logits/rejected": -3.1666228771209717, "logps/chosen": -0.749689519405365, "logps/rejected": -0.7806918621063232, "loss": 0.5884, "nll_loss": 0.44312652945518494, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14993791282176971, "rewards/margins": 0.0062004560604691505, "rewards/rejected": -0.1561383754014969, "step": 560 }, { "epoch": 0.41, "grad_norm": 4.96875, "learning_rate": 4.863316239347163e-06, "log_odds_chosen": 0.12389856576919556, "log_odds_ratio": -0.6848828196525574, "logits/chosen": -3.1486778259277344, "logits/rejected": -3.143662452697754, "logps/chosen": -0.6603912115097046, "logps/rejected": -0.7258492112159729, "loss": 0.5353, "nll_loss": 0.3620496392250061, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.13207824528217316, "rewards/margins": 0.013091601431369781, "rewards/rejected": -0.14516983926296234, "step": 570 }, { "epoch": 0.41, "grad_norm": 5.0, "learning_rate": 4.861017342390847e-06, "log_odds_chosen": 0.14408716559410095, "log_odds_ratio": -0.6706200242042542, "logits/chosen": -3.1252732276916504, "logits/rejected": -3.120234966278076, "logps/chosen": -0.700475811958313, "logps/rejected": -0.7832350730895996, "loss": 0.5601, "nll_loss": 0.41865190863609314, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14009517431259155, "rewards/margins": 0.016551848500967026, "rewards/rejected": -0.15664701163768768, "step": 580 }, { "epoch": 0.42, "grad_norm": 5.4375, "learning_rate": 4.858721702431704e-06, "log_odds_chosen": 0.19907112419605255, "log_odds_ratio": -0.6365267634391785, "logits/chosen": -3.1869750022888184, "logits/rejected": -3.1916017532348633, "logps/chosen": -0.6768631339073181, "logps/rejected": -0.7860314249992371, "loss": 0.5486, "nll_loss": 0.4141230583190918, "rewards/accuracies": 0.65625, "rewards/chosen": -0.13537263870239258, "rewards/margins": 0.02183363400399685, "rewards/rejected": -0.15720628201961517, "step": 590 }, { "epoch": 0.43, "grad_norm": 5.1875, "learning_rate": 4.856429311786322e-06, "log_odds_chosen": 0.20475217700004578, "log_odds_ratio": -0.6470059752464294, "logits/chosen": -3.0911927223205566, "logits/rejected": -3.088820219039917, "logps/chosen": -0.7005254030227661, "logps/rejected": -0.8171119689941406, "loss": 0.5372, "nll_loss": 0.4075283110141754, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.14010508358478546, "rewards/margins": 0.023317309096455574, "rewards/rejected": -0.1634223908185959, "step": 600 }, { "epoch": 0.44, "grad_norm": 5.3125, "learning_rate": 4.8541401627966426e-06, "log_odds_chosen": 0.16298583149909973, "log_odds_ratio": -0.6658545732498169, "logits/chosen": -3.041769504547119, "logits/rejected": -3.043297290802002, "logps/chosen": -0.689085066318512, "logps/rejected": -0.7849712371826172, "loss": 0.5575, "nll_loss": 0.4643743634223938, "rewards/accuracies": 0.625, "rewards/chosen": -0.13781702518463135, "rewards/margins": 0.01917722448706627, "rewards/rejected": -0.15699425339698792, "step": 610 }, { "epoch": 0.44, "grad_norm": 4.78125, "learning_rate": 4.85185424782985e-06, "log_odds_chosen": 0.12417051941156387, "log_odds_ratio": -0.68137526512146, "logits/chosen": -2.961411714553833, "logits/rejected": -2.9629132747650146, "logps/chosen": -0.6715599298477173, "logps/rejected": -0.7414580583572388, "loss": 0.5462, "nll_loss": 0.3696933686733246, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1343119889497757, "rewards/margins": 0.013979624025523663, "rewards/rejected": -0.14829161763191223, "step": 620 }, { "epoch": 0.45, "grad_norm": 5.28125, "learning_rate": 4.8495715592782715e-06, "log_odds_chosen": 0.129594087600708, "log_odds_ratio": -0.6814179420471191, "logits/chosen": -3.0220203399658203, "logits/rejected": -3.007021427154541, "logps/chosen": -0.7019616365432739, "logps/rejected": -0.7783767580986023, "loss": 0.581, "nll_loss": 0.43684515357017517, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14039233326911926, "rewards/margins": 0.0152830109000206, "rewards/rejected": -0.15567535161972046, "step": 630 }, { "epoch": 0.46, "grad_norm": 5.34375, "learning_rate": 4.847292089559258e-06, "log_odds_chosen": 0.14772093296051025, "log_odds_ratio": -0.6776904463768005, "logits/chosen": -3.015796661376953, "logits/rejected": -2.998617649078369, "logps/chosen": -0.6798470616340637, "logps/rejected": -0.7485564351081848, "loss": 0.5124, "nll_loss": 0.3927190601825714, "rewards/accuracies": 0.59375, "rewards/chosen": -0.13596941530704498, "rewards/margins": 0.01374187134206295, "rewards/rejected": -0.14971129596233368, "step": 640 }, { "epoch": 0.46, "grad_norm": 5.8125, "learning_rate": 4.845015831115093e-06, "log_odds_chosen": 0.22233247756958008, "log_odds_ratio": -0.6407415866851807, "logits/chosen": -3.0206289291381836, "logits/rejected": -2.993927478790283, "logps/chosen": -0.7096582055091858, "logps/rejected": -0.8297877311706543, "loss": 0.5503, "nll_loss": 0.4199894070625305, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1419316530227661, "rewards/margins": 0.024025900289416313, "rewards/rejected": -0.16595754027366638, "step": 650 }, { "epoch": 0.47, "grad_norm": 7.5, "learning_rate": 4.842742776412874e-06, "log_odds_chosen": 0.19071760773658752, "log_odds_ratio": -0.6609346270561218, "logits/chosen": -2.93975567817688, "logits/rejected": -2.927455425262451, "logps/chosen": -0.6851130723953247, "logps/rejected": -0.7924318313598633, "loss": 0.5098, "nll_loss": 0.39896029233932495, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.13702259957790375, "rewards/margins": 0.02146376296877861, "rewards/rejected": -0.15848635137081146, "step": 660 }, { "epoch": 0.48, "grad_norm": 5.03125, "learning_rate": 4.840472917944417e-06, "log_odds_chosen": 0.035539038479328156, "log_odds_ratio": -0.7433319687843323, "logits/chosen": -2.91545033454895, "logits/rejected": -2.904900074005127, "logps/chosen": -0.7592023015022278, "logps/rejected": -0.778312087059021, "loss": 0.5748, "nll_loss": 0.43543463945388794, "rewards/accuracies": 0.53125, "rewards/chosen": -0.151840478181839, "rewards/margins": 0.0038219629786908627, "rewards/rejected": -0.1556624323129654, "step": 670 }, { "epoch": 0.49, "grad_norm": 5.0, "learning_rate": 4.838206248226147e-06, "log_odds_chosen": 0.12000226974487305, "log_odds_ratio": -0.6970423460006714, "logits/chosen": -2.918330430984497, "logits/rejected": -2.909175395965576, "logps/chosen": -0.7081884145736694, "logps/rejected": -0.7808458209037781, "loss": 0.5926, "nll_loss": 0.4864211976528168, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1416376680135727, "rewards/margins": 0.01453147642314434, "rewards/rejected": -0.15616916120052338, "step": 680 }, { "epoch": 0.49, "grad_norm": 5.125, "learning_rate": 4.835942759799002e-06, "log_odds_chosen": 0.12994246184825897, "log_odds_ratio": -0.679962694644928, "logits/chosen": -2.9056477546691895, "logits/rejected": -2.907723903656006, "logps/chosen": -0.6788502335548401, "logps/rejected": -0.7463597059249878, "loss": 0.5609, "nll_loss": 0.4340883791446686, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1357700526714325, "rewards/margins": 0.013501903042197227, "rewards/rejected": -0.14927193522453308, "step": 690 }, { "epoch": 0.5, "grad_norm": 4.6875, "learning_rate": 4.833682445228318e-06, "log_odds_chosen": 0.1527804434299469, "log_odds_ratio": -0.6715155243873596, "logits/chosen": -2.8825762271881104, "logits/rejected": -2.878715991973877, "logps/chosen": -0.7160866856575012, "logps/rejected": -0.7908951044082642, "loss": 0.52, "nll_loss": 0.3986133933067322, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14321734011173248, "rewards/margins": 0.014961689710617065, "rewards/rejected": -0.15817902982234955, "step": 700 }, { "epoch": 0.51, "grad_norm": 4.84375, "learning_rate": 4.831425297103738e-06, "log_odds_chosen": 0.07408870756626129, "log_odds_ratio": -0.7036418318748474, "logits/chosen": -2.915374279022217, "logits/rejected": -2.8941776752471924, "logps/chosen": -0.7258971929550171, "logps/rejected": -0.776419460773468, "loss": 0.5574, "nll_loss": 0.4164521098136902, "rewards/accuracies": 0.5, "rewards/chosen": -0.14517942070960999, "rewards/margins": 0.010104473680257797, "rewards/rejected": -0.1552838832139969, "step": 710 }, { "epoch": 0.51, "grad_norm": 5.25, "learning_rate": 4.829171308039099e-06, "log_odds_chosen": 0.11705788224935532, "log_odds_ratio": -0.6934496164321899, "logits/chosen": -2.8689353466033936, "logits/rejected": -2.855515480041504, "logps/chosen": -0.7816129326820374, "logps/rejected": -0.8508423566818237, "loss": 0.5618, "nll_loss": 0.41237854957580566, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.15632258355617523, "rewards/margins": 0.013845880515873432, "rewards/rejected": -0.1701684594154358, "step": 720 }, { "epoch": 0.52, "grad_norm": 4.90625, "learning_rate": 4.826920470672344e-06, "log_odds_chosen": 0.14946161210536957, "log_odds_ratio": -0.6791409254074097, "logits/chosen": -2.920727252960205, "logits/rejected": -2.9093258380889893, "logps/chosen": -0.721591055393219, "logps/rejected": -0.8128422498703003, "loss": 0.5434, "nll_loss": 0.4294136166572571, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.14431820809841156, "rewards/margins": 0.018250230699777603, "rewards/rejected": -0.16256846487522125, "step": 730 }, { "epoch": 0.53, "grad_norm": 4.96875, "learning_rate": 4.824672777665406e-06, "log_odds_chosen": 0.1518145054578781, "log_odds_ratio": -0.6733521819114685, "logits/chosen": -2.8987679481506348, "logits/rejected": -2.8861145973205566, "logps/chosen": -0.7268251180648804, "logps/rejected": -0.8102623224258423, "loss": 0.592, "nll_loss": 0.4659670889377594, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.14536501467227936, "rewards/margins": 0.01668746955692768, "rewards/rejected": -0.1620524823665619, "step": 740 }, { "epoch": 0.54, "grad_norm": 5.46875, "learning_rate": 4.822428221704122e-06, "log_odds_chosen": 0.10895228385925293, "log_odds_ratio": -0.6847666501998901, "logits/chosen": -2.9176247119903564, "logits/rejected": -2.9068551063537598, "logps/chosen": -0.6662013530731201, "logps/rejected": -0.7180477976799011, "loss": 0.5466, "nll_loss": 0.4058578908443451, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.13324028253555298, "rewards/margins": 0.010369287803769112, "rewards/rejected": -0.14360955357551575, "step": 750 }, { "epoch": 0.54, "grad_norm": 5.5, "learning_rate": 4.820186795498119e-06, "log_odds_chosen": 0.07982759177684784, "log_odds_ratio": -0.6936464905738831, "logits/chosen": -2.900437831878662, "logits/rejected": -2.9012367725372314, "logps/chosen": -0.684615433216095, "logps/rejected": -0.7419982552528381, "loss": 0.5572, "nll_loss": 0.4167535901069641, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.13692307472229004, "rewards/margins": 0.011476578190922737, "rewards/rejected": -0.14839965105056763, "step": 760 }, { "epoch": 0.55, "grad_norm": 5.5625, "learning_rate": 4.817948491780728e-06, "log_odds_chosen": 0.07513515651226044, "log_odds_ratio": -0.700663685798645, "logits/chosen": -2.92631196975708, "logits/rejected": -2.9068970680236816, "logps/chosen": -0.701633095741272, "logps/rejected": -0.7442451119422913, "loss": 0.5573, "nll_loss": 0.41745367646217346, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1403266191482544, "rewards/margins": 0.008522395975887775, "rewards/rejected": -0.1488490104675293, "step": 770 }, { "epoch": 0.56, "grad_norm": 5.5625, "learning_rate": 4.815713303308872e-06, "log_odds_chosen": 0.07833166420459747, "log_odds_ratio": -0.7058992385864258, "logits/chosen": -2.877963066101074, "logits/rejected": -2.8587679862976074, "logps/chosen": -0.7175201773643494, "logps/rejected": -0.7572144865989685, "loss": 0.558, "nll_loss": 0.4125480055809021, "rewards/accuracies": 0.5625, "rewards/chosen": -0.1435040533542633, "rewards/margins": 0.00793885625898838, "rewards/rejected": -0.15144291520118713, "step": 780 }, { "epoch": 0.56, "grad_norm": 4.84375, "learning_rate": 4.813481222862981e-06, "log_odds_chosen": 0.11849744617938995, "log_odds_ratio": -0.6830799579620361, "logits/chosen": -2.9524035453796387, "logits/rejected": -2.9341073036193848, "logps/chosen": -0.6966606378555298, "logps/rejected": -0.7586569786071777, "loss": 0.5619, "nll_loss": 0.43152493238449097, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1393321305513382, "rewards/margins": 0.012399254366755486, "rewards/rejected": -0.15173138678073883, "step": 790 }, { "epoch": 0.57, "grad_norm": 5.53125, "learning_rate": 4.811252243246881e-06, "log_odds_chosen": 0.135534405708313, "log_odds_ratio": -0.6746788620948792, "logits/chosen": -2.8978843688964844, "logits/rejected": -2.8979437351226807, "logps/chosen": -0.7271771430969238, "logps/rejected": -0.7996577024459839, "loss": 0.5428, "nll_loss": 0.4260443150997162, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.14543543756008148, "rewards/margins": 0.01449611783027649, "rewards/rejected": -0.15993155539035797, "step": 800 }, { "epoch": 0.58, "grad_norm": 4.3125, "learning_rate": 4.809026357287709e-06, "log_odds_chosen": 0.11346729844808578, "log_odds_ratio": -0.6924766898155212, "logits/chosen": -2.9434874057769775, "logits/rejected": -2.931755781173706, "logps/chosen": -0.6964095830917358, "logps/rejected": -0.763741135597229, "loss": 0.5738, "nll_loss": 0.4434167444705963, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.13928191363811493, "rewards/margins": 0.013466304168105125, "rewards/rejected": -0.1527482271194458, "step": 810 }, { "epoch": 0.59, "grad_norm": 5.65625, "learning_rate": 4.806803557835802e-06, "log_odds_chosen": 0.053010594099760056, "log_odds_ratio": -0.7078055143356323, "logits/chosen": -2.951209545135498, "logits/rejected": -2.9389567375183105, "logps/chosen": -0.7215791344642639, "logps/rejected": -0.7523022890090942, "loss": 0.5626, "nll_loss": 0.45763856172561646, "rewards/accuracies": 0.5, "rewards/chosen": -0.14431582391262054, "rewards/margins": 0.0061446288600564, "rewards/rejected": -0.15046045184135437, "step": 820 }, { "epoch": 0.59, "grad_norm": 5.0625, "learning_rate": 4.804583837764616e-06, "log_odds_chosen": 0.12964418530464172, "log_odds_ratio": -0.6815978288650513, "logits/chosen": -2.977019786834717, "logits/rejected": -2.9383797645568848, "logps/chosen": -0.7318437099456787, "logps/rejected": -0.8120439648628235, "loss": 0.5538, "nll_loss": 0.42555293440818787, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.14636874198913574, "rewards/margins": 0.01604006253182888, "rewards/rejected": -0.16240879893302917, "step": 830 }, { "epoch": 0.6, "grad_norm": 5.75, "learning_rate": 4.802367189970616e-06, "log_odds_chosen": 0.12411677837371826, "log_odds_ratio": -0.6854800581932068, "logits/chosen": -2.913602828979492, "logits/rejected": -2.901832342147827, "logps/chosen": -0.7199221849441528, "logps/rejected": -0.792190432548523, "loss": 0.5405, "nll_loss": 0.42834392189979553, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.14398446679115295, "rewards/margins": 0.014453647658228874, "rewards/rejected": -0.15843810141086578, "step": 840 }, { "epoch": 0.61, "grad_norm": 5.3125, "learning_rate": 4.8001536073731936e-06, "log_odds_chosen": 0.1903192102909088, "log_odds_ratio": -0.6434763669967651, "logits/chosen": -2.9723453521728516, "logits/rejected": -2.952861785888672, "logps/chosen": -0.6776723265647888, "logps/rejected": -0.7700678110122681, "loss": 0.5276, "nll_loss": 0.4004877209663391, "rewards/accuracies": 0.625, "rewards/chosen": -0.13553445041179657, "rewards/margins": 0.018479080870747566, "rewards/rejected": -0.15401355922222137, "step": 850 }, { "epoch": 0.61, "grad_norm": 4.84375, "learning_rate": 4.797943082914558e-06, "log_odds_chosen": 0.1074991226196289, "log_odds_ratio": -0.6918960809707642, "logits/chosen": -2.931121349334717, "logits/rejected": -2.9277279376983643, "logps/chosen": -0.7141292691230774, "logps/rejected": -0.7773590087890625, "loss": 0.5386, "nll_loss": 0.4104904532432556, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.1428258717060089, "rewards/margins": 0.012645942158997059, "rewards/rejected": -0.1554718017578125, "step": 860 }, { "epoch": 0.62, "grad_norm": 5.0625, "learning_rate": 4.795735609559657e-06, "log_odds_chosen": 0.1122872605919838, "log_odds_ratio": -0.6973878145217896, "logits/chosen": -2.918083429336548, "logits/rejected": -2.9120912551879883, "logps/chosen": -0.749970555305481, "logps/rejected": -0.8086145520210266, "loss": 0.5636, "nll_loss": 0.4259462356567383, "rewards/accuracies": 0.53125, "rewards/chosen": -0.1499941051006317, "rewards/margins": 0.011728787794709206, "rewards/rejected": -0.16172286868095398, "step": 870 }, { "epoch": 0.63, "grad_norm": 5.375, "learning_rate": 4.793531180296065e-06, "log_odds_chosen": 0.17277751863002777, "log_odds_ratio": -0.6646739840507507, "logits/chosen": -2.9382827281951904, "logits/rejected": -2.936652421951294, "logps/chosen": -0.7452108860015869, "logps/rejected": -0.8357378840446472, "loss": 0.5698, "nll_loss": 0.42103928327560425, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14904220402240753, "rewards/margins": 0.018105393275618553, "rewards/rejected": -0.16714756190776825, "step": 880 }, { "epoch": 0.64, "grad_norm": 5.3125, "learning_rate": 4.7913297881339085e-06, "log_odds_chosen": 0.23507757484912872, "log_odds_ratio": -0.6391969323158264, "logits/chosen": -2.9719607830047607, "logits/rejected": -2.9441778659820557, "logps/chosen": -0.6970862746238708, "logps/rejected": -0.8258684873580933, "loss": 0.5789, "nll_loss": 0.4702020287513733, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.13941726088523865, "rewards/margins": 0.02575644478201866, "rewards/rejected": -0.1651737093925476, "step": 890 }, { "epoch": 0.64, "grad_norm": 5.40625, "learning_rate": 4.789131426105757e-06, "log_odds_chosen": 0.1419156789779663, "log_odds_ratio": -0.681240975856781, "logits/chosen": -2.9537596702575684, "logits/rejected": -2.9550156593322754, "logps/chosen": -0.6770733594894409, "logps/rejected": -0.7640056610107422, "loss": 0.5749, "nll_loss": 0.40316563844680786, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.13541468977928162, "rewards/margins": 0.01738644763827324, "rewards/rejected": -0.15280112624168396, "step": 900 }, { "epoch": 0.65, "grad_norm": 5.1875, "learning_rate": 4.786936087266542e-06, "log_odds_chosen": 0.16488385200500488, "log_odds_ratio": -0.6757220029830933, "logits/chosen": -2.9237289428710938, "logits/rejected": -2.918750047683716, "logps/chosen": -0.6665478944778442, "logps/rejected": -0.7528942823410034, "loss": 0.5518, "nll_loss": 0.4029026925563812, "rewards/accuracies": 0.53125, "rewards/chosen": -0.13330957293510437, "rewards/margins": 0.017269287258386612, "rewards/rejected": -0.15057885646820068, "step": 910 }, { "epoch": 0.66, "grad_norm": 4.53125, "learning_rate": 4.784743764693455e-06, "log_odds_chosen": 0.1091703400015831, "log_odds_ratio": -0.6948757767677307, "logits/chosen": -2.9501147270202637, "logits/rejected": -2.9290926456451416, "logps/chosen": -0.7138906717300415, "logps/rejected": -0.7786569595336914, "loss": 0.5813, "nll_loss": 0.45604902505874634, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.14277812838554382, "rewards/margins": 0.012953246012330055, "rewards/rejected": -0.15573139488697052, "step": 920 }, { "epoch": 0.66, "grad_norm": 4.78125, "learning_rate": 4.7825544514858655e-06, "log_odds_chosen": 0.12012086063623428, "log_odds_ratio": -0.6921867728233337, "logits/chosen": -2.9667954444885254, "logits/rejected": -2.9569287300109863, "logps/chosen": -0.7008910179138184, "logps/rejected": -0.7661630511283875, "loss": 0.5731, "nll_loss": 0.44391852617263794, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.14017818868160248, "rewards/margins": 0.013054423034191132, "rewards/rejected": -0.153232604265213, "step": 930 }, { "epoch": 0.67, "grad_norm": 5.875, "learning_rate": 4.780368140765222e-06, "log_odds_chosen": 0.0706237182021141, "log_odds_ratio": -0.714358925819397, "logits/chosen": -2.932490348815918, "logits/rejected": -2.9120583534240723, "logps/chosen": -0.6620159149169922, "logps/rejected": -0.7102792859077454, "loss": 0.5259, "nll_loss": 0.40104350447654724, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1324031949043274, "rewards/margins": 0.009652670472860336, "rewards/rejected": -0.14205586910247803, "step": 940 }, { "epoch": 0.68, "grad_norm": 4.875, "learning_rate": 4.778184825674966e-06, "log_odds_chosen": 0.17702895402908325, "log_odds_ratio": -0.6546781063079834, "logits/chosen": -2.94228196144104, "logits/rejected": -2.9221675395965576, "logps/chosen": -0.6707956194877625, "logps/rejected": -0.7657662630081177, "loss": 0.5465, "nll_loss": 0.39552420377731323, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.1341591328382492, "rewards/margins": 0.01899414323270321, "rewards/rejected": -0.15315327048301697, "step": 950 }, { "epoch": 0.69, "grad_norm": 5.09375, "learning_rate": 4.776004499380439e-06, "log_odds_chosen": 0.019583452492952347, "log_odds_ratio": -0.7346916794776917, "logits/chosen": -2.9015583992004395, "logits/rejected": -2.9088730812072754, "logps/chosen": -0.7036567330360413, "logps/rejected": -0.7168689966201782, "loss": 0.5455, "nll_loss": 0.3951405882835388, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -0.14073136448860168, "rewards/margins": 0.002642437582835555, "rewards/rejected": -0.1433737874031067, "step": 960 }, { "epoch": 0.69, "grad_norm": 5.40625, "learning_rate": 4.773827155068793e-06, "log_odds_chosen": 0.06732301414012909, "log_odds_ratio": -0.7026981115341187, "logits/chosen": -2.9094488620758057, "logits/rejected": -2.902775287628174, "logps/chosen": -0.7069900631904602, "logps/rejected": -0.7331331372261047, "loss": 0.5518, "nll_loss": 0.3954068422317505, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.14139801263809204, "rewards/margins": 0.005228628404438496, "rewards/rejected": -0.14662663638591766, "step": 970 }, { "epoch": 0.7, "grad_norm": 4.9375, "learning_rate": 4.771652785948902e-06, "log_odds_chosen": 0.11821585893630981, "log_odds_ratio": -0.6919859647750854, "logits/chosen": -2.9587864875793457, "logits/rejected": -2.948873519897461, "logps/chosen": -0.7012097835540771, "logps/rejected": -0.7723513841629028, "loss": 0.5381, "nll_loss": 0.38139674067497253, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14024195075035095, "rewards/margins": 0.014228323474526405, "rewards/rejected": -0.154470294713974, "step": 980 }, { "epoch": 0.71, "grad_norm": 5.09375, "learning_rate": 4.769481385251275e-06, "log_odds_chosen": 0.10404877364635468, "log_odds_ratio": -0.7043722867965698, "logits/chosen": -2.9584336280822754, "logits/rejected": -2.9519834518432617, "logps/chosen": -0.6719237565994263, "logps/rejected": -0.7380385994911194, "loss": 0.5493, "nll_loss": 0.43441280722618103, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.13438475131988525, "rewards/margins": 0.013222972862422466, "rewards/rejected": -0.1476077288389206, "step": 990 }, { "epoch": 0.71, "grad_norm": 5.53125, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.12437696754932404, "log_odds_ratio": -0.6797572374343872, "logits/chosen": -2.9542737007141113, "logits/rejected": -2.944174289703369, "logps/chosen": -0.6895853877067566, "logps/rejected": -0.7687771916389465, "loss": 0.5247, "nll_loss": 0.397632360458374, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.13791708648204803, "rewards/margins": 0.01583835482597351, "rewards/rejected": -0.15375544130802155, "step": 1000 }, { "epoch": 0.72, "grad_norm": 5.59375, "learning_rate": 4.765147462152471e-06, "log_odds_chosen": 0.03156871721148491, "log_odds_ratio": -0.7198628187179565, "logits/chosen": -2.9656319618225098, "logits/rejected": -2.9585325717926025, "logps/chosen": -0.7327839732170105, "logps/rejected": -0.7419841885566711, "loss": 0.5595, "nll_loss": 0.45351147651672363, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.1465567946434021, "rewards/margins": 0.0018400519620627165, "rewards/rejected": -0.148396834731102, "step": 1010 }, { "epoch": 0.73, "grad_norm": 6.03125, "learning_rate": 4.762984926319677e-06, "log_odds_chosen": 0.1008574515581131, "log_odds_ratio": -0.7012881636619568, "logits/chosen": -2.900951623916626, "logits/rejected": -2.913351058959961, "logps/chosen": -0.7476619482040405, "logps/rejected": -0.7988759279251099, "loss": 0.5313, "nll_loss": 0.3712484538555145, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.14953239262104034, "rewards/margins": 0.01024280209094286, "rewards/rejected": -0.15977518260478973, "step": 1020 }, { "epoch": 0.74, "grad_norm": 4.875, "learning_rate": 4.760825332045738e-06, "log_odds_chosen": 0.1647627353668213, "log_odds_ratio": -0.6773719787597656, "logits/chosen": -2.916574239730835, "logits/rejected": -2.9114716053009033, "logps/chosen": -0.732185959815979, "logps/rejected": -0.820814311504364, "loss": 0.563, "nll_loss": 0.40981560945510864, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.14643718302249908, "rewards/margins": 0.01772570051252842, "rewards/rejected": -0.16416288912296295, "step": 1030 }, { "epoch": 0.74, "grad_norm": 4.59375, "learning_rate": 4.758668672668006e-06, "log_odds_chosen": -0.007136444561183453, "log_odds_ratio": -0.7459918260574341, "logits/chosen": -2.950622797012329, "logits/rejected": -2.935281276702881, "logps/chosen": -0.7756951451301575, "logps/rejected": -0.7845336198806763, "loss": 0.5498, "nll_loss": 0.4428860545158386, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.1551390290260315, "rewards/margins": 0.001767703564837575, "rewards/rejected": -0.15690675377845764, "step": 1040 }, { "epoch": 0.75, "grad_norm": 5.3125, "learning_rate": 4.756514941544941e-06, "log_odds_chosen": 0.22850975394248962, "log_odds_ratio": -0.6434667110443115, "logits/chosen": -2.916316509246826, "logits/rejected": -2.900111675262451, "logps/chosen": -0.7090237736701965, "logps/rejected": -0.8277368545532227, "loss": 0.5347, "nll_loss": 0.39760535955429077, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.1418047398328781, "rewards/margins": 0.02374262548983097, "rewards/rejected": -0.16554740071296692, "step": 1050 }, { "epoch": 0.76, "grad_norm": 5.0, "learning_rate": 4.754364132056025e-06, "log_odds_chosen": 0.23557403683662415, "log_odds_ratio": -0.6366820931434631, "logits/chosen": -2.9666476249694824, "logits/rejected": -2.964963436126709, "logps/chosen": -0.6721023917198181, "logps/rejected": -0.7975376844406128, "loss": 0.5546, "nll_loss": 0.43326544761657715, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.1344204843044281, "rewards/margins": 0.025087062269449234, "rewards/rejected": -0.15950754284858704, "step": 1060 }, { "epoch": 0.76, "grad_norm": 4.375, "learning_rate": 4.752216237601676e-06, "log_odds_chosen": 0.17871122062206268, "log_odds_ratio": -0.6607886552810669, "logits/chosen": -2.9594788551330566, "logits/rejected": -2.949906349182129, "logps/chosen": -0.6754817366600037, "logps/rejected": -0.761304497718811, "loss": 0.5284, "nll_loss": 0.41694697737693787, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.13509634137153625, "rewards/margins": 0.0171645637601614, "rewards/rejected": -0.1522609144449234, "step": 1070 }, { "epoch": 0.77, "grad_norm": 5.21875, "learning_rate": 4.750071251603165e-06, "log_odds_chosen": 0.10223817825317383, "log_odds_ratio": -0.6892545819282532, "logits/chosen": -2.91923189163208, "logits/rejected": -2.9072306156158447, "logps/chosen": -0.700354814529419, "logps/rejected": -0.7621926069259644, "loss": 0.5481, "nll_loss": 0.4226892590522766, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.14007095992565155, "rewards/margins": 0.01236753724515438, "rewards/rejected": -0.15243850648403168, "step": 1080 }, { "epoch": 0.78, "grad_norm": 5.5625, "learning_rate": 4.7479291675025314e-06, "log_odds_chosen": 0.16114802658557892, "log_odds_ratio": -0.6601124405860901, "logits/chosen": -2.9457671642303467, "logits/rejected": -2.9389617443084717, "logps/chosen": -0.6818099617958069, "logps/rejected": -0.7652055025100708, "loss": 0.5778, "nll_loss": 0.4147875905036926, "rewards/accuracies": 0.59375, "rewards/chosen": -0.1363619863986969, "rewards/margins": 0.01667911931872368, "rewards/rejected": -0.15304109454154968, "step": 1090 }, { "epoch": 0.79, "grad_norm": 4.625, "learning_rate": 4.745789978762496e-06, "log_odds_chosen": 0.08631005138158798, "log_odds_ratio": -0.7082911729812622, "logits/chosen": -2.927237033843994, "logits/rejected": -2.929316997528076, "logps/chosen": -0.7573009729385376, "logps/rejected": -0.8193367719650269, "loss": 0.5399, "nll_loss": 0.4314785897731781, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.151460200548172, "rewards/margins": 0.012407159432768822, "rewards/rejected": -0.16386735439300537, "step": 1100 }, { "epoch": 0.79, "grad_norm": 4.5625, "learning_rate": 4.7436536788663765e-06, "log_odds_chosen": 0.07461805641651154, "log_odds_ratio": -0.710638701915741, "logits/chosen": -2.954481840133667, "logits/rejected": -2.947251796722412, "logps/chosen": -0.7126928567886353, "logps/rejected": -0.7473545074462891, "loss": 0.5331, "nll_loss": 0.39857620000839233, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.14253857731819153, "rewards/margins": 0.006932335905730724, "rewards/rejected": -0.14947089552879333, "step": 1110 }, { "epoch": 0.8, "grad_norm": 5.59375, "learning_rate": 4.74152026131801e-06, "log_odds_chosen": 0.06558915972709656, "log_odds_ratio": -0.7165695428848267, "logits/chosen": -2.9481871128082275, "logits/rejected": -2.934065341949463, "logps/chosen": -0.7117661237716675, "logps/rejected": -0.7454390525817871, "loss": 0.5532, "nll_loss": 0.41643962264060974, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.14235322177410126, "rewards/margins": 0.006734585855156183, "rewards/rejected": -0.1490878164768219, "step": 1120 }, { "epoch": 0.81, "grad_norm": 5.25, "learning_rate": 4.739389719641665e-06, "log_odds_chosen": 0.038577549159526825, "log_odds_ratio": -0.7274538278579712, "logits/chosen": -2.9647698402404785, "logits/rejected": -2.953193187713623, "logps/chosen": -0.7552378177642822, "logps/rejected": -0.767500102519989, "loss": 0.5665, "nll_loss": 0.42185306549072266, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.15104755759239197, "rewards/margins": 0.002452465472742915, "rewards/rejected": -0.1535000503063202, "step": 1130 }, { "epoch": 0.81, "grad_norm": 5.78125, "learning_rate": 4.7372620473819615e-06, "log_odds_chosen": 0.14568562805652618, "log_odds_ratio": -0.6789036989212036, "logits/chosen": -2.9319028854370117, "logits/rejected": -2.9229094982147217, "logps/chosen": -0.6954749822616577, "logps/rejected": -0.7834473848342896, "loss": 0.5403, "nll_loss": 0.4035823345184326, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1390949785709381, "rewards/margins": 0.01759449765086174, "rewards/rejected": -0.15668947994709015, "step": 1140 }, { "epoch": 0.82, "grad_norm": 5.09375, "learning_rate": 4.735137238103785e-06, "log_odds_chosen": -0.04453912377357483, "log_odds_ratio": -0.7690579295158386, "logits/chosen": -2.9604084491729736, "logits/rejected": -2.9683547019958496, "logps/chosen": -0.7146620750427246, "logps/rejected": -0.6833995580673218, "loss": 0.5543, "nll_loss": 0.4193571209907532, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.14293241500854492, "rewards/margins": -0.006252489052712917, "rewards/rejected": -0.13667991757392883, "step": 1150 }, { "epoch": 0.83, "grad_norm": 4.59375, "learning_rate": 4.7330152853922064e-06, "log_odds_chosen": 0.10607735067605972, "log_odds_ratio": -0.6870900988578796, "logits/chosen": -2.933436870574951, "logits/rejected": -2.926907777786255, "logps/chosen": -0.707848846912384, "logps/rejected": -0.7633363008499146, "loss": 0.5204, "nll_loss": 0.40916410088539124, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.14156976342201233, "rewards/margins": 0.011097497306764126, "rewards/rejected": -0.15266726911067963, "step": 1160 }, { "epoch": 0.84, "grad_norm": 5.03125, "learning_rate": 4.730896182852409e-06, "log_odds_chosen": 0.11698383092880249, "log_odds_ratio": -0.6935534477233887, "logits/chosen": -2.9313931465148926, "logits/rejected": -2.911740779876709, "logps/chosen": -0.7008348107337952, "logps/rejected": -0.7741026282310486, "loss": 0.5431, "nll_loss": 0.4040239453315735, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1401669681072235, "rewards/margins": 0.014653565362095833, "rewards/rejected": -0.1548205316066742, "step": 1170 }, { "epoch": 0.84, "grad_norm": 4.28125, "learning_rate": 4.72877992410959e-06, "log_odds_chosen": 0.10059946775436401, "log_odds_ratio": -0.7010030150413513, "logits/chosen": -2.946838855743408, "logits/rejected": -2.9204533100128174, "logps/chosen": -0.6578399538993835, "logps/rejected": -0.7220724821090698, "loss": 0.5386, "nll_loss": 0.3835596740245819, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.13156801462173462, "rewards/margins": 0.012846499681472778, "rewards/rejected": -0.1444145143032074, "step": 1180 }, { "epoch": 0.85, "grad_norm": 4.84375, "learning_rate": 4.7266665028088985e-06, "log_odds_chosen": 0.08397103101015091, "log_odds_ratio": -0.7051702737808228, "logits/chosen": -2.93192982673645, "logits/rejected": -2.9253973960876465, "logps/chosen": -0.6880505681037903, "logps/rejected": -0.7314938306808472, "loss": 0.5324, "nll_loss": 0.3771182596683502, "rewards/accuracies": 0.53125, "rewards/chosen": -0.13761012256145477, "rewards/margins": 0.008688644506037235, "rewards/rejected": -0.14629876613616943, "step": 1190 }, { "epoch": 0.86, "grad_norm": 5.0, "learning_rate": 4.72455591261534e-06, "log_odds_chosen": 0.10587283223867416, "log_odds_ratio": -0.6958147287368774, "logits/chosen": -2.9291391372680664, "logits/rejected": -2.931337833404541, "logps/chosen": -0.6905062794685364, "logps/rejected": -0.7565279603004456, "loss": 0.5432, "nll_loss": 0.36201146245002747, "rewards/accuracies": 0.518750011920929, "rewards/chosen": -0.138101264834404, "rewards/margins": 0.013204338029026985, "rewards/rejected": -0.15130558609962463, "step": 1200 }, { "epoch": 0.86, "grad_norm": 5.28125, "learning_rate": 4.722448147213712e-06, "log_odds_chosen": 0.16628727316856384, "log_odds_ratio": -0.6715134382247925, "logits/chosen": -2.9251606464385986, "logits/rejected": -2.924400806427002, "logps/chosen": -0.7159217000007629, "logps/rejected": -0.7908354997634888, "loss": 0.5327, "nll_loss": 0.4329577088356018, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1431843340396881, "rewards/margins": 0.01498276274651289, "rewards/rejected": -0.15816709399223328, "step": 1210 }, { "epoch": 0.87, "grad_norm": 5.34375, "learning_rate": 4.720343200308507e-06, "log_odds_chosen": 0.09120135009288788, "log_odds_ratio": -0.7072020769119263, "logits/chosen": -2.918830394744873, "logits/rejected": -2.9098925590515137, "logps/chosen": -0.7446082830429077, "logps/rejected": -0.7945619821548462, "loss": 0.5726, "nll_loss": 0.4186829924583435, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.1489216834306717, "rewards/margins": 0.009990743361413479, "rewards/rejected": -0.15891240537166595, "step": 1220 }, { "epoch": 0.88, "grad_norm": 5.15625, "learning_rate": 4.7182410656238484e-06, "log_odds_chosen": 0.08639942854642868, "log_odds_ratio": -0.7023371458053589, "logits/chosen": -2.883063793182373, "logits/rejected": -2.876861095428467, "logps/chosen": -0.7035251259803772, "logps/rejected": -0.7428823113441467, "loss": 0.5529, "nll_loss": 0.3646206855773926, "rewards/accuracies": 0.53125, "rewards/chosen": -0.14070501923561096, "rewards/margins": 0.007871445268392563, "rewards/rejected": -0.14857646822929382, "step": 1230 }, { "epoch": 0.89, "grad_norm": 5.03125, "learning_rate": 4.716141736903407e-06, "log_odds_chosen": 0.17533931136131287, "log_odds_ratio": -0.6885548830032349, "logits/chosen": -2.8661389350891113, "logits/rejected": -2.851670980453491, "logps/chosen": -0.7199305295944214, "logps/rejected": -0.8365727663040161, "loss": 0.5332, "nll_loss": 0.41772204637527466, "rewards/accuracies": 0.5625, "rewards/chosen": -0.14398609101772308, "rewards/margins": 0.02332843840122223, "rewards/rejected": -0.1673145294189453, "step": 1240 }, { "epoch": 0.89, "grad_norm": 5.125, "learning_rate": 4.714045207910318e-06, "log_odds_chosen": 0.13246873021125793, "log_odds_ratio": -0.6706294417381287, "logits/chosen": -2.8975651264190674, "logits/rejected": -2.872434616088867, "logps/chosen": -0.6649607419967651, "logps/rejected": -0.7357169389724731, "loss": 0.5467, "nll_loss": 0.41414278745651245, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.13299211859703064, "rewards/margins": 0.014151250943541527, "rewards/rejected": -0.1471433937549591, "step": 1250 }, { "epoch": 0.9, "grad_norm": 5.0625, "learning_rate": 4.71195147242711e-06, "log_odds_chosen": 0.040602125227451324, "log_odds_ratio": -0.7303158640861511, "logits/chosen": -2.8778679370880127, "logits/rejected": -2.8803482055664062, "logps/chosen": -0.7029106020927429, "logps/rejected": -0.7276321649551392, "loss": 0.559, "nll_loss": 0.4467160701751709, "rewards/accuracies": 0.5, "rewards/chosen": -0.1405821144580841, "rewards/margins": 0.004944324027746916, "rewards/rejected": -0.1455264538526535, "step": 1260 }, { "epoch": 0.91, "grad_norm": 8.0, "learning_rate": 4.709860524255622e-06, "log_odds_chosen": 0.09002572298049927, "log_odds_ratio": -0.7032243609428406, "logits/chosen": -2.8805668354034424, "logits/rejected": -2.860121965408325, "logps/chosen": -0.718974232673645, "logps/rejected": -0.7800055742263794, "loss": 0.5319, "nll_loss": 0.3927960991859436, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14379484951496124, "rewards/margins": 0.012206263840198517, "rewards/rejected": -0.15600110590457916, "step": 1270 }, { "epoch": 0.91, "grad_norm": 5.125, "learning_rate": 4.707772357216934e-06, "log_odds_chosen": 0.2139289379119873, "log_odds_ratio": -0.6507673859596252, "logits/chosen": -2.9283065795898438, "logits/rejected": -2.909491777420044, "logps/chosen": -0.666816234588623, "logps/rejected": -0.7714813947677612, "loss": 0.5357, "nll_loss": 0.4001480042934418, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.1333632469177246, "rewards/margins": 0.020933035761117935, "rewards/rejected": -0.15429629385471344, "step": 1280 }, { "epoch": 0.92, "grad_norm": 5.21875, "learning_rate": 4.705686965151282e-06, "log_odds_chosen": 0.08337760716676712, "log_odds_ratio": -0.6977395415306091, "logits/chosen": -2.866161584854126, "logits/rejected": -2.8568389415740967, "logps/chosen": -0.6980268955230713, "logps/rejected": -0.7350268959999084, "loss": 0.5858, "nll_loss": 0.44848671555519104, "rewards/accuracies": 0.5625, "rewards/chosen": -0.13960537314414978, "rewards/margins": 0.007400007452815771, "rewards/rejected": -0.14700539410114288, "step": 1290 }, { "epoch": 0.93, "grad_norm": 5.09375, "learning_rate": 4.703604341917987e-06, "log_odds_chosen": 0.16698376834392548, "log_odds_ratio": -0.6709255576133728, "logits/chosen": -2.8881678581237793, "logits/rejected": -2.8717174530029297, "logps/chosen": -0.6411119699478149, "logps/rejected": -0.7108407616615295, "loss": 0.5404, "nll_loss": 0.39268070459365845, "rewards/accuracies": 0.5625, "rewards/chosen": -0.12822240591049194, "rewards/margins": 0.01394575648009777, "rewards/rejected": -0.14216816425323486, "step": 1300 }, { "epoch": 0.94, "grad_norm": 5.125, "learning_rate": 4.701524481395374e-06, "log_odds_chosen": 0.04918034002184868, "log_odds_ratio": -0.7179148197174072, "logits/chosen": -2.8727264404296875, "logits/rejected": -2.858916759490967, "logps/chosen": -0.7131275534629822, "logps/rejected": -0.7290714979171753, "loss": 0.5486, "nll_loss": 0.37463003396987915, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14262551069259644, "rewards/margins": 0.0031887758523225784, "rewards/rejected": -0.14581426978111267, "step": 1310 }, { "epoch": 0.94, "grad_norm": 4.875, "learning_rate": 4.699447377480703e-06, "log_odds_chosen": 0.1447034478187561, "log_odds_ratio": -0.6646226644515991, "logits/chosen": -2.8924479484558105, "logits/rejected": -2.8799147605895996, "logps/chosen": -0.7005911469459534, "logps/rejected": -0.773948073387146, "loss": 0.5247, "nll_loss": 0.38711321353912354, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.14011821150779724, "rewards/margins": 0.01467139832675457, "rewards/rejected": -0.15478962659835815, "step": 1320 }, { "epoch": 0.95, "grad_norm": 5.125, "learning_rate": 4.6973730240900876e-06, "log_odds_chosen": 0.18853728473186493, "log_odds_ratio": -0.6440411806106567, "logits/chosen": -2.9185266494750977, "logits/rejected": -2.888824939727783, "logps/chosen": -0.6820311546325684, "logps/rejected": -0.7798157334327698, "loss": 0.5488, "nll_loss": 0.4185566008090973, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.13640624284744263, "rewards/margins": 0.019556904211640358, "rewards/rejected": -0.15596315264701843, "step": 1330 }, { "epoch": 0.96, "grad_norm": 5.3125, "learning_rate": 4.695301415158426e-06, "log_odds_chosen": 0.10568451881408691, "log_odds_ratio": -0.7053590416908264, "logits/chosen": -2.9057796001434326, "logits/rejected": -2.896315813064575, "logps/chosen": -0.7402433753013611, "logps/rejected": -0.7852991223335266, "loss": 0.5757, "nll_loss": 0.44732385873794556, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.14804866909980774, "rewards/margins": 0.00901114847511053, "rewards/rejected": -0.15705981850624084, "step": 1340 }, { "epoch": 0.96, "grad_norm": 4.9375, "learning_rate": 4.693232544639321e-06, "log_odds_chosen": 0.1334969699382782, "log_odds_ratio": -0.6749390959739685, "logits/chosen": -2.9262282848358154, "logits/rejected": -2.9031224250793457, "logps/chosen": -0.7033289670944214, "logps/rejected": -0.7720553874969482, "loss": 0.5335, "nll_loss": 0.38161200284957886, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.14066579937934875, "rewards/margins": 0.013745295815169811, "rewards/rejected": -0.15441109240055084, "step": 1350 }, { "epoch": 0.97, "grad_norm": 5.53125, "learning_rate": 4.691166406505011e-06, "log_odds_chosen": 0.20113444328308105, "log_odds_ratio": -0.6593700051307678, "logits/chosen": -2.8720855712890625, "logits/rejected": -2.865278959274292, "logps/chosen": -0.6543204188346863, "logps/rejected": -0.7732547521591187, "loss": 0.5593, "nll_loss": 0.4312739372253418, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.13086409866809845, "rewards/margins": 0.023786863312125206, "rewards/rejected": -0.1546509563922882, "step": 1360 }, { "epoch": 0.98, "grad_norm": 5.5, "learning_rate": 4.689102994746289e-06, "log_odds_chosen": 0.11934226751327515, "log_odds_ratio": -0.6696735620498657, "logits/chosen": -2.879538059234619, "logits/rejected": -2.8727288246154785, "logps/chosen": -0.673319399356842, "logps/rejected": -0.7296326756477356, "loss": 0.5975, "nll_loss": 0.44223251938819885, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.1346638798713684, "rewards/margins": 0.011262651532888412, "rewards/rejected": -0.14592652022838593, "step": 1370 }, { "epoch": 0.99, "grad_norm": 5.5, "learning_rate": 4.687042303372439e-06, "log_odds_chosen": 0.11940214782953262, "log_odds_ratio": -0.6887474060058594, "logits/chosen": -2.9131243228912354, "logits/rejected": -2.9096319675445557, "logps/chosen": -0.6941181421279907, "logps/rejected": -0.7482253313064575, "loss": 0.5402, "nll_loss": 0.40905994176864624, "rewards/accuracies": 0.5625, "rewards/chosen": -0.13882364332675934, "rewards/margins": 0.010821421630680561, "rewards/rejected": -0.14964506030082703, "step": 1380 }, { "epoch": 0.99, "grad_norm": 4.25, "learning_rate": 4.684984326411154e-06, "log_odds_chosen": 0.09887897968292236, "log_odds_ratio": -0.6886764764785767, "logits/chosen": -2.8908066749572754, "logits/rejected": -2.8787262439727783, "logps/chosen": -0.679020881652832, "logps/rejected": -0.7298253774642944, "loss": 0.5263, "nll_loss": 0.39224857091903687, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.1358041763305664, "rewards/margins": 0.010160907171666622, "rewards/rejected": -0.1459650695323944, "step": 1390 }, { "epoch": 1.0, "grad_norm": 4.65625, "learning_rate": 4.68292905790847e-06, "log_odds_chosen": 0.2600487768650055, "log_odds_ratio": -0.6203187704086304, "logits/chosen": -2.9187633991241455, "logits/rejected": -2.9031124114990234, "logps/chosen": -0.635817289352417, "logps/rejected": -0.757480800151825, "loss": 0.5639, "nll_loss": 0.42110905051231384, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.12716346979141235, "rewards/margins": 0.02433270588517189, "rewards/rejected": -0.15149617195129395, "step": 1400 }, { "epoch": 1.01, "grad_norm": 6.03125, "learning_rate": 4.6808764919286885e-06, "log_odds_chosen": 0.8848162889480591, "log_odds_ratio": -0.37814536690711975, "logits/chosen": -2.9017772674560547, "logits/rejected": -2.8844223022460938, "logps/chosen": -0.5128706097602844, "logps/rejected": -0.9588940739631653, "loss": 0.4309, "nll_loss": 0.3441932797431946, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10257412493228912, "rewards/margins": 0.08920470625162125, "rewards/rejected": -0.19177880883216858, "step": 1410 }, { "epoch": 1.01, "grad_norm": 4.53125, "learning_rate": 4.678826622554307e-06, "log_odds_chosen": 0.9211248159408569, "log_odds_ratio": -0.3970174491405487, "logits/chosen": -2.8897476196289062, "logits/rejected": -2.877735137939453, "logps/chosen": -0.47735509276390076, "logps/rejected": -0.9233940839767456, "loss": 0.4206, "nll_loss": 0.31756311655044556, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.09547100961208344, "rewards/margins": 0.08920779824256897, "rewards/rejected": -0.1846788227558136, "step": 1420 }, { "epoch": 1.02, "grad_norm": 4.9375, "learning_rate": 4.676779443885949e-06, "log_odds_chosen": 1.0763452053070068, "log_odds_ratio": -0.3526865839958191, "logits/chosen": -2.8960213661193848, "logits/rejected": -2.8801820278167725, "logps/chosen": -0.512055516242981, "logps/rejected": -1.0564101934432983, "loss": 0.3946, "nll_loss": 0.3245953619480133, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.10241111367940903, "rewards/margins": 0.1088709607720375, "rewards/rejected": -0.21128205955028534, "step": 1430 }, { "epoch": 1.03, "grad_norm": 5.4375, "learning_rate": 4.674734950042287e-06, "log_odds_chosen": 1.138904333114624, "log_odds_ratio": -0.33328309655189514, "logits/chosen": -2.8903861045837402, "logits/rejected": -2.884766101837158, "logps/chosen": -0.4885633885860443, "logps/rejected": -1.0382764339447021, "loss": 0.3688, "nll_loss": 0.31521254777908325, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0977126881480217, "rewards/margins": 0.10994259268045425, "rewards/rejected": -0.20765526592731476, "step": 1440 }, { "epoch": 1.04, "grad_norm": 4.78125, "learning_rate": 4.672693135159978e-06, "log_odds_chosen": 1.0414109230041504, "log_odds_ratio": -0.3578895330429077, "logits/chosen": -2.8687214851379395, "logits/rejected": -2.853811264038086, "logps/chosen": -0.5137967467308044, "logps/rejected": -1.0513535737991333, "loss": 0.3982, "nll_loss": 0.3380189538002014, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10275934636592865, "rewards/margins": 0.10751136392354965, "rewards/rejected": -0.2102707177400589, "step": 1450 }, { "epoch": 1.04, "grad_norm": 6.65625, "learning_rate": 4.67065399339359e-06, "log_odds_chosen": 1.1234166622161865, "log_odds_ratio": -0.35121458768844604, "logits/chosen": -2.861860752105713, "logits/rejected": -2.8768668174743652, "logps/chosen": -0.49000921845436096, "logps/rejected": -1.0480070114135742, "loss": 0.3922, "nll_loss": 0.31293317675590515, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09800183773040771, "rewards/margins": 0.11159957945346832, "rewards/rejected": -0.20960143208503723, "step": 1460 }, { "epoch": 1.05, "grad_norm": 6.96875, "learning_rate": 4.668617518915533e-06, "log_odds_chosen": 1.04032301902771, "log_odds_ratio": -0.35156646370887756, "logits/chosen": -2.8752763271331787, "logits/rejected": -2.8781304359436035, "logps/chosen": -0.5017446279525757, "logps/rejected": -1.0297907590866089, "loss": 0.3846, "nll_loss": 0.31080394983291626, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10034892708063126, "rewards/margins": 0.1056092381477356, "rewards/rejected": -0.20595815777778625, "step": 1470 }, { "epoch": 1.06, "grad_norm": 5.84375, "learning_rate": 4.666583705915985e-06, "log_odds_chosen": 1.1882513761520386, "log_odds_ratio": -0.32196754217147827, "logits/chosen": -2.867017984390259, "logits/rejected": -2.8608901500701904, "logps/chosen": -0.4866812825202942, "logps/rejected": -1.108413577079773, "loss": 0.4034, "nll_loss": 0.30916067957878113, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.09733626246452332, "rewards/margins": 0.12434647977352142, "rewards/rejected": -0.22168274223804474, "step": 1480 }, { "epoch": 1.06, "grad_norm": 5.90625, "learning_rate": 4.664552548602825e-06, "log_odds_chosen": 1.2755037546157837, "log_odds_ratio": -0.3192000985145569, "logits/chosen": -2.8904194831848145, "logits/rejected": -2.887146472930908, "logps/chosen": -0.4415016174316406, "logps/rejected": -1.0573655366897583, "loss": 0.375, "nll_loss": 0.30936262011528015, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.08830033242702484, "rewards/margins": 0.12317276000976562, "rewards/rejected": -0.21147307753562927, "step": 1490 }, { "epoch": 1.07, "grad_norm": 6.3125, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 1.2209784984588623, "log_odds_ratio": -0.31655097007751465, "logits/chosen": -2.8983540534973145, "logits/rejected": -2.876600742340088, "logps/chosen": -0.5172096490859985, "logps/rejected": -1.1844242811203003, "loss": 0.416, "nll_loss": 0.3785460293292999, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.10344193130731583, "rewards/margins": 0.1334429532289505, "rewards/rejected": -0.23688487708568573, "step": 1500 }, { "epoch": 1.08, "grad_norm": 6.1875, "learning_rate": 4.660498177955291e-06, "log_odds_chosen": 1.1757891178131104, "log_odds_ratio": -0.316308856010437, "logits/chosen": -2.8644537925720215, "logits/rejected": -2.8410959243774414, "logps/chosen": -0.49577221274375916, "logps/rejected": -1.1109702587127686, "loss": 0.4093, "nll_loss": 0.3233625888824463, "rewards/accuracies": 0.96875, "rewards/chosen": -0.09915443509817123, "rewards/margins": 0.12303964048624039, "rewards/rejected": -0.22219407558441162, "step": 1510 }, { "epoch": 1.09, "grad_norm": 4.8125, "learning_rate": 4.658474953124562e-06, "log_odds_chosen": 1.1299588680267334, "log_odds_ratio": -0.3389926254749298, "logits/chosen": -2.9047207832336426, "logits/rejected": -2.9168930053710938, "logps/chosen": -0.4910566806793213, "logps/rejected": -1.0821782350540161, "loss": 0.3979, "nll_loss": 0.36464497447013855, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.09821134060621262, "rewards/margins": 0.11822430789470673, "rewards/rejected": -0.21643562614917755, "step": 1520 }, { "epoch": 1.09, "grad_norm": 6.25, "learning_rate": 4.656454360987378e-06, "log_odds_chosen": 1.2231553792953491, "log_odds_ratio": -0.30703750252723694, "logits/chosen": -2.8862717151641846, "logits/rejected": -2.880195140838623, "logps/chosen": -0.45338621735572815, "logps/rejected": -1.0659048557281494, "loss": 0.3926, "nll_loss": 0.32782015204429626, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.09067724645137787, "rewards/margins": 0.12250375747680664, "rewards/rejected": -0.21318098902702332, "step": 1530 }, { "epoch": 1.1, "grad_norm": 5.90625, "learning_rate": 4.654436395839094e-06, "log_odds_chosen": 1.2722887992858887, "log_odds_ratio": -0.3012635111808777, "logits/chosen": -2.8365585803985596, "logits/rejected": -2.8223185539245605, "logps/chosen": -0.498770147562027, "logps/rejected": -1.186909556388855, "loss": 0.3876, "nll_loss": 0.34141403436660767, "rewards/accuracies": 0.96875, "rewards/chosen": -0.09975402057170868, "rewards/margins": 0.13762789964675903, "rewards/rejected": -0.23738190531730652, "step": 1540 }, { "epoch": 1.11, "grad_norm": 5.25, "learning_rate": 4.652421051992354e-06, "log_odds_chosen": 1.367235541343689, "log_odds_ratio": -0.2841276526451111, "logits/chosen": -2.8690426349639893, "logits/rejected": -2.860100507736206, "logps/chosen": -0.42585864663124084, "logps/rejected": -1.0907236337661743, "loss": 0.3664, "nll_loss": 0.2977783679962158, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.08517173677682877, "rewards/margins": 0.13297298550605774, "rewards/rejected": -0.2181447297334671, "step": 1550 }, { "epoch": 1.11, "grad_norm": 6.9375, "learning_rate": 4.650408323777029e-06, "log_odds_chosen": 1.1410921812057495, "log_odds_ratio": -0.3362571597099304, "logits/chosen": -2.853015184402466, "logits/rejected": -2.8460183143615723, "logps/chosen": -0.5159534215927124, "logps/rejected": -1.1035113334655762, "loss": 0.392, "nll_loss": 0.33157429099082947, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10319069772958755, "rewards/margins": 0.11751158535480499, "rewards/rejected": -0.22070229053497314, "step": 1560 }, { "epoch": 1.12, "grad_norm": 4.875, "learning_rate": 4.6483982055401415e-06, "log_odds_chosen": 1.3359148502349854, "log_odds_ratio": -0.3214777410030365, "logits/chosen": -2.807814359664917, "logits/rejected": -2.8036892414093018, "logps/chosen": -0.48363837599754333, "logps/rejected": -1.1991592645645142, "loss": 0.3751, "nll_loss": 0.2966926693916321, "rewards/accuracies": 0.9375, "rewards/chosen": -0.09672766178846359, "rewards/margins": 0.1431041657924652, "rewards/rejected": -0.23983187973499298, "step": 1570 }, { "epoch": 1.13, "grad_norm": 5.65625, "learning_rate": 4.646390691645805e-06, "log_odds_chosen": 1.219293236732483, "log_odds_ratio": -0.34068965911865234, "logits/chosen": -2.826866626739502, "logits/rejected": -2.8263888359069824, "logps/chosen": -0.4953809678554535, "logps/rejected": -1.1199510097503662, "loss": 0.3645, "nll_loss": 0.2939409017562866, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09907619655132294, "rewards/margins": 0.1249140128493309, "rewards/rejected": -0.22399020195007324, "step": 1580 }, { "epoch": 1.14, "grad_norm": 5.875, "learning_rate": 4.644385776475159e-06, "log_odds_chosen": 1.1653202772140503, "log_odds_ratio": -0.3451498746871948, "logits/chosen": -2.8366851806640625, "logits/rejected": -2.835325002670288, "logps/chosen": -0.5062024593353271, "logps/rejected": -1.1195242404937744, "loss": 0.4054, "nll_loss": 0.3258489966392517, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10124049335718155, "rewards/margins": 0.12266434729099274, "rewards/rejected": -0.2239048182964325, "step": 1590 }, { "epoch": 1.14, "grad_norm": 6.75, "learning_rate": 4.642383454426297e-06, "log_odds_chosen": 1.174912452697754, "log_odds_ratio": -0.3339281678199768, "logits/chosen": -2.861581325531006, "logits/rejected": -2.8604111671447754, "logps/chosen": -0.5135446786880493, "logps/rejected": -1.1017265319824219, "loss": 0.396, "nll_loss": 0.3384125828742981, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10270893573760986, "rewards/margins": 0.11763636022806168, "rewards/rejected": -0.22034530341625214, "step": 1600 }, { "epoch": 1.15, "grad_norm": 8.125, "learning_rate": 4.640383719914205e-06, "log_odds_chosen": 1.2484989166259766, "log_odds_ratio": -0.30592483282089233, "logits/chosen": -2.807584285736084, "logits/rejected": -2.813006639480591, "logps/chosen": -0.5197979807853699, "logps/rejected": -1.181796669960022, "loss": 0.4215, "nll_loss": 0.3560883402824402, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10395960509777069, "rewards/margins": 0.13239972293376923, "rewards/rejected": -0.23635932803153992, "step": 1610 }, { "epoch": 1.16, "grad_norm": 6.25, "learning_rate": 4.638386567370694e-06, "log_odds_chosen": 1.2706489562988281, "log_odds_ratio": -0.3202516436576843, "logits/chosen": -2.8386385440826416, "logits/rejected": -2.835495710372925, "logps/chosen": -0.5026634931564331, "logps/rejected": -1.1739028692245483, "loss": 0.3759, "nll_loss": 0.31424498558044434, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10053269565105438, "rewards/margins": 0.13424786925315857, "rewards/rejected": -0.23478057980537415, "step": 1620 }, { "epoch": 1.16, "grad_norm": 5.3125, "learning_rate": 4.636391991244338e-06, "log_odds_chosen": 1.2696077823638916, "log_odds_ratio": -0.3153325915336609, "logits/chosen": -2.858908176422119, "logits/rejected": -2.8612444400787354, "logps/chosen": -0.48242902755737305, "logps/rejected": -1.126427173614502, "loss": 0.366, "nll_loss": 0.295919805765152, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.09648582339286804, "rewards/margins": 0.12879963219165802, "rewards/rejected": -0.22528544068336487, "step": 1630 }, { "epoch": 1.17, "grad_norm": 6.4375, "learning_rate": 4.634399986000405e-06, "log_odds_chosen": 1.3514255285263062, "log_odds_ratio": -0.2864634096622467, "logits/chosen": -2.892820358276367, "logits/rejected": -2.8774783611297607, "logps/chosen": -0.4613906741142273, "logps/rejected": -1.1687535047531128, "loss": 0.3837, "nll_loss": 0.35217028856277466, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.0922781378030777, "rewards/margins": 0.14147259294986725, "rewards/rejected": -0.23375073075294495, "step": 1640 }, { "epoch": 1.18, "grad_norm": 5.90625, "learning_rate": 4.632410546120794e-06, "log_odds_chosen": 1.1376657485961914, "log_odds_ratio": -0.3578290343284607, "logits/chosen": -2.820157527923584, "logits/rejected": -2.8282933235168457, "logps/chosen": -0.5289143323898315, "logps/rejected": -1.1361706256866455, "loss": 0.4242, "nll_loss": 0.36498746275901794, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10578285157680511, "rewards/margins": 0.12145128101110458, "rewards/rejected": -0.2272341251373291, "step": 1650 }, { "epoch": 1.19, "grad_norm": 6.5, "learning_rate": 4.6304236661039765e-06, "log_odds_chosen": 1.2930752038955688, "log_odds_ratio": -0.3022990822792053, "logits/chosen": -2.8277323246002197, "logits/rejected": -2.814790725708008, "logps/chosen": -0.471322625875473, "logps/rejected": -1.1453874111175537, "loss": 0.3746, "nll_loss": 0.298636794090271, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.09426452219486237, "rewards/margins": 0.13481295108795166, "rewards/rejected": -0.2290775030851364, "step": 1660 }, { "epoch": 1.19, "grad_norm": 6.875, "learning_rate": 4.628439340464919e-06, "log_odds_chosen": 1.1731585264205933, "log_odds_ratio": -0.33878791332244873, "logits/chosen": -2.8189854621887207, "logits/rejected": -2.822537660598755, "logps/chosen": -0.5024202466011047, "logps/rejected": -1.1166616678237915, "loss": 0.3923, "nll_loss": 0.34138697385787964, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.10048405081033707, "rewards/margins": 0.12284828722476959, "rewards/rejected": -0.22333233058452606, "step": 1670 }, { "epoch": 1.2, "grad_norm": 6.21875, "learning_rate": 4.626457563735034e-06, "log_odds_chosen": 1.2489228248596191, "log_odds_ratio": -0.30081456899642944, "logits/chosen": -2.8132882118225098, "logits/rejected": -2.8195571899414062, "logps/chosen": -0.501319408416748, "logps/rejected": -1.1651036739349365, "loss": 0.3777, "nll_loss": 0.3397257328033447, "rewards/accuracies": 0.96875, "rewards/chosen": -0.10026389360427856, "rewards/margins": 0.13275685906410217, "rewards/rejected": -0.23302075266838074, "step": 1680 }, { "epoch": 1.21, "grad_norm": 6.5, "learning_rate": 4.624478330462108e-06, "log_odds_chosen": 1.2629085779190063, "log_odds_ratio": -0.3190768361091614, "logits/chosen": -2.808572769165039, "logits/rejected": -2.8040456771850586, "logps/chosen": -0.52824467420578, "logps/rejected": -1.2037794589996338, "loss": 0.4016, "nll_loss": 0.3477191627025604, "rewards/accuracies": 0.9375, "rewards/chosen": -0.105648934841156, "rewards/margins": 0.13510698080062866, "rewards/rejected": -0.24075591564178467, "step": 1690 }, { "epoch": 1.21, "grad_norm": 5.3125, "learning_rate": 4.622501635210244e-06, "log_odds_chosen": 1.199979305267334, "log_odds_ratio": -0.32586055994033813, "logits/chosen": -2.811058759689331, "logits/rejected": -2.797663927078247, "logps/chosen": -0.5032976269721985, "logps/rejected": -1.1406500339508057, "loss": 0.412, "nll_loss": 0.35429102182388306, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10065951198339462, "rewards/margins": 0.1274704784154892, "rewards/rejected": -0.2281300127506256, "step": 1700 }, { "epoch": 1.22, "grad_norm": 6.625, "learning_rate": 4.62052747255979e-06, "log_odds_chosen": 1.2002490758895874, "log_odds_ratio": -0.3310433030128479, "logits/chosen": -2.8232879638671875, "logits/rejected": -2.8157012462615967, "logps/chosen": -0.5096195936203003, "logps/rejected": -1.1472256183624268, "loss": 0.4145, "nll_loss": 0.33412426710128784, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.10192392021417618, "rewards/margins": 0.12752120196819305, "rewards/rejected": -0.22944512963294983, "step": 1710 }, { "epoch": 1.23, "grad_norm": 6.25, "learning_rate": 4.61855583710729e-06, "log_odds_chosen": 1.200430154800415, "log_odds_ratio": -0.3242012560367584, "logits/chosen": -2.807443141937256, "logits/rejected": -2.812464475631714, "logps/chosen": -0.4960111975669861, "logps/rejected": -1.1150873899459839, "loss": 0.3998, "nll_loss": 0.3255448639392853, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0992022305727005, "rewards/margins": 0.12381526082754135, "rewards/rejected": -0.22301749885082245, "step": 1720 }, { "epoch": 1.24, "grad_norm": 6.15625, "learning_rate": 4.616586723465408e-06, "log_odds_chosen": 1.2508338689804077, "log_odds_ratio": -0.31589239835739136, "logits/chosen": -2.7934987545013428, "logits/rejected": -2.798473358154297, "logps/chosen": -0.46048063039779663, "logps/rejected": -1.0900535583496094, "loss": 0.3718, "nll_loss": 0.2973485589027405, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.09209613502025604, "rewards/margins": 0.12591460347175598, "rewards/rejected": -0.21801073849201202, "step": 1730 }, { "epoch": 1.24, "grad_norm": 5.5, "learning_rate": 4.614620126262875e-06, "log_odds_chosen": 1.2658488750457764, "log_odds_ratio": -0.31101447343826294, "logits/chosen": -2.7943694591522217, "logits/rejected": -2.810157299041748, "logps/chosen": -0.5024830102920532, "logps/rejected": -1.1770861148834229, "loss": 0.3848, "nll_loss": 0.3470260202884674, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.10049660503864288, "rewards/margins": 0.1349206268787384, "rewards/rejected": -0.2354172170162201, "step": 1740 }, { "epoch": 1.25, "grad_norm": 5.71875, "learning_rate": 4.6126560401444256e-06, "log_odds_chosen": 1.2377068996429443, "log_odds_ratio": -0.32577404379844666, "logits/chosen": -2.8102307319641113, "logits/rejected": -2.816279411315918, "logps/chosen": -0.4699520170688629, "logps/rejected": -1.1359916925430298, "loss": 0.3875, "nll_loss": 0.3352469503879547, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09399040788412094, "rewards/margins": 0.13320791721343994, "rewards/rejected": -0.22719831764698029, "step": 1750 }, { "epoch": 1.26, "grad_norm": 5.3125, "learning_rate": 4.610694459770736e-06, "log_odds_chosen": 1.142558217048645, "log_odds_ratio": -0.3530025780200958, "logits/chosen": -2.8239128589630127, "logits/rejected": -2.8176794052124023, "logps/chosen": -0.5302969217300415, "logps/rejected": -1.1179604530334473, "loss": 0.424, "nll_loss": 0.36241117119789124, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.10605937242507935, "rewards/margins": 0.11753270775079727, "rewards/rejected": -0.22359208762645721, "step": 1760 }, { "epoch": 1.26, "grad_norm": 6.71875, "learning_rate": 4.6087353798183585e-06, "log_odds_chosen": 1.2016445398330688, "log_odds_ratio": -0.338972806930542, "logits/chosen": -2.8231258392333984, "logits/rejected": -2.8218886852264404, "logps/chosen": -0.49100199341773987, "logps/rejected": -1.0961363315582275, "loss": 0.3851, "nll_loss": 0.34230172634124756, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.09820040315389633, "rewards/margins": 0.1210268959403038, "rewards/rejected": -0.21922728419303894, "step": 1770 }, { "epoch": 1.27, "grad_norm": 8.0, "learning_rate": 4.606778794979673e-06, "log_odds_chosen": 1.2381914854049683, "log_odds_ratio": -0.32014578580856323, "logits/chosen": -2.7523140907287598, "logits/rejected": -2.769972801208496, "logps/chosen": -0.48253411054611206, "logps/rejected": -1.137094259262085, "loss": 0.414, "nll_loss": 0.3594844937324524, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.09650683403015137, "rewards/margins": 0.13091202080249786, "rewards/rejected": -0.22741885483264923, "step": 1780 }, { "epoch": 1.28, "grad_norm": 5.375, "learning_rate": 4.604824699962814e-06, "log_odds_chosen": 1.2986476421356201, "log_odds_ratio": -0.31768113374710083, "logits/chosen": -2.8044631481170654, "logits/rejected": -2.8003344535827637, "logps/chosen": -0.48466992378234863, "logps/rejected": -1.1684885025024414, "loss": 0.4, "nll_loss": 0.32119065523147583, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.09693397581577301, "rewards/margins": 0.13676372170448303, "rewards/rejected": -0.23369769752025604, "step": 1790 }, { "epoch": 1.29, "grad_norm": 5.75, "learning_rate": 4.602873089491618e-06, "log_odds_chosen": 1.3184231519699097, "log_odds_ratio": -0.3061246871948242, "logits/chosen": -2.7834739685058594, "logits/rejected": -2.7912349700927734, "logps/chosen": -0.4672764837741852, "logps/rejected": -1.1388607025146484, "loss": 0.3609, "nll_loss": 0.312313973903656, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.09345529973506927, "rewards/margins": 0.1343168318271637, "rewards/rejected": -0.22777214646339417, "step": 1800 }, { "epoch": 1.29, "grad_norm": 5.40625, "learning_rate": 4.600923958305558e-06, "log_odds_chosen": 1.221590280532837, "log_odds_ratio": -0.325833261013031, "logits/chosen": -2.7938928604125977, "logits/rejected": -2.787433624267578, "logps/chosen": -0.49871402978897095, "logps/rejected": -1.1466445922851562, "loss": 0.391, "nll_loss": 0.3510339856147766, "rewards/accuracies": 0.90625, "rewards/chosen": -0.09974280744791031, "rewards/margins": 0.1295861303806305, "rewards/rejected": -0.2293289452791214, "step": 1810 }, { "epoch": 1.3, "grad_norm": 6.0, "learning_rate": 4.59897730115969e-06, "log_odds_chosen": 1.3827050924301147, "log_odds_ratio": -0.30398255586624146, "logits/chosen": -2.811122417449951, "logits/rejected": -2.817436933517456, "logps/chosen": -0.4758991599082947, "logps/rejected": -1.212922215461731, "loss": 0.3903, "nll_loss": 0.32494717836380005, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.09517984092235565, "rewards/margins": 0.14740462601184845, "rewards/rejected": -0.2425844967365265, "step": 1820 }, { "epoch": 1.31, "grad_norm": 5.5625, "learning_rate": 4.597033112824591e-06, "log_odds_chosen": 1.2106988430023193, "log_odds_ratio": -0.3271104395389557, "logits/chosen": -2.798574924468994, "logits/rejected": -2.78853702545166, "logps/chosen": -0.5198577642440796, "logps/rejected": -1.1642177104949951, "loss": 0.3965, "nll_loss": 0.3389652669429779, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10397156327962875, "rewards/margins": 0.12887199223041534, "rewards/rejected": -0.2328435480594635, "step": 1830 }, { "epoch": 1.31, "grad_norm": 6.21875, "learning_rate": 4.595091388086298e-06, "log_odds_chosen": 1.1952093839645386, "log_odds_ratio": -0.3166283071041107, "logits/chosen": -2.8241782188415527, "logits/rejected": -2.8152718544006348, "logps/chosen": -0.5200030207633972, "logps/rejected": -1.1538721323013306, "loss": 0.3864, "nll_loss": 0.30587464570999146, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10400060564279556, "rewards/margins": 0.12677384912967682, "rewards/rejected": -0.23077444732189178, "step": 1840 }, { "epoch": 1.32, "grad_norm": 6.96875, "learning_rate": 4.593152121746254e-06, "log_odds_chosen": 1.168222427368164, "log_odds_ratio": -0.3400697410106659, "logits/chosen": -2.816959857940674, "logits/rejected": -2.813157081604004, "logps/chosen": -0.4970974326133728, "logps/rejected": -1.108543038368225, "loss": 0.411, "nll_loss": 0.31471529603004456, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0994194895029068, "rewards/margins": 0.12228912115097046, "rewards/rejected": -0.22170861065387726, "step": 1850 }, { "epoch": 1.33, "grad_norm": 5.59375, "learning_rate": 4.591215308621242e-06, "log_odds_chosen": 1.2203196287155151, "log_odds_ratio": -0.3189007639884949, "logits/chosen": -2.8120200634002686, "logits/rejected": -2.791141986846924, "logps/chosen": -0.4980306029319763, "logps/rejected": -1.1655288934707642, "loss": 0.3934, "nll_loss": 0.31481805443763733, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09960611909627914, "rewards/margins": 0.1334996372461319, "rewards/rejected": -0.23310574889183044, "step": 1860 }, { "epoch": 1.34, "grad_norm": 6.25, "learning_rate": 4.5892809435433355e-06, "log_odds_chosen": 1.245577096939087, "log_odds_ratio": -0.32619625329971313, "logits/chosen": -2.8177731037139893, "logits/rejected": -2.8102052211761475, "logps/chosen": -0.4943556785583496, "logps/rejected": -1.1462085247039795, "loss": 0.3925, "nll_loss": 0.3532278537750244, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.0988711342215538, "rewards/margins": 0.13037055730819702, "rewards/rejected": -0.2292417287826538, "step": 1870 }, { "epoch": 1.34, "grad_norm": 6.09375, "learning_rate": 4.587349021359836e-06, "log_odds_chosen": 1.1849457025527954, "log_odds_ratio": -0.3331480026245117, "logits/chosen": -2.8479719161987305, "logits/rejected": -2.8441169261932373, "logps/chosen": -0.5096966624259949, "logps/rejected": -1.1546456813812256, "loss": 0.4052, "nll_loss": 0.32981541752815247, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10193934291601181, "rewards/margins": 0.1289898157119751, "rewards/rejected": -0.23092913627624512, "step": 1880 }, { "epoch": 1.35, "grad_norm": 5.75, "learning_rate": 4.585419536933215e-06, "log_odds_chosen": 1.235563039779663, "log_odds_ratio": -0.3173638880252838, "logits/chosen": -2.8725390434265137, "logits/rejected": -2.859806776046753, "logps/chosen": -0.4742043614387512, "logps/rejected": -1.1231261491775513, "loss": 0.4115, "nll_loss": 0.365268349647522, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0948408767580986, "rewards/margins": 0.12978434562683105, "rewards/rejected": -0.22462522983551025, "step": 1890 }, { "epoch": 1.36, "grad_norm": 5.4375, "learning_rate": 4.583492485141056e-06, "log_odds_chosen": 1.3153822422027588, "log_odds_ratio": -0.3131297528743744, "logits/chosen": -2.783536434173584, "logits/rejected": -2.803969144821167, "logps/chosen": -0.4793526530265808, "logps/rejected": -1.1401029825210571, "loss": 0.3573, "nll_loss": 0.2991887629032135, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.09587053954601288, "rewards/margins": 0.1321500688791275, "rewards/rejected": -0.2280205935239792, "step": 1900 }, { "epoch": 1.36, "grad_norm": 6.25, "learning_rate": 4.581567860876004e-06, "log_odds_chosen": 1.2473056316375732, "log_odds_ratio": -0.3080199658870697, "logits/chosen": -2.8462700843811035, "logits/rejected": -2.8370962142944336, "logps/chosen": -0.48333874344825745, "logps/rejected": -1.148425817489624, "loss": 0.4128, "nll_loss": 0.3549380302429199, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.09666775166988373, "rewards/margins": 0.13301745057106018, "rewards/rejected": -0.2296852171421051, "step": 1910 }, { "epoch": 1.37, "grad_norm": 6.75, "learning_rate": 4.579645659045699e-06, "log_odds_chosen": 1.227070689201355, "log_odds_ratio": -0.3165205121040344, "logits/chosen": -2.8399529457092285, "logits/rejected": -2.8465752601623535, "logps/chosen": -0.46384549140930176, "logps/rejected": -1.0851547718048096, "loss": 0.401, "nll_loss": 0.3482987880706787, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.09276910126209259, "rewards/margins": 0.12426185607910156, "rewards/rejected": -0.21703095734119415, "step": 1920 }, { "epoch": 1.38, "grad_norm": 4.46875, "learning_rate": 4.577725874572724e-06, "log_odds_chosen": 1.1611140966415405, "log_odds_ratio": -0.34230470657348633, "logits/chosen": -2.807835340499878, "logits/rejected": -2.817835807800293, "logps/chosen": -0.49586695432662964, "logps/rejected": -1.093207836151123, "loss": 0.3975, "nll_loss": 0.31580257415771484, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0991733968257904, "rewards/margins": 0.11946818977594376, "rewards/rejected": -0.21864160895347595, "step": 1930 }, { "epoch": 1.39, "grad_norm": 6.21875, "learning_rate": 4.575808502394551e-06, "log_odds_chosen": 1.2263875007629395, "log_odds_ratio": -0.3336968421936035, "logits/chosen": -2.8074028491973877, "logits/rejected": -2.808117628097534, "logps/chosen": -0.5058302879333496, "logps/rejected": -1.1664202213287354, "loss": 0.3892, "nll_loss": 0.32398825883865356, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.10116605460643768, "rewards/margins": 0.13211797177791595, "rewards/rejected": -0.23328404128551483, "step": 1940 }, { "epoch": 1.39, "grad_norm": 5.65625, "learning_rate": 4.573893537463482e-06, "log_odds_chosen": 1.1256129741668701, "log_odds_ratio": -0.3365449905395508, "logits/chosen": -2.799619197845459, "logits/rejected": -2.8067538738250732, "logps/chosen": -0.501841127872467, "logps/rejected": -1.0864284038543701, "loss": 0.3919, "nll_loss": 0.3244914412498474, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10036821663379669, "rewards/margins": 0.11691747605800629, "rewards/rejected": -0.21728567779064178, "step": 1950 }, { "epoch": 1.4, "grad_norm": 7.71875, "learning_rate": 4.5719809747465946e-06, "log_odds_chosen": 1.2488094568252563, "log_odds_ratio": -0.3095954358577728, "logits/chosen": -2.80102801322937, "logits/rejected": -2.786306381225586, "logps/chosen": -0.48401910066604614, "logps/rejected": -1.1524279117584229, "loss": 0.391, "nll_loss": 0.32259485125541687, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09680382162332535, "rewards/margins": 0.1336817443370819, "rewards/rejected": -0.23048558831214905, "step": 1960 }, { "epoch": 1.41, "grad_norm": 6.65625, "learning_rate": 4.570070809225682e-06, "log_odds_chosen": 1.185505986213684, "log_odds_ratio": -0.3236411213874817, "logits/chosen": -2.8083627223968506, "logits/rejected": -2.8073079586029053, "logps/chosen": -0.497401624917984, "logps/rejected": -1.1157503128051758, "loss": 0.3984, "nll_loss": 0.32068413496017456, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.09948031604290009, "rewards/margins": 0.12366974353790283, "rewards/rejected": -0.2231500893831253, "step": 1970 }, { "epoch": 1.41, "grad_norm": 7.0625, "learning_rate": 4.568163035897205e-06, "log_odds_chosen": 1.2525742053985596, "log_odds_ratio": -0.31062158942222595, "logits/chosen": -2.852297782897949, "logits/rejected": -2.858063220977783, "logps/chosen": -0.5127692222595215, "logps/rejected": -1.180938482284546, "loss": 0.4053, "nll_loss": 0.36488935351371765, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10255385935306549, "rewards/margins": 0.1336338222026825, "rewards/rejected": -0.23618769645690918, "step": 1980 }, { "epoch": 1.42, "grad_norm": 6.5625, "learning_rate": 4.566257649772231e-06, "log_odds_chosen": 1.2319920063018799, "log_odds_ratio": -0.32411015033721924, "logits/chosen": -2.8400864601135254, "logits/rejected": -2.835186004638672, "logps/chosen": -0.5001600384712219, "logps/rejected": -1.1353777647018433, "loss": 0.4052, "nll_loss": 0.346365749835968, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10003199428319931, "rewards/margins": 0.12704357504844666, "rewards/rejected": -0.22707557678222656, "step": 1990 }, { "epoch": 1.43, "grad_norm": 5.78125, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 1.2825465202331543, "log_odds_ratio": -0.31023693084716797, "logits/chosen": -2.7589869499206543, "logits/rejected": -2.7817113399505615, "logps/chosen": -0.48962241411209106, "logps/rejected": -1.1758294105529785, "loss": 0.3808, "nll_loss": 0.3050033152103424, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.09792448580265045, "rewards/margins": 0.137241393327713, "rewards/rejected": -0.23516587913036346, "step": 2000 }, { "epoch": 1.44, "grad_norm": 6.5625, "learning_rate": 4.562454019249786e-06, "log_odds_chosen": 1.1062853336334229, "log_odds_ratio": -0.34708550572395325, "logits/chosen": -2.8106868267059326, "logits/rejected": -2.802447557449341, "logps/chosen": -0.5518588423728943, "logps/rejected": -1.1463332176208496, "loss": 0.4399, "nll_loss": 0.3327658772468567, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.11037178337574005, "rewards/margins": 0.11889486014842987, "rewards/rejected": -0.22926661372184753, "step": 2010 }, { "epoch": 1.44, "grad_norm": 6.65625, "learning_rate": 4.560555764947004e-06, "log_odds_chosen": 1.2340061664581299, "log_odds_ratio": -0.32612934708595276, "logits/chosen": -2.822478771209717, "logits/rejected": -2.828272819519043, "logps/chosen": -0.5015519857406616, "logps/rejected": -1.164368987083435, "loss": 0.3998, "nll_loss": 0.37030792236328125, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.10031040757894516, "rewards/margins": 0.13256338238716125, "rewards/rejected": -0.2328738272190094, "step": 2020 }, { "epoch": 1.45, "grad_norm": 5.5625, "learning_rate": 4.5586598780369954e-06, "log_odds_chosen": 1.1390058994293213, "log_odds_ratio": -0.35169175267219543, "logits/chosen": -2.8428843021392822, "logits/rejected": -2.846796989440918, "logps/chosen": -0.5042189359664917, "logps/rejected": -1.092398762702942, "loss": 0.3971, "nll_loss": 0.3557208478450775, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.10084378719329834, "rewards/margins": 0.11763594299554825, "rewards/rejected": -0.21847975254058838, "step": 2030 }, { "epoch": 1.46, "grad_norm": 6.5625, "learning_rate": 4.556766353603058e-06, "log_odds_chosen": 1.2142447233200073, "log_odds_ratio": -0.3221642076969147, "logits/chosen": -2.7607929706573486, "logits/rejected": -2.769547939300537, "logps/chosen": -0.5120490789413452, "logps/rejected": -1.1542648077011108, "loss": 0.4132, "nll_loss": 0.31557202339172363, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10240981727838516, "rewards/margins": 0.1284431517124176, "rewards/rejected": -0.23085296154022217, "step": 2040 }, { "epoch": 1.46, "grad_norm": 6.46875, "learning_rate": 4.55487518674277e-06, "log_odds_chosen": 1.2244555950164795, "log_odds_ratio": -0.3199111521244049, "logits/chosen": -2.848184108734131, "logits/rejected": -2.853646755218506, "logps/chosen": -0.4814947545528412, "logps/rejected": -1.1112531423568726, "loss": 0.4191, "nll_loss": 0.33359044790267944, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.0962989553809166, "rewards/margins": 0.12595167756080627, "rewards/rejected": -0.22225065529346466, "step": 2050 }, { "epoch": 1.47, "grad_norm": 5.34375, "learning_rate": 4.552986372567943e-06, "log_odds_chosen": 1.2204258441925049, "log_odds_ratio": -0.3285110592842102, "logits/chosen": -2.826977252960205, "logits/rejected": -2.8170952796936035, "logps/chosen": -0.518255352973938, "logps/rejected": -1.1798815727233887, "loss": 0.3758, "nll_loss": 0.335254967212677, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10365106910467148, "rewards/margins": 0.13232523202896118, "rewards/rejected": -0.23597629368305206, "step": 2060 }, { "epoch": 1.48, "grad_norm": 5.65625, "learning_rate": 4.5510999062045625e-06, "log_odds_chosen": 1.268137812614441, "log_odds_ratio": -0.3236086964607239, "logits/chosen": -2.7644591331481934, "logits/rejected": -2.761575222015381, "logps/chosen": -0.5131622552871704, "logps/rejected": -1.2004690170288086, "loss": 0.3893, "nll_loss": 0.33567947149276733, "rewards/accuracies": 0.90625, "rewards/chosen": -0.10263246297836304, "rewards/margins": 0.1374613493680954, "rewards/rejected": -0.24009379744529724, "step": 2070 }, { "epoch": 1.49, "grad_norm": 6.09375, "learning_rate": 4.5492157827927435e-06, "log_odds_chosen": 1.2079408168792725, "log_odds_ratio": -0.33384907245635986, "logits/chosen": -2.86165714263916, "logits/rejected": -2.8367888927459717, "logps/chosen": -0.4977470338344574, "logps/rejected": -1.1325632333755493, "loss": 0.3736, "nll_loss": 0.32109367847442627, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.09954941272735596, "rewards/margins": 0.12696322798728943, "rewards/rejected": -0.22651264071464539, "step": 2080 }, { "epoch": 1.49, "grad_norm": 7.21875, "learning_rate": 4.54733399748667e-06, "log_odds_chosen": 1.2205212116241455, "log_odds_ratio": -0.3305164873600006, "logits/chosen": -2.7856006622314453, "logits/rejected": -2.7885613441467285, "logps/chosen": -0.5106269717216492, "logps/rejected": -1.1481513977050781, "loss": 0.3855, "nll_loss": 0.30324143171310425, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10212540626525879, "rewards/margins": 0.12750491499900818, "rewards/rejected": -0.22963032126426697, "step": 2090 }, { "epoch": 1.5, "grad_norm": 6.75, "learning_rate": 4.5454545454545455e-06, "log_odds_chosen": 1.2237420082092285, "log_odds_ratio": -0.32826218008995056, "logits/chosen": -2.820686101913452, "logits/rejected": -2.8283371925354004, "logps/chosen": -0.5093476176261902, "logps/rejected": -1.168192744255066, "loss": 0.3887, "nll_loss": 0.3230911195278168, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10186950862407684, "rewards/margins": 0.13176901638507843, "rewards/rejected": -0.23363855481147766, "step": 2100 }, { "epoch": 1.51, "grad_norm": 6.3125, "learning_rate": 4.543577421878542e-06, "log_odds_chosen": 1.2703464031219482, "log_odds_ratio": -0.31365537643432617, "logits/chosen": -2.851209878921509, "logits/rejected": -2.8478894233703613, "logps/chosen": -0.4876154363155365, "logps/rejected": -1.1669528484344482, "loss": 0.4003, "nll_loss": 0.32528436183929443, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.09752309322357178, "rewards/margins": 0.13586744666099548, "rewards/rejected": -0.23339056968688965, "step": 2110 }, { "epoch": 1.51, "grad_norm": 6.46875, "learning_rate": 4.541702621954749e-06, "log_odds_chosen": 1.1352488994598389, "log_odds_ratio": -0.35285863280296326, "logits/chosen": -2.8669912815093994, "logits/rejected": -2.872535228729248, "logps/chosen": -0.5130313634872437, "logps/rejected": -1.1068975925445557, "loss": 0.4053, "nll_loss": 0.33999350666999817, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.10260625928640366, "rewards/margins": 0.11877324432134628, "rewards/rejected": -0.22137951850891113, "step": 2120 }, { "epoch": 1.52, "grad_norm": 5.71875, "learning_rate": 4.539830140893113e-06, "log_odds_chosen": 1.4157606363296509, "log_odds_ratio": -0.30813926458358765, "logits/chosen": -2.8120830059051514, "logits/rejected": -2.8225760459899902, "logps/chosen": -0.4803242087364197, "logps/rejected": -1.2175981998443604, "loss": 0.3963, "nll_loss": 0.3383215367794037, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.09606485068798065, "rewards/margins": 0.14745476841926575, "rewards/rejected": -0.2435196191072464, "step": 2130 }, { "epoch": 1.53, "grad_norm": 7.625, "learning_rate": 4.537959973917404e-06, "log_odds_chosen": 1.2107295989990234, "log_odds_ratio": -0.3288795053958893, "logits/chosen": -2.809863567352295, "logits/rejected": -2.8031539916992188, "logps/chosen": -0.49784055352211, "logps/rejected": -1.1504080295562744, "loss": 0.3939, "nll_loss": 0.3284316062927246, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.09956810623407364, "rewards/margins": 0.1305135190486908, "rewards/rejected": -0.23008163273334503, "step": 2140 }, { "epoch": 1.54, "grad_norm": 7.46875, "learning_rate": 4.536092116265145e-06, "log_odds_chosen": 1.1781766414642334, "log_odds_ratio": -0.33598729968070984, "logits/chosen": -2.8239569664001465, "logits/rejected": -2.8153862953186035, "logps/chosen": -0.5003775954246521, "logps/rejected": -1.1214288473129272, "loss": 0.3922, "nll_loss": 0.3246839940547943, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10007552057504654, "rewards/margins": 0.12421026080846786, "rewards/rejected": -0.2242857962846756, "step": 2150 }, { "epoch": 1.54, "grad_norm": 6.46875, "learning_rate": 4.534226563187573e-06, "log_odds_chosen": 1.1511073112487793, "log_odds_ratio": -0.3392142653465271, "logits/chosen": -2.8518738746643066, "logits/rejected": -2.854708671569824, "logps/chosen": -0.5298787355422974, "logps/rejected": -1.1405723094940186, "loss": 0.3987, "nll_loss": 0.37272533774375916, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10597574710845947, "rewards/margins": 0.12213869392871857, "rewards/rejected": -0.22811445593833923, "step": 2160 }, { "epoch": 1.55, "grad_norm": 6.3125, "learning_rate": 4.532363309949585e-06, "log_odds_chosen": 1.0746400356292725, "log_odds_ratio": -0.3582301139831543, "logits/chosen": -2.7901675701141357, "logits/rejected": -2.7982590198516846, "logps/chosen": -0.5267711877822876, "logps/rejected": -1.0863219499588013, "loss": 0.4085, "nll_loss": 0.3383466601371765, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10535424947738647, "rewards/margins": 0.1119101420044899, "rewards/rejected": -0.21726438403129578, "step": 2170 }, { "epoch": 1.56, "grad_norm": 7.375, "learning_rate": 4.530502351829687e-06, "log_odds_chosen": 1.2173881530761719, "log_odds_ratio": -0.33332520723342896, "logits/chosen": -2.816040515899658, "logits/rejected": -2.8241238594055176, "logps/chosen": -0.49707546830177307, "logps/rejected": -1.1558698415756226, "loss": 0.3933, "nll_loss": 0.29985347390174866, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.09941510856151581, "rewards/margins": 0.13175883889198303, "rewards/rejected": -0.23117394745349884, "step": 2180 }, { "epoch": 1.56, "grad_norm": 6.0, "learning_rate": 4.528643684119943e-06, "log_odds_chosen": 1.2386395931243896, "log_odds_ratio": -0.3029495179653168, "logits/chosen": -2.780738115310669, "logits/rejected": -2.78902530670166, "logps/chosen": -0.49545398354530334, "logps/rejected": -1.158929705619812, "loss": 0.3934, "nll_loss": 0.3328419327735901, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.09909079968929291, "rewards/margins": 0.13269512355327606, "rewards/rejected": -0.23178592324256897, "step": 2190 }, { "epoch": 1.57, "grad_norm": 7.34375, "learning_rate": 4.526787302125927e-06, "log_odds_chosen": 1.2402592897415161, "log_odds_ratio": -0.3278099596500397, "logits/chosen": -2.8122589588165283, "logits/rejected": -2.816236972808838, "logps/chosen": -0.5384883284568787, "logps/rejected": -1.2339115142822266, "loss": 0.4106, "nll_loss": 0.3580947518348694, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10769768059253693, "rewards/margins": 0.1390846073627472, "rewards/rejected": -0.2467823028564453, "step": 2200 }, { "epoch": 1.58, "grad_norm": 6.125, "learning_rate": 4.524933201166673e-06, "log_odds_chosen": 1.1136146783828735, "log_odds_ratio": -0.3713721036911011, "logits/chosen": -2.8021061420440674, "logits/rejected": -2.813220739364624, "logps/chosen": -0.5083444714546204, "logps/rejected": -1.091552495956421, "loss": 0.4274, "nll_loss": 0.36474576592445374, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.10166887938976288, "rewards/margins": 0.11664160341024399, "rewards/rejected": -0.21831050515174866, "step": 2210 }, { "epoch": 1.59, "grad_norm": 8.125, "learning_rate": 4.523081376574626e-06, "log_odds_chosen": 1.2216575145721436, "log_odds_ratio": -0.3291280269622803, "logits/chosen": -2.777296304702759, "logits/rejected": -2.7747302055358887, "logps/chosen": -0.5027607679367065, "logps/rejected": -1.1558706760406494, "loss": 0.4256, "nll_loss": 0.3766383230686188, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.10055215656757355, "rewards/margins": 0.1306219846010208, "rewards/rejected": -0.23117414116859436, "step": 2220 }, { "epoch": 1.59, "grad_norm": 5.875, "learning_rate": 4.521231823695586e-06, "log_odds_chosen": 1.131630539894104, "log_odds_ratio": -0.34842541813850403, "logits/chosen": -2.8320116996765137, "logits/rejected": -2.8329334259033203, "logps/chosen": -0.5016427636146545, "logps/rejected": -1.0832736492156982, "loss": 0.3947, "nll_loss": 0.3312470316886902, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10032854229211807, "rewards/margins": 0.11632619798183441, "rewards/rejected": -0.21665474772453308, "step": 2230 }, { "epoch": 1.6, "grad_norm": 6.4375, "learning_rate": 4.519384537888671e-06, "log_odds_chosen": 1.2074496746063232, "log_odds_ratio": -0.341810017824173, "logits/chosen": -2.805079698562622, "logits/rejected": -2.7929458618164062, "logps/chosen": -0.4954722821712494, "logps/rejected": -1.1329338550567627, "loss": 0.4051, "nll_loss": 0.3333224356174469, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.099094457924366, "rewards/margins": 0.12749230861663818, "rewards/rejected": -0.2265867441892624, "step": 2240 }, { "epoch": 1.61, "grad_norm": 6.59375, "learning_rate": 4.517539514526257e-06, "log_odds_chosen": 1.3590493202209473, "log_odds_ratio": -0.316969096660614, "logits/chosen": -2.803619861602783, "logits/rejected": -2.77778959274292, "logps/chosen": -0.5187050700187683, "logps/rejected": -1.2949590682983398, "loss": 0.4183, "nll_loss": 0.34726181626319885, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10374102741479874, "rewards/margins": 0.15525080263614655, "rewards/rejected": -0.2589918076992035, "step": 2250 }, { "epoch": 1.61, "grad_norm": 6.375, "learning_rate": 4.515696748993935e-06, "log_odds_chosen": 1.159447193145752, "log_odds_ratio": -0.3347181975841522, "logits/chosen": -2.809387683868408, "logits/rejected": -2.8114075660705566, "logps/chosen": -0.5045318007469177, "logps/rejected": -1.105101466178894, "loss": 0.4172, "nll_loss": 0.3646569848060608, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.1009063571691513, "rewards/margins": 0.12011395394802094, "rewards/rejected": -0.22102029621601105, "step": 2260 }, { "epoch": 1.62, "grad_norm": 6.53125, "learning_rate": 4.513856236690462e-06, "log_odds_chosen": 1.1499160528182983, "log_odds_ratio": -0.34458065032958984, "logits/chosen": -2.81234073638916, "logits/rejected": -2.8134610652923584, "logps/chosen": -0.5100609660148621, "logps/rejected": -1.1153619289398193, "loss": 0.4097, "nll_loss": 0.3220939040184021, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10201219469308853, "rewards/margins": 0.12106022983789444, "rewards/rejected": -0.22307243943214417, "step": 2270 }, { "epoch": 1.63, "grad_norm": 6.75, "learning_rate": 4.51201797302771e-06, "log_odds_chosen": 1.307038426399231, "log_odds_ratio": -0.32670050859451294, "logits/chosen": -2.855238676071167, "logits/rejected": -2.830141544342041, "logps/chosen": -0.5477741360664368, "logps/rejected": -1.310255765914917, "loss": 0.4111, "nll_loss": 0.3261423110961914, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.10955484211444855, "rewards/margins": 0.152496337890625, "rewards/rejected": -0.26205116510391235, "step": 2280 }, { "epoch": 1.64, "grad_norm": 7.5, "learning_rate": 4.510181953430622e-06, "log_odds_chosen": 1.170881986618042, "log_odds_ratio": -0.3537355363368988, "logits/chosen": -2.7898900508880615, "logits/rejected": -2.8005497455596924, "logps/chosen": -0.5161569714546204, "logps/rejected": -1.1166584491729736, "loss": 0.4116, "nll_loss": 0.33776649832725525, "rewards/accuracies": 0.90625, "rewards/chosen": -0.10323138535022736, "rewards/margins": 0.12010029703378677, "rewards/rejected": -0.22333166003227234, "step": 2290 }, { "epoch": 1.64, "grad_norm": 6.59375, "learning_rate": 4.508348173337162e-06, "log_odds_chosen": 1.1670069694519043, "log_odds_ratio": -0.3370336890220642, "logits/chosen": -2.8468751907348633, "logits/rejected": -2.8393032550811768, "logps/chosen": -0.5258538722991943, "logps/rejected": -1.1647305488586426, "loss": 0.4118, "nll_loss": 0.3415473699569702, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10517077147960663, "rewards/margins": 0.12777534127235413, "rewards/rejected": -0.23294611275196075, "step": 2300 }, { "epoch": 1.65, "grad_norm": 7.21875, "learning_rate": 4.5065166281982656e-06, "log_odds_chosen": 1.2587502002716064, "log_odds_ratio": -0.31747183203697205, "logits/chosen": -2.833258867263794, "logits/rejected": -2.828791379928589, "logps/chosen": -0.5313466191291809, "logps/rejected": -1.2055209875106812, "loss": 0.3804, "nll_loss": 0.3351854085922241, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.10626931488513947, "rewards/margins": 0.1348349153995514, "rewards/rejected": -0.24110420048236847, "step": 2310 }, { "epoch": 1.66, "grad_norm": 6.375, "learning_rate": 4.5046873134777955e-06, "log_odds_chosen": 1.1143945455551147, "log_odds_ratio": -0.3569260239601135, "logits/chosen": -2.8429629802703857, "logits/rejected": -2.8426883220672607, "logps/chosen": -0.5169599652290344, "logps/rejected": -1.0884437561035156, "loss": 0.4024, "nll_loss": 0.3241182565689087, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10339200496673584, "rewards/margins": 0.11429674923419952, "rewards/rejected": -0.21768875420093536, "step": 2320 }, { "epoch": 1.66, "grad_norm": 6.125, "learning_rate": 4.5028602246524934e-06, "log_odds_chosen": 1.1783859729766846, "log_odds_ratio": -0.3340161442756653, "logits/chosen": -2.806804895401001, "logits/rejected": -2.7988064289093018, "logps/chosen": -0.5388362407684326, "logps/rejected": -1.1798697710037231, "loss": 0.3924, "nll_loss": 0.34538668394088745, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1077672466635704, "rewards/margins": 0.12820670008659363, "rewards/rejected": -0.23597395420074463, "step": 2330 }, { "epoch": 1.67, "grad_norm": 7.09375, "learning_rate": 4.5010353572119344e-06, "log_odds_chosen": 1.2434608936309814, "log_odds_ratio": -0.3279341459274292, "logits/chosen": -2.8133628368377686, "logits/rejected": -2.803882360458374, "logps/chosen": -0.5466577410697937, "logps/rejected": -1.2451903820037842, "loss": 0.4016, "nll_loss": 0.32733532786369324, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.10933154821395874, "rewards/margins": 0.13970652222633362, "rewards/rejected": -0.24903810024261475, "step": 2340 }, { "epoch": 1.68, "grad_norm": 6.65625, "learning_rate": 4.499212706658476e-06, "log_odds_chosen": 1.2468292713165283, "log_odds_ratio": -0.33085036277770996, "logits/chosen": -2.7604432106018066, "logits/rejected": -2.767918586730957, "logps/chosen": -0.5312229990959167, "logps/rejected": -1.232725977897644, "loss": 0.3905, "nll_loss": 0.28453925251960754, "rewards/accuracies": 0.90625, "rewards/chosen": -0.10624460875988007, "rewards/margins": 0.14030058681964874, "rewards/rejected": -0.24654516577720642, "step": 2350 }, { "epoch": 1.69, "grad_norm": 6.15625, "learning_rate": 4.497392268507216e-06, "log_odds_chosen": 1.3811371326446533, "log_odds_ratio": -0.3187825083732605, "logits/chosen": -2.7945587635040283, "logits/rejected": -2.793541431427002, "logps/chosen": -0.5095025897026062, "logps/rejected": -1.2895538806915283, "loss": 0.3888, "nll_loss": 0.33172607421875, "rewards/accuracies": 0.90625, "rewards/chosen": -0.10190053284168243, "rewards/margins": 0.15601028501987457, "rewards/rejected": -0.2579107880592346, "step": 2360 }, { "epoch": 1.69, "grad_norm": 8.1875, "learning_rate": 4.495574038285945e-06, "log_odds_chosen": 1.2981445789337158, "log_odds_ratio": -0.3155536949634552, "logits/chosen": -2.8101601600646973, "logits/rejected": -2.8108277320861816, "logps/chosen": -0.5573509335517883, "logps/rejected": -1.3329538106918335, "loss": 0.3898, "nll_loss": 0.3312874436378479, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11147018522024155, "rewards/margins": 0.1551206111907959, "rewards/rejected": -0.26659080386161804, "step": 2370 }, { "epoch": 1.7, "grad_norm": 8.5625, "learning_rate": 4.493758011535097e-06, "log_odds_chosen": 1.2831408977508545, "log_odds_ratio": -0.3103974163532257, "logits/chosen": -2.823028564453125, "logits/rejected": -2.813875436782837, "logps/chosen": -0.5119468569755554, "logps/rejected": -1.2149946689605713, "loss": 0.4083, "nll_loss": 0.32668930292129517, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.1023893728852272, "rewards/margins": 0.1406095325946808, "rewards/rejected": -0.2429989129304886, "step": 2380 }, { "epoch": 1.71, "grad_norm": 9.375, "learning_rate": 4.491944183807709e-06, "log_odds_chosen": 1.1459182500839233, "log_odds_ratio": -0.3687919080257416, "logits/chosen": -2.850087881088257, "logits/rejected": -2.8486053943634033, "logps/chosen": -0.5623646974563599, "logps/rejected": -1.2140840291976929, "loss": 0.3556, "nll_loss": 0.285260409116745, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.11247295141220093, "rewards/margins": 0.13034388422966003, "rewards/rejected": -0.24281683564186096, "step": 2390 }, { "epoch": 1.71, "grad_norm": 9.375, "learning_rate": 4.490132550669373e-06, "log_odds_chosen": 1.240378975868225, "log_odds_ratio": -0.3130456507205963, "logits/chosen": -2.9485726356506348, "logits/rejected": -2.9329330921173096, "logps/chosen": -0.5569415092468262, "logps/rejected": -1.2546156644821167, "loss": 0.4249, "nll_loss": 0.3691002428531647, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.11138832569122314, "rewards/margins": 0.1395348310470581, "rewards/rejected": -0.25092315673828125, "step": 2400 }, { "epoch": 1.72, "grad_norm": 7.0625, "learning_rate": 4.488323107698186e-06, "log_odds_chosen": 1.3868653774261475, "log_odds_ratio": -0.2894250750541687, "logits/chosen": -2.9048142433166504, "logits/rejected": -2.893167018890381, "logps/chosen": -0.4791548252105713, "logps/rejected": -1.2176661491394043, "loss": 0.4143, "nll_loss": 0.3569006025791168, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.09583096206188202, "rewards/margins": 0.14770226180553436, "rewards/rejected": -0.24353322386741638, "step": 2410 }, { "epoch": 1.73, "grad_norm": 7.21875, "learning_rate": 4.486515850484713e-06, "log_odds_chosen": 1.1212716102600098, "log_odds_ratio": -0.3596242070198059, "logits/chosen": -2.9160571098327637, "logits/rejected": -2.9128189086914062, "logps/chosen": -0.5376850366592407, "logps/rejected": -1.1596351861953735, "loss": 0.4265, "nll_loss": 0.33928748965263367, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10753700882196426, "rewards/margins": 0.12439004331827164, "rewards/rejected": -0.2319270670413971, "step": 2420 }, { "epoch": 1.74, "grad_norm": 8.5, "learning_rate": 4.484710774631934e-06, "log_odds_chosen": 1.241854190826416, "log_odds_ratio": -0.3230142295360565, "logits/chosen": -2.8752593994140625, "logits/rejected": -2.8711416721343994, "logps/chosen": -0.5400632619857788, "logps/rejected": -1.2231850624084473, "loss": 0.397, "nll_loss": 0.3497290313243866, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.10801263898611069, "rewards/margins": 0.13662435114383698, "rewards/rejected": -0.24463701248168945, "step": 2430 }, { "epoch": 1.74, "grad_norm": 7.28125, "learning_rate": 4.482907875755205e-06, "log_odds_chosen": 1.3686769008636475, "log_odds_ratio": -0.2961091697216034, "logits/chosen": -2.8909826278686523, "logits/rejected": -2.897916078567505, "logps/chosen": -0.5009250640869141, "logps/rejected": -1.2302018404006958, "loss": 0.4144, "nll_loss": 0.36144185066223145, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.10018502175807953, "rewards/margins": 0.14585533738136292, "rewards/rejected": -0.24604037404060364, "step": 2440 }, { "epoch": 1.75, "grad_norm": 7.40625, "learning_rate": 4.481107149482208e-06, "log_odds_chosen": 1.3370695114135742, "log_odds_ratio": -0.30031710863113403, "logits/chosen": -2.8956050872802734, "logits/rejected": -2.8868050575256348, "logps/chosen": -0.5050898194313049, "logps/rejected": -1.255013108253479, "loss": 0.3929, "nll_loss": 0.3568061888217926, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10101795196533203, "rewards/margins": 0.14998464286327362, "rewards/rejected": -0.25100260972976685, "step": 2450 }, { "epoch": 1.76, "grad_norm": 8.6875, "learning_rate": 4.4793085914529136e-06, "log_odds_chosen": 1.1334112882614136, "log_odds_ratio": -0.3533148169517517, "logits/chosen": -2.8731157779693604, "logits/rejected": -2.8705520629882812, "logps/chosen": -0.5255860090255737, "logps/rejected": -1.1402482986450195, "loss": 0.3965, "nll_loss": 0.34123849868774414, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10511720180511475, "rewards/margins": 0.12293247878551483, "rewards/rejected": -0.22804968059062958, "step": 2460 }, { "epoch": 1.76, "grad_norm": 10.6875, "learning_rate": 4.477512197319528e-06, "log_odds_chosen": 1.3563247919082642, "log_odds_ratio": -0.32014063000679016, "logits/chosen": -2.8848843574523926, "logits/rejected": -2.879897356033325, "logps/chosen": -0.5252963900566101, "logps/rejected": -1.3075590133666992, "loss": 0.3723, "nll_loss": 0.30930250883102417, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10505928099155426, "rewards/margins": 0.15645255148410797, "rewards/rejected": -0.26151180267333984, "step": 2470 }, { "epoch": 1.77, "grad_norm": 19.25, "learning_rate": 4.475717962746456e-06, "log_odds_chosen": 1.2710864543914795, "log_odds_ratio": -0.3212870657444, "logits/chosen": -2.875734806060791, "logits/rejected": -2.869267702102661, "logps/chosen": -0.5439091920852661, "logps/rejected": -1.2747299671173096, "loss": 0.3726, "nll_loss": 0.30996406078338623, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1087818369269371, "rewards/margins": 0.1461641490459442, "rewards/rejected": -0.2549459636211395, "step": 2480 }, { "epoch": 1.78, "grad_norm": 8.1875, "learning_rate": 4.4739258834102515e-06, "log_odds_chosen": 1.2608749866485596, "log_odds_ratio": -0.346863329410553, "logits/chosen": -2.85408878326416, "logits/rejected": -2.849069595336914, "logps/chosen": -0.5514413118362427, "logps/rejected": -1.3013412952423096, "loss": 0.3812, "nll_loss": 0.33392244577407837, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11028827726840973, "rewards/margins": 0.14997997879981995, "rewards/rejected": -0.26026827096939087, "step": 2490 }, { "epoch": 1.79, "grad_norm": 7.125, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 1.5626336336135864, "log_odds_ratio": -0.27938312292099, "logits/chosen": -2.928284168243408, "logits/rejected": -2.90639328956604, "logps/chosen": -0.5721508860588074, "logps/rejected": -1.5081751346588135, "loss": 0.3923, "nll_loss": 0.3527207374572754, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.11443017423152924, "rewards/margins": 0.18720485270023346, "rewards/rejected": -0.3016350269317627, "step": 2500 }, { "epoch": 1.79, "grad_norm": 6.65625, "learning_rate": 4.470348173215168e-06, "log_odds_chosen": 1.3578099012374878, "log_odds_ratio": -0.3152514100074768, "logits/chosen": -2.8871371746063232, "logits/rejected": -2.874263286590576, "logps/chosen": -0.5261938571929932, "logps/rejected": -1.3208293914794922, "loss": 0.3849, "nll_loss": 0.33318930864334106, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10523878037929535, "rewards/margins": 0.15892712771892548, "rewards/rejected": -0.26416587829589844, "step": 2510 }, { "epoch": 1.8, "grad_norm": 12.5625, "learning_rate": 4.468562533769766e-06, "log_odds_chosen": 1.367193579673767, "log_odds_ratio": -0.3165552616119385, "logits/chosen": -2.8899295330047607, "logits/rejected": -2.890939950942993, "logps/chosen": -0.5429189205169678, "logps/rejected": -1.3376638889312744, "loss": 0.4111, "nll_loss": 0.33587661385536194, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10858378559350967, "rewards/margins": 0.15894900262355804, "rewards/rejected": -0.2675327956676483, "step": 2520 }, { "epoch": 1.81, "grad_norm": 12.8125, "learning_rate": 4.4667790323881e-06, "log_odds_chosen": 1.2805125713348389, "log_odds_ratio": -0.33233442902565, "logits/chosen": -2.871812105178833, "logits/rejected": -2.852710723876953, "logps/chosen": -0.6220626831054688, "logps/rejected": -1.3669788837432861, "loss": 0.4336, "nll_loss": 0.4037063717842102, "rewards/accuracies": 0.9312499761581421, "rewards/chosen": -0.12441255897283554, "rewards/margins": 0.14898324012756348, "rewards/rejected": -0.2733957767486572, "step": 2530 }, { "epoch": 1.81, "grad_norm": 7.0625, "learning_rate": 4.464997664806832e-06, "log_odds_chosen": 1.1691948175430298, "log_odds_ratio": -0.35392698645591736, "logits/chosen": -2.857517719268799, "logits/rejected": -2.862380266189575, "logps/chosen": -0.5593948364257812, "logps/rejected": -1.201662302017212, "loss": 0.3871, "nll_loss": 0.31616219878196716, "rewards/accuracies": 0.893750011920929, "rewards/chosen": -0.11187896877527237, "rewards/margins": 0.12845350801944733, "rewards/rejected": -0.2403324842453003, "step": 2540 }, { "epoch": 1.82, "grad_norm": 6.4375, "learning_rate": 4.463218426774518e-06, "log_odds_chosen": 1.4263107776641846, "log_odds_ratio": -0.3046194911003113, "logits/chosen": -2.891172170639038, "logits/rejected": -2.8799169063568115, "logps/chosen": -0.5329869389533997, "logps/rejected": -1.3692753314971924, "loss": 0.3873, "nll_loss": 0.3430352509021759, "rewards/accuracies": 0.9375, "rewards/chosen": -0.10659738630056381, "rewards/margins": 0.16725768148899078, "rewards/rejected": -0.273855060338974, "step": 2550 }, { "epoch": 1.83, "grad_norm": 6.40625, "learning_rate": 4.461441314051561e-06, "log_odds_chosen": 1.4294536113739014, "log_odds_ratio": -0.2968199849128723, "logits/chosen": -2.9138002395629883, "logits/rejected": -2.8921501636505127, "logps/chosen": -0.4996632933616638, "logps/rejected": -1.3187425136566162, "loss": 0.3745, "nll_loss": 0.3486596941947937, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.09993267059326172, "rewards/margins": 0.16381582617759705, "rewards/rejected": -0.2637484669685364, "step": 2560 }, { "epoch": 1.84, "grad_norm": 7.59375, "learning_rate": 4.459666322410172e-06, "log_odds_chosen": 1.3704941272735596, "log_odds_ratio": -0.3186499774456024, "logits/chosen": -2.883667469024658, "logits/rejected": -2.8775885105133057, "logps/chosen": -0.5285792350769043, "logps/rejected": -1.3211987018585205, "loss": 0.4048, "nll_loss": 0.3316243290901184, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10571584850549698, "rewards/margins": 0.15852385759353638, "rewards/rejected": -0.26423972845077515, "step": 2570 }, { "epoch": 1.84, "grad_norm": 9.1875, "learning_rate": 4.457893447634326e-06, "log_odds_chosen": 1.2892141342163086, "log_odds_ratio": -0.33191436529159546, "logits/chosen": -2.9250831604003906, "logits/rejected": -2.902353286743164, "logps/chosen": -0.580119788646698, "logps/rejected": -1.3289637565612793, "loss": 0.4149, "nll_loss": 0.35767120122909546, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1160239577293396, "rewards/margins": 0.14976876974105835, "rewards/rejected": -0.26579275727272034, "step": 2580 }, { "epoch": 1.85, "grad_norm": 10.5625, "learning_rate": 4.456122685519721e-06, "log_odds_chosen": 1.394887089729309, "log_odds_ratio": -0.3056487441062927, "logits/chosen": -2.8950932025909424, "logits/rejected": -2.873574733734131, "logps/chosen": -0.5190961956977844, "logps/rejected": -1.3081409931182861, "loss": 0.4005, "nll_loss": 0.31329116225242615, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.10381922870874405, "rewards/margins": 0.15780898928642273, "rewards/rejected": -0.2616282105445862, "step": 2590 }, { "epoch": 1.86, "grad_norm": 7.09375, "learning_rate": 4.45435403187374e-06, "log_odds_chosen": 1.507157325744629, "log_odds_ratio": -0.2677215039730072, "logits/chosen": -2.8969600200653076, "logits/rejected": -2.88423752784729, "logps/chosen": -0.4688204824924469, "logps/rejected": -1.299645185470581, "loss": 0.3953, "nll_loss": 0.32218068838119507, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.09376410394906998, "rewards/margins": 0.16616491973400116, "rewards/rejected": -0.25992903113365173, "step": 2600 }, { "epoch": 1.86, "grad_norm": 9.4375, "learning_rate": 4.452587482515399e-06, "log_odds_chosen": 1.3385193347930908, "log_odds_ratio": -0.31111469864845276, "logits/chosen": -2.907179355621338, "logits/rejected": -2.8795833587646484, "logps/chosen": -0.5588836669921875, "logps/rejected": -1.3447484970092773, "loss": 0.398, "nll_loss": 0.3411378264427185, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.11177675426006317, "rewards/margins": 0.15717296302318573, "rewards/rejected": -0.2689497172832489, "step": 2610 }, { "epoch": 1.87, "grad_norm": 9.5625, "learning_rate": 4.450823033275315e-06, "log_odds_chosen": 1.338440179824829, "log_odds_ratio": -0.3163678050041199, "logits/chosen": -2.9387764930725098, "logits/rejected": -2.9056990146636963, "logps/chosen": -0.5297420024871826, "logps/rejected": -1.322826862335205, "loss": 0.422, "nll_loss": 0.3693048357963562, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.10594840347766876, "rewards/margins": 0.15861697494983673, "rewards/rejected": -0.2645653784275055, "step": 2620 }, { "epoch": 1.88, "grad_norm": 7.65625, "learning_rate": 4.4490606799956615e-06, "log_odds_chosen": 1.4915597438812256, "log_odds_ratio": -0.28770941495895386, "logits/chosen": -2.8788347244262695, "logits/rejected": -2.8595736026763916, "logps/chosen": -0.5416139364242554, "logps/rejected": -1.4380385875701904, "loss": 0.4005, "nll_loss": 0.34102195501327515, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10832278430461884, "rewards/margins": 0.1792849451303482, "rewards/rejected": -0.28760772943496704, "step": 2630 }, { "epoch": 1.89, "grad_norm": 9.5625, "learning_rate": 4.447300418530126e-06, "log_odds_chosen": 1.3638993501663208, "log_odds_ratio": -0.32332590222358704, "logits/chosen": -2.916489601135254, "logits/rejected": -2.8951425552368164, "logps/chosen": -0.5407701730728149, "logps/rejected": -1.3687996864318848, "loss": 0.4186, "nll_loss": 0.350581556558609, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10815402120351791, "rewards/margins": 0.16560593247413635, "rewards/rejected": -0.27375996112823486, "step": 2640 }, { "epoch": 1.89, "grad_norm": 8.4375, "learning_rate": 4.4455422447438715e-06, "log_odds_chosen": 1.3353352546691895, "log_odds_ratio": -0.3252139091491699, "logits/chosen": -2.880558729171753, "logits/rejected": -2.8813929557800293, "logps/chosen": -0.5225256681442261, "logps/rejected": -1.2812443971633911, "loss": 0.4082, "nll_loss": 0.32984524965286255, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.10450513660907745, "rewards/margins": 0.15174375474452972, "rewards/rejected": -0.25624892115592957, "step": 2650 }, { "epoch": 1.9, "grad_norm": 9.0625, "learning_rate": 4.443786154513493e-06, "log_odds_chosen": 1.541635513305664, "log_odds_ratio": -0.2674759328365326, "logits/chosen": -2.883507490158081, "logits/rejected": -2.8745486736297607, "logps/chosen": -0.5493952035903931, "logps/rejected": -1.4771674871444702, "loss": 0.3756, "nll_loss": 0.2988761067390442, "rewards/accuracies": 0.956250011920929, "rewards/chosen": -0.1098790392279625, "rewards/margins": 0.18555445969104767, "rewards/rejected": -0.2954334616661072, "step": 2660 }, { "epoch": 1.91, "grad_norm": 12.9375, "learning_rate": 4.442032143726981e-06, "log_odds_chosen": 1.3634321689605713, "log_odds_ratio": -0.30965596437454224, "logits/chosen": -2.8866159915924072, "logits/rejected": -2.8669233322143555, "logps/chosen": -0.602931022644043, "logps/rejected": -1.4159696102142334, "loss": 0.4085, "nll_loss": 0.362027645111084, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.12058620154857635, "rewards/margins": 0.16260775923728943, "rewards/rejected": -0.283193975687027, "step": 2670 }, { "epoch": 1.91, "grad_norm": 13.9375, "learning_rate": 4.440280208283675e-06, "log_odds_chosen": 1.3336797952651978, "log_odds_ratio": -0.32034236192703247, "logits/chosen": -2.9205827713012695, "logits/rejected": -2.8959076404571533, "logps/chosen": -0.5290694236755371, "logps/rejected": -1.265318512916565, "loss": 0.3972, "nll_loss": 0.3247922360897064, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.1058138832449913, "rewards/margins": 0.14724981784820557, "rewards/rejected": -0.25306370854377747, "step": 2680 }, { "epoch": 1.92, "grad_norm": 12.0625, "learning_rate": 4.43853034409423e-06, "log_odds_chosen": 1.4389830827713013, "log_odds_ratio": -0.308421790599823, "logits/chosen": -2.900420904159546, "logits/rejected": -2.879181385040283, "logps/chosen": -0.5513266324996948, "logps/rejected": -1.4156787395477295, "loss": 0.3989, "nll_loss": 0.3391249179840088, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.11026531457901001, "rewards/margins": 0.1728704273700714, "rewards/rejected": -0.2831357419490814, "step": 2690 }, { "epoch": 1.93, "grad_norm": 7.46875, "learning_rate": 4.43678254708057e-06, "log_odds_chosen": 1.2590786218643188, "log_odds_ratio": -0.3339710831642151, "logits/chosen": -2.8714280128479004, "logits/rejected": -2.8531947135925293, "logps/chosen": -0.5529037117958069, "logps/rejected": -1.291151762008667, "loss": 0.4083, "nll_loss": 0.34685009717941284, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.11058074235916138, "rewards/margins": 0.1476496458053589, "rewards/rejected": -0.25823038816452026, "step": 2700 }, { "epoch": 1.94, "grad_norm": 10.8125, "learning_rate": 4.435036813175853e-06, "log_odds_chosen": 1.296800971031189, "log_odds_ratio": -0.3468873202800751, "logits/chosen": -2.9166648387908936, "logits/rejected": -2.905029773712158, "logps/chosen": -0.5468270778656006, "logps/rejected": -1.318878412246704, "loss": 0.4244, "nll_loss": 0.3611888289451599, "rewards/accuracies": 0.875, "rewards/chosen": -0.10936541855335236, "rewards/margins": 0.15441028773784637, "rewards/rejected": -0.26377567648887634, "step": 2710 }, { "epoch": 1.94, "grad_norm": 7.59375, "learning_rate": 4.4332931383244296e-06, "log_odds_chosen": 1.5938103199005127, "log_odds_ratio": -0.2786775231361389, "logits/chosen": -2.84563946723938, "logits/rejected": -2.81329345703125, "logps/chosen": -0.5737396478652954, "logps/rejected": -1.5809630155563354, "loss": 0.4009, "nll_loss": 0.3476629853248596, "rewards/accuracies": 0.9375, "rewards/chosen": -0.11474792659282684, "rewards/margins": 0.20144470036029816, "rewards/rejected": -0.316192626953125, "step": 2720 }, { "epoch": 1.95, "grad_norm": 9.125, "learning_rate": 4.431551518481802e-06, "log_odds_chosen": 1.4222679138183594, "log_odds_ratio": -0.30235162377357483, "logits/chosen": -2.859123468399048, "logits/rejected": -2.8459434509277344, "logps/chosen": -0.5479883551597595, "logps/rejected": -1.3775622844696045, "loss": 0.3895, "nll_loss": 0.31317585706710815, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.10959766805171967, "rewards/margins": 0.16591483354568481, "rewards/rejected": -0.2755124866962433, "step": 2730 }, { "epoch": 1.96, "grad_norm": 8.1875, "learning_rate": 4.429811949614588e-06, "log_odds_chosen": 1.3053306341171265, "log_odds_ratio": -0.3146135210990906, "logits/chosen": -2.9177818298339844, "logits/rejected": -2.8973758220672607, "logps/chosen": -0.5131221413612366, "logps/rejected": -1.2459287643432617, "loss": 0.4246, "nll_loss": 0.37119024991989136, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.10262443125247955, "rewards/margins": 0.14656129479408264, "rewards/rejected": -0.2491857260465622, "step": 2740 }, { "epoch": 1.96, "grad_norm": 6.4375, "learning_rate": 4.428074427700477e-06, "log_odds_chosen": 1.3217928409576416, "log_odds_ratio": -0.30279672145843506, "logits/chosen": -2.877779245376587, "logits/rejected": -2.860002279281616, "logps/chosen": -0.5184148550033569, "logps/rejected": -1.2421989440917969, "loss": 0.3943, "nll_loss": 0.32501065731048584, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.1036829724907875, "rewards/margins": 0.14475682377815247, "rewards/rejected": -0.24843978881835938, "step": 2750 }, { "epoch": 1.97, "grad_norm": 16.125, "learning_rate": 4.426338948728195e-06, "log_odds_chosen": 1.3941218852996826, "log_odds_ratio": -0.3162110447883606, "logits/chosen": -2.8602135181427, "logits/rejected": -2.820308208465576, "logps/chosen": -0.5790046453475952, "logps/rejected": -1.4153722524642944, "loss": 0.4456, "nll_loss": 0.3927566409111023, "rewards/accuracies": 0.9437500238418579, "rewards/chosen": -0.11580093950033188, "rewards/margins": 0.16727350652217865, "rewards/rejected": -0.28307443857192993, "step": 2760 }, { "epoch": 1.98, "grad_norm": 7.15625, "learning_rate": 4.424605508697463e-06, "log_odds_chosen": 1.4302750825881958, "log_odds_ratio": -0.3106474280357361, "logits/chosen": -2.8623061180114746, "logits/rejected": -2.825331449508667, "logps/chosen": -0.5542712211608887, "logps/rejected": -1.4113256931304932, "loss": 0.3867, "nll_loss": 0.34051352739334106, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.11085423082113266, "rewards/margins": 0.17141090333461761, "rewards/rejected": -0.2822651267051697, "step": 2770 }, { "epoch": 1.99, "grad_norm": 11.4375, "learning_rate": 4.42287410361896e-06, "log_odds_chosen": 1.4414396286010742, "log_odds_ratio": -0.32269230484962463, "logits/chosen": -2.8639965057373047, "logits/rejected": -2.8246994018554688, "logps/chosen": -0.5694876313209534, "logps/rejected": -1.4324452877044678, "loss": 0.3603, "nll_loss": 0.307388037443161, "rewards/accuracies": 0.8812500238418579, "rewards/chosen": -0.11389752477407455, "rewards/margins": 0.17259155213832855, "rewards/rejected": -0.2864890694618225, "step": 2780 }, { "epoch": 1.99, "grad_norm": 12.0625, "learning_rate": 4.421144729514289e-06, "log_odds_chosen": 1.4677602052688599, "log_odds_ratio": -0.3058595061302185, "logits/chosen": -2.8635823726654053, "logits/rejected": -2.80998158454895, "logps/chosen": -0.5519368052482605, "logps/rejected": -1.4589126110076904, "loss": 0.4043, "nll_loss": 0.3577142357826233, "rewards/accuracies": 0.918749988079071, "rewards/chosen": -0.11038736253976822, "rewards/margins": 0.18139515817165375, "rewards/rejected": -0.2917824983596802, "step": 2790 }, { "epoch": 2.0, "grad_norm": 12.75, "learning_rate": 4.419417382415923e-06, "log_odds_chosen": 1.4039043188095093, "log_odds_ratio": -0.313946932554245, "logits/chosen": -2.8399224281311035, "logits/rejected": -2.791377544403076, "logps/chosen": -0.5001746416091919, "logps/rejected": -1.3304522037506104, "loss": 0.3924, "nll_loss": 0.3202509880065918, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.1000349372625351, "rewards/margins": 0.16605551540851593, "rewards/rejected": -0.26609042286872864, "step": 2800 }, { "epoch": 2.01, "grad_norm": 5.3125, "learning_rate": 4.417692058367186e-06, "log_odds_chosen": 2.910543441772461, "log_odds_ratio": -0.10447581857442856, "logits/chosen": -2.8468985557556152, "logits/rejected": -2.7128536701202393, "logps/chosen": -0.390835702419281, "logps/rejected": -2.187709331512451, "loss": 0.2834, "nll_loss": 0.2709491550922394, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0781671404838562, "rewards/margins": 0.35937467217445374, "rewards/rejected": -0.43754181265830994, "step": 2810 }, { "epoch": 2.01, "grad_norm": 9.5625, "learning_rate": 4.415968753422204e-06, "log_odds_chosen": 3.4466099739074707, "log_odds_ratio": -0.07032948732376099, "logits/chosen": -2.8091824054718018, "logits/rejected": -2.6169466972351074, "logps/chosen": -0.31530073285102844, "logps/rejected": -2.4634060859680176, "loss": 0.2586, "nll_loss": 0.2396393120288849, "rewards/accuracies": 1.0, "rewards/chosen": -0.06306014955043793, "rewards/margins": 0.4296211302280426, "rewards/rejected": -0.4926813244819641, "step": 2820 }, { "epoch": 2.02, "grad_norm": 4.65625, "learning_rate": 4.414247463645868e-06, "log_odds_chosen": 3.7165610790252686, "log_odds_ratio": -0.06657154858112335, "logits/chosen": -2.7692294120788574, "logits/rejected": -2.4765193462371826, "logps/chosen": -0.37618759274482727, "logps/rejected": -2.8358898162841797, "loss": 0.2718, "nll_loss": 0.2537682354450226, "rewards/accuracies": 1.0, "rewards/chosen": -0.07523752003908157, "rewards/margins": 0.491940438747406, "rewards/rejected": -0.5671780109405518, "step": 2830 }, { "epoch": 2.03, "grad_norm": 6.1875, "learning_rate": 4.4125281851137995e-06, "log_odds_chosen": 5.014394283294678, "log_odds_ratio": -0.07537925243377686, "logits/chosen": -2.721604108810425, "logits/rejected": -2.253276824951172, "logps/chosen": -0.3364471197128296, "logps/rejected": -4.104693412780762, "loss": 0.2853, "nll_loss": 0.24688100814819336, "rewards/accuracies": 1.0, "rewards/chosen": -0.06728943437337875, "rewards/margins": 0.7536492943763733, "rewards/rejected": -0.820938766002655, "step": 2840 }, { "epoch": 2.03, "grad_norm": 7.8125, "learning_rate": 4.41081091391231e-06, "log_odds_chosen": 3.7566096782684326, "log_odds_ratio": -0.06575653702020645, "logits/chosen": -2.7676377296447754, "logits/rejected": -2.4655532836914062, "logps/chosen": -0.3585343360900879, "logps/rejected": -2.8366215229034424, "loss": 0.2814, "nll_loss": 0.27807971835136414, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0717068687081337, "rewards/margins": 0.49561747908592224, "rewards/rejected": -0.5673243403434753, "step": 2850 }, { "epoch": 2.04, "grad_norm": 13.875, "learning_rate": 4.409095646138363e-06, "log_odds_chosen": 3.4027340412139893, "log_odds_ratio": -0.08646047860383987, "logits/chosen": -2.78098726272583, "logits/rejected": -2.575989246368408, "logps/chosen": -0.3481005132198334, "logps/rejected": -2.515467405319214, "loss": 0.2687, "nll_loss": 0.2460239678621292, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06962011754512787, "rewards/margins": 0.4334734380245209, "rewards/rejected": -0.5030934810638428, "step": 2860 }, { "epoch": 2.05, "grad_norm": 5.5625, "learning_rate": 4.4073823778995425e-06, "log_odds_chosen": 3.673715114593506, "log_odds_ratio": -0.06583552062511444, "logits/chosen": -2.7850005626678467, "logits/rejected": -2.458684206008911, "logps/chosen": -0.3202974200248718, "logps/rejected": -2.65244197845459, "loss": 0.2405, "nll_loss": 0.2267676144838333, "rewards/accuracies": 1.0, "rewards/chosen": -0.06405948847532272, "rewards/margins": 0.4664289355278015, "rewards/rejected": -0.5304883718490601, "step": 2870 }, { "epoch": 2.06, "grad_norm": 12.8125, "learning_rate": 4.405671105314009e-06, "log_odds_chosen": 4.673083782196045, "log_odds_ratio": -0.05653030425310135, "logits/chosen": -2.6835262775421143, "logits/rejected": -2.0964083671569824, "logps/chosen": -0.336531400680542, "logps/rejected": -3.6344618797302246, "loss": 0.2548, "nll_loss": 0.23812773823738098, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0673062726855278, "rewards/margins": 0.6595860719680786, "rewards/rejected": -0.726892352104187, "step": 2880 }, { "epoch": 2.06, "grad_norm": 5.71875, "learning_rate": 4.4039618245104645e-06, "log_odds_chosen": 5.218452453613281, "log_odds_ratio": -0.06279017776250839, "logits/chosen": -2.765897274017334, "logits/rejected": -2.0521953105926514, "logps/chosen": -0.3615598678588867, "logps/rejected": -4.352395534515381, "loss": 0.2837, "nll_loss": 0.2878434956073761, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07231196761131287, "rewards/margins": 0.7981672286987305, "rewards/rejected": -0.8704792261123657, "step": 2890 }, { "epoch": 2.07, "grad_norm": 9.125, "learning_rate": 4.4022545316281195e-06, "log_odds_chosen": 5.200108051300049, "log_odds_ratio": -0.05653291940689087, "logits/chosen": -2.7568302154541016, "logits/rejected": -2.0886034965515137, "logps/chosen": -0.34464651346206665, "logps/rejected": -4.230252742767334, "loss": 0.2593, "nll_loss": 0.26374679803848267, "rewards/accuracies": 1.0, "rewards/chosen": -0.06892929971218109, "rewards/margins": 0.7771213054656982, "rewards/rejected": -0.8460506200790405, "step": 2900 }, { "epoch": 2.08, "grad_norm": 9.375, "learning_rate": 4.40054922281665e-06, "log_odds_chosen": 6.3368120193481445, "log_odds_ratio": -0.04714951664209366, "logits/chosen": -2.7386841773986816, "logits/rejected": -2.0858142375946045, "logps/chosen": -0.342684805393219, "logps/rejected": -5.299139499664307, "loss": 0.2966, "nll_loss": 0.28953665494918823, "rewards/accuracies": 1.0, "rewards/chosen": -0.06853695958852768, "rewards/margins": 0.9912910461425781, "rewards/rejected": -1.0598279237747192, "step": 2910 }, { "epoch": 2.08, "grad_norm": 10.125, "learning_rate": 4.398845894236168e-06, "log_odds_chosen": 6.120891094207764, "log_odds_ratio": -0.045149557292461395, "logits/chosen": -2.792410135269165, "logits/rejected": -2.072402000427246, "logps/chosen": -0.3363955020904541, "logps/rejected": -5.071963787078857, "loss": 0.2825, "nll_loss": 0.25938451290130615, "rewards/accuracies": 1.0, "rewards/chosen": -0.06727909296751022, "rewards/margins": 0.9471136927604675, "rewards/rejected": -1.014392614364624, "step": 2920 }, { "epoch": 2.09, "grad_norm": 8.875, "learning_rate": 4.397144542057179e-06, "log_odds_chosen": 4.736415863037109, "log_odds_ratio": -0.06085144728422165, "logits/chosen": -2.801853656768799, "logits/rejected": -2.182584047317505, "logps/chosen": -0.36493486166000366, "logps/rejected": -3.7831199169158936, "loss": 0.2756, "nll_loss": 0.2436126172542572, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07298697531223297, "rewards/margins": 0.6836371421813965, "rewards/rejected": -0.7566241025924683, "step": 2930 }, { "epoch": 2.1, "grad_norm": 7.75, "learning_rate": 4.395445162460548e-06, "log_odds_chosen": 6.897581577301025, "log_odds_ratio": -0.03966455161571503, "logits/chosen": -2.73842191696167, "logits/rejected": -1.9335638284683228, "logps/chosen": -0.3176586329936981, "logps/rejected": -5.78334903717041, "loss": 0.251, "nll_loss": 0.23550419509410858, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06353173404932022, "rewards/margins": 1.0931380987167358, "rewards/rejected": -1.1566698551177979, "step": 2940 }, { "epoch": 2.11, "grad_norm": 5.59375, "learning_rate": 4.393747751637469e-06, "log_odds_chosen": 10.120447158813477, "log_odds_ratio": -0.03257293254137039, "logits/chosen": -2.7335376739501953, "logits/rejected": -1.6728357076644897, "logps/chosen": -0.32205477356910706, "logps/rejected": -8.999835968017578, "loss": 0.2672, "nll_loss": 0.2610648274421692, "rewards/accuracies": 1.0, "rewards/chosen": -0.06441095471382141, "rewards/margins": 1.7355563640594482, "rewards/rejected": -1.7999674081802368, "step": 2950 }, { "epoch": 2.11, "grad_norm": 14.3125, "learning_rate": 4.392052305789416e-06, "log_odds_chosen": 9.041637420654297, "log_odds_ratio": -0.039078570902347565, "logits/chosen": -2.7224085330963135, "logits/rejected": -1.6541340351104736, "logps/chosen": -0.3390257954597473, "logps/rejected": -7.973293304443359, "loss": 0.2984, "nll_loss": 0.2727344036102295, "rewards/accuracies": 1.0, "rewards/chosen": -0.06780517101287842, "rewards/margins": 1.5268534421920776, "rewards/rejected": -1.594658613204956, "step": 2960 }, { "epoch": 2.12, "grad_norm": 10.8125, "learning_rate": 4.390358821128123e-06, "log_odds_chosen": 9.127324104309082, "log_odds_ratio": -0.04081900417804718, "logits/chosen": -2.6818902492523193, "logits/rejected": -1.3260700702667236, "logps/chosen": -0.3540460765361786, "logps/rejected": -8.071810722351074, "loss": 0.2843, "nll_loss": 0.28363126516342163, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07080921530723572, "rewards/margins": 1.5435529947280884, "rewards/rejected": -1.6143620014190674, "step": 2970 }, { "epoch": 2.13, "grad_norm": 18.0, "learning_rate": 4.388667293875536e-06, "log_odds_chosen": 5.46827507019043, "log_odds_ratio": -0.13300354778766632, "logits/chosen": -2.7808876037597656, "logits/rejected": -2.0889151096343994, "logps/chosen": -0.4834941327571869, "logps/rejected": -4.695062160491943, "loss": 0.275, "nll_loss": 0.26671329140663147, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0966988280415535, "rewards/margins": 0.8423136472702026, "rewards/rejected": -0.9390126466751099, "step": 2980 }, { "epoch": 2.13, "grad_norm": 169.0, "learning_rate": 4.386977720263786e-06, "log_odds_chosen": 6.041357040405273, "log_odds_ratio": -0.05618007108569145, "logits/chosen": -2.7575278282165527, "logits/rejected": -1.7493776082992554, "logps/chosen": -0.3787495195865631, "logps/rejected": -5.060299873352051, "loss": 0.2629, "nll_loss": 0.26606082916259766, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.07574990391731262, "rewards/margins": 0.9363101124763489, "rewards/rejected": -1.0120599269866943, "step": 2990 }, { "epoch": 2.14, "grad_norm": 5.9375, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 4.69273567199707, "log_odds_ratio": -0.05490034073591232, "logits/chosen": -2.8089425563812256, "logits/rejected": -2.2398369312286377, "logps/chosen": -0.30732420086860657, "logps/rejected": -3.626305103302002, "loss": 0.2486, "nll_loss": 0.2282293289899826, "rewards/accuracies": 1.0, "rewards/chosen": -0.061464838683605194, "rewards/margins": 0.663796067237854, "rewards/rejected": -0.7252610325813293, "step": 3000 }, { "epoch": 2.15, "grad_norm": 12.6875, "learning_rate": 4.383604418942005e-06, "log_odds_chosen": 6.116213321685791, "log_odds_ratio": -0.03878642991185188, "logits/chosen": -2.6938247680664062, "logits/rejected": -1.8152358531951904, "logps/chosen": -0.35414764285087585, "logps/rejected": -5.104779243469238, "loss": 0.2722, "nll_loss": 0.252914160490036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07082952558994293, "rewards/margins": 0.9501264691352844, "rewards/rejected": -1.0209559202194214, "step": 3010 }, { "epoch": 2.16, "grad_norm": 6.125, "learning_rate": 4.381920683746824e-06, "log_odds_chosen": 5.905968189239502, "log_odds_ratio": -0.0426829531788826, "logits/chosen": -2.756247043609619, "logits/rejected": -2.1121325492858887, "logps/chosen": -0.3639851212501526, "logps/rejected": -4.953127384185791, "loss": 0.2756, "nll_loss": 0.27040037512779236, "rewards/accuracies": 1.0, "rewards/chosen": -0.0727970153093338, "rewards/margins": 0.9178284406661987, "rewards/rejected": -0.9906253814697266, "step": 3020 }, { "epoch": 2.16, "grad_norm": 5.8125, "learning_rate": 4.380238887222108e-06, "log_odds_chosen": 6.093326568603516, "log_odds_ratio": -0.04192028567194939, "logits/chosen": -2.7782235145568848, "logits/rejected": -1.9548759460449219, "logps/chosen": -0.31508156657218933, "logps/rejected": -4.972496032714844, "loss": 0.2734, "nll_loss": 0.2683493494987488, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.06301631033420563, "rewards/margins": 0.9314829707145691, "rewards/rejected": -0.9944992065429688, "step": 3030 }, { "epoch": 2.17, "grad_norm": 6.03125, "learning_rate": 4.378559025650368e-06, "log_odds_chosen": 7.272418022155762, "log_odds_ratio": -0.056223928928375244, "logits/chosen": -2.7609264850616455, "logits/rejected": -1.9744220972061157, "logps/chosen": -0.3785443902015686, "logps/rejected": -6.300936698913574, "loss": 0.2611, "nll_loss": 0.26268187165260315, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.07570887356996536, "rewards/margins": 1.184478521347046, "rewards/rejected": -1.2601875066757202, "step": 3040 }, { "epoch": 2.18, "grad_norm": 5.21875, "learning_rate": 4.376881095324086e-06, "log_odds_chosen": 8.435240745544434, "log_odds_ratio": -0.032564710825681686, "logits/chosen": -2.803131580352783, "logits/rejected": -1.676296591758728, "logps/chosen": -0.3182581067085266, "logps/rejected": -7.271533012390137, "loss": 0.2522, "nll_loss": 0.23994970321655273, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06365162134170532, "rewards/margins": 1.3906551599502563, "rewards/rejected": -1.454306721687317, "step": 3050 }, { "epoch": 2.18, "grad_norm": 7.5625, "learning_rate": 4.375205092545683e-06, "log_odds_chosen": 8.230413436889648, "log_odds_ratio": -0.05366669222712517, "logits/chosen": -2.786719560623169, "logits/rejected": -1.8391005992889404, "logps/chosen": -0.3586600422859192, "logps/rejected": -7.209918022155762, "loss": 0.2779, "nll_loss": 0.2507212460041046, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07173201441764832, "rewards/margins": 1.3702516555786133, "rewards/rejected": -1.4419835805892944, "step": 3060 }, { "epoch": 2.19, "grad_norm": 5.125, "learning_rate": 4.373531013627483e-06, "log_odds_chosen": 8.445428848266602, "log_odds_ratio": -0.03792678564786911, "logits/chosen": -2.7394258975982666, "logits/rejected": -1.4675519466400146, "logps/chosen": -0.3647417426109314, "logps/rejected": -7.469712734222412, "loss": 0.2809, "nll_loss": 0.26669415831565857, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07294835150241852, "rewards/margins": 1.4209941625595093, "rewards/rejected": -1.4939426183700562, "step": 3070 }, { "epoch": 2.2, "grad_norm": 170.0, "learning_rate": 4.371858854891681e-06, "log_odds_chosen": 6.993101596832275, "log_odds_ratio": -0.06284736096858978, "logits/chosen": -2.7770023345947266, "logits/rejected": -1.9066228866577148, "logps/chosen": -0.36158767342567444, "logps/rejected": -6.016913414001465, "loss": 0.2731, "nll_loss": 0.25095489621162415, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07231752574443817, "rewards/margins": 1.1310651302337646, "rewards/rejected": -1.2033826112747192, "step": 3080 }, { "epoch": 2.21, "grad_norm": 20.375, "learning_rate": 4.370188612670307e-06, "log_odds_chosen": 6.399539947509766, "log_odds_ratio": -0.05658464506268501, "logits/chosen": -2.7655324935913086, "logits/rejected": -1.907060980796814, "logps/chosen": -0.3928866684436798, "logps/rejected": -5.48469877243042, "loss": 0.2814, "nll_loss": 0.2612989544868469, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07857732474803925, "rewards/margins": 1.0183625221252441, "rewards/rejected": -1.0969398021697998, "step": 3090 }, { "epoch": 2.21, "grad_norm": 10.8125, "learning_rate": 4.36852028330519e-06, "log_odds_chosen": 6.503146171569824, "log_odds_ratio": -0.04645160958170891, "logits/chosen": -2.803072929382324, "logits/rejected": -1.9909429550170898, "logps/chosen": -0.38660162687301636, "logps/rejected": -5.571995735168457, "loss": 0.2914, "nll_loss": 0.2930335998535156, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07732032984495163, "rewards/margins": 1.0370789766311646, "rewards/rejected": -1.1143993139266968, "step": 3100 }, { "epoch": 2.22, "grad_norm": 7.46875, "learning_rate": 4.3668538631479314e-06, "log_odds_chosen": 6.382928848266602, "log_odds_ratio": -0.04722817987203598, "logits/chosen": -2.793405055999756, "logits/rejected": -1.897592544555664, "logps/chosen": -0.36716800928115845, "logps/rejected": -5.360346794128418, "loss": 0.2719, "nll_loss": 0.2766880989074707, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07343360781669617, "rewards/margins": 0.9986356496810913, "rewards/rejected": -1.0720694065093994, "step": 3110 }, { "epoch": 2.23, "grad_norm": 7.53125, "learning_rate": 4.365189348559864e-06, "log_odds_chosen": 7.294475555419922, "log_odds_ratio": -0.04280845448374748, "logits/chosen": -2.794233560562134, "logits/rejected": -1.9600608348846436, "logps/chosen": -0.3509625494480133, "logps/rejected": -6.329296112060547, "loss": 0.26, "nll_loss": 0.24876642227172852, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07019250839948654, "rewards/margins": 1.1956669092178345, "rewards/rejected": -1.2658593654632568, "step": 3120 }, { "epoch": 2.23, "grad_norm": 105.0, "learning_rate": 4.363526735912025e-06, "log_odds_chosen": 8.40985107421875, "log_odds_ratio": -0.03858170285820961, "logits/chosen": -2.7838852405548096, "logits/rejected": -1.66619873046875, "logps/chosen": -0.39186400175094604, "logps/rejected": -7.510270118713379, "loss": 0.2711, "nll_loss": 0.27240419387817383, "rewards/accuracies": 1.0, "rewards/chosen": -0.07837279886007309, "rewards/margins": 1.423681378364563, "rewards/rejected": -1.5020540952682495, "step": 3130 }, { "epoch": 2.24, "grad_norm": 48.25, "learning_rate": 4.361866021585114e-06, "log_odds_chosen": 8.032949447631836, "log_odds_ratio": -0.03759379684925079, "logits/chosen": -2.8125336170196533, "logits/rejected": -1.9667034149169922, "logps/chosen": -0.3394584059715271, "logps/rejected": -6.949484348297119, "loss": 0.277, "nll_loss": 0.2678987979888916, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0678916722536087, "rewards/margins": 1.322005271911621, "rewards/rejected": -1.3898969888687134, "step": 3140 }, { "epoch": 2.25, "grad_norm": 9.625, "learning_rate": 4.360207201969474e-06, "log_odds_chosen": 6.880602836608887, "log_odds_ratio": -0.040961816906929016, "logits/chosen": -2.7715115547180176, "logits/rejected": -2.049297571182251, "logps/chosen": -0.34252291917800903, "logps/rejected": -5.822251319885254, "loss": 0.2519, "nll_loss": 0.23448018729686737, "rewards/accuracies": 1.0, "rewards/chosen": -0.06850457936525345, "rewards/margins": 1.0959457159042358, "rewards/rejected": -1.1644504070281982, "step": 3150 }, { "epoch": 2.26, "grad_norm": 120.0, "learning_rate": 4.358550273465042e-06, "log_odds_chosen": 7.489804267883301, "log_odds_ratio": -0.030518781393766403, "logits/chosen": -2.8056952953338623, "logits/rejected": -1.9100799560546875, "logps/chosen": -0.3443801999092102, "logps/rejected": -6.379244804382324, "loss": 0.267, "nll_loss": 0.25786659121513367, "rewards/accuracies": 1.0, "rewards/chosen": -0.06887603551149368, "rewards/margins": 1.2069729566574097, "rewards/rejected": -1.2758489847183228, "step": 3160 }, { "epoch": 2.26, "grad_norm": 6.875, "learning_rate": 4.356895232481328e-06, "log_odds_chosen": 8.904826164245605, "log_odds_ratio": -0.02953409031033516, "logits/chosen": -2.8109521865844727, "logits/rejected": -1.7148170471191406, "logps/chosen": -0.32581400871276855, "logps/rejected": -7.794666290283203, "loss": 0.2778, "nll_loss": 0.2546003758907318, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.065162792801857, "rewards/margins": 1.4937704801559448, "rewards/rejected": -1.5589332580566406, "step": 3170 }, { "epoch": 2.27, "grad_norm": 64.5, "learning_rate": 4.3552420754373795e-06, "log_odds_chosen": 9.826112747192383, "log_odds_ratio": -0.03542652726173401, "logits/chosen": -2.755399465560913, "logits/rejected": -1.6918747425079346, "logps/chosen": -0.37103694677352905, "logps/rejected": -8.79539680480957, "loss": 0.2858, "nll_loss": 0.27970394492149353, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07420738786458969, "rewards/margins": 1.6848719120025635, "rewards/rejected": -1.7590793371200562, "step": 3180 }, { "epoch": 2.28, "grad_norm": 71.0, "learning_rate": 4.353590798761745e-06, "log_odds_chosen": 7.645397186279297, "log_odds_ratio": -0.041773177683353424, "logits/chosen": -2.7663168907165527, "logits/rejected": -1.89877188205719, "logps/chosen": -0.3466086685657501, "logps/rejected": -6.663360595703125, "loss": 0.2852, "nll_loss": 0.2754463255405426, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06932173669338226, "rewards/margins": 1.263350248336792, "rewards/rejected": -1.3326722383499146, "step": 3190 }, { "epoch": 2.28, "grad_norm": 8.0, "learning_rate": 4.351941398892446e-06, "log_odds_chosen": 10.17717170715332, "log_odds_ratio": -0.02503364160656929, "logits/chosen": -2.7787466049194336, "logits/rejected": -1.7833493947982788, "logps/chosen": -0.38776397705078125, "logps/rejected": -9.250219345092773, "loss": 0.291, "nll_loss": 0.3181122839450836, "rewards/accuracies": 1.0, "rewards/chosen": -0.07755279541015625, "rewards/margins": 1.7724910974502563, "rewards/rejected": -1.8500440120697021, "step": 3200 }, { "epoch": 2.29, "grad_norm": 4.6875, "learning_rate": 4.350293872276944e-06, "log_odds_chosen": 8.712306022644043, "log_odds_ratio": -0.028896396979689598, "logits/chosen": -2.7661192417144775, "logits/rejected": -1.6439266204833984, "logps/chosen": -0.3586076498031616, "logps/rejected": -7.733296871185303, "loss": 0.2939, "nll_loss": 0.28883105516433716, "rewards/accuracies": 1.0, "rewards/chosen": -0.07172153890132904, "rewards/margins": 1.474937915802002, "rewards/rejected": -1.5466594696044922, "step": 3210 }, { "epoch": 2.3, "grad_norm": 14.3125, "learning_rate": 4.348648215372106e-06, "log_odds_chosen": 9.273715019226074, "log_odds_ratio": -0.025510499253869057, "logits/chosen": -2.7492473125457764, "logits/rejected": -1.7804412841796875, "logps/chosen": -0.3289060592651367, "logps/rejected": -8.170428276062012, "loss": 0.287, "nll_loss": 0.2702774405479431, "rewards/accuracies": 1.0, "rewards/chosen": -0.06578120589256287, "rewards/margins": 1.568304419517517, "rewards/rejected": -1.6340856552124023, "step": 3220 }, { "epoch": 2.31, "grad_norm": 5.53125, "learning_rate": 4.347004424644176e-06, "log_odds_chosen": 9.255958557128906, "log_odds_ratio": -0.04089302942156792, "logits/chosen": -2.817354202270508, "logits/rejected": -1.8950843811035156, "logps/chosen": -0.32880938053131104, "logps/rejected": -8.185022354125977, "loss": 0.2918, "nll_loss": 0.2752782106399536, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06576187163591385, "rewards/margins": 1.5712426900863647, "rewards/rejected": -1.6370046138763428, "step": 3230 }, { "epoch": 2.31, "grad_norm": 6.40625, "learning_rate": 4.34536249656874e-06, "log_odds_chosen": 8.917261123657227, "log_odds_ratio": -0.04923417791724205, "logits/chosen": -2.776374101638794, "logits/rejected": -1.8802967071533203, "logps/chosen": -0.42338424921035767, "logps/rejected": -7.919134616851807, "loss": 0.2844, "nll_loss": 0.29589223861694336, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0846768468618393, "rewards/margins": 1.499150037765503, "rewards/rejected": -1.583827018737793, "step": 3240 }, { "epoch": 2.32, "grad_norm": 6.5, "learning_rate": 4.3437224276306945e-06, "log_odds_chosen": 7.567285060882568, "log_odds_ratio": -0.028490770608186722, "logits/chosen": -2.786663770675659, "logits/rejected": -1.9782333374023438, "logps/chosen": -0.33940792083740234, "logps/rejected": -6.447556495666504, "loss": 0.3091, "nll_loss": 0.3036688566207886, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06788158416748047, "rewards/margins": 1.2216296195983887, "rewards/rejected": -1.2895113229751587, "step": 3250 }, { "epoch": 2.33, "grad_norm": 17.875, "learning_rate": 4.342084214324218e-06, "log_odds_chosen": 7.1914873123168945, "log_odds_ratio": -0.0445329025387764, "logits/chosen": -2.756364345550537, "logits/rejected": -2.037761926651001, "logps/chosen": -0.3597845435142517, "logps/rejected": -6.1982855796813965, "loss": 0.3067, "nll_loss": 0.2898246645927429, "rewards/accuracies": 1.0, "rewards/chosen": -0.07195691019296646, "rewards/margins": 1.1677000522613525, "rewards/rejected": -1.2396571636199951, "step": 3260 }, { "epoch": 2.33, "grad_norm": 13.0625, "learning_rate": 4.340447853152738e-06, "log_odds_chosen": 8.178131103515625, "log_odds_ratio": -0.05221225693821907, "logits/chosen": -2.783968925476074, "logits/rejected": -1.8930259943008423, "logps/chosen": -0.36348551511764526, "logps/rejected": -7.173810005187988, "loss": 0.2735, "nll_loss": 0.258183091878891, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07269710302352905, "rewards/margins": 1.3620648384094238, "rewards/rejected": -1.434761881828308, "step": 3270 }, { "epoch": 2.34, "grad_norm": 5.71875, "learning_rate": 4.338813340628896e-06, "log_odds_chosen": 9.553061485290527, "log_odds_ratio": -0.037361737340688705, "logits/chosen": -2.797703266143799, "logits/rejected": -1.5537981986999512, "logps/chosen": -0.33459705114364624, "logps/rejected": -8.510927200317383, "loss": 0.2714, "nll_loss": 0.27574431896209717, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06691941618919373, "rewards/margins": 1.6352659463882446, "rewards/rejected": -1.7021853923797607, "step": 3280 }, { "epoch": 2.35, "grad_norm": 12.9375, "learning_rate": 4.337180673274523e-06, "log_odds_chosen": 8.518575668334961, "log_odds_ratio": -0.04755989462137222, "logits/chosen": -2.8275094032287598, "logits/rejected": -1.775490403175354, "logps/chosen": -0.33448418974876404, "logps/rejected": -7.48303747177124, "loss": 0.2834, "nll_loss": 0.27971869707107544, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06689684092998505, "rewards/margins": 1.4297107458114624, "rewards/rejected": -1.4966075420379639, "step": 3290 }, { "epoch": 2.36, "grad_norm": 18.625, "learning_rate": 4.3355498476206e-06, "log_odds_chosen": 9.549077033996582, "log_odds_ratio": -0.0418349914252758, "logits/chosen": -2.8091912269592285, "logits/rejected": -1.7388540506362915, "logps/chosen": -0.3228936493396759, "logps/rejected": -8.422323226928711, "loss": 0.2922, "nll_loss": 0.28200000524520874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06457872688770294, "rewards/margins": 1.6198861598968506, "rewards/rejected": -1.6844650506973267, "step": 3300 }, { "epoch": 2.36, "grad_norm": 5.375, "learning_rate": 4.333920860207238e-06, "log_odds_chosen": 9.765109062194824, "log_odds_ratio": -0.03472736105322838, "logits/chosen": -2.820134162902832, "logits/rejected": -1.793215036392212, "logps/chosen": -0.4387420117855072, "logps/rejected": -8.810163497924805, "loss": 0.2826, "nll_loss": 0.2727591395378113, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08774841576814651, "rewards/margins": 1.6742843389511108, "rewards/rejected": -1.7620328664779663, "step": 3310 }, { "epoch": 2.37, "grad_norm": 9.0625, "learning_rate": 4.332293707583636e-06, "log_odds_chosen": 9.640665054321289, "log_odds_ratio": -0.03734064847230911, "logits/chosen": -2.774531841278076, "logits/rejected": -1.7828537225723267, "logps/chosen": -0.41496315598487854, "logps/rejected": -8.623726844787598, "loss": 0.3401, "nll_loss": 0.3533519506454468, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08299262821674347, "rewards/margins": 1.6417526006698608, "rewards/rejected": -1.7247453927993774, "step": 3320 }, { "epoch": 2.38, "grad_norm": 48.75, "learning_rate": 4.330668386308059e-06, "log_odds_chosen": 8.390340805053711, "log_odds_ratio": -0.03434724360704422, "logits/chosen": -2.824404716491699, "logits/rejected": -1.6728851795196533, "logps/chosen": -0.36389368772506714, "logps/rejected": -7.420858860015869, "loss": 0.2978, "nll_loss": 0.28047677874565125, "rewards/accuracies": 1.0, "rewards/chosen": -0.07277874648571014, "rewards/margins": 1.4113929271697998, "rewards/rejected": -1.4841716289520264, "step": 3330 }, { "epoch": 2.38, "grad_norm": 27.25, "learning_rate": 4.329044892947799e-06, "log_odds_chosen": 7.926604270935059, "log_odds_ratio": -0.04124735668301582, "logits/chosen": -2.8356728553771973, "logits/rejected": -1.8978474140167236, "logps/chosen": -0.3497302830219269, "logps/rejected": -6.923464775085449, "loss": 0.258, "nll_loss": 0.24766549468040466, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06994606554508209, "rewards/margins": 1.3147468566894531, "rewards/rejected": -1.384692907333374, "step": 3340 }, { "epoch": 2.39, "grad_norm": 56.0, "learning_rate": 4.327423224079155e-06, "log_odds_chosen": 6.804099082946777, "log_odds_ratio": -0.05955817550420761, "logits/chosen": -2.8271944522857666, "logits/rejected": -2.0780460834503174, "logps/chosen": -0.3145996928215027, "logps/rejected": -5.751158237457275, "loss": 0.2661, "nll_loss": 0.25528645515441895, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06291993707418442, "rewards/margins": 1.087311863899231, "rewards/rejected": -1.1502315998077393, "step": 3350 }, { "epoch": 2.4, "grad_norm": 21.5, "learning_rate": 4.325803376287392e-06, "log_odds_chosen": 8.253396987915039, "log_odds_ratio": -0.0472552627325058, "logits/chosen": -2.797362804412842, "logits/rejected": -1.7111046314239502, "logps/chosen": -0.35209912061691284, "logps/rejected": -7.188366889953613, "loss": 0.2921, "nll_loss": 0.3038533329963684, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07041982561349869, "rewards/margins": 1.3672535419464111, "rewards/rejected": -1.4376734495162964, "step": 3360 }, { "epoch": 2.41, "grad_norm": 8.6875, "learning_rate": 4.32418534616672e-06, "log_odds_chosen": 10.038507461547852, "log_odds_ratio": -0.025366192683577538, "logits/chosen": -2.804220676422119, "logits/rejected": -1.6713545322418213, "logps/chosen": -0.35934942960739136, "logps/rejected": -9.00309944152832, "loss": 0.2658, "nll_loss": 0.26834237575531006, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0718698799610138, "rewards/margins": 1.728750228881836, "rewards/rejected": -1.8006200790405273, "step": 3370 }, { "epoch": 2.41, "grad_norm": 6.0, "learning_rate": 4.322569130320256e-06, "log_odds_chosen": 9.466753005981445, "log_odds_ratio": -0.03348467871546745, "logits/chosen": -2.8029136657714844, "logits/rejected": -1.865025520324707, "logps/chosen": -0.38565486669540405, "logps/rejected": -8.445741653442383, "loss": 0.2768, "nll_loss": 0.2976570129394531, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07713097333908081, "rewards/margins": 1.6120173931121826, "rewards/rejected": -1.6891483068466187, "step": 3380 }, { "epoch": 2.42, "grad_norm": 13.5, "learning_rate": 4.320954725359999e-06, "log_odds_chosen": 9.79299259185791, "log_odds_ratio": -0.027992457151412964, "logits/chosen": -2.841531753540039, "logits/rejected": -1.9643710851669312, "logps/chosen": -0.35286644101142883, "logps/rejected": -8.675617218017578, "loss": 0.2939, "nll_loss": 0.3023623824119568, "rewards/accuracies": 1.0, "rewards/chosen": -0.07057328522205353, "rewards/margins": 1.6645504236221313, "rewards/rejected": -1.735123634338379, "step": 3390 }, { "epoch": 2.43, "grad_norm": 41.25, "learning_rate": 4.319342127906801e-06, "log_odds_chosen": 9.294361114501953, "log_odds_ratio": -0.045621536672115326, "logits/chosen": -2.8277316093444824, "logits/rejected": -1.8078911304473877, "logps/chosen": -0.36391180753707886, "logps/rejected": -8.31618595123291, "loss": 0.2618, "nll_loss": 0.25773972272872925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07278236001729965, "rewards/margins": 1.5904548168182373, "rewards/rejected": -1.66323721408844, "step": 3400 }, { "epoch": 2.43, "grad_norm": 5.65625, "learning_rate": 4.317731334590332e-06, "log_odds_chosen": 9.953015327453613, "log_odds_ratio": -0.04586916044354439, "logits/chosen": -2.7547903060913086, "logits/rejected": -1.697317361831665, "logps/chosen": -0.3400295674800873, "logps/rejected": -8.843751907348633, "loss": 0.2692, "nll_loss": 0.24716739356517792, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.06800590455532074, "rewards/margins": 1.700744390487671, "rewards/rejected": -1.7687504291534424, "step": 3410 }, { "epoch": 2.44, "grad_norm": 6.78125, "learning_rate": 4.316122342049056e-06, "log_odds_chosen": 11.804905891418457, "log_odds_ratio": -0.023801427334547043, "logits/chosen": -2.755643844604492, "logits/rejected": -1.4954235553741455, "logps/chosen": -0.4441162943840027, "logps/rejected": -10.793535232543945, "loss": 0.2901, "nll_loss": 0.3134029507637024, "rewards/accuracies": 1.0, "rewards/chosen": -0.08882326632738113, "rewards/margins": 2.0698838233947754, "rewards/rejected": -2.1587071418762207, "step": 3420 }, { "epoch": 2.45, "grad_norm": 7.03125, "learning_rate": 4.314515146930197e-06, "log_odds_chosen": 9.51766586303711, "log_odds_ratio": -0.029458215460181236, "logits/chosen": -2.814960241317749, "logits/rejected": -1.9240309000015259, "logps/chosen": -0.41664624214172363, "logps/rejected": -8.569226264953613, "loss": 0.2687, "nll_loss": 0.25920039415359497, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08332924544811249, "rewards/margins": 1.6305160522460938, "rewards/rejected": -1.7138452529907227, "step": 3430 }, { "epoch": 2.46, "grad_norm": 5.875, "learning_rate": 4.312909745889715e-06, "log_odds_chosen": 7.934914588928223, "log_odds_ratio": -0.033872656524181366, "logits/chosen": -2.7913076877593994, "logits/rejected": -1.9354534149169922, "logps/chosen": -0.411059707403183, "logps/rejected": -7.0791335105896, "loss": 0.2602, "nll_loss": 0.24955859780311584, "rewards/accuracies": 1.0, "rewards/chosen": -0.082211934030056, "rewards/margins": 1.3336145877838135, "rewards/rejected": -1.4158265590667725, "step": 3440 }, { "epoch": 2.46, "grad_norm": 37.75, "learning_rate": 4.311306135592269e-06, "log_odds_chosen": 8.438997268676758, "log_odds_ratio": -0.04086356610059738, "logits/chosen": -2.8126413822174072, "logits/rejected": -1.8445075750350952, "logps/chosen": -0.3259912133216858, "logps/rejected": -7.326315879821777, "loss": 0.2547, "nll_loss": 0.24355188012123108, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06519824266433716, "rewards/margins": 1.4000650644302368, "rewards/rejected": -1.4652631282806396, "step": 3450 }, { "epoch": 2.47, "grad_norm": 11.5625, "learning_rate": 4.309704312711197e-06, "log_odds_chosen": 7.0475311279296875, "log_odds_ratio": -0.04972859099507332, "logits/chosen": -2.7898261547088623, "logits/rejected": -2.0793161392211914, "logps/chosen": -0.3466077148914337, "logps/rejected": -5.998042106628418, "loss": 0.2998, "nll_loss": 0.27084097266197205, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06932154297828674, "rewards/margins": 1.1302868127822876, "rewards/rejected": -1.199608564376831, "step": 3460 }, { "epoch": 2.48, "grad_norm": 25.0, "learning_rate": 4.3081042739284794e-06, "log_odds_chosen": 9.243393898010254, "log_odds_ratio": -0.03272037208080292, "logits/chosen": -2.832883834838867, "logits/rejected": -1.8257993459701538, "logps/chosen": -0.3495597839355469, "logps/rejected": -8.209525108337402, "loss": 0.264, "nll_loss": 0.24593329429626465, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06991195678710938, "rewards/margins": 1.5719932317733765, "rewards/rejected": -1.6419051885604858, "step": 3470 }, { "epoch": 2.48, "grad_norm": 11.125, "learning_rate": 4.306506015934716e-06, "log_odds_chosen": 8.312199592590332, "log_odds_ratio": -0.024369016289711, "logits/chosen": -2.8706321716308594, "logits/rejected": -1.9261465072631836, "logps/chosen": -0.3649846911430359, "logps/rejected": -7.237170219421387, "loss": 0.2681, "nll_loss": 0.25165751576423645, "rewards/accuracies": 1.0, "rewards/chosen": -0.07299693673849106, "rewards/margins": 1.3744370937347412, "rewards/rejected": -1.4474341869354248, "step": 3480 }, { "epoch": 2.49, "grad_norm": 58.5, "learning_rate": 4.304909535429091e-06, "log_odds_chosen": 10.501726150512695, "log_odds_ratio": -0.029145091772079468, "logits/chosen": -2.8260130882263184, "logits/rejected": -1.6204798221588135, "logps/chosen": -0.3936752378940582, "logps/rejected": -9.544812202453613, "loss": 0.2738, "nll_loss": 0.2682205140590668, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07873504608869553, "rewards/margins": 1.8302274942398071, "rewards/rejected": -1.9089622497558594, "step": 3490 }, { "epoch": 2.5, "grad_norm": 15.9375, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 10.030715942382812, "log_odds_ratio": -0.04658619314432144, "logits/chosen": -2.796156406402588, "logits/rejected": -1.8100440502166748, "logps/chosen": -0.3424500823020935, "logps/rejected": -8.992310523986816, "loss": 0.2764, "nll_loss": 0.24179503321647644, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06849001348018646, "rewards/margins": 1.7299721240997314, "rewards/rejected": -1.798462152481079, "step": 3500 }, { "epoch": 2.51, "grad_norm": 122.5, "learning_rate": 4.301721893721773e-06, "log_odds_chosen": 10.208802223205566, "log_odds_ratio": -0.04213592782616615, "logits/chosen": -2.797102212905884, "logits/rejected": -1.4663350582122803, "logps/chosen": -0.3950093686580658, "logps/rejected": -9.20128345489502, "loss": 0.301, "nll_loss": 0.3496857285499573, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07900188118219376, "rewards/margins": 1.7612546682357788, "rewards/rejected": -1.840256690979004, "step": 3510 }, { "epoch": 2.51, "grad_norm": 10.25, "learning_rate": 4.300130725961134e-06, "log_odds_chosen": 6.270288467407227, "log_odds_ratio": -0.05212852358818054, "logits/chosen": -2.8619461059570312, "logits/rejected": -2.255450487136841, "logps/chosen": -0.36604180932044983, "logps/rejected": -5.2709221839904785, "loss": 0.265, "nll_loss": 0.26578956842422485, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07320836186408997, "rewards/margins": 0.9809761047363281, "rewards/rejected": -1.0541845560073853, "step": 3520 }, { "epoch": 2.52, "grad_norm": 10.125, "learning_rate": 4.298541322570686e-06, "log_odds_chosen": 6.3298749923706055, "log_odds_ratio": -0.03688116371631622, "logits/chosen": -2.834110736846924, "logits/rejected": -2.0629196166992188, "logps/chosen": -0.3773978352546692, "logps/rejected": -5.346269130706787, "loss": 0.247, "nll_loss": 0.24589797854423523, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07547955214977264, "rewards/margins": 0.9937742948532104, "rewards/rejected": -1.0692538022994995, "step": 3530 }, { "epoch": 2.53, "grad_norm": 136.0, "learning_rate": 4.296953680292129e-06, "log_odds_chosen": 7.077627658843994, "log_odds_ratio": -0.06007321551442146, "logits/chosen": -2.754781484603882, "logits/rejected": -1.8707349300384521, "logps/chosen": -0.41770386695861816, "logps/rejected": -6.089178085327148, "loss": 0.3063, "nll_loss": 0.3370429575443268, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08354077488183975, "rewards/margins": 1.134294867515564, "rewards/rejected": -1.2178356647491455, "step": 3540 }, { "epoch": 2.53, "grad_norm": 7.1875, "learning_rate": 4.295367795875578e-06, "log_odds_chosen": 4.167150974273682, "log_odds_ratio": -0.06860674917697906, "logits/chosen": -2.8772263526916504, "logits/rejected": -2.3703644275665283, "logps/chosen": -0.3609137237071991, "logps/rejected": -3.2382397651672363, "loss": 0.2878, "nll_loss": 0.3157772421836853, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07218273729085922, "rewards/margins": 0.575465202331543, "rewards/rejected": -0.6476479172706604, "step": 3550 }, { "epoch": 2.54, "grad_norm": 7.9375, "learning_rate": 4.293783666079539e-06, "log_odds_chosen": 6.502009391784668, "log_odds_ratio": -0.03050677478313446, "logits/chosen": -2.6957783699035645, "logits/rejected": -1.993639349937439, "logps/chosen": -0.31069567799568176, "logps/rejected": -5.301680088043213, "loss": 0.2675, "nll_loss": 0.2550848424434662, "rewards/accuracies": 1.0, "rewards/chosen": -0.06213913485407829, "rewards/margins": 0.9981969594955444, "rewards/rejected": -1.0603359937667847, "step": 3560 }, { "epoch": 2.55, "grad_norm": 8.875, "learning_rate": 4.292201287670881e-06, "log_odds_chosen": 7.6693291664123535, "log_odds_ratio": -0.04327741265296936, "logits/chosen": -2.78936505317688, "logits/rejected": -1.8133246898651123, "logps/chosen": -0.42709359526634216, "logps/rejected": -6.817948818206787, "loss": 0.3028, "nll_loss": 0.2974828779697418, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0854187160730362, "rewards/margins": 1.2781710624694824, "rewards/rejected": -1.363589882850647, "step": 3570 }, { "epoch": 2.56, "grad_norm": 7.90625, "learning_rate": 4.2906206574248056e-06, "log_odds_chosen": 7.7335991859436035, "log_odds_ratio": -0.03188318759202957, "logits/chosen": -2.823167085647583, "logits/rejected": -1.8172123432159424, "logps/chosen": -0.39690691232681274, "logps/rejected": -6.871090888977051, "loss": 0.2829, "nll_loss": 0.26852673292160034, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07938139140605927, "rewards/margins": 1.2948367595672607, "rewards/rejected": -1.3742179870605469, "step": 3580 }, { "epoch": 2.56, "grad_norm": 8.375, "learning_rate": 4.289041772124823e-06, "log_odds_chosen": 7.018429756164551, "log_odds_ratio": -0.04488766938447952, "logits/chosen": -2.8090367317199707, "logits/rejected": -2.016038417816162, "logps/chosen": -0.4279165267944336, "logps/rejected": -6.177850246429443, "loss": 0.2934, "nll_loss": 0.27853289246559143, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.08558329194784164, "rewards/margins": 1.1499868631362915, "rewards/rejected": -1.235569953918457, "step": 3590 }, { "epoch": 2.57, "grad_norm": 7.84375, "learning_rate": 4.2874646285627205e-06, "log_odds_chosen": 7.609469413757324, "log_odds_ratio": -0.029052983969449997, "logits/chosen": -2.792881727218628, "logits/rejected": -1.8695917129516602, "logps/chosen": -0.35575172305107117, "logps/rejected": -6.599902153015137, "loss": 0.2679, "nll_loss": 0.26382261514663696, "rewards/accuracies": 1.0, "rewards/chosen": -0.07115034759044647, "rewards/margins": 1.2488303184509277, "rewards/rejected": -1.3199807405471802, "step": 3600 }, { "epoch": 2.58, "grad_norm": 11.0625, "learning_rate": 4.2858892235385405e-06, "log_odds_chosen": 9.122172355651855, "log_odds_ratio": -0.02934536337852478, "logits/chosen": -2.7800889015197754, "logits/rejected": -1.775040626525879, "logps/chosen": -0.40309590101242065, "logps/rejected": -8.151556968688965, "loss": 0.2977, "nll_loss": 0.28484228253364563, "rewards/accuracies": 1.0, "rewards/chosen": -0.08061918616294861, "rewards/margins": 1.5496922731399536, "rewards/rejected": -1.630311369895935, "step": 3610 }, { "epoch": 2.58, "grad_norm": 5.03125, "learning_rate": 4.2843155538605454e-06, "log_odds_chosen": 9.74396800994873, "log_odds_ratio": -0.03267233446240425, "logits/chosen": -2.8342878818511963, "logits/rejected": -1.560414433479309, "logps/chosen": -0.358869731426239, "logps/rejected": -8.761539459228516, "loss": 0.286, "nll_loss": 0.2713310122489929, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.071773961186409, "rewards/margins": 1.680533766746521, "rewards/rejected": -1.7523078918457031, "step": 3620 }, { "epoch": 2.59, "grad_norm": 8.3125, "learning_rate": 4.2827436163452e-06, "log_odds_chosen": 8.96192741394043, "log_odds_ratio": -0.025754928588867188, "logits/chosen": -2.807020664215088, "logits/rejected": -1.7215229272842407, "logps/chosen": -0.37024766206741333, "logps/rejected": -7.972418785095215, "loss": 0.2918, "nll_loss": 0.2985715866088867, "rewards/accuracies": 1.0, "rewards/chosen": -0.07404953241348267, "rewards/margins": 1.5204341411590576, "rewards/rejected": -1.5944838523864746, "step": 3630 }, { "epoch": 2.6, "grad_norm": 27.375, "learning_rate": 4.2811734078171365e-06, "log_odds_chosen": 9.322843551635742, "log_odds_ratio": -0.02989858388900757, "logits/chosen": -2.800328016281128, "logits/rejected": -1.6582973003387451, "logps/chosen": -0.33669382333755493, "logps/rejected": -8.228538513183594, "loss": 0.2924, "nll_loss": 0.294689804315567, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06733877211809158, "rewards/margins": 1.578368902206421, "rewards/rejected": -1.6457077264785767, "step": 3640 }, { "epoch": 2.61, "grad_norm": 83.5, "learning_rate": 4.27960492510913e-06, "log_odds_chosen": 10.60124397277832, "log_odds_ratio": -0.03527144342660904, "logits/chosen": -2.7842440605163574, "logits/rejected": -1.780975103378296, "logps/chosen": -0.35910865664482117, "logps/rejected": -9.61752986907959, "loss": 0.2886, "nll_loss": 0.26610469818115234, "rewards/accuracies": 1.0, "rewards/chosen": -0.07182172685861588, "rewards/margins": 1.851684331893921, "rewards/rejected": -1.9235061407089233, "step": 3650 }, { "epoch": 2.61, "grad_norm": 11.6875, "learning_rate": 4.278038165062074e-06, "log_odds_chosen": 9.46485424041748, "log_odds_ratio": -0.030444592237472534, "logits/chosen": -2.7923178672790527, "logits/rejected": -1.7434918880462646, "logps/chosen": -0.36185431480407715, "logps/rejected": -8.415678977966309, "loss": 0.2739, "nll_loss": 0.2679918110370636, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07237086445093155, "rewards/margins": 1.6107648611068726, "rewards/rejected": -1.6831356287002563, "step": 3660 }, { "epoch": 2.62, "grad_norm": 11.875, "learning_rate": 4.276473124524951e-06, "log_odds_chosen": 10.846599578857422, "log_odds_ratio": -0.0410008430480957, "logits/chosen": -2.814073324203491, "logits/rejected": -1.7574056386947632, "logps/chosen": -0.3752827048301697, "logps/rejected": -9.809711456298828, "loss": 0.3038, "nll_loss": 0.2877393960952759, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.0750565379858017, "rewards/margins": 1.886885643005371, "rewards/rejected": -1.9619423151016235, "step": 3670 }, { "epoch": 2.63, "grad_norm": 80.5, "learning_rate": 4.274909800354809e-06, "log_odds_chosen": 8.6845703125, "log_odds_ratio": -0.04010792821645737, "logits/chosen": -2.8353562355041504, "logits/rejected": -1.9117534160614014, "logps/chosen": -0.36866775155067444, "logps/rejected": -7.723712921142578, "loss": 0.2716, "nll_loss": 0.28985413908958435, "rewards/accuracies": 1.0, "rewards/chosen": -0.07373355329036713, "rewards/margins": 1.4710088968276978, "rewards/rejected": -1.5447423458099365, "step": 3680 }, { "epoch": 2.63, "grad_norm": 25.5, "learning_rate": 4.27334818941673e-06, "log_odds_chosen": 11.139304161071777, "log_odds_ratio": -0.02806129679083824, "logits/chosen": -2.829277515411377, "logits/rejected": -1.7117398977279663, "logps/chosen": -0.43500715494155884, "logps/rejected": -10.296022415161133, "loss": 0.298, "nll_loss": 0.30640918016433716, "rewards/accuracies": 1.0, "rewards/chosen": -0.08700142055749893, "rewards/margins": 1.972203254699707, "rewards/rejected": -2.0592048168182373, "step": 3690 }, { "epoch": 2.64, "grad_norm": 14.0625, "learning_rate": 4.271788288583805e-06, "log_odds_chosen": 9.920567512512207, "log_odds_ratio": -0.0430777445435524, "logits/chosen": -2.800262928009033, "logits/rejected": -1.6749203205108643, "logps/chosen": -0.3537217676639557, "logps/rejected": -8.953767776489258, "loss": 0.292, "nll_loss": 0.28976768255233765, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.0707443580031395, "rewards/margins": 1.720009207725525, "rewards/rejected": -1.7907536029815674, "step": 3700 }, { "epoch": 2.65, "grad_norm": 71.0, "learning_rate": 4.270230094737115e-06, "log_odds_chosen": 10.269146919250488, "log_odds_ratio": -0.03263562172651291, "logits/chosen": -2.8407130241394043, "logits/rejected": -1.8693710565567017, "logps/chosen": -0.3560786247253418, "logps/rejected": -9.248159408569336, "loss": 0.2528, "nll_loss": 0.24508798122406006, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.07121572643518448, "rewards/margins": 1.7784162759780884, "rewards/rejected": -1.8496320247650146, "step": 3710 }, { "epoch": 2.66, "grad_norm": 163.0, "learning_rate": 4.268673604765692e-06, "log_odds_chosen": 8.313056945800781, "log_odds_ratio": -0.04137984663248062, "logits/chosen": -2.8410933017730713, "logits/rejected": -1.973070502281189, "logps/chosen": -0.3569386601448059, "logps/rejected": -7.325540065765381, "loss": 0.2936, "nll_loss": 0.3143910765647888, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07138773798942566, "rewards/margins": 1.3937203884124756, "rewards/rejected": -1.4651081562042236, "step": 3720 }, { "epoch": 2.66, "grad_norm": 69.0, "learning_rate": 4.267118815566505e-06, "log_odds_chosen": 11.062416076660156, "log_odds_ratio": -0.02929757535457611, "logits/chosen": -2.8256027698516846, "logits/rejected": -1.8267700672149658, "logps/chosen": -0.34090739488601685, "logps/rejected": -10.022697448730469, "loss": 0.2669, "nll_loss": 0.2610389292240143, "rewards/accuracies": 1.0, "rewards/chosen": -0.06818147003650665, "rewards/margins": 1.936357855796814, "rewards/rejected": -2.0045394897460938, "step": 3730 }, { "epoch": 2.67, "grad_norm": 7.21875, "learning_rate": 4.265565724044426e-06, "log_odds_chosen": 9.198521614074707, "log_odds_ratio": -0.03330180048942566, "logits/chosen": -2.8640224933624268, "logits/rejected": -1.98549485206604, "logps/chosen": -0.41554588079452515, "logps/rejected": -8.330385208129883, "loss": 0.2867, "nll_loss": 0.29421791434288025, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08310917764902115, "rewards/margins": 1.58296799659729, "rewards/rejected": -1.6660770177841187, "step": 3740 }, { "epoch": 2.68, "grad_norm": 26.125, "learning_rate": 4.264014327112208e-06, "log_odds_chosen": 8.376938819885254, "log_odds_ratio": -0.03763898089528084, "logits/chosen": -2.8337912559509277, "logits/rejected": -1.9237620830535889, "logps/chosen": -0.33510148525238037, "logps/rejected": -7.281014919281006, "loss": 0.2649, "nll_loss": 0.23882155120372772, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06702030450105667, "rewards/margins": 1.389182686805725, "rewards/rejected": -1.4562032222747803, "step": 3750 }, { "epoch": 2.68, "grad_norm": 6.375, "learning_rate": 4.26246462169046e-06, "log_odds_chosen": 7.407693386077881, "log_odds_ratio": -0.0290813185274601, "logits/chosen": -2.852565050125122, "logits/rejected": -2.0294179916381836, "logps/chosen": -0.3306184411048889, "logps/rejected": -6.291057586669922, "loss": 0.2842, "nll_loss": 0.28192299604415894, "rewards/accuracies": 1.0, "rewards/chosen": -0.06612369418144226, "rewards/margins": 1.1920878887176514, "rewards/rejected": -1.2582114934921265, "step": 3760 }, { "epoch": 2.69, "grad_norm": 27.5, "learning_rate": 4.260916604707614e-06, "log_odds_chosen": 8.492695808410645, "log_odds_ratio": -0.040566060692071915, "logits/chosen": -2.8278088569641113, "logits/rejected": -1.880413293838501, "logps/chosen": -0.40683627128601074, "logps/rejected": -7.554902076721191, "loss": 0.269, "nll_loss": 0.2897428870201111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.08136724680662155, "rewards/margins": 1.4296132326126099, "rewards/rejected": -1.5109803676605225, "step": 3770 }, { "epoch": 2.7, "grad_norm": 5.75, "learning_rate": 4.25937027309991e-06, "log_odds_chosen": 7.113955497741699, "log_odds_ratio": -0.034956224262714386, "logits/chosen": -2.863619089126587, "logits/rejected": -2.0861170291900635, "logps/chosen": -0.3817751407623291, "logps/rejected": -6.190094947814941, "loss": 0.3086, "nll_loss": 0.32031726837158203, "rewards/accuracies": 1.0, "rewards/chosen": -0.07635502517223358, "rewards/margins": 1.1616640090942383, "rewards/rejected": -1.2380189895629883, "step": 3780 }, { "epoch": 2.71, "grad_norm": 11.5625, "learning_rate": 4.257825623811364e-06, "log_odds_chosen": 6.6032867431640625, "log_odds_ratio": -0.04721903055906296, "logits/chosen": -2.870551824569702, "logits/rejected": -2.1494574546813965, "logps/chosen": -0.3325832486152649, "logps/rejected": -5.56205940246582, "loss": 0.2733, "nll_loss": 0.2754889130592346, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06651665270328522, "rewards/margins": 1.0458953380584717, "rewards/rejected": -1.1124117374420166, "step": 3790 }, { "epoch": 2.71, "grad_norm": 13.625, "learning_rate": 4.256282653793743e-06, "log_odds_chosen": 9.226226806640625, "log_odds_ratio": -0.04071826487779617, "logits/chosen": -2.8239336013793945, "logits/rejected": -1.799629807472229, "logps/chosen": -0.37041693925857544, "logps/rejected": -8.24487018585205, "loss": 0.2801, "nll_loss": 0.2625892162322998, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07408339530229568, "rewards/margins": 1.5748907327651978, "rewards/rejected": -1.648974061012268, "step": 3800 }, { "epoch": 2.72, "grad_norm": 43.0, "learning_rate": 4.254741360006543e-06, "log_odds_chosen": 7.27142858505249, "log_odds_ratio": -0.04866673797369003, "logits/chosen": -2.8658313751220703, "logits/rejected": -2.249227523803711, "logps/chosen": -0.3399750590324402, "logps/rejected": -6.300660133361816, "loss": 0.2534, "nll_loss": 0.22867396473884583, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06799499690532684, "rewards/margins": 1.1921371221542358, "rewards/rejected": -1.2601318359375, "step": 3810 }, { "epoch": 2.73, "grad_norm": 7.5625, "learning_rate": 4.25320173941696e-06, "log_odds_chosen": 8.403349876403809, "log_odds_ratio": -0.03414331004023552, "logits/chosen": -2.849536657333374, "logits/rejected": -2.041189193725586, "logps/chosen": -0.3983810842037201, "logps/rejected": -7.541253566741943, "loss": 0.277, "nll_loss": 0.2703934609889984, "rewards/accuracies": 1.0, "rewards/chosen": -0.07967622578144073, "rewards/margins": 1.428574562072754, "rewards/rejected": -1.5082508325576782, "step": 3820 }, { "epoch": 2.73, "grad_norm": 11.6875, "learning_rate": 4.251663788999866e-06, "log_odds_chosen": 8.267216682434082, "log_odds_ratio": -0.03860500827431679, "logits/chosen": -2.8517775535583496, "logits/rejected": -2.037395477294922, "logps/chosen": -0.35646042227745056, "logps/rejected": -7.292947292327881, "loss": 0.2727, "nll_loss": 0.262928307056427, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07129208743572235, "rewards/margins": 1.3872973918914795, "rewards/rejected": -1.4585894346237183, "step": 3830 }, { "epoch": 2.74, "grad_norm": 24.625, "learning_rate": 4.250127505737787e-06, "log_odds_chosen": 9.158185958862305, "log_odds_ratio": -0.02719098888337612, "logits/chosen": -2.81072735786438, "logits/rejected": -1.8471095561981201, "logps/chosen": -0.3558461368083954, "logps/rejected": -8.152399063110352, "loss": 0.2867, "nll_loss": 0.2748354375362396, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07116921991109848, "rewards/margins": 1.5593106746673584, "rewards/rejected": -1.6304798126220703, "step": 3840 }, { "epoch": 2.75, "grad_norm": 6.21875, "learning_rate": 4.2485928866208736e-06, "log_odds_chosen": 5.923229217529297, "log_odds_ratio": -0.0449279360473156, "logits/chosen": -2.8480160236358643, "logits/rejected": -2.3773159980773926, "logps/chosen": -0.3430297374725342, "logps/rejected": -4.906435012817383, "loss": 0.2786, "nll_loss": 0.26425907015800476, "rewards/accuracies": 1.0, "rewards/chosen": -0.0686059445142746, "rewards/margins": 0.9126811027526855, "rewards/rejected": -0.981286883354187, "step": 3850 }, { "epoch": 2.76, "grad_norm": 11.9375, "learning_rate": 4.247059928646881e-06, "log_odds_chosen": 9.403549194335938, "log_odds_ratio": -0.04165271669626236, "logits/chosen": -2.783756971359253, "logits/rejected": -1.7871586084365845, "logps/chosen": -0.44730639457702637, "logps/rejected": -8.516668319702148, "loss": 0.3078, "nll_loss": 0.2746645212173462, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08946128189563751, "rewards/margins": 1.6138725280761719, "rewards/rejected": -1.7033336162567139, "step": 3860 }, { "epoch": 2.76, "grad_norm": 39.25, "learning_rate": 4.245528628821135e-06, "log_odds_chosen": 8.89954662322998, "log_odds_ratio": -0.02781747281551361, "logits/chosen": -2.8360252380371094, "logits/rejected": -1.8986726999282837, "logps/chosen": -0.3417971730232239, "logps/rejected": -7.838662147521973, "loss": 0.2597, "nll_loss": 0.28470245003700256, "rewards/accuracies": 1.0, "rewards/chosen": -0.06835943460464478, "rewards/margins": 1.4993728399276733, "rewards/rejected": -1.5677324533462524, "step": 3870 }, { "epoch": 2.77, "grad_norm": 203.0, "learning_rate": 4.243998984156526e-06, "log_odds_chosen": 9.094325065612793, "log_odds_ratio": -0.033297426998615265, "logits/chosen": -2.8292903900146484, "logits/rejected": -2.0152010917663574, "logps/chosen": -0.35362544655799866, "logps/rejected": -8.119251251220703, "loss": 0.2861, "nll_loss": 0.26851797103881836, "rewards/accuracies": 1.0, "rewards/chosen": -0.07072508335113525, "rewards/margins": 1.5531253814697266, "rewards/rejected": -1.6238505840301514, "step": 3880 }, { "epoch": 2.78, "grad_norm": 7.0, "learning_rate": 4.242470991673459e-06, "log_odds_chosen": 8.757810592651367, "log_odds_ratio": -0.02989915944635868, "logits/chosen": -2.8213298320770264, "logits/rejected": -2.0070536136627197, "logps/chosen": -0.4033467173576355, "logps/rejected": -7.789436340332031, "loss": 0.2885, "nll_loss": 0.27011099457740784, "rewards/accuracies": 1.0, "rewards/chosen": -0.0806693509221077, "rewards/margins": 1.4772179126739502, "rewards/rejected": -1.5578871965408325, "step": 3890 }, { "epoch": 2.78, "grad_norm": 247.0, "learning_rate": 4.240944648399854e-06, "log_odds_chosen": 9.707173347473145, "log_odds_ratio": -0.030115026980638504, "logits/chosen": -2.812969923019409, "logits/rejected": -1.7806400060653687, "logps/chosen": -0.3756954073905945, "logps/rejected": -8.687716484069824, "loss": 0.2924, "nll_loss": 0.2944888770580292, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07513907551765442, "rewards/margins": 1.6624042987823486, "rewards/rejected": -1.7375433444976807, "step": 3900 }, { "epoch": 2.79, "grad_norm": 7.3125, "learning_rate": 4.239419951371107e-06, "log_odds_chosen": 7.855190277099609, "log_odds_ratio": -0.049606241285800934, "logits/chosen": -2.8382956981658936, "logits/rejected": -1.9690237045288086, "logps/chosen": -0.3678521513938904, "logps/rejected": -6.8809919357299805, "loss": 0.246, "nll_loss": 0.24365882575511932, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.07357043772935867, "rewards/margins": 1.3026279211044312, "rewards/rejected": -1.3761985301971436, "step": 3910 }, { "epoch": 2.8, "grad_norm": 8.0625, "learning_rate": 4.237896897630065e-06, "log_odds_chosen": 9.103668212890625, "log_odds_ratio": -0.01939046010375023, "logits/chosen": -2.8209850788116455, "logits/rejected": -1.7593700885772705, "logps/chosen": -0.3552018702030182, "logps/rejected": -8.111700057983398, "loss": 0.2691, "nll_loss": 0.26674580574035645, "rewards/accuracies": 1.0, "rewards/chosen": -0.07104037702083588, "rewards/margins": 1.5512996912002563, "rewards/rejected": -1.6223399639129639, "step": 3920 }, { "epoch": 2.81, "grad_norm": 6.625, "learning_rate": 4.2363754842270135e-06, "log_odds_chosen": 8.898262023925781, "log_odds_ratio": -0.03263109177350998, "logits/chosen": -2.8059921264648438, "logits/rejected": -1.9720518589019775, "logps/chosen": -0.3654606342315674, "logps/rejected": -7.9605865478515625, "loss": 0.2722, "nll_loss": 0.24651777744293213, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07309212535619736, "rewards/margins": 1.5190250873565674, "rewards/rejected": -1.592117190361023, "step": 3930 }, { "epoch": 2.81, "grad_norm": 10.9375, "learning_rate": 4.23485570821964e-06, "log_odds_chosen": 10.18175220489502, "log_odds_ratio": -0.029537459835410118, "logits/chosen": -2.805284023284912, "logits/rejected": -1.7588096857070923, "logps/chosen": -0.3637697100639343, "logps/rejected": -9.15390682220459, "loss": 0.2921, "nll_loss": 0.28845328092575073, "rewards/accuracies": 1.0, "rewards/chosen": -0.07275393605232239, "rewards/margins": 1.7580273151397705, "rewards/rejected": -1.8307812213897705, "step": 3940 }, { "epoch": 2.82, "grad_norm": 276.0, "learning_rate": 4.233337566673017e-06, "log_odds_chosen": 10.128081321716309, "log_odds_ratio": -0.05651165917515755, "logits/chosen": -2.7642838954925537, "logits/rejected": -1.8633878231048584, "logps/chosen": -0.4686155915260315, "logps/rejected": -9.309432029724121, "loss": 0.299, "nll_loss": 0.2807057797908783, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0937231183052063, "rewards/margins": 1.7681634426116943, "rewards/rejected": -1.8618863821029663, "step": 3950 }, { "epoch": 2.83, "grad_norm": 9.0625, "learning_rate": 4.2318210566595795e-06, "log_odds_chosen": 10.621389389038086, "log_odds_ratio": -0.01841096207499504, "logits/chosen": -2.835549831390381, "logits/rejected": -1.8261913061141968, "logps/chosen": -0.3856840133666992, "logps/rejected": -9.641094207763672, "loss": 0.2804, "nll_loss": 0.28866028785705566, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.0771368071436882, "rewards/margins": 1.851082444190979, "rewards/rejected": -1.928219199180603, "step": 3960 }, { "epoch": 2.83, "grad_norm": 50.5, "learning_rate": 4.230306175259094e-06, "log_odds_chosen": 8.465916633605957, "log_odds_ratio": -0.029557716101408005, "logits/chosen": -2.8356223106384277, "logits/rejected": -2.1212961673736572, "logps/chosen": -0.3706379532814026, "logps/rejected": -7.479179382324219, "loss": 0.2831, "nll_loss": 0.28696635365486145, "rewards/accuracies": 1.0, "rewards/chosen": -0.07412759214639664, "rewards/margins": 1.421708345413208, "rewards/rejected": -1.495835781097412, "step": 3970 }, { "epoch": 2.84, "grad_norm": 9.375, "learning_rate": 4.228792919558642e-06, "log_odds_chosen": 9.524942398071289, "log_odds_ratio": -0.03312790021300316, "logits/chosen": -2.817082405090332, "logits/rejected": -1.7293701171875, "logps/chosen": -0.35598936676979065, "logps/rejected": -8.479887008666992, "loss": 0.2787, "nll_loss": 0.28613555431365967, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07119788229465485, "rewards/margins": 1.6247799396514893, "rewards/rejected": -1.6959775686264038, "step": 3980 }, { "epoch": 2.85, "grad_norm": 8.625, "learning_rate": 4.227281286652593e-06, "log_odds_chosen": 9.673505783081055, "log_odds_ratio": -0.027351032942533493, "logits/chosen": -2.8485827445983887, "logits/rejected": -1.9277604818344116, "logps/chosen": -0.36449387669563293, "logps/rejected": -8.6371431350708, "loss": 0.2689, "nll_loss": 0.25819873809814453, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07289878278970718, "rewards/margins": 1.6545298099517822, "rewards/rejected": -1.727428674697876, "step": 3990 }, { "epoch": 2.86, "grad_norm": 11.875, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 10.454000473022461, "log_odds_ratio": -0.021729234606027603, "logits/chosen": -2.8186697959899902, "logits/rejected": -1.75783371925354, "logps/chosen": -0.4553149342536926, "logps/rejected": -9.653711318969727, "loss": 0.2844, "nll_loss": 0.27239570021629333, "rewards/accuracies": 1.0, "rewards/chosen": -0.0910629853606224, "rewards/margins": 1.8396793603897095, "rewards/rejected": -1.9307425022125244, "step": 4000 }, { "epoch": 2.86, "grad_norm": 4.3125, "learning_rate": 4.224262877637488e-06, "log_odds_chosen": 9.088384628295898, "log_odds_ratio": -0.044943638145923615, "logits/chosen": -2.779998302459717, "logits/rejected": -1.8532028198242188, "logps/chosen": -0.4339783191680908, "logps/rejected": -8.246506690979004, "loss": 0.2768, "nll_loss": 0.27718260884284973, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08679567277431488, "rewards/margins": 1.5625057220458984, "rewards/rejected": -1.649301290512085, "step": 4010 }, { "epoch": 2.87, "grad_norm": 8.125, "learning_rate": 4.2227560957534054e-06, "log_odds_chosen": 12.664664268493652, "log_odds_ratio": -0.027644852176308632, "logits/chosen": -2.8048336505889893, "logits/rejected": -1.6057932376861572, "logps/chosen": -0.3959696888923645, "logps/rejected": -11.737017631530762, "loss": 0.3086, "nll_loss": 0.2660156190395355, "rewards/accuracies": 1.0, "rewards/chosen": -0.07919393479824066, "rewards/margins": 2.26820969581604, "rewards/rejected": -2.3474037647247314, "step": 4020 }, { "epoch": 2.88, "grad_norm": 109.0, "learning_rate": 4.221250925113625e-06, "log_odds_chosen": 9.165689468383789, "log_odds_ratio": -0.07480543851852417, "logits/chosen": -2.8110146522521973, "logits/rejected": -1.914781928062439, "logps/chosen": -0.40431445837020874, "logps/rejected": -8.292974472045898, "loss": 0.2893, "nll_loss": 0.2831747233867645, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.08086288720369339, "rewards/margins": 1.5777318477630615, "rewards/rejected": -1.6585948467254639, "step": 4030 }, { "epoch": 2.88, "grad_norm": 7.65625, "learning_rate": 4.219747362848612e-06, "log_odds_chosen": 11.30323314666748, "log_odds_ratio": -0.03717976063489914, "logits/chosen": -2.7821929454803467, "logits/rejected": -1.768080472946167, "logps/chosen": -0.3442782461643219, "logps/rejected": -10.223400115966797, "loss": 0.2818, "nll_loss": 0.2807686924934387, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0688556432723999, "rewards/margins": 1.9758243560791016, "rewards/rejected": -2.044680118560791, "step": 4040 }, { "epoch": 2.89, "grad_norm": 9.0, "learning_rate": 4.2182454060959784e-06, "log_odds_chosen": 11.626094818115234, "log_odds_ratio": -0.02243855968117714, "logits/chosen": -2.83784818649292, "logits/rejected": -1.7854284048080444, "logps/chosen": -0.375316321849823, "logps/rejected": -10.655095100402832, "loss": 0.2895, "nll_loss": 0.29837626218795776, "rewards/accuracies": 1.0, "rewards/chosen": -0.07506327331066132, "rewards/margins": 2.055955648422241, "rewards/rejected": -2.131019115447998, "step": 4050 }, { "epoch": 2.9, "grad_norm": 6.25, "learning_rate": 4.216745052000467e-06, "log_odds_chosen": 9.441912651062012, "log_odds_ratio": -0.07335133850574493, "logits/chosen": -2.814119815826416, "logits/rejected": -2.0087761878967285, "logps/chosen": -0.40029245615005493, "logps/rejected": -8.542582511901855, "loss": 0.253, "nll_loss": 0.24284212291240692, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.0800584927201271, "rewards/margins": 1.6284582614898682, "rewards/rejected": -1.7085163593292236, "step": 4060 }, { "epoch": 2.91, "grad_norm": 8.5, "learning_rate": 4.21524629771392e-06, "log_odds_chosen": 11.409785270690918, "log_odds_ratio": -0.032930441200733185, "logits/chosen": -2.830761194229126, "logits/rejected": -1.863943338394165, "logps/chosen": -0.3692954480648041, "logps/rejected": -10.434585571289062, "loss": 0.288, "nll_loss": 0.2768712639808655, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07385909557342529, "rewards/margins": 2.0130581855773926, "rewards/rejected": -2.0869174003601074, "step": 4070 }, { "epoch": 2.91, "grad_norm": 5.15625, "learning_rate": 4.213749140395264e-06, "log_odds_chosen": 10.989635467529297, "log_odds_ratio": -0.0375329926609993, "logits/chosen": -2.829820394515991, "logits/rejected": -2.0580878257751465, "logps/chosen": -0.35255834460258484, "logps/rejected": -9.971551895141602, "loss": 0.2844, "nll_loss": 0.26232025027275085, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.07051166892051697, "rewards/margins": 1.9237985610961914, "rewards/rejected": -1.9943103790283203, "step": 4080 }, { "epoch": 2.92, "grad_norm": 72.5, "learning_rate": 4.2122535772104825e-06, "log_odds_chosen": 8.69615364074707, "log_odds_ratio": -0.056691087782382965, "logits/chosen": -2.840405225753784, "logits/rejected": -1.9989845752716064, "logps/chosen": -0.3394516110420227, "logps/rejected": -7.6478590965271, "loss": 0.2743, "nll_loss": 0.2390586882829666, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.06789031624794006, "rewards/margins": 1.4616813659667969, "rewards/rejected": -1.529571771621704, "step": 4090 }, { "epoch": 2.93, "grad_norm": 6.90625, "learning_rate": 4.2107596053325946e-06, "log_odds_chosen": 8.241374969482422, "log_odds_ratio": -0.04560881108045578, "logits/chosen": -2.870729446411133, "logits/rejected": -2.089569568634033, "logps/chosen": -0.3773348033428192, "logps/rejected": -7.359114170074463, "loss": 0.2822, "nll_loss": 0.29517418146133423, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.07546695321798325, "rewards/margins": 1.3963558673858643, "rewards/rejected": -1.471822738647461, "step": 4100 }, { "epoch": 2.93, "grad_norm": 5.8125, "learning_rate": 4.209267221941637e-06, "log_odds_chosen": 8.46093463897705, "log_odds_ratio": -0.050915759056806564, "logits/chosen": -2.8458335399627686, "logits/rejected": -1.9423186779022217, "logps/chosen": -0.3869299292564392, "logps/rejected": -7.5012969970703125, "loss": 0.2991, "nll_loss": 0.2835760712623596, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07738598436117172, "rewards/margins": 1.4228734970092773, "rewards/rejected": -1.5002593994140625, "step": 4110 }, { "epoch": 2.94, "grad_norm": 22.25, "learning_rate": 4.207776424224631e-06, "log_odds_chosen": 10.923059463500977, "log_odds_ratio": -0.023947065696120262, "logits/chosen": -2.816763401031494, "logits/rejected": -1.7249367237091064, "logps/chosen": -0.3471135199069977, "logps/rejected": -9.89926815032959, "loss": 0.2673, "nll_loss": 0.25374776124954224, "rewards/accuracies": 1.0, "rewards/chosen": -0.06942270696163177, "rewards/margins": 1.910431146621704, "rewards/rejected": -1.979853868484497, "step": 4120 }, { "epoch": 2.95, "grad_norm": 7.0625, "learning_rate": 4.206287209375573e-06, "log_odds_chosen": 9.080406188964844, "log_odds_ratio": -0.05416526645421982, "logits/chosen": -2.8165950775146484, "logits/rejected": -1.8180831670761108, "logps/chosen": -0.363316148519516, "logps/rejected": -8.077341079711914, "loss": 0.2679, "nll_loss": 0.26229244470596313, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.07266323268413544, "rewards/margins": 1.5428051948547363, "rewards/rejected": -1.6154683828353882, "step": 4130 }, { "epoch": 2.96, "grad_norm": 6.53125, "learning_rate": 4.204799574595403e-06, "log_odds_chosen": 6.818055629730225, "log_odds_ratio": -0.05800958722829819, "logits/chosen": -2.8333072662353516, "logits/rejected": -2.186479330062866, "logps/chosen": -0.3609169125556946, "logps/rejected": -5.922616958618164, "loss": 0.2654, "nll_loss": 0.2335742712020874, "rewards/accuracies": 1.0, "rewards/chosen": -0.07218338549137115, "rewards/margins": 1.112339973449707, "rewards/rejected": -1.184523344039917, "step": 4140 }, { "epoch": 2.96, "grad_norm": 15.625, "learning_rate": 4.203313517091987e-06, "log_odds_chosen": 6.995954990386963, "log_odds_ratio": -0.04991995543241501, "logits/chosen": -2.8100647926330566, "logits/rejected": -2.1644890308380127, "logps/chosen": -0.3308708071708679, "logps/rejected": -5.982227325439453, "loss": 0.2739, "nll_loss": 0.23660406470298767, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.06617416441440582, "rewards/margins": 1.130271553993225, "rewards/rejected": -1.1964454650878906, "step": 4150 }, { "epoch": 2.97, "grad_norm": 24.5, "learning_rate": 4.201829034080091e-06, "log_odds_chosen": 9.040159225463867, "log_odds_ratio": -0.048347409814596176, "logits/chosen": -2.8145790100097656, "logits/rejected": -1.903846025466919, "logps/chosen": -0.40995389223098755, "logps/rejected": -8.149190902709961, "loss": 0.2742, "nll_loss": 0.28532686829566956, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.08199077844619751, "rewards/margins": 1.5478473901748657, "rewards/rejected": -1.629838228225708, "step": 4160 }, { "epoch": 2.98, "grad_norm": 9.0625, "learning_rate": 4.200346122781363e-06, "log_odds_chosen": 6.394429683685303, "log_odds_ratio": -0.048393916338682175, "logits/chosen": -2.858705997467041, "logits/rejected": -2.1757659912109375, "logps/chosen": -0.36599108576774597, "logps/rejected": -5.35428524017334, "loss": 0.2785, "nll_loss": 0.2647210955619812, "rewards/accuracies": 0.981249988079071, "rewards/chosen": -0.07319821417331696, "rewards/margins": 0.9976588487625122, "rewards/rejected": -1.0708571672439575, "step": 4170 }, { "epoch": 2.98, "grad_norm": 8.0625, "learning_rate": 4.1988647804243155e-06, "log_odds_chosen": 7.234747409820557, "log_odds_ratio": -0.03427041321992874, "logits/chosen": -2.8021435737609863, "logits/rejected": -2.0722737312316895, "logps/chosen": -0.4443022310733795, "logps/rejected": -6.377936840057373, "loss": 0.2923, "nll_loss": 0.2961937487125397, "rewards/accuracies": 1.0, "rewards/chosen": -0.08886045217514038, "rewards/margins": 1.1867269277572632, "rewards/rejected": -1.2755874395370483, "step": 4180 }, { "epoch": 2.99, "grad_norm": 15.5, "learning_rate": 4.197385004244289e-06, "log_odds_chosen": 8.025124549865723, "log_odds_ratio": -0.023739898577332497, "logits/chosen": -2.865403413772583, "logits/rejected": -2.047375202178955, "logps/chosen": -0.3514173626899719, "logps/rejected": -6.971765995025635, "loss": 0.2778, "nll_loss": 0.2846386432647705, "rewards/accuracies": 1.0, "rewards/chosen": -0.07028347253799438, "rewards/margins": 1.3240697383880615, "rewards/rejected": -1.3943531513214111, "step": 4190 }, { "epoch": 3.0, "grad_norm": 6.8125, "learning_rate": 4.195906791483446e-06, "log_odds_chosen": 9.17123031616211, "log_odds_ratio": -0.03721408173441887, "logits/chosen": -2.8432440757751465, "logits/rejected": -1.8437057733535767, "logps/chosen": -0.40266337990760803, "logps/rejected": -8.306893348693848, "loss": 0.2994, "nll_loss": 0.289571076631546, "rewards/accuracies": 0.9937499761581421, "rewards/chosen": -0.08053267002105713, "rewards/margins": 1.580845832824707, "rewards/rejected": -1.6613785028457642, "step": 4200 }, { "epoch": 3.0, "step": 4200, "total_flos": 0.0, "train_loss": 0.4132561104070573, "train_runtime": 52660.5661, "train_samples_per_second": 2.553, "train_steps_per_second": 0.08 } ], "logging_steps": 10, "max_steps": 4200, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }