{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008, "grad_norm": 0.4211425877451838, "learning_rate": 4.999912270696202e-05, "log_odds_chosen": -0.0004254445375408977, "log_odds_ratio": -0.6933605670928955, "logits/chosen": -2.876610279083252, "logits/chosen_prompt": -2.844738245010376, "logits/rejected": -2.8758692741394043, "logits/rejected_prompt": -2.8239073753356934, "logps/chosen": -1.9094527959823608, "logps/chosen_both": -1.9286587238311768, "logps/chosen_prompt": -3.189321756362915, "logps/rejected": -1.9090824127197266, "logps/rejected_both": -1.9364073276519775, "logps/rejected_prompt": -3.4751086235046387, "loss": 2.325, "nll_loss": 1.928330421447754, "rewards/accuracies": 0.5, "rewards/chosen": -0.7637811899185181, "rewards/margins": -0.00014820098294876516, "rewards/rejected": -0.7636328935623169, "step": 10 }, { "epoch": 0.016, "grad_norm": 0.19485166995413405, "learning_rate": 4.9996490889419514e-05, "log_odds_chosen": 0.0011974871158599854, "log_odds_ratio": -0.6925489902496338, "logits/chosen": -2.9591917991638184, "logits/chosen_prompt": -2.8109309673309326, "logits/rejected": -2.9579415321350098, "logits/rejected_prompt": -2.789308547973633, "logps/chosen": -2.084634304046631, "logps/chosen_both": -2.0863680839538574, "logps/chosen_prompt": -2.1795780658721924, "logps/rejected": -2.0856688022613525, "logps/rejected_both": -2.0941364765167236, "logps/rejected_prompt": -2.347795009613037, "loss": 2.2922, "nll_loss": 2.08614182472229, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8338537216186523, "rewards/margins": 0.0004138052463531494, "rewards/rejected": -0.8342674970626831, "step": 20 }, { "epoch": 0.024, "grad_norm": 0.16144893961648712, "learning_rate": 4.99921047320825e-05, "log_odds_chosen": 0.003194092307239771, "log_odds_ratio": -0.6915546655654907, "logits/chosen": -2.9421558380126953, "logits/chosen_prompt": -2.7285828590393066, "logits/rejected": -2.939770221710205, "logits/rejected_prompt": -2.70296311378479, "logps/chosen": -2.0509393215179443, "logps/chosen_both": -2.0457570552825928, "logps/chosen_prompt": -1.5747671127319336, "logps/rejected": -2.0534369945526123, "logps/rejected_both": -2.0497002601623535, "logps/rejected_prompt": -1.6531193256378174, "loss": 2.2795, "nll_loss": 2.04412841796875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8203758001327515, "rewards/margins": 0.0009990095859393477, "rewards/rejected": -0.821374773979187, "step": 30 }, { "epoch": 0.032, "grad_norm": 0.16210904759452727, "learning_rate": 4.9985964542786614e-05, "log_odds_chosen": 0.0012136728037148714, "log_odds_ratio": -0.6925405859947205, "logits/chosen": -2.92653226852417, "logits/chosen_prompt": -2.7136194705963135, "logits/rejected": -2.925443172454834, "logits/rejected_prompt": -2.700766086578369, "logps/chosen": -2.0835628509521484, "logps/chosen_both": -2.070845365524292, "logps/chosen_prompt": -1.1743593215942383, "logps/rejected": -2.084618330001831, "logps/rejected_both": -2.076547384262085, "logps/rejected_prompt": -1.2668603658676147, "loss": 2.2852, "nll_loss": 2.070385694503784, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8334251642227173, "rewards/margins": 0.00042223333730362356, "rewards/rejected": -0.8338474035263062, "step": 40 }, { "epoch": 0.04, "grad_norm": 0.1829717877342827, "learning_rate": 4.997807075247146e-05, "log_odds_chosen": 0.000906852656044066, "log_odds_ratio": -0.6926941871643066, "logits/chosen": -2.8913445472717285, "logits/chosen_prompt": -2.6892333030700684, "logits/rejected": -2.8896098136901855, "logits/rejected_prompt": -2.6766159534454346, "logps/chosen": -2.009531259536743, "logps/chosen_both": -1.9982995986938477, "logps/chosen_prompt": -1.053348422050476, "logps/rejected": -2.0103189945220947, "logps/rejected_both": -2.0013835430145264, "logps/rejected_prompt": -1.2616751194000244, "loss": 2.2716, "nll_loss": 1.996681571006775, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8038125038146973, "rewards/margins": 0.0003150761185679585, "rewards/rejected": -0.80412757396698, "step": 50 }, { "epoch": 0.048, "grad_norm": 0.1926273569998765, "learning_rate": 4.996842391515044e-05, "log_odds_chosen": 0.0007017262396402657, "log_odds_ratio": -0.6927965879440308, "logits/chosen": -2.9328999519348145, "logits/chosen_prompt": -2.684788227081299, "logits/rejected": -2.93101167678833, "logits/rejected_prompt": -2.659271240234375, "logps/chosen": -1.9513660669326782, "logps/chosen_both": -1.93800950050354, "logps/chosen_prompt": -0.95411616563797, "logps/rejected": -1.9519250392913818, "logps/rejected_both": -1.9419523477554321, "logps/rejected_prompt": -1.0883800983428955, "loss": 2.2492, "nll_loss": 1.9371274709701538, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7805464863777161, "rewards/margins": 0.00022354423708748072, "rewards/rejected": -0.7807700634002686, "step": 60 }, { "epoch": 0.056, "grad_norm": 0.1815660976282933, "learning_rate": 4.9957024707871806e-05, "log_odds_chosen": 0.0007978074136190116, "log_odds_ratio": -0.6927486062049866, "logits/chosen": -3.0125765800476074, "logits/chosen_prompt": -2.6774511337280273, "logits/rejected": -3.0124025344848633, "logits/rejected_prompt": -2.6662356853485107, "logps/chosen": -2.0494558811187744, "logps/chosen_both": -2.0350148677825928, "logps/chosen_prompt": -0.9741342663764954, "logps/rejected": -2.050143003463745, "logps/rejected_both": -2.042119264602661, "logps/rejected_prompt": -1.1199967861175537, "loss": 2.2682, "nll_loss": 2.0335299968719482, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8197824358940125, "rewards/margins": 0.00027483105077408254, "rewards/rejected": -0.820057213306427, "step": 70 }, { "epoch": 0.064, "grad_norm": 0.18993029983534432, "learning_rate": 4.994387393067117e-05, "log_odds_chosen": 0.0014978877734392881, "log_odds_ratio": -0.6923991441726685, "logits/chosen": -2.9860825538635254, "logits/chosen_prompt": -2.6699416637420654, "logits/rejected": -2.9854748249053955, "logits/rejected_prompt": -2.6453309059143066, "logps/chosen": -2.025066614151001, "logps/chosen_both": -2.0116593837738037, "logps/chosen_prompt": -1.0876951217651367, "logps/rejected": -2.0263991355895996, "logps/rejected_both": -2.0129716396331787, "logps/rejected_prompt": -1.1680071353912354, "loss": 2.2805, "nll_loss": 2.008460283279419, "rewards/accuracies": 1.0, "rewards/chosen": -0.8100266456604004, "rewards/margins": 0.0005330622079782188, "rewards/rejected": -0.8105596303939819, "step": 80 }, { "epoch": 0.072, "grad_norm": 0.19392806669970095, "learning_rate": 4.992897250651535e-05, "log_odds_chosen": 0.0007344387704506516, "log_odds_ratio": -0.6927801370620728, "logits/chosen": -2.998304605484009, "logits/chosen_prompt": -2.7530579566955566, "logits/rejected": -2.9966633319854736, "logits/rejected_prompt": -2.726839542388916, "logps/chosen": -1.9492180347442627, "logps/chosen_both": -1.9305731058120728, "logps/chosen_prompt": -0.871951699256897, "logps/rejected": -1.9498412609100342, "logps/rejected_both": -1.9371519088745117, "logps/rejected_prompt": -1.0174219608306885, "loss": 2.2152, "nll_loss": 1.929351806640625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7796871662139893, "rewards/margins": 0.000249391800025478, "rewards/rejected": -0.779936671257019, "step": 90 }, { "epoch": 0.08, "grad_norm": 0.18477584362829488, "learning_rate": 4.991232148123761e-05, "log_odds_chosen": 0.0013153791660442948, "log_odds_ratio": -0.6924898624420166, "logits/chosen": -2.959036350250244, "logits/chosen_prompt": -2.6582894325256348, "logits/rejected": -2.959897518157959, "logits/rejected_prompt": -2.656588077545166, "logps/chosen": -1.980985403060913, "logps/chosen_both": -1.965191125869751, "logps/chosen_prompt": -0.8711269497871399, "logps/rejected": -1.9821256399154663, "logps/rejected_both": -1.9721254110336304, "logps/rejected_prompt": -0.9294773936271667, "loss": 2.2517, "nll_loss": 1.964665412902832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7923941612243652, "rewards/margins": 0.00045606493949890137, "rewards/rejected": -0.7928503155708313, "step": 100 }, { "epoch": 0.088, "grad_norm": 0.19924379886100949, "learning_rate": 4.9893922023464236e-05, "log_odds_chosen": 0.002966083586215973, "log_odds_ratio": -0.6916661858558655, "logits/chosen": -3.0152981281280518, "logits/chosen_prompt": -2.685716152191162, "logits/rejected": -3.0145790576934814, "logits/rejected_prompt": -2.6468653678894043, "logps/chosen": -1.8295310735702515, "logps/chosen_both": -1.8159011602401733, "logps/chosen_prompt": -1.0153570175170898, "logps/rejected": -1.8320270776748657, "logps/rejected_both": -1.8261594772338867, "logps/rejected_prompt": -1.1217412948608398, "loss": 2.2814, "nll_loss": 1.815495491027832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7318124175071716, "rewards/margins": 0.0009983479976654053, "rewards/rejected": -0.7328108549118042, "step": 110 }, { "epoch": 0.096, "grad_norm": 0.2009899005827714, "learning_rate": 4.987377542453251e-05, "log_odds_chosen": 0.0022429400123655796, "log_odds_ratio": -0.6920267939567566, "logits/chosen": -2.9447622299194336, "logits/chosen_prompt": -2.632648468017578, "logits/rejected": -2.9442973136901855, "logits/rejected_prompt": -2.6101832389831543, "logps/chosen": -2.0063014030456543, "logps/chosen_both": -1.991539716720581, "logps/chosen_prompt": -0.9827820658683777, "logps/rejected": -2.0082459449768066, "logps/rejected_both": -1.9994781017303467, "logps/rejected_prompt": -1.0614566802978516, "loss": 2.2719, "nll_loss": 1.99040949344635, "rewards/accuracies": 1.0, "rewards/chosen": -0.8025206327438354, "rewards/margins": 0.0007776618003845215, "rewards/rejected": -0.8032983541488647, "step": 120 }, { "epoch": 0.104, "grad_norm": 0.18861397575558203, "learning_rate": 4.985188309840012e-05, "log_odds_chosen": 0.001361916190944612, "log_odds_ratio": -0.692466676235199, "logits/chosen": -2.95689058303833, "logits/chosen_prompt": -2.6187005043029785, "logits/rejected": -2.95717191696167, "logits/rejected_prompt": -2.592301607131958, "logps/chosen": -2.0394482612609863, "logps/chosen_both": -2.02314829826355, "logps/chosen_prompt": -0.9008905291557312, "logps/rejected": -2.040587902069092, "logps/rejected_both": -2.0329113006591797, "logps/rejected_prompt": -1.0704509019851685, "loss": 2.2882, "nll_loss": 2.023050546646118, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8157793283462524, "rewards/margins": 0.00045590996160171926, "rewards/rejected": -0.8162351846694946, "step": 130 }, { "epoch": 0.112, "grad_norm": 0.2030737765327122, "learning_rate": 4.982824658154589e-05, "log_odds_chosen": 0.0003186427056789398, "log_odds_ratio": -0.6929879188537598, "logits/chosen": -2.934846878051758, "logits/chosen_prompt": -2.6593239307403564, "logits/rejected": -2.9346649646759033, "logits/rejected_prompt": -2.637718677520752, "logps/chosen": -2.066263437271118, "logps/chosen_both": -2.0494155883789062, "logps/chosen_prompt": -0.9298864603042603, "logps/rejected": -2.066551685333252, "logps/rejected_both": -2.0526323318481445, "logps/rejected_prompt": -1.0461074113845825, "loss": 2.2784, "nll_loss": 2.048583984375, "rewards/accuracies": 0.5, "rewards/chosen": -0.8265053629875183, "rewards/margins": 0.0001151919350377284, "rewards/rejected": -0.8266205787658691, "step": 140 }, { "epoch": 0.12, "grad_norm": 0.17845448491542337, "learning_rate": 4.980286753286195e-05, "log_odds_chosen": 0.0020511746406555176, "log_odds_ratio": -0.6921236515045166, "logits/chosen": -2.9423627853393555, "logits/chosen_prompt": -2.6544814109802246, "logits/rejected": -2.9413440227508545, "logits/rejected_prompt": -2.6495890617370605, "logps/chosen": -2.0567996501922607, "logps/chosen_both": -2.0376124382019043, "logps/chosen_prompt": -0.8456690907478333, "logps/rejected": -2.058603525161743, "logps/rejected_both": -2.0455679893493652, "logps/rejected_prompt": -1.0780448913574219, "loss": 2.2474, "nll_loss": 2.036198616027832, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.822719931602478, "rewards/margins": 0.0007214724901132286, "rewards/rejected": -0.8234413862228394, "step": 150 }, { "epoch": 0.128, "grad_norm": 0.18228364635340788, "learning_rate": 4.977574773353732e-05, "log_odds_chosen": 0.0005785167450085282, "log_odds_ratio": -0.6928580403327942, "logits/chosen": -2.906240940093994, "logits/chosen_prompt": -2.656862735748291, "logits/rejected": -2.906233072280884, "logits/rejected_prompt": -2.658569812774658, "logps/chosen": -1.8988163471221924, "logps/chosen_both": -1.8861125707626343, "logps/chosen_prompt": -0.9287108182907104, "logps/rejected": -1.8993009328842163, "logps/rejected_both": -1.890856146812439, "logps/rejected_prompt": -1.113793134689331, "loss": 2.2658, "nll_loss": 1.8859831094741821, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7595265507698059, "rewards/margins": 0.00019387007341720164, "rewards/rejected": -0.7597203850746155, "step": 160 }, { "epoch": 0.136, "grad_norm": 0.21059375256598528, "learning_rate": 4.9746889086932895e-05, "log_odds_chosen": 0.0012606128584593534, "log_odds_ratio": -0.6925175786018372, "logits/chosen": -2.9255146980285645, "logits/chosen_prompt": -2.681833505630493, "logits/rejected": -2.9241907596588135, "logits/rejected_prompt": -2.6375930309295654, "logps/chosen": -2.018401861190796, "logps/chosen_both": -2.0020346641540527, "logps/chosen_prompt": -0.8163633346557617, "logps/rejected": -2.0194990634918213, "logps/rejected_both": -2.0088753700256348, "logps/rejected_prompt": -1.024702787399292, "loss": 2.2545, "nll_loss": 2.0013086795806885, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8073607683181763, "rewards/margins": 0.00043891073437407613, "rewards/rejected": -0.8077996373176575, "step": 170 }, { "epoch": 0.144, "grad_norm": 0.2593749816883702, "learning_rate": 4.971629361844785e-05, "log_odds_chosen": 0.000588211405556649, "log_odds_ratio": -0.6928532123565674, "logits/chosen": -2.9365015029907227, "logits/chosen_prompt": -2.6852712631225586, "logits/rejected": -2.9362454414367676, "logits/rejected_prompt": -2.6527528762817383, "logps/chosen": -2.049866199493408, "logps/chosen_both": -2.03619122505188, "logps/chosen_prompt": -0.8910077214241028, "logps/rejected": -2.050372838973999, "logps/rejected_both": -2.0393173694610596, "logps/rejected_prompt": -1.0920004844665527, "loss": 2.2312, "nll_loss": 2.0342373847961426, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8199464678764343, "rewards/margins": 0.00020260215387679636, "rewards/rejected": -0.8201491236686707, "step": 180 }, { "epoch": 0.152, "grad_norm": 0.21239961737940086, "learning_rate": 4.968396347537751e-05, "log_odds_chosen": 0.0017036155331879854, "log_odds_ratio": -0.6922971606254578, "logits/chosen": -2.9285712242126465, "logits/chosen_prompt": -2.637676477432251, "logits/rejected": -2.9268641471862793, "logits/rejected_prompt": -2.601259231567383, "logps/chosen": -2.019813060760498, "logps/chosen_both": -2.003007173538208, "logps/chosen_prompt": -0.9411777257919312, "logps/rejected": -2.0213375091552734, "logps/rejected_both": -2.013278007507324, "logps/rejected_prompt": -1.0966544151306152, "loss": 2.2257, "nll_loss": 2.003007173538208, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8079251050949097, "rewards/margins": 0.0006098627927713096, "rewards/rejected": -0.8085349798202515, "step": 190 }, { "epoch": 0.16, "grad_norm": 0.2296631901191577, "learning_rate": 4.964990092676263e-05, "log_odds_chosen": 0.002268400741741061, "log_odds_ratio": -0.6920153498649597, "logits/chosen": -2.9518988132476807, "logits/chosen_prompt": -2.6878037452697754, "logits/rejected": -2.9512124061584473, "logits/rejected_prompt": -2.6565701961517334, "logps/chosen": -1.69021475315094, "logps/chosen_both": -1.6815983057022095, "logps/chosen_prompt": -0.8377019762992859, "logps/rejected": -1.6910902261734009, "logps/rejected_both": -1.686661958694458, "logps/rejected_prompt": -0.9836887121200562, "loss": 2.2189, "nll_loss": 1.6812556982040405, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.6760859489440918, "rewards/margins": 0.0003500869497656822, "rewards/rejected": -0.6764360666275024, "step": 200 }, { "epoch": 0.168, "grad_norm": 0.20512599393851222, "learning_rate": 4.9614108363230135e-05, "log_odds_chosen": 0.0021390921901911497, "log_odds_ratio": -0.6920791268348694, "logits/chosen": -2.9732565879821777, "logits/chosen_prompt": -2.6687545776367188, "logits/rejected": -2.9718270301818848, "logits/rejected_prompt": -2.6496801376342773, "logps/chosen": -2.0387587547302246, "logps/chosen_both": -2.017876148223877, "logps/chosen_prompt": -0.897871196269989, "logps/rejected": -2.040605068206787, "logps/rejected_both": -2.0265369415283203, "logps/rejected_prompt": -1.0972706079483032, "loss": 2.2179, "nll_loss": 2.0162312984466553, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8155035972595215, "rewards/margins": 0.000738424074370414, "rewards/rejected": -0.8162419199943542, "step": 210 }, { "epoch": 0.176, "grad_norm": 0.2127533742878833, "learning_rate": 4.9576588296825386e-05, "log_odds_chosen": 0.0020120560657233, "log_odds_ratio": -0.6921423077583313, "logits/chosen": -2.8992626667022705, "logits/chosen_prompt": -2.7236571311950684, "logits/rejected": -2.8986992835998535, "logits/rejected_prompt": -2.676098346710205, "logps/chosen": -2.0563912391662598, "logps/chosen_both": -2.036818027496338, "logps/chosen_prompt": -0.9310529828071594, "logps/rejected": -2.058103322982788, "logps/rejected_both": -2.0425784587860107, "logps/rejected_prompt": -1.0257813930511475, "loss": 2.273, "nll_loss": 2.03584885597229, "rewards/accuracies": 1.0, "rewards/chosen": -0.8225564956665039, "rewards/margins": 0.0006849050405435264, "rewards/rejected": -0.8232414126396179, "step": 220 }, { "epoch": 0.184, "grad_norm": 0.17056867832509964, "learning_rate": 4.953734336083583e-05, "log_odds_chosen": 0.0011583305895328522, "log_odds_ratio": -0.6925683617591858, "logits/chosen": -3.0050501823425293, "logits/chosen_prompt": -2.7037124633789062, "logits/rejected": -3.0038866996765137, "logits/rejected_prompt": -2.6890504360198975, "logps/chosen": -2.0860724449157715, "logps/chosen_both": -2.067084550857544, "logps/chosen_prompt": -0.8457021713256836, "logps/rejected": -2.087078332901001, "logps/rejected_both": -2.0733180046081543, "logps/rejected_prompt": -1.0261476039886475, "loss": 2.2779, "nll_loss": 2.065519094467163, "rewards/accuracies": 1.0, "rewards/chosen": -0.8344290852546692, "rewards/margins": 0.00040218234062194824, "rewards/rejected": -0.8348312377929688, "step": 230 }, { "epoch": 0.192, "grad_norm": 0.2058632754394824, "learning_rate": 4.949637630960617e-05, "log_odds_chosen": 0.0013900771737098694, "log_odds_ratio": -0.6924527883529663, "logits/chosen": -2.966139316558838, "logits/chosen_prompt": -2.7504935264587402, "logits/rejected": -2.965026378631592, "logits/rejected_prompt": -2.7268807888031006, "logps/chosen": -1.945728063583374, "logps/chosen_both": -1.9301140308380127, "logps/chosen_prompt": -0.9403144717216492, "logps/rejected": -1.946915626525879, "logps/rejected_both": -1.936022162437439, "logps/rejected_prompt": -1.0291379690170288, "loss": 2.2775, "nll_loss": 1.9295330047607422, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7782912254333496, "rewards/margins": 0.000475037086289376, "rewards/rejected": -0.7787662744522095, "step": 240 }, { "epoch": 0.2, "grad_norm": 0.18030355585658703, "learning_rate": 4.9453690018345144e-05, "log_odds_chosen": 0.0017323314677923918, "log_odds_ratio": -0.6922817826271057, "logits/chosen": -2.9892709255218506, "logits/chosen_prompt": -2.7419209480285645, "logits/rejected": -2.9878451824188232, "logits/rejected_prompt": -2.706714391708374, "logps/chosen": -2.0075595378875732, "logps/chosen_both": -1.9899797439575195, "logps/chosen_prompt": -0.8903474807739258, "logps/rejected": -2.0090558528900146, "logps/rejected_both": -1.998038649559021, "logps/rejected_prompt": -1.0070338249206543, "loss": 2.2079, "nll_loss": 1.9889189004898071, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8030239343643188, "rewards/margins": 0.0005984127637930214, "rewards/rejected": -0.8036222457885742, "step": 250 }, { "epoch": 0.208, "grad_norm": 0.18677326033959232, "learning_rate": 4.940928748292363e-05, "log_odds_chosen": 0.0003323271812405437, "log_odds_ratio": -0.6929812431335449, "logits/chosen": -2.8448781967163086, "logits/chosen_prompt": -2.6570119857788086, "logits/rejected": -2.844160795211792, "logits/rejected_prompt": -2.6436538696289062, "logps/chosen": -2.090553045272827, "logps/chosen_both": -2.077347993850708, "logps/chosen_prompt": -0.8073711395263672, "logps/rejected": -2.090845823287964, "logps/rejected_both": -2.077338695526123, "logps/rejected_prompt": -0.9910534024238586, "loss": 2.2579, "nll_loss": 2.0748660564422607, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8362210988998413, "rewards/margins": 0.00011717081360984594, "rewards/rejected": -0.8363384008407593, "step": 260 }, { "epoch": 0.216, "grad_norm": 0.19524819903076443, "learning_rate": 4.9363171819664434e-05, "log_odds_chosen": 0.001574930502101779, "log_odds_ratio": -0.6923605799674988, "logits/chosen": -2.9072844982147217, "logits/chosen_prompt": -2.6988863945007324, "logits/rejected": -2.9070873260498047, "logits/rejected_prompt": -2.6662864685058594, "logps/chosen": -1.8586593866348267, "logps/chosen_both": -1.847161889076233, "logps/chosen_prompt": -0.8614280819892883, "logps/rejected": -1.8599656820297241, "logps/rejected_both": -1.8520950078964233, "logps/rejected_prompt": -1.0004897117614746, "loss": 2.2122, "nll_loss": 1.8460156917572021, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7434637546539307, "rewards/margins": 0.0005225300556048751, "rewards/rejected": -0.7439862489700317, "step": 270 }, { "epoch": 0.224, "grad_norm": 0.17891708421025293, "learning_rate": 4.9315346265123594e-05, "log_odds_chosen": 0.0014710575342178345, "log_odds_ratio": -0.6924123764038086, "logits/chosen": -2.893035888671875, "logits/chosen_prompt": -2.6818959712982178, "logits/rejected": -2.8925375938415527, "logits/rejected_prompt": -2.6510303020477295, "logps/chosen": -1.959538221359253, "logps/chosen_both": -1.9448583126068115, "logps/chosen_prompt": -0.8354212641716003, "logps/rejected": -1.9608103036880493, "logps/rejected_both": -1.9502098560333252, "logps/rejected_prompt": -0.9869192838668823, "loss": 2.2903, "nll_loss": 1.94313645362854, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7838152647018433, "rewards/margins": 0.0005089103942736983, "rewards/rejected": -0.7843241691589355, "step": 280 }, { "epoch": 0.232, "grad_norm": 0.21094148380709188, "learning_rate": 4.9265814175863186e-05, "log_odds_chosen": 0.0009952529799193144, "log_odds_ratio": -0.6926498413085938, "logits/chosen": -2.9005274772644043, "logits/chosen_prompt": -2.71238374710083, "logits/rejected": -2.8991751670837402, "logits/rejected_prompt": -2.6699583530426025, "logps/chosen": -2.1492276191711426, "logps/chosen_both": -2.1339974403381348, "logps/chosen_prompt": -0.9373821020126343, "logps/rejected": -2.1501176357269287, "logps/rejected_both": -2.1395199298858643, "logps/rejected_prompt": -1.100056529045105, "loss": 2.2923, "nll_loss": 2.1338019371032715, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8596910238265991, "rewards/margins": 0.00035610198392532766, "rewards/rejected": -0.8600472211837769, "step": 290 }, { "epoch": 0.24, "grad_norm": 0.19361551914630554, "learning_rate": 4.9214579028215776e-05, "log_odds_chosen": 0.0016762830782681704, "log_odds_ratio": -0.6923099160194397, "logits/chosen": -2.9360158443450928, "logits/chosen_prompt": -2.7480220794677734, "logits/rejected": -2.9349968433380127, "logits/rejected_prompt": -2.733687400817871, "logps/chosen": -1.8898597955703735, "logps/chosen_both": -1.874415636062622, "logps/chosen_prompt": -0.8352281451225281, "logps/rejected": -1.8912776708602905, "logps/rejected_both": -1.8779733180999756, "logps/rejected_prompt": -0.9313365817070007, "loss": 2.2525, "nll_loss": 1.8733183145523071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7559438943862915, "rewards/margins": 0.0005672037368640304, "rewards/rejected": -0.7565110921859741, "step": 300 }, { "epoch": 0.248, "grad_norm": 0.19645084360565487, "learning_rate": 4.916164441804044e-05, "log_odds_chosen": 0.0019232749473303556, "log_odds_ratio": -0.692186176776886, "logits/chosen": -2.9699971675872803, "logits/chosen_prompt": -2.7393062114715576, "logits/rejected": -2.9690558910369873, "logits/rejected_prompt": -2.7017319202423096, "logps/chosen": -1.9972589015960693, "logps/chosen_both": -1.981871247291565, "logps/chosen_prompt": -0.8229547739028931, "logps/rejected": -1.9988943338394165, "logps/rejected_both": -1.9911056756973267, "logps/rejected_prompt": -0.9741779565811157, "loss": 2.2527, "nll_loss": 1.981127381324768, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7989035844802856, "rewards/margins": 0.000654196715913713, "rewards/rejected": -0.7995578050613403, "step": 310 }, { "epoch": 0.256, "grad_norm": 0.17696578224649318, "learning_rate": 4.910701406047037e-05, "log_odds_chosen": 0.0012397505342960358, "log_odds_ratio": -0.6925276517868042, "logits/chosen": -2.9160306453704834, "logits/chosen_prompt": -2.7327325344085693, "logits/rejected": -2.915261745452881, "logits/rejected_prompt": -2.701322078704834, "logps/chosen": -1.9081172943115234, "logps/chosen_both": -1.892844557762146, "logps/chosen_prompt": -0.8174566030502319, "logps/rejected": -1.9091819524765015, "logps/rejected_both": -1.9010097980499268, "logps/rejected_prompt": -1.0786253213882446, "loss": 2.2602, "nll_loss": 1.8927319049835205, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7632468938827515, "rewards/margins": 0.00042594075785018504, "rewards/rejected": -0.7636728286743164, "step": 320 }, { "epoch": 0.264, "grad_norm": 0.17292787330822676, "learning_rate": 4.905069178965215e-05, "log_odds_chosen": 0.0019163743127137423, "log_odds_ratio": -0.692189633846283, "logits/chosen": -2.9151923656463623, "logits/chosen_prompt": -2.7165563106536865, "logits/rejected": -2.914482593536377, "logits/rejected_prompt": -2.6829206943511963, "logps/chosen": -1.8700447082519531, "logps/chosen_both": -1.8556480407714844, "logps/chosen_prompt": -0.8194649815559387, "logps/rejected": -1.8716179132461548, "logps/rejected_both": -1.864458680152893, "logps/rejected_prompt": -1.1078553199768066, "loss": 2.1808, "nll_loss": 1.8551757335662842, "rewards/accuracies": 1.0, "rewards/chosen": -0.7480179071426392, "rewards/margins": 0.0006292253965511918, "rewards/rejected": -0.7486470937728882, "step": 330 }, { "epoch": 0.272, "grad_norm": 0.19147435771992855, "learning_rate": 4.899268155847667e-05, "log_odds_chosen": 0.002677363809198141, "log_odds_ratio": -0.6918100118637085, "logits/chosen": -3.017524242401123, "logits/chosen_prompt": -2.756082534790039, "logits/rejected": -3.016745090484619, "logits/rejected_prompt": -2.7283802032470703, "logps/chosen": -1.8907134532928467, "logps/chosen_both": -1.8744417428970337, "logps/chosen_prompt": -0.8424029350280762, "logps/rejected": -1.8929758071899414, "logps/rejected_both": -1.8843475580215454, "logps/rejected_prompt": -1.0425379276275635, "loss": 2.225, "nll_loss": 1.8739697933197021, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7562853693962097, "rewards/margins": 0.000904941582120955, "rewards/rejected": -0.7571902275085449, "step": 340 }, { "epoch": 0.28, "grad_norm": 0.19050905162566348, "learning_rate": 4.893298743830168e-05, "log_odds_chosen": 0.0014245070051401854, "log_odds_ratio": -0.6924355626106262, "logits/chosen": -2.881587028503418, "logits/chosen_prompt": -2.7358975410461426, "logits/rejected": -2.8803658485412598, "logits/rejected_prompt": -2.693080186843872, "logps/chosen": -2.135007381439209, "logps/chosen_both": -2.115304470062256, "logps/chosen_prompt": -0.8588684797286987, "logps/rejected": -2.1362690925598145, "logps/rejected_both": -2.1251254081726074, "logps/rejected_prompt": -1.0595465898513794, "loss": 2.3085, "nll_loss": 2.1149659156799316, "rewards/accuracies": 1.0, "rewards/chosen": -0.8540030717849731, "rewards/margins": 0.0005046069854870439, "rewards/rejected": -0.8545076251029968, "step": 350 }, { "epoch": 0.288, "grad_norm": 0.2409286506380079, "learning_rate": 4.887161361866608e-05, "log_odds_chosen": 0.0026388473343104124, "log_odds_ratio": -0.6918294429779053, "logits/chosen": -2.983471632003784, "logits/chosen_prompt": -2.755098819732666, "logits/rejected": -2.982506513595581, "logits/rejected_prompt": -2.7400355339050293, "logps/chosen": -1.9234100580215454, "logps/chosen_both": -1.904706597328186, "logps/chosen_prompt": -0.8400828242301941, "logps/rejected": -1.9256340265274048, "logps/rejected_both": -1.912940263748169, "logps/rejected_prompt": -0.9321552515029907, "loss": 2.2324, "nll_loss": 1.9039466381072998, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.769364058971405, "rewards/margins": 0.0008895128848962486, "rewards/rejected": -0.7702535390853882, "step": 360 }, { "epoch": 0.296, "grad_norm": 0.16083280812237927, "learning_rate": 4.880856440699582e-05, "log_odds_chosen": 0.0021248466800898314, "log_odds_ratio": -0.6920855641365051, "logits/chosen": -2.9351096153259277, "logits/chosen_prompt": -2.723745107650757, "logits/rejected": -2.93329119682312, "logits/rejected_prompt": -2.689175844192505, "logps/chosen": -2.005812644958496, "logps/chosen_both": -1.9874347448349, "logps/chosen_prompt": -0.8169828653335571, "logps/rejected": -2.0076451301574707, "logps/rejected_both": -1.9974247217178345, "logps/rejected_prompt": -0.9817326664924622, "loss": 2.2565, "nll_loss": 1.9868465662002563, "rewards/accuracies": 1.0, "rewards/chosen": -0.8023250699043274, "rewards/margins": 0.0007329642539843917, "rewards/rejected": -0.8030580282211304, "step": 370 }, { "epoch": 0.304, "grad_norm": 0.22470013003589273, "learning_rate": 4.874384422830167e-05, "log_odds_chosen": 0.0011979244882240891, "log_odds_ratio": -0.6925488710403442, "logits/chosen": -2.9063477516174316, "logits/chosen_prompt": -2.607713222503662, "logits/rejected": -2.905827760696411, "logits/rejected_prompt": -2.5853092670440674, "logps/chosen": -1.9979126453399658, "logps/chosen_both": -1.982242226600647, "logps/chosen_prompt": -0.8234804272651672, "logps/rejected": -1.9988930225372314, "logps/rejected_both": -1.9891548156738281, "logps/rejected_prompt": -0.9966527223587036, "loss": 2.266, "nll_loss": 1.9814211130142212, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7991650104522705, "rewards/margins": 0.0003922194300685078, "rewards/rejected": -0.7995571494102478, "step": 380 }, { "epoch": 0.312, "grad_norm": 0.16501384834156196, "learning_rate": 4.867745762486861e-05, "log_odds_chosen": 0.0010735094547271729, "log_odds_ratio": -0.6926108598709106, "logits/chosen": -2.9659483432769775, "logits/chosen_prompt": -2.684511661529541, "logits/rejected": -2.9646358489990234, "logits/rejected_prompt": -2.6466262340545654, "logps/chosen": -1.8777449131011963, "logps/chosen_both": -1.8621854782104492, "logps/chosen_prompt": -0.8326584100723267, "logps/rejected": -1.8786296844482422, "logps/rejected_both": -1.8694502115249634, "logps/rejected_prompt": -1.119554042816162, "loss": 2.2551, "nll_loss": 1.8609716892242432, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7510979771614075, "rewards/margins": 0.0003538370074238628, "rewards/rejected": -0.751451849937439, "step": 390 }, { "epoch": 0.32, "grad_norm": 0.18496197696993874, "learning_rate": 4.860940925593703e-05, "log_odds_chosen": 0.0022099569905549288, "log_odds_ratio": -0.6920434832572937, "logits/chosen": -2.8903660774230957, "logits/chosen_prompt": -2.6781816482543945, "logits/rejected": -2.890045166015625, "logits/rejected_prompt": -2.6534364223480225, "logps/chosen": -1.969386339187622, "logps/chosen_both": -1.954185128211975, "logps/chosen_prompt": -0.7636314630508423, "logps/rejected": -1.9712820053100586, "logps/rejected_both": -1.9598472118377686, "logps/rejected_prompt": -0.9155877232551575, "loss": 2.243, "nll_loss": 1.9532957077026367, "rewards/accuracies": 1.0, "rewards/chosen": -0.7877545356750488, "rewards/margins": 0.0007582366233691573, "rewards/rejected": -0.7885128259658813, "step": 400 }, { "epoch": 0.328, "grad_norm": 0.22859080108494093, "learning_rate": 4.8539703897375755e-05, "log_odds_chosen": 0.004624041263014078, "log_odds_ratio": -0.690842866897583, "logits/chosen": -2.9258294105529785, "logits/chosen_prompt": -2.6813464164733887, "logits/rejected": -2.9250378608703613, "logits/rejected_prompt": -2.6571106910705566, "logps/chosen": -2.0521552562713623, "logps/chosen_both": -2.034921646118164, "logps/chosen_prompt": -0.8797234296798706, "logps/rejected": -2.056114673614502, "logps/rejected_both": -2.044158935546875, "logps/rejected_prompt": -0.9540025591850281, "loss": 2.2663, "nll_loss": 2.0334911346435547, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8208619952201843, "rewards/margins": 0.0015838384861126542, "rewards/rejected": -0.8224459886550903, "step": 410 }, { "epoch": 0.336, "grad_norm": 0.2070567132691218, "learning_rate": 4.846834644134686e-05, "log_odds_chosen": 0.001986052840948105, "log_odds_ratio": -0.6921548843383789, "logits/chosen": -2.9888834953308105, "logits/chosen_prompt": -2.6887311935424805, "logits/rejected": -2.989170789718628, "logits/rejected_prompt": -2.694418430328369, "logps/chosen": -1.9955952167510986, "logps/chosen_both": -1.9792373180389404, "logps/chosen_prompt": -0.8381233215332031, "logps/rejected": -1.997323751449585, "logps/rejected_both": -1.9859631061553955, "logps/rejected_prompt": -0.9913262128829956, "loss": 2.2321, "nll_loss": 1.9785674810409546, "rewards/accuracies": 1.0, "rewards/chosen": -0.7982381582260132, "rewards/margins": 0.0006913721445016563, "rewards/rejected": -0.7989295721054077, "step": 420 }, { "epoch": 0.344, "grad_norm": 0.19605531509972363, "learning_rate": 4.839534189596228e-05, "log_odds_chosen": 0.0027246386744081974, "log_odds_ratio": -0.6917861104011536, "logits/chosen": -2.912360429763794, "logits/chosen_prompt": -2.653672218322754, "logits/rejected": -2.910978317260742, "logits/rejected_prompt": -2.627488613128662, "logps/chosen": -2.060957908630371, "logps/chosen_both": -2.043726921081543, "logps/chosen_prompt": -0.7695341110229492, "logps/rejected": -2.0633223056793213, "logps/rejected_both": -2.051257371902466, "logps/rejected_prompt": -1.0156570672988892, "loss": 2.2675, "nll_loss": 2.042490005493164, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8243831396102905, "rewards/margins": 0.0009458243730477989, "rewards/rejected": -0.8253289461135864, "step": 430 }, { "epoch": 0.352, "grad_norm": 0.44833865318290117, "learning_rate": 4.832069538493237e-05, "log_odds_chosen": 0.04500371962785721, "log_odds_ratio": -0.6715863943099976, "logits/chosen": -2.9302279949188232, "logits/chosen_prompt": -2.6701042652130127, "logits/rejected": -2.9281227588653564, "logits/rejected_prompt": -2.666865587234497, "logps/chosen": -1.9099162817001343, "logps/chosen_both": -1.898306131362915, "logps/chosen_prompt": -0.702593982219696, "logps/rejected": -1.948999047279358, "logps/rejected_both": -1.9378995895385742, "logps/rejected_prompt": -0.971504807472229, "loss": 2.2392, "nll_loss": 1.895094633102417, "rewards/accuracies": 1.0, "rewards/chosen": -0.76396644115448, "rewards/margins": 0.015633201226592064, "rewards/rejected": -0.7795997262001038, "step": 440 }, { "epoch": 0.36, "grad_norm": 0.21226948111262156, "learning_rate": 4.8244412147206284e-05, "log_odds_chosen": 2.9653515815734863, "log_odds_ratio": -0.40015140175819397, "logits/chosen": -2.9068620204925537, "logits/chosen_prompt": -2.6536412239074707, "logits/rejected": -2.1202731132507324, "logits/rejected_prompt": -2.6555583477020264, "logps/chosen": -2.0414326190948486, "logps/chosen_both": -2.0248727798461914, "logps/chosen_prompt": -0.8300280570983887, "logps/rejected": -4.945545196533203, "logps/rejected_both": -4.884528160095215, "logps/rejected_prompt": -0.9442939758300781, "loss": 2.1853, "nll_loss": 2.0240979194641113, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8165730237960815, "rewards/margins": 1.1616451740264893, "rewards/rejected": -1.9782178401947021, "step": 450 }, { "epoch": 0.368, "grad_norm": 0.1736633646525648, "learning_rate": 4.81664975366043e-05, "log_odds_chosen": 7.59240198135376, "log_odds_ratio": -0.1370885670185089, "logits/chosen": -2.9020304679870605, "logits/chosen_prompt": -2.6753904819488525, "logits/rejected": -0.7233905792236328, "logits/rejected_prompt": -2.637943983078003, "logps/chosen": -1.8611255884170532, "logps/chosen_both": -1.8469617366790771, "logps/chosen_prompt": -0.8501307368278503, "logps/rejected": -9.215188026428223, "logps/rejected_both": -9.101489067077637, "logps/rejected_prompt": -1.2299854755401611, "loss": 2.0244, "nll_loss": 1.8459827899932861, "rewards/accuracies": 1.0, "rewards/chosen": -0.7444502115249634, "rewards/margins": 2.9416251182556152, "rewards/rejected": -3.686075210571289, "step": 460 }, { "epoch": 0.376, "grad_norm": 0.17769599500435684, "learning_rate": 4.808695702144206e-05, "log_odds_chosen": 5.727511882781982, "log_odds_ratio": -0.2772656977176666, "logits/chosen": -2.879725694656372, "logits/chosen_prompt": -2.642578125, "logits/rejected": -1.0399138927459717, "logits/rejected_prompt": -2.6099534034729004, "logps/chosen": -2.0047779083251953, "logps/chosen_both": -1.9910427331924438, "logps/chosen_prompt": -0.8587312698364258, "logps/rejected": -7.64484167098999, "logps/rejected_both": -7.5631890296936035, "logps/rejected_prompt": -1.0231356620788574, "loss": 2.0507, "nll_loss": 1.990276575088501, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8019111752510071, "rewards/margins": 2.2560253143310547, "rewards/rejected": -3.057936429977417, "step": 470 }, { "epoch": 0.384, "grad_norm": 0.18584682979949957, "learning_rate": 4.800579618414676e-05, "log_odds_chosen": 4.071249961853027, "log_odds_ratio": -0.34571754932403564, "logits/chosen": -2.903729200363159, "logits/chosen_prompt": -2.7958900928497314, "logits/rejected": -3.239121198654175, "logits/rejected_prompt": -2.7663371562957764, "logps/chosen": -1.9373371601104736, "logps/chosen_both": -1.921233892440796, "logps/chosen_prompt": -0.9925417900085449, "logps/rejected": -5.936069488525391, "logps/rejected_both": -5.837677955627441, "logps/rejected_prompt": -1.1928670406341553, "loss": 2.4809, "nll_loss": 1.919942855834961, "rewards/accuracies": 1.0, "rewards/chosen": -0.7749348282814026, "rewards/margins": 1.5994927883148193, "rewards/rejected": -2.3744280338287354, "step": 480 }, { "epoch": 0.392, "grad_norm": 0.1641543403493392, "learning_rate": 4.7923020720865414e-05, "log_odds_chosen": 3.001093626022339, "log_odds_ratio": -0.484192430973053, "logits/chosen": -2.983025312423706, "logits/chosen_prompt": -3.0399768352508545, "logits/rejected": -4.017498970031738, "logits/rejected_prompt": -3.0394999980926514, "logps/chosen": -2.209317922592163, "logps/chosen_both": -2.1894264221191406, "logps/chosen_prompt": -0.8747655153274536, "logps/rejected": -5.173645496368408, "logps/rejected_both": -5.1185221672058105, "logps/rejected_prompt": -1.2934271097183228, "loss": 2.1178, "nll_loss": 2.188310384750366, "rewards/accuracies": 1.0, "rewards/chosen": -0.8837271928787231, "rewards/margins": 1.185731053352356, "rewards/rejected": -2.069458484649658, "step": 490 }, { "epoch": 0.4, "grad_norm": 4.569078846846734, "learning_rate": 4.783863644106502e-05, "log_odds_chosen": 6.397196292877197, "log_odds_ratio": -0.20790621638298035, "logits/chosen": -2.8709733486175537, "logits/chosen_prompt": -2.905733585357666, "logits/rejected": -4.449090480804443, "logits/rejected_prompt": -2.8762049674987793, "logps/chosen": -1.861519455909729, "logps/chosen_both": -1.8485714197158813, "logps/chosen_prompt": -0.7894952893257141, "logps/rejected": -8.093868255615234, "logps/rejected_both": -7.9878997802734375, "logps/rejected_prompt": -1.098191499710083, "loss": 2.2466, "nll_loss": 1.847815752029419, "rewards/accuracies": 1.0, "rewards/chosen": -0.7446077466011047, "rewards/margins": 2.4929394721984863, "rewards/rejected": -3.2375473976135254, "step": 500 }, { "epoch": 0.408, "grad_norm": 26.906300876077555, "learning_rate": 4.775264926712489e-05, "log_odds_chosen": 5.443802833557129, "log_odds_ratio": -0.13954684138298035, "logits/chosen": -2.9360134601593018, "logits/chosen_prompt": -2.6900744438171387, "logits/rejected": -3.0484580993652344, "logits/rejected_prompt": -2.612032890319824, "logps/chosen": -1.974119782447815, "logps/chosen_both": -1.958168625831604, "logps/chosen_prompt": -0.8577529788017273, "logps/rejected": -7.293883323669434, "logps/rejected_both": -7.204199314117432, "logps/rejected_prompt": -1.3446273803710938, "loss": 2.518, "nll_loss": 1.9573103189468384, "rewards/accuracies": 1.0, "rewards/chosen": -0.7896479368209839, "rewards/margins": 2.1279053688049316, "rewards/rejected": -2.917553424835205, "step": 510 }, { "epoch": 0.416, "grad_norm": 1.0236738821403857, "learning_rate": 4.7665065233920945e-05, "log_odds_chosen": 4.726571559906006, "log_odds_ratio": -0.14057810604572296, "logits/chosen": -2.9554474353790283, "logits/chosen_prompt": -3.076146364212036, "logits/rejected": -3.131758689880371, "logits/rejected_prompt": -3.045212507247925, "logps/chosen": -1.9218995571136475, "logps/chosen_both": -1.910244345664978, "logps/chosen_prompt": -0.8790926933288574, "logps/rejected": -6.504288673400879, "logps/rejected_both": -6.445836544036865, "logps/rejected_prompt": -1.288549542427063, "loss": 2.0423, "nll_loss": 1.909478783607483, "rewards/accuracies": 1.0, "rewards/chosen": -0.7687598466873169, "rewards/margins": 1.8329557180404663, "rewards/rejected": -2.601715564727783, "step": 520 }, { "epoch": 0.424, "grad_norm": 0.6288533211783596, "learning_rate": 4.7575890488402185e-05, "log_odds_chosen": 4.645321846008301, "log_odds_ratio": -0.14102457463741302, "logits/chosen": -2.9634203910827637, "logits/chosen_prompt": -3.0218586921691895, "logits/rejected": -3.2898871898651123, "logits/rejected_prompt": -3.0139455795288086, "logps/chosen": -1.9550220966339111, "logps/chosen_both": -1.9388656616210938, "logps/chosen_prompt": -0.826554000377655, "logps/rejected": -6.471889495849609, "logps/rejected_both": -6.390293121337891, "logps/rejected_prompt": -1.0643904209136963, "loss": 2.2513, "nll_loss": 1.9378074407577515, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7820087671279907, "rewards/margins": 1.8067471981048584, "rewards/rejected": -2.5887560844421387, "step": 530 }, { "epoch": 0.432, "grad_norm": 2.277583713971035, "learning_rate": 4.7485131289159276e-05, "log_odds_chosen": 4.095762252807617, "log_odds_ratio": -0.15678586065769196, "logits/chosen": -2.9781079292297363, "logits/chosen_prompt": -3.05256986618042, "logits/rejected": -2.9668664932250977, "logits/rejected_prompt": -3.041161060333252, "logps/chosen": -1.9822967052459717, "logps/chosen_both": -1.9686206579208374, "logps/chosen_prompt": -0.9377325177192688, "logps/rejected": -5.9602532386779785, "logps/rejected_both": -5.897341728210449, "logps/rejected_prompt": -1.051451563835144, "loss": 2.0657, "nll_loss": 1.9684457778930664, "rewards/accuracies": 1.0, "rewards/chosen": -0.7929186820983887, "rewards/margins": 1.5911824703216553, "rewards/rejected": -2.384101390838623, "step": 540 }, { "epoch": 0.44, "grad_norm": 0.17707808472563716, "learning_rate": 4.7392794005985326e-05, "log_odds_chosen": 4.996828556060791, "log_odds_ratio": -0.1402866542339325, "logits/chosen": -2.9852428436279297, "logits/chosen_prompt": -3.1000924110412598, "logits/rejected": -3.4309897422790527, "logits/rejected_prompt": -3.088724374771118, "logps/chosen": -1.9283807277679443, "logps/chosen_both": -1.913000464439392, "logps/chosen_prompt": -0.7973994612693787, "logps/rejected": -6.7942705154418945, "logps/rejected_both": -6.711949348449707, "logps/rejected_prompt": -1.098016619682312, "loss": 2.2189, "nll_loss": 1.9121148586273193, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7713521718978882, "rewards/margins": 1.946356177330017, "rewards/rejected": -2.7177083492279053, "step": 550 }, { "epoch": 0.448, "grad_norm": 5.519018494250257, "learning_rate": 4.7298885119428773e-05, "log_odds_chosen": 5.843784332275391, "log_odds_ratio": -0.07069602608680725, "logits/chosen": -3.0550990104675293, "logits/chosen_prompt": -3.058029890060425, "logits/rejected": -3.9521071910858154, "logits/rejected_prompt": -3.025411367416382, "logps/chosen": -1.8835957050323486, "logps/chosen_both": -1.8681533336639404, "logps/chosen_prompt": -0.8553426861763, "logps/rejected": -7.572214603424072, "logps/rejected_both": -7.47025203704834, "logps/rejected_prompt": -1.0323774814605713, "loss": 2.077, "nll_loss": 1.8675563335418701, "rewards/accuracies": 1.0, "rewards/chosen": -0.7534382939338684, "rewards/margins": 2.275447368621826, "rewards/rejected": -3.02888560295105, "step": 560 }, { "epoch": 0.456, "grad_norm": 0.6103366310438396, "learning_rate": 4.720341122033862e-05, "log_odds_chosen": 5.190781593322754, "log_odds_ratio": -0.4892934262752533, "logits/chosen": -2.9757232666015625, "logits/chosen_prompt": -3.0236659049987793, "logits/rejected": -3.8188633918762207, "logits/rejected_prompt": -3.0117480754852295, "logps/chosen": -2.410020351409912, "logps/chosen_both": -2.387420415878296, "logps/chosen_prompt": -0.8877968788146973, "logps/rejected": -7.459628105163574, "logps/rejected_both": -7.362242698669434, "logps/rejected_prompt": -1.1302134990692139, "loss": 2.4112, "nll_loss": 2.3871912956237793, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9640080332756042, "rewards/margins": 2.019843339920044, "rewards/rejected": -2.983851432800293, "step": 570 }, { "epoch": 0.464, "grad_norm": 0.151774002914212, "learning_rate": 4.710637900940181e-05, "log_odds_chosen": 3.729964017868042, "log_odds_ratio": -0.2660212516784668, "logits/chosen": -2.9713380336761475, "logits/chosen_prompt": -2.968736410140991, "logits/rejected": -3.0788886547088623, "logits/rejected_prompt": -2.944664478302002, "logps/chosen": -1.842739462852478, "logps/chosen_both": -1.829923391342163, "logps/chosen_prompt": -0.7877852320671082, "logps/rejected": -5.440505504608154, "logps/rejected_both": -5.388018608093262, "logps/rejected_prompt": -1.0643196105957031, "loss": 2.2685, "nll_loss": 1.8282448053359985, "rewards/accuracies": 1.0, "rewards/chosen": -0.7370957732200623, "rewards/margins": 1.4391063451766968, "rewards/rejected": -2.1762022972106934, "step": 580 }, { "epoch": 0.472, "grad_norm": 0.19312538023716122, "learning_rate": 4.7007795296673006e-05, "log_odds_chosen": 3.5488052368164062, "log_odds_ratio": -0.27949827909469604, "logits/chosen": -2.9776198863983154, "logits/chosen_prompt": -3.0068747997283936, "logits/rejected": -3.2581207752227783, "logits/rejected_prompt": -2.980543613433838, "logps/chosen": -1.9455007314682007, "logps/chosen_both": -1.929386854171753, "logps/chosen_prompt": -0.7683624625205994, "logps/rejected": -5.4047675132751465, "logps/rejected_both": -5.334201812744141, "logps/rejected_prompt": -1.0063989162445068, "loss": 2.0098, "nll_loss": 1.927821159362793, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7782004475593567, "rewards/margins": 1.3837066888809204, "rewards/rejected": -2.161907196044922, "step": 590 }, { "epoch": 0.48, "grad_norm": 0.6876461909667617, "learning_rate": 4.690766700109659e-05, "log_odds_chosen": 3.753337860107422, "log_odds_ratio": -0.21310639381408691, "logits/chosen": -2.983619213104248, "logits/chosen_prompt": -3.056485652923584, "logits/rejected": -3.4968714714050293, "logits/rejected_prompt": -3.052788496017456, "logps/chosen": -2.029822587966919, "logps/chosen_both": -2.0120925903320312, "logps/chosen_prompt": -0.8819751739501953, "logps/rejected": -5.680521488189697, "logps/rejected_both": -5.621560096740723, "logps/rejected_prompt": -1.1547878980636597, "loss": 2.2063, "nll_loss": 2.011672258377075, "rewards/accuracies": 1.0, "rewards/chosen": -0.8119290471076965, "rewards/margins": 1.4602794647216797, "rewards/rejected": -2.2722086906433105, "step": 600 }, { "epoch": 0.488, "grad_norm": 0.19286504559147355, "learning_rate": 4.68060011500211e-05, "log_odds_chosen": 4.486660957336426, "log_odds_ratio": -0.16551145911216736, "logits/chosen": -2.9143826961517334, "logits/chosen_prompt": -3.077587366104126, "logits/rejected": -3.641350507736206, "logits/rejected_prompt": -3.062753677368164, "logps/chosen": -1.9688940048217773, "logps/chosen_both": -1.954045295715332, "logps/chosen_prompt": -0.6965051293373108, "logps/rejected": -6.356810569763184, "logps/rejected_both": -6.294190406799316, "logps/rejected_prompt": -0.9163694381713867, "loss": 2.0169, "nll_loss": 1.953741431236267, "rewards/accuracies": 1.0, "rewards/chosen": -0.7875575423240662, "rewards/margins": 1.7551662921905518, "rewards/rejected": -2.542724132537842, "step": 610 }, { "epoch": 0.496, "grad_norm": 0.19049752930142771, "learning_rate": 4.670280487870598e-05, "log_odds_chosen": 4.947572708129883, "log_odds_ratio": -0.14103658497333527, "logits/chosen": -2.8884735107421875, "logits/chosen_prompt": -3.0340023040771484, "logits/rejected": -3.598095655441284, "logits/rejected_prompt": -3.0135154724121094, "logps/chosen": -2.0803651809692383, "logps/chosen_both": -2.065659284591675, "logps/chosen_prompt": -0.7768818140029907, "logps/rejected": -6.917575836181641, "logps/rejected_both": -6.847512245178223, "logps/rejected_prompt": -1.0173327922821045, "loss": 2.4222, "nll_loss": 2.0645294189453125, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.832146167755127, "rewards/margins": 1.9348840713500977, "rewards/rejected": -2.7670302391052246, "step": 620 }, { "epoch": 0.504, "grad_norm": 0.17010508801078386, "learning_rate": 4.659808542982088e-05, "log_odds_chosen": 4.44757604598999, "log_odds_ratio": -0.07313639670610428, "logits/chosen": -2.8788280487060547, "logits/chosen_prompt": -2.848573923110962, "logits/rejected": -2.6464812755584717, "logits/rejected_prompt": -2.814408540725708, "logps/chosen": -2.0289366245269775, "logps/chosen_both": -2.014009952545166, "logps/chosen_prompt": -0.7678987979888916, "logps/rejected": -6.3392744064331055, "logps/rejected_both": -6.2660441398620605, "logps/rejected_prompt": -1.0411919355392456, "loss": 2.0605, "nll_loss": 2.0118680000305176, "rewards/accuracies": 1.0, "rewards/chosen": -0.8115746378898621, "rewards/margins": 1.724135160446167, "rewards/rejected": -2.535709857940674, "step": 630 }, { "epoch": 0.512, "grad_norm": 102.20159023426744, "learning_rate": 4.649185015293728e-05, "log_odds_chosen": 5.305100440979004, "log_odds_ratio": -0.02886788547039032, "logits/chosen": -2.934922456741333, "logits/chosen_prompt": -2.8038196563720703, "logits/rejected": -2.483616828918457, "logits/rejected_prompt": -2.801661491394043, "logps/chosen": -1.7393245697021484, "logps/chosen_both": -1.728514313697815, "logps/chosen_prompt": -0.882293701171875, "logps/rejected": -6.811369895935059, "logps/rejected_both": -6.727609157562256, "logps/rejected_prompt": -1.0623975992202759, "loss": 2.1612, "nll_loss": 1.7267690896987915, "rewards/accuracies": 1.0, "rewards/chosen": -0.6957297921180725, "rewards/margins": 2.028818130493164, "rewards/rejected": -2.724547863006592, "step": 640 }, { "epoch": 0.52, "grad_norm": 2.577120716963003, "learning_rate": 4.638410650401267e-05, "log_odds_chosen": 5.029098033905029, "log_odds_ratio": -0.0729464739561081, "logits/chosen": -2.946472644805908, "logits/chosen_prompt": -2.7987747192382812, "logits/rejected": -2.31748628616333, "logits/rejected_prompt": -2.7790069580078125, "logps/chosen": -1.9928621053695679, "logps/chosen_both": -1.97336745262146, "logps/chosen_prompt": -0.8152757883071899, "logps/rejected": -6.8893632888793945, "logps/rejected_both": -6.791792392730713, "logps/rejected_prompt": -1.0174424648284912, "loss": 2.0913, "nll_loss": 1.9712880849838257, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7971449494361877, "rewards/margins": 1.9586002826690674, "rewards/rejected": -2.7557451725006104, "step": 650 }, { "epoch": 0.528, "grad_norm": 0.15356571620190568, "learning_rate": 4.6274862044867304e-05, "log_odds_chosen": 4.515711307525635, "log_odds_ratio": -0.14140725135803223, "logits/chosen": -2.93347430229187, "logits/chosen_prompt": -2.790188789367676, "logits/rejected": -2.197619915008545, "logits/rejected_prompt": -2.7709336280822754, "logps/chosen": -1.9486901760101318, "logps/chosen_both": -1.936274766921997, "logps/chosen_prompt": -0.9808751940727234, "logps/rejected": -6.346037864685059, "logps/rejected_both": -6.276151180267334, "logps/rejected_prompt": -1.2042269706726074, "loss": 2.0583, "nll_loss": 1.9354143142700195, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7794761657714844, "rewards/margins": 1.7589390277862549, "rewards/rejected": -2.5384154319763184, "step": 660 }, { "epoch": 0.536, "grad_norm": 0.18548636024672094, "learning_rate": 4.616412444265345e-05, "log_odds_chosen": 5.104066371917725, "log_odds_ratio": -0.0724453255534172, "logits/chosen": -2.9771525859832764, "logits/chosen_prompt": -2.8243517875671387, "logits/rejected": -2.083482265472412, "logits/rejected_prompt": -2.8059630393981934, "logps/chosen": -2.0861048698425293, "logps/chosen_both": -2.068869113922119, "logps/chosen_prompt": -0.8699228167533875, "logps/rejected": -7.067320823669434, "logps/rejected_both": -6.978930473327637, "logps/rejected_prompt": -1.0220625400543213, "loss": 2.1363, "nll_loss": 2.0682852268218994, "rewards/accuracies": 1.0, "rewards/chosen": -0.8344419598579407, "rewards/margins": 1.9924862384796143, "rewards/rejected": -2.8269283771514893, "step": 670 }, { "epoch": 0.544, "grad_norm": 59.01092604551995, "learning_rate": 4.605190146931731e-05, "log_odds_chosen": 4.40061092376709, "log_odds_ratio": -0.1419232189655304, "logits/chosen": -2.9263124465942383, "logits/chosen_prompt": -2.8417701721191406, "logits/rejected": -2.351675510406494, "logits/rejected_prompt": -2.8414313793182373, "logps/chosen": -2.124084711074829, "logps/chosen_both": -2.102914571762085, "logps/chosen_prompt": -0.8957809209823608, "logps/rejected": -6.422041893005371, "logps/rejected_both": -6.32672643661499, "logps/rejected_prompt": -1.0718226432800293, "loss": 2.1268, "nll_loss": 2.1024184226989746, "rewards/accuracies": 1.0, "rewards/chosen": -0.8496338725090027, "rewards/margins": 1.7191829681396484, "rewards/rejected": -2.568816661834717, "step": 680 }, { "epoch": 0.552, "grad_norm": 0.1936209207703668, "learning_rate": 4.593820100105355e-05, "log_odds_chosen": 4.4033403396606445, "log_odds_ratio": -0.1418362557888031, "logits/chosen": -2.947152614593506, "logits/chosen_prompt": -2.8191583156585693, "logits/rejected": -2.3703582286834717, "logits/rejected_prompt": -2.8038182258605957, "logps/chosen": -1.993703842163086, "logps/chosen_both": -1.9738051891326904, "logps/chosen_prompt": -0.8131387829780579, "logps/rejected": -6.278976917266846, "logps/rejected_both": -6.194762229919434, "logps/rejected_prompt": -0.9806526303291321, "loss": 2.0429, "nll_loss": 1.9733550548553467, "rewards/accuracies": 1.0, "rewards/chosen": -0.7974814772605896, "rewards/margins": 1.7141094207763672, "rewards/rejected": -2.5115909576416016, "step": 690 }, { "epoch": 0.56, "grad_norm": 0.21779512193360956, "learning_rate": 4.5823031017752485e-05, "log_odds_chosen": 4.373869895935059, "log_odds_ratio": -0.1618097722530365, "logits/chosen": -2.9762911796569824, "logits/chosen_prompt": -2.787757396697998, "logits/rejected": -2.3213400840759277, "logits/rejected_prompt": -2.7804551124572754, "logps/chosen": -1.8093370199203491, "logps/chosen_both": -1.7962630987167358, "logps/chosen_prompt": -0.7294620871543884, "logps/rejected": -6.035723686218262, "logps/rejected_both": -5.961843490600586, "logps/rejected_prompt": -0.9543176889419556, "loss": 2.0382, "nll_loss": 1.7948728799819946, "rewards/accuracies": 1.0, "rewards/chosen": -0.7237349152565002, "rewards/margins": 1.6905548572540283, "rewards/rejected": -2.414289712905884, "step": 700 }, { "epoch": 0.568, "grad_norm": 1.9769361000953782, "learning_rate": 4.5706399602440106e-05, "log_odds_chosen": 4.656636714935303, "log_odds_ratio": -0.1408310979604721, "logits/chosen": -2.916656255722046, "logits/chosen_prompt": -2.787416458129883, "logits/rejected": -2.190491199493408, "logits/rejected_prompt": -2.754542589187622, "logps/chosen": -2.000397205352783, "logps/chosen_both": -1.983769416809082, "logps/chosen_prompt": -0.7894454002380371, "logps/rejected": -6.537571907043457, "logps/rejected_both": -6.459201812744141, "logps/rejected_prompt": -1.0599520206451416, "loss": 2.098, "nll_loss": 1.9831438064575195, "rewards/accuracies": 1.0, "rewards/chosen": -0.8001587986946106, "rewards/margins": 1.8148695230484009, "rewards/rejected": -2.6150283813476562, "step": 710 }, { "epoch": 0.576, "grad_norm": 0.18745727904701265, "learning_rate": 4.558831494071069e-05, "log_odds_chosen": 4.969104290008545, "log_odds_ratio": -0.14006975293159485, "logits/chosen": -2.9004273414611816, "logits/chosen_prompt": -2.7481789588928223, "logits/rejected": -1.9203866720199585, "logits/rejected_prompt": -2.7317967414855957, "logps/chosen": -2.000072479248047, "logps/chosen_both": -1.9829126596450806, "logps/chosen_prompt": -0.9659306406974792, "logps/rejected": -6.8479132652282715, "logps/rejected_both": -6.743927955627441, "logps/rejected_prompt": -1.1112347841262817, "loss": 2.0041, "nll_loss": 1.982696533203125, "rewards/accuracies": 1.0, "rewards/chosen": -0.800028920173645, "rewards/margins": 1.9391365051269531, "rewards/rejected": -2.7391655445098877, "step": 720 }, { "epoch": 0.584, "grad_norm": 44.61607145258881, "learning_rate": 4.5468785320152365e-05, "log_odds_chosen": 4.449766635894775, "log_odds_ratio": -0.20899026095867157, "logits/chosen": -3.0241429805755615, "logits/chosen_prompt": -2.746372699737549, "logits/rejected": -2.07698917388916, "logits/rejected_prompt": -2.746025562286377, "logps/chosen": -1.9495675563812256, "logps/chosen_both": -1.9276573657989502, "logps/chosen_prompt": -0.8301995992660522, "logps/rejected": -6.287846565246582, "logps/rejected_both": -6.172031402587891, "logps/rejected_prompt": -0.9652963876724243, "loss": 2.1169, "nll_loss": 1.9262176752090454, "rewards/accuracies": 1.0, "rewards/chosen": -0.7798271179199219, "rewards/margins": 1.735311508178711, "rewards/rejected": -2.515138626098633, "step": 730 }, { "epoch": 0.592, "grad_norm": 0.39108071766635244, "learning_rate": 4.534781912976546e-05, "log_odds_chosen": 3.2947051525115967, "log_odds_ratio": -0.2812163829803467, "logits/chosen": -2.989047050476074, "logits/chosen_prompt": -2.7699084281921387, "logits/rejected": -2.4307093620300293, "logits/rejected_prompt": -2.756155014038086, "logps/chosen": -1.9651190042495728, "logps/chosen_both": -1.9502513408660889, "logps/chosen_prompt": -0.7651479840278625, "logps/rejected": -5.176846981048584, "logps/rejected_both": -5.1231608390808105, "logps/rejected_prompt": -0.8976105451583862, "loss": 2.0946, "nll_loss": 1.949180245399475, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7860475778579712, "rewards/margins": 1.2846912145614624, "rewards/rejected": -2.070739269256592, "step": 740 }, { "epoch": 0.6, "grad_norm": 4.489644128912903, "learning_rate": 4.522542485937369e-05, "log_odds_chosen": 4.886274337768555, "log_odds_ratio": -0.14077258110046387, "logits/chosen": -2.948451519012451, "logits/chosen_prompt": -2.7478134632110596, "logits/rejected": -2.1101903915405273, "logits/rejected_prompt": -2.7366366386413574, "logps/chosen": -1.992583990097046, "logps/chosen_both": -1.9766371250152588, "logps/chosen_prompt": -0.8634021878242493, "logps/rejected": -6.756987571716309, "logps/rejected_both": -6.673755645751953, "logps/rejected_prompt": -1.0165636539459229, "loss": 2.1241, "nll_loss": 1.9759677648544312, "rewards/accuracies": 1.0, "rewards/chosen": -0.7970336675643921, "rewards/margins": 1.90576171875, "rewards/rejected": -2.7027952671051025, "step": 750 }, { "epoch": 0.608, "grad_norm": 0.4533909423122121, "learning_rate": 4.510161109902837e-05, "log_odds_chosen": 3.120637893676758, "log_odds_ratio": -0.6285208463668823, "logits/chosen": -2.909808397293091, "logits/chosen_prompt": -2.8316149711608887, "logits/rejected": -2.377187490463257, "logits/rejected_prompt": -2.823117971420288, "logps/chosen": -2.327125072479248, "logps/chosen_both": -2.3096871376037598, "logps/chosen_prompt": -0.868097186088562, "logps/rejected": -5.366008281707764, "logps/rejected_both": -5.30277681350708, "logps/rejected_prompt": -1.0501350164413452, "loss": 2.1836, "nll_loss": 2.3085296154022217, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9308500289916992, "rewards/margins": 1.2155535221099854, "rewards/rejected": -2.1464035511016846, "step": 760 }, { "epoch": 0.616, "grad_norm": 0.19141971158001736, "learning_rate": 4.4976386538405495e-05, "log_odds_chosen": 2.943345546722412, "log_odds_ratio": -0.2832263708114624, "logits/chosen": -2.926583766937256, "logits/chosen_prompt": -2.8340327739715576, "logits/rejected": -2.5858168601989746, "logits/rejected_prompt": -2.8149476051330566, "logps/chosen": -2.0653610229492188, "logps/chosen_both": -2.0445759296417236, "logps/chosen_prompt": -0.8157526254653931, "logps/rejected": -4.919131278991699, "logps/rejected_both": -4.849064350128174, "logps/rejected_prompt": -1.005324125289917, "loss": 2.0024, "nll_loss": 2.0445759296417236, "rewards/accuracies": 1.0, "rewards/chosen": -0.8261443972587585, "rewards/margins": 1.1415081024169922, "rewards/rejected": -1.9676525592803955, "step": 770 }, { "epoch": 0.624, "grad_norm": 0.299820230370255, "learning_rate": 4.484975996619589e-05, "log_odds_chosen": 4.539975166320801, "log_odds_ratio": -0.11812126636505127, "logits/chosen": -2.87815523147583, "logits/chosen_prompt": -2.8412280082702637, "logits/rejected": -2.3637688159942627, "logits/rejected_prompt": -2.8588156700134277, "logps/chosen": -2.4759485721588135, "logps/chosen_both": -2.454190731048584, "logps/chosen_prompt": -0.7899399995803833, "logps/rejected": -6.8973388671875, "logps/rejected_both": -6.819916725158691, "logps/rejected_prompt": -1.066646695137024, "loss": 2.3702, "nll_loss": 2.454133987426758, "rewards/accuracies": 1.0, "rewards/chosen": -0.9903793334960938, "rewards/margins": 1.7685562372207642, "rewards/rejected": -2.7589354515075684, "step": 780 }, { "epoch": 0.632, "grad_norm": 5.192675922080671, "learning_rate": 4.4721740269488355e-05, "log_odds_chosen": 2.496995210647583, "log_odds_ratio": -0.32391008734703064, "logits/chosen": -2.966625213623047, "logits/chosen_prompt": -2.795879602432251, "logits/rejected": -2.514392137527466, "logits/rejected_prompt": -2.783583164215088, "logps/chosen": -2.563605546951294, "logps/chosen_both": -2.541128635406494, "logps/chosen_prompt": -0.9771214723587036, "logps/rejected": -4.989082336425781, "logps/rejected_both": -4.936980724334717, "logps/rejected_prompt": -1.0889393091201782, "loss": 2.1847, "nll_loss": 2.5405211448669434, "rewards/accuracies": 1.0, "rewards/chosen": -1.025442123413086, "rewards/margins": 0.9701908230781555, "rewards/rejected": -1.9956328868865967, "step": 790 }, { "epoch": 0.64, "grad_norm": 0.4695325524437554, "learning_rate": 4.4592336433146e-05, "log_odds_chosen": 5.124607563018799, "log_odds_ratio": -0.018428776413202286, "logits/chosen": -3.051105260848999, "logits/chosen_prompt": -2.8179726600646973, "logits/rejected": -1.909102201461792, "logits/rejected_prompt": -2.7916340827941895, "logps/chosen": -1.8969109058380127, "logps/chosen_both": -1.8779878616333008, "logps/chosen_prompt": -0.8452935218811035, "logps/rejected": -6.845399379730225, "logps/rejected_both": -6.747313022613525, "logps/rejected_prompt": -0.9934666752815247, "loss": 2.0368, "nll_loss": 1.8772528171539307, "rewards/accuracies": 1.0, "rewards/chosen": -0.758764386177063, "rewards/margins": 1.9793955087661743, "rewards/rejected": -2.7381598949432373, "step": 800 }, { "epoch": 0.648, "grad_norm": 0.21280813340257887, "learning_rate": 4.4461557539175594e-05, "log_odds_chosen": 5.451117515563965, "log_odds_ratio": -0.07145892083644867, "logits/chosen": -2.9378345012664795, "logits/chosen_prompt": -2.762908458709717, "logits/rejected": -1.6283600330352783, "logits/rejected_prompt": -2.7498764991760254, "logps/chosen": -2.0257043838500977, "logps/chosen_both": -2.008737087249756, "logps/chosen_prompt": -0.8673852682113647, "logps/rejected": -7.346819877624512, "logps/rejected_both": -7.247427940368652, "logps/rejected_prompt": -1.0632621049880981, "loss": 2.0447, "nll_loss": 2.0078537464141846, "rewards/accuracies": 1.0, "rewards/chosen": -0.8102817535400391, "rewards/margins": 2.128446340560913, "rewards/rejected": -2.938728094100952, "step": 810 }, { "epoch": 0.656, "grad_norm": 0.209653515397789, "learning_rate": 4.432941276609018e-05, "log_odds_chosen": 5.421745777130127, "log_odds_ratio": -0.07243818789720535, "logits/chosen": -2.9660727977752686, "logits/chosen_prompt": -2.805607318878174, "logits/rejected": -1.6398050785064697, "logits/rejected_prompt": -2.7811026573181152, "logps/chosen": -2.0751829147338867, "logps/chosen_both": -2.0558664798736572, "logps/chosen_prompt": -0.7402461767196655, "logps/rejected": -7.376537322998047, "logps/rejected_both": -7.285178184509277, "logps/rejected_prompt": -0.9955169558525085, "loss": 2.1673, "nll_loss": 2.05536150932312, "rewards/accuracies": 1.0, "rewards/chosen": -0.8300731778144836, "rewards/margins": 2.1205410957336426, "rewards/rejected": -2.9506144523620605, "step": 820 }, { "epoch": 0.664, "grad_norm": 0.2932004663372407, "learning_rate": 4.4195911388264946e-05, "log_odds_chosen": 3.337216854095459, "log_odds_ratio": -0.28040507435798645, "logits/chosen": -3.0083236694335938, "logits/chosen_prompt": -2.7438673973083496, "logits/rejected": -2.2188708782196045, "logits/rejected_prompt": -2.710932970046997, "logps/chosen": -1.7532163858413696, "logps/chosen_both": -1.7392990589141846, "logps/chosen_prompt": -0.881622314453125, "logps/rejected": -4.988051891326904, "logps/rejected_both": -4.921896934509277, "logps/rejected_prompt": -0.8814730644226074, "loss": 2.0387, "nll_loss": 1.7385940551757812, "rewards/accuracies": 1.0, "rewards/chosen": -0.7012865543365479, "rewards/margins": 1.2939343452453613, "rewards/rejected": -1.9952208995819092, "step": 830 }, { "epoch": 0.672, "grad_norm": 2.9403489436512475, "learning_rate": 4.40610627752862e-05, "log_odds_chosen": 5.995909690856934, "log_odds_ratio": -0.07048363983631134, "logits/chosen": -2.951843738555908, "logits/chosen_prompt": -2.657824993133545, "logits/rejected": -1.3483891487121582, "logits/rejected_prompt": -2.6459240913391113, "logps/chosen": -2.0297625064849854, "logps/chosen_both": -2.011107921600342, "logps/chosen_prompt": -0.8041833639144897, "logps/rejected": -7.886776924133301, "logps/rejected_both": -7.784094333648682, "logps/rejected_prompt": -0.9874393343925476, "loss": 2.0868, "nll_loss": 2.0107545852661133, "rewards/accuracies": 1.0, "rewards/chosen": -0.8119049072265625, "rewards/margins": 2.342806100845337, "rewards/rejected": -3.1547107696533203, "step": 840 }, { "epoch": 0.68, "grad_norm": 0.1951986041864062, "learning_rate": 4.3924876391293915e-05, "log_odds_chosen": 5.405202865600586, "log_odds_ratio": -0.4933692514896393, "logits/chosen": -2.8229470252990723, "logits/chosen_prompt": -2.70353102684021, "logits/rejected": -1.516230821609497, "logits/rejected_prompt": -2.682372570037842, "logps/chosen": -2.4473724365234375, "logps/chosen_both": -2.4278030395507812, "logps/chosen_prompt": -0.8016360402107239, "logps/rejected": -7.731281280517578, "logps/rejected_both": -7.645183563232422, "logps/rejected_prompt": -0.9825652241706848, "loss": 2.2426, "nll_loss": 2.427164316177368, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9789490699768066, "rewards/margins": 2.113563299179077, "rewards/rejected": -3.092512369155884, "step": 850 }, { "epoch": 0.688, "grad_norm": 0.19812900890844543, "learning_rate": 4.3787361794317405e-05, "log_odds_chosen": 3.4184670448303223, "log_odds_ratio": -0.22132563591003418, "logits/chosen": -2.9762589931488037, "logits/chosen_prompt": -2.764681816101074, "logits/rejected": -2.4695773124694824, "logits/rejected_prompt": -2.739607095718384, "logps/chosen": -1.889784812927246, "logps/chosen_both": -1.8726049661636353, "logps/chosen_prompt": -0.8000418543815613, "logps/rejected": -5.191944122314453, "logps/rejected_both": -5.127084732055664, "logps/rejected_prompt": -0.973870575428009, "loss": 2.0017, "nll_loss": 1.8721071481704712, "rewards/accuracies": 1.0, "rewards/chosen": -0.7559138536453247, "rewards/margins": 1.3208638429641724, "rewards/rejected": -2.076777935028076, "step": 860 }, { "epoch": 0.696, "grad_norm": 1.1208289382374679, "learning_rate": 4.3648528635604556e-05, "log_odds_chosen": 4.736769199371338, "log_odds_ratio": -0.07410699129104614, "logits/chosen": -2.9047577381134033, "logits/chosen_prompt": -2.7688372135162354, "logits/rejected": -2.297377824783325, "logits/rejected_prompt": -2.7379658222198486, "logps/chosen": -2.166656017303467, "logps/chosen_both": -2.149369955062866, "logps/chosen_prompt": -0.7613478899002075, "logps/rejected": -6.790528774261475, "logps/rejected_both": -6.711920738220215, "logps/rejected_prompt": -0.9217512011528015, "loss": 2.19, "nll_loss": 2.1481828689575195, "rewards/accuracies": 1.0, "rewards/chosen": -0.8666625022888184, "rewards/margins": 1.8495492935180664, "rewards/rejected": -2.7162115573883057, "step": 870 }, { "epoch": 0.704, "grad_norm": 0.18802597714184358, "learning_rate": 4.350838665894446e-05, "log_odds_chosen": 3.573579788208008, "log_odds_ratio": -0.2119835913181305, "logits/chosen": -2.9564337730407715, "logits/chosen_prompt": -2.8878400325775146, "logits/rejected": -2.7999844551086426, "logits/rejected_prompt": -2.8850619792938232, "logps/chosen": -2.041067361831665, "logps/chosen_both": -2.0219027996063232, "logps/chosen_prompt": -0.7945634126663208, "logps/rejected": -5.52020788192749, "logps/rejected_both": -5.447958946228027, "logps/rejected_prompt": -0.9404302835464478, "loss": 2.1522, "nll_loss": 2.0212433338165283, "rewards/accuracies": 1.0, "rewards/chosen": -0.8164268732070923, "rewards/margins": 1.3916563987731934, "rewards/rejected": -2.208083152770996, "step": 880 }, { "epoch": 0.712, "grad_norm": 1.3417035590493764, "learning_rate": 4.336694569998354e-05, "log_odds_chosen": 4.419407367706299, "log_odds_ratio": -0.07842884957790375, "logits/chosen": -2.980591297149658, "logits/chosen_prompt": -2.9254255294799805, "logits/rejected": -2.7680697441101074, "logits/rejected_prompt": -2.905561923980713, "logps/chosen": -2.0169148445129395, "logps/chosen_both": -2.0003621578216553, "logps/chosen_prompt": -0.8039913177490234, "logps/rejected": -6.302676200866699, "logps/rejected_both": -6.233563423156738, "logps/rejected_prompt": -0.9547332525253296, "loss": 2.0996, "nll_loss": 2.000209331512451, "rewards/accuracies": 1.0, "rewards/chosen": -0.8067659139633179, "rewards/margins": 1.7143046855926514, "rewards/rejected": -2.5210704803466797, "step": 890 }, { "epoch": 0.72, "grad_norm": 0.17015695407576262, "learning_rate": 4.3224215685535294e-05, "log_odds_chosen": 3.736863613128662, "log_odds_ratio": -0.21099340915679932, "logits/chosen": -2.9480998516082764, "logits/chosen_prompt": -2.909301519393921, "logits/rejected": -2.5860133171081543, "logits/rejected_prompt": -2.8961730003356934, "logps/chosen": -1.99604070186615, "logps/chosen_both": -1.9824683666229248, "logps/chosen_prompt": -0.8537474870681763, "logps/rejected": -5.6191020011901855, "logps/rejected_both": -5.559712886810303, "logps/rejected_prompt": -1.0109044313430786, "loss": 2.0333, "nll_loss": 1.9815161228179932, "rewards/accuracies": 1.0, "rewards/chosen": -0.798416256904602, "rewards/margins": 1.449224591255188, "rewards/rejected": -2.247641086578369, "step": 900 }, { "epoch": 0.728, "grad_norm": 0.1938256131016386, "learning_rate": 4.3080206632883554e-05, "log_odds_chosen": 4.993983745574951, "log_odds_ratio": -0.07278299331665039, "logits/chosen": -2.9305057525634766, "logits/chosen_prompt": -2.8883767127990723, "logits/rejected": -2.744293212890625, "logits/rejected_prompt": -2.865830183029175, "logps/chosen": -1.9137989282608032, "logps/chosen_both": -1.897878646850586, "logps/chosen_prompt": -0.8952886462211609, "logps/rejected": -6.773948669433594, "logps/rejected_both": -6.680284023284912, "logps/rejected_prompt": -1.1111478805541992, "loss": 2.072, "nll_loss": 1.896592378616333, "rewards/accuracies": 1.0, "rewards/chosen": -0.7655196189880371, "rewards/margins": 1.9440600872039795, "rewards/rejected": -2.7095799446105957, "step": 910 }, { "epoch": 0.736, "grad_norm": 0.19422924079693882, "learning_rate": 4.293492864907947e-05, "log_odds_chosen": 4.982480049133301, "log_odds_ratio": -0.07303477078676224, "logits/chosen": -2.897078275680542, "logits/chosen_prompt": -2.8844199180603027, "logits/rejected": -2.5853612422943115, "logits/rejected_prompt": -2.896810531616211, "logps/chosen": -2.046506404876709, "logps/chosen_both": -2.027215003967285, "logps/chosen_prompt": -0.8521916270256042, "logps/rejected": -6.898811340332031, "logps/rejected_both": -6.797191619873047, "logps/rejected_prompt": -1.0783166885375977, "loss": 2.0343, "nll_loss": 2.025817394256592, "rewards/accuracies": 1.0, "rewards/chosen": -0.8186025619506836, "rewards/margins": 1.9409217834472656, "rewards/rejected": -2.7595245838165283, "step": 920 }, { "epoch": 0.744, "grad_norm": 0.17503233577716112, "learning_rate": 4.278839193023214e-05, "log_odds_chosen": 5.051764011383057, "log_odds_ratio": -0.07269078493118286, "logits/chosen": -2.968621015548706, "logits/chosen_prompt": -2.8850250244140625, "logits/rejected": -2.575244426727295, "logits/rejected_prompt": -2.879965305328369, "logps/chosen": -2.0476856231689453, "logps/chosen_both": -2.0287888050079346, "logps/chosen_prompt": -0.8320780992507935, "logps/rejected": -6.972892761230469, "logps/rejected_both": -6.875253200531006, "logps/rejected_prompt": -0.9857944250106812, "loss": 2.4164, "nll_loss": 2.027635335922241, "rewards/accuracies": 1.0, "rewards/chosen": -0.8190741539001465, "rewards/margins": 1.9700825214385986, "rewards/rejected": -2.7891571521759033, "step": 930 }, { "epoch": 0.752, "grad_norm": 47.44652080135077, "learning_rate": 4.264060676079302e-05, "log_odds_chosen": 3.4615960121154785, "log_odds_ratio": -0.25266528129577637, "logits/chosen": -2.9501328468322754, "logits/chosen_prompt": -2.8721659183502197, "logits/rejected": -3.1557369232177734, "logits/rejected_prompt": -2.854639768600464, "logps/chosen": -2.153719425201416, "logps/chosen_both": -2.135387897491455, "logps/chosen_prompt": -0.9698511958122253, "logps/rejected": -5.52289342880249, "logps/rejected_both": -5.454329490661621, "logps/rejected_prompt": -1.0520834922790527, "loss": 2.1268, "nll_loss": 2.1349105834960938, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8614877462387085, "rewards/margins": 1.3476698398590088, "rewards/rejected": -2.2091574668884277, "step": 940 }, { "epoch": 0.76, "grad_norm": 0.19086614631182744, "learning_rate": 4.249158351283414e-05, "log_odds_chosen": 4.672451496124268, "log_odds_ratio": -0.14073383808135986, "logits/chosen": -3.003997325897217, "logits/chosen_prompt": -2.9195713996887207, "logits/rejected": -3.2987685203552246, "logits/rejected_prompt": -2.9031708240509033, "logps/chosen": -2.006805896759033, "logps/chosen_both": -1.9856882095336914, "logps/chosen_prompt": -0.8608209490776062, "logps/rejected": -6.554454803466797, "logps/rejected_both": -6.446510314941406, "logps/rejected_prompt": -1.0303418636322021, "loss": 2.0534, "nll_loss": 1.9856884479522705, "rewards/accuracies": 1.0, "rewards/chosen": -0.8027224540710449, "rewards/margins": 1.8190593719482422, "rewards/rejected": -2.621781826019287, "step": 950 }, { "epoch": 0.768, "grad_norm": 0.16090899053372315, "learning_rate": 4.234133264532012e-05, "log_odds_chosen": 6.077364444732666, "log_odds_ratio": -0.004217286594212055, "logits/chosen": -2.842454433441162, "logits/chosen_prompt": -2.8957276344299316, "logits/rejected": -3.5180137157440186, "logits/rejected_prompt": -2.9135992527008057, "logps/chosen": -1.9932161569595337, "logps/chosen_both": -1.9756605625152588, "logps/chosen_prompt": -0.8626230359077454, "logps/rejected": -7.9156999588012695, "logps/rejected_both": -7.813823699951172, "logps/rejected_prompt": -1.0395594835281372, "loss": 2.0091, "nll_loss": 1.975542664527893, "rewards/accuracies": 1.0, "rewards/chosen": -0.7972863912582397, "rewards/margins": 2.3689935207366943, "rewards/rejected": -3.1662800312042236, "step": 960 }, { "epoch": 0.776, "grad_norm": 0.4432866833057449, "learning_rate": 4.218986470337419e-05, "log_odds_chosen": 5.5125412940979, "log_odds_ratio": -0.07154224812984467, "logits/chosen": -2.9377503395080566, "logits/chosen_prompt": -2.926082134246826, "logits/rejected": -3.535740375518799, "logits/rejected_prompt": -2.9182417392730713, "logps/chosen": -1.919931411743164, "logps/chosen_both": -1.9039018154144287, "logps/chosen_prompt": -0.7944774627685547, "logps/rejected": -7.288356781005859, "logps/rejected_both": -7.193412780761719, "logps/rejected_prompt": -0.9629098773002625, "loss": 2.3092, "nll_loss": 1.9036260843276978, "rewards/accuracies": 1.0, "rewards/chosen": -0.7679725289344788, "rewards/margins": 2.147369861602783, "rewards/rejected": -2.9153425693511963, "step": 970 }, { "epoch": 0.784, "grad_norm": 0.19680727751711977, "learning_rate": 4.2037190317538e-05, "log_odds_chosen": 4.595906734466553, "log_odds_ratio": -0.07939890027046204, "logits/chosen": -2.9524266719818115, "logits/chosen_prompt": -2.790818691253662, "logits/rejected": -2.9070940017700195, "logits/rejected_prompt": -2.781165599822998, "logps/chosen": -1.9940401315689087, "logps/chosen_both": -1.978316068649292, "logps/chosen_prompt": -0.7690817713737488, "logps/rejected": -6.455039024353027, "logps/rejected_both": -6.385528087615967, "logps/rejected_prompt": -0.9404104948043823, "loss": 2.0872, "nll_loss": 1.9778735637664795, "rewards/accuracies": 1.0, "rewards/chosen": -0.7976160049438477, "rewards/margins": 1.7843996286392212, "rewards/rejected": -2.5820157527923584, "step": 980 }, { "epoch": 0.792, "grad_norm": 0.1584132780664858, "learning_rate": 4.188332020302561e-05, "log_odds_chosen": 4.230597496032715, "log_odds_ratio": -0.14310847222805023, "logits/chosen": -2.956609010696411, "logits/chosen_prompt": -2.8512063026428223, "logits/rejected": -2.678597927093506, "logits/rejected_prompt": -2.8333568572998047, "logps/chosen": -1.8776973485946655, "logps/chosen_both": -1.8625962734222412, "logps/chosen_prompt": -0.8090478777885437, "logps/rejected": -5.976474761962891, "logps/rejected_both": -5.902680397033691, "logps/rejected_prompt": -0.9692068099975586, "loss": 1.9999, "nll_loss": 1.861577033996582, "rewards/accuracies": 1.0, "rewards/chosen": -0.7510789632797241, "rewards/margins": 1.6395108699798584, "rewards/rejected": -2.390589952468872, "step": 990 }, { "epoch": 0.8, "grad_norm": 0.18982243368973564, "learning_rate": 4.172826515897146e-05, "log_odds_chosen": 4.3918375968933105, "log_odds_ratio": -0.14247746765613556, "logits/chosen": -2.9714953899383545, "logits/chosen_prompt": -2.824305772781372, "logits/rejected": -2.6518845558166504, "logits/rejected_prompt": -2.8202338218688965, "logps/chosen": -1.8688671588897705, "logps/chosen_both": -1.8508541584014893, "logps/chosen_prompt": -0.9176328778266907, "logps/rejected": -6.10614538192749, "logps/rejected_both": -6.007752418518066, "logps/rejected_prompt": -1.0590510368347168, "loss": 2.0857, "nll_loss": 1.8497679233551025, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.747546911239624, "rewards/margins": 1.694911241531372, "rewards/rejected": -2.442458391189575, "step": 1000 }, { "epoch": 0.808, "grad_norm": 0.15641654006436478, "learning_rate": 4.157203606767238e-05, "log_odds_chosen": 4.2656779289245605, "log_odds_ratio": -0.14230065047740936, "logits/chosen": -2.9932308197021484, "logits/chosen_prompt": -2.830867290496826, "logits/rejected": -2.6234424114227295, "logits/rejected_prompt": -2.8216352462768555, "logps/chosen": -2.024932384490967, "logps/chosen_both": -2.0056064128875732, "logps/chosen_prompt": -0.7936287522315979, "logps/rejected": -6.178097724914551, "logps/rejected_both": -6.095284938812256, "logps/rejected_prompt": -0.9350797533988953, "loss": 2.037, "nll_loss": 2.0045900344848633, "rewards/accuracies": 1.0, "rewards/chosen": -0.809972882270813, "rewards/margins": 1.6612660884857178, "rewards/rejected": -2.4712390899658203, "step": 1010 }, { "epoch": 0.816, "grad_norm": 0.20983648268469735, "learning_rate": 4.1414643893823914e-05, "log_odds_chosen": 4.862036228179932, "log_odds_ratio": -0.07260783016681671, "logits/chosen": -2.9284424781799316, "logits/chosen_prompt": -2.8569953441619873, "logits/rejected": -2.5351157188415527, "logits/rejected_prompt": -2.8426971435546875, "logps/chosen": -2.1229608058929443, "logps/chosen_both": -2.10365629196167, "logps/chosen_prompt": -0.8154341578483582, "logps/rejected": -6.869643211364746, "logps/rejected_both": -6.7744574546813965, "logps/rejected_prompt": -0.9435701370239258, "loss": 2.1102, "nll_loss": 2.1023664474487305, "rewards/accuracies": 1.0, "rewards/chosen": -0.8491843342781067, "rewards/margins": 1.8986728191375732, "rewards/rejected": -2.7478575706481934, "step": 1020 }, { "epoch": 0.824, "grad_norm": 0.16714535237522857, "learning_rate": 4.125609968375072e-05, "log_odds_chosen": 5.137936115264893, "log_odds_ratio": -0.0722423866391182, "logits/chosen": -2.917429208755493, "logits/chosen_prompt": -2.805572509765625, "logits/rejected": -2.4986531734466553, "logits/rejected_prompt": -2.7935025691986084, "logps/chosen": -1.898790717124939, "logps/chosen_both": -1.88314688205719, "logps/chosen_prompt": -0.8224050402641296, "logps/rejected": -6.880563259124756, "logps/rejected_both": -6.7928266525268555, "logps/rejected_prompt": -0.9875515699386597, "loss": 2.0572, "nll_loss": 1.8828372955322266, "rewards/accuracies": 1.0, "rewards/chosen": -0.7595163583755493, "rewards/margins": 1.9927089214324951, "rewards/rejected": -2.752225637435913, "step": 1030 }, { "epoch": 0.832, "grad_norm": 0.17116655114302515, "learning_rate": 4.109641456463135e-05, "log_odds_chosen": 4.716578006744385, "log_odds_ratio": -0.05661363527178764, "logits/chosen": -2.9051055908203125, "logits/chosen_prompt": -2.861964702606201, "logits/rejected": -2.489297866821289, "logits/rejected_prompt": -2.8317601680755615, "logps/chosen": -2.72660493850708, "logps/chosen_both": -2.6989545822143555, "logps/chosen_prompt": -0.786345899105072, "logps/rejected": -7.32622766494751, "logps/rejected_both": -7.235006809234619, "logps/rejected_prompt": -0.9496296048164368, "loss": 2.0544, "nll_loss": 2.698387622833252, "rewards/accuracies": 1.0, "rewards/chosen": -1.0906422138214111, "rewards/margins": 1.8398488759994507, "rewards/rejected": -2.9304909706115723, "step": 1040 }, { "epoch": 0.84, "grad_norm": 2.0421569101702004, "learning_rate": 4.093559974371725e-05, "log_odds_chosen": 4.683531284332275, "log_odds_ratio": -0.14838626980781555, "logits/chosen": -2.983940601348877, "logits/chosen_prompt": -2.8726494312286377, "logits/rejected": -2.683384418487549, "logits/rejected_prompt": -2.844991683959961, "logps/chosen": -1.7734657526016235, "logps/chosen_both": -1.762310266494751, "logps/chosen_prompt": -0.8980112075805664, "logps/rejected": -6.07004976272583, "logps/rejected_both": -5.987616062164307, "logps/rejected_prompt": -1.1182132959365845, "loss": 2.145, "nll_loss": 1.7613089084625244, "rewards/accuracies": 1.0, "rewards/chosen": -0.7093862891197205, "rewards/margins": 1.7186336517333984, "rewards/rejected": -2.4280200004577637, "step": 1050 }, { "epoch": 0.848, "grad_norm": 0.1891364752116159, "learning_rate": 4.077366650754624e-05, "log_odds_chosen": 4.3087382316589355, "log_odds_ratio": -0.1364879608154297, "logits/chosen": -2.9432783126831055, "logits/chosen_prompt": -2.815147638320923, "logits/rejected": -2.721280097961426, "logits/rejected_prompt": -2.818236827850342, "logps/chosen": -1.8882700204849243, "logps/chosen_both": -1.8756290674209595, "logps/chosen_prompt": -0.8526128530502319, "logps/rejected": -6.065881729125977, "logps/rejected_both": -6.0042314529418945, "logps/rejected_prompt": -0.9744648933410645, "loss": 2.1355, "nll_loss": 1.8748886585235596, "rewards/accuracies": 1.0, "rewards/chosen": -0.7553080320358276, "rewards/margins": 1.6710445880889893, "rewards/rejected": -2.4263527393341064, "step": 1060 }, { "epoch": 0.856, "grad_norm": 33.963511456298086, "learning_rate": 4.0610626221150394e-05, "log_odds_chosen": 4.251172065734863, "log_odds_ratio": -0.09040095657110214, "logits/chosen": -2.9414284229278564, "logits/chosen_prompt": -2.8389973640441895, "logits/rejected": -2.8033430576324463, "logits/rejected_prompt": -2.82332706451416, "logps/chosen": -1.9342035055160522, "logps/chosen_both": -1.9176651239395142, "logps/chosen_prompt": -0.8298524022102356, "logps/rejected": -6.048348903656006, "logps/rejected_both": -5.979620933532715, "logps/rejected_prompt": -0.9826586842536926, "loss": 2.0673, "nll_loss": 1.9169620275497437, "rewards/accuracies": 1.0, "rewards/chosen": -0.7736814618110657, "rewards/margins": 1.645658254623413, "rewards/rejected": -2.419339656829834, "step": 1070 }, { "epoch": 0.864, "grad_norm": 5.339928107993312, "learning_rate": 4.044649032725836e-05, "log_odds_chosen": 4.668586730957031, "log_odds_ratio": -0.04072408378124237, "logits/chosen": -2.9805121421813965, "logits/chosen_prompt": -2.858212947845459, "logits/rejected": -2.779395580291748, "logits/rejected_prompt": -2.8353207111358643, "logps/chosen": -2.4372153282165527, "logps/chosen_both": -2.4168477058410645, "logps/chosen_prompt": -0.7482016086578369, "logps/rejected": -6.966684818267822, "logps/rejected_both": -6.886708736419678, "logps/rejected_prompt": -0.9111725687980652, "loss": 2.1177, "nll_loss": 2.4160780906677246, "rewards/accuracies": 1.0, "rewards/chosen": -0.9748862981796265, "rewards/margins": 1.8117873668670654, "rewards/rejected": -2.7866737842559814, "step": 1080 }, { "epoch": 0.872, "grad_norm": 0.2060230046824354, "learning_rate": 4.028127034549229e-05, "log_odds_chosen": 2.597301483154297, "log_odds_ratio": -0.6685577630996704, "logits/chosen": -2.9436233043670654, "logits/chosen_prompt": -2.8545641899108887, "logits/rejected": -2.8262507915496826, "logits/rejected_prompt": -2.8353445529937744, "logps/chosen": -2.3411784172058105, "logps/chosen_both": -2.3227829933166504, "logps/chosen_prompt": -0.7935237884521484, "logps/rejected": -4.853774070739746, "logps/rejected_both": -4.805240154266357, "logps/rejected_prompt": -0.958962082862854, "loss": 2.139, "nll_loss": 2.3222460746765137, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9364713430404663, "rewards/margins": 1.0050380229949951, "rewards/rejected": -1.941509485244751, "step": 1090 }, { "epoch": 0.88, "grad_norm": 0.17774111122195055, "learning_rate": 4.011497787155938e-05, "log_odds_chosen": 4.53702449798584, "log_odds_ratio": -0.02008737251162529, "logits/chosen": -2.898667335510254, "logits/chosen_prompt": -2.8412561416625977, "logits/rejected": -2.799050807952881, "logits/rejected_prompt": -2.819329023361206, "logps/chosen": -2.120091438293457, "logps/chosen_both": -2.0994343757629395, "logps/chosen_prompt": -0.7898808717727661, "logps/rejected": -6.5274176597595215, "logps/rejected_both": -6.440402030944824, "logps/rejected_prompt": -1.0125057697296143, "loss": 2.0681, "nll_loss": 2.0985283851623535, "rewards/accuracies": 1.0, "rewards/chosen": -0.8480366468429565, "rewards/margins": 1.7629306316375732, "rewards/rejected": -2.6109673976898193, "step": 1100 }, { "epoch": 0.888, "grad_norm": 0.5492835402833951, "learning_rate": 3.9947624576437975e-05, "log_odds_chosen": 3.65099835395813, "log_odds_ratio": -0.21185067296028137, "logits/chosen": -2.8890416622161865, "logits/chosen_prompt": -2.8260998725891113, "logits/rejected": -2.8036818504333496, "logits/rejected_prompt": -2.8174471855163574, "logps/chosen": -2.0846400260925293, "logps/chosen_both": -2.065948247909546, "logps/chosen_prompt": -0.8428912162780762, "logps/rejected": -5.634668350219727, "logps/rejected_both": -5.555979251861572, "logps/rejected_prompt": -1.0157763957977295, "loss": 2.128, "nll_loss": 2.065037488937378, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8338559865951538, "rewards/margins": 1.420011281967163, "rewards/rejected": -2.2538673877716064, "step": 1110 }, { "epoch": 0.896, "grad_norm": 0.2391375753226414, "learning_rate": 3.977922220555855e-05, "log_odds_chosen": 4.121129989624023, "log_odds_ratio": -0.2298469990491867, "logits/chosen": -2.969383955001831, "logits/chosen_prompt": -2.841618061065674, "logits/rejected": -2.8132920265197754, "logits/rejected_prompt": -2.8176777362823486, "logps/chosen": -2.3696742057800293, "logps/chosen_both": -2.350247621536255, "logps/chosen_prompt": -0.8721768260002136, "logps/rejected": -6.348196029663086, "logps/rejected_both": -6.277990818023682, "logps/rejected_prompt": -1.0750401020050049, "loss": 2.1621, "nll_loss": 2.3494279384613037, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9478696584701538, "rewards/margins": 1.591408610343933, "rewards/rejected": -2.539278268814087, "step": 1120 }, { "epoch": 0.904, "grad_norm": 1.0869471605926033, "learning_rate": 3.960978257797931e-05, "log_odds_chosen": 3.306716203689575, "log_odds_ratio": -0.17165422439575195, "logits/chosen": -2.901864767074585, "logits/chosen_prompt": -2.8563239574432373, "logits/rejected": -2.815932273864746, "logits/rejected_prompt": -2.829672336578369, "logps/chosen": -2.3288769721984863, "logps/chosen_both": -2.307668447494507, "logps/chosen_prompt": -0.8160017132759094, "logps/rejected": -5.531130790710449, "logps/rejected_both": -5.466065406799316, "logps/rejected_prompt": -0.9807281494140625, "loss": 2.0755, "nll_loss": 2.3062796592712402, "rewards/accuracies": 1.0, "rewards/chosen": -0.9315508604049683, "rewards/margins": 1.280901551246643, "rewards/rejected": -2.2124524116516113, "step": 1130 }, { "epoch": 0.912, "grad_norm": 0.21229431870443033, "learning_rate": 3.943931758555669e-05, "log_odds_chosen": 4.015295505523682, "log_odds_ratio": -0.14405557513237, "logits/chosen": -2.9465222358703613, "logits/chosen_prompt": -2.830146074295044, "logits/rejected": -2.7873902320861816, "logits/rejected_prompt": -2.8030102252960205, "logps/chosen": -1.9876712560653687, "logps/chosen_both": -1.9711806774139404, "logps/chosen_prompt": -0.8330597877502441, "logps/rejected": -5.87436580657959, "logps/rejected_both": -5.79966402053833, "logps/rejected_prompt": -1.0102033615112305, "loss": 1.9833, "nll_loss": 1.9705440998077393, "rewards/accuracies": 1.0, "rewards/chosen": -0.7950685620307922, "rewards/margins": 1.554678201675415, "rewards/rejected": -2.3497467041015625, "step": 1140 }, { "epoch": 0.92, "grad_norm": 0.18607892338713655, "learning_rate": 3.92678391921108e-05, "log_odds_chosen": 4.167088985443115, "log_odds_ratio": -0.081887386739254, "logits/chosen": -2.9688785076141357, "logits/chosen_prompt": -2.8491876125335693, "logits/rejected": -2.8233845233917236, "logits/rejected_prompt": -2.836411237716675, "logps/chosen": -2.0486931800842285, "logps/chosen_both": -2.0284764766693115, "logps/chosen_prompt": -0.8191589117050171, "logps/rejected": -6.082810878753662, "logps/rejected_both": -5.993044853210449, "logps/rejected_prompt": -0.957076907157898, "loss": 2.086, "nll_loss": 2.0268213748931885, "rewards/accuracies": 1.0, "rewards/chosen": -0.8194772601127625, "rewards/margins": 1.613647222518921, "rewards/rejected": -2.433124303817749, "step": 1150 }, { "epoch": 0.928, "grad_norm": 0.21278740734057763, "learning_rate": 3.909535943258567e-05, "log_odds_chosen": 4.548261642456055, "log_odds_ratio": -0.07581990212202072, "logits/chosen": -3.092094898223877, "logits/chosen_prompt": -2.8779349327087402, "logits/rejected": -2.840526580810547, "logits/rejected_prompt": -2.8706183433532715, "logps/chosen": -1.943817138671875, "logps/chosen_both": -1.9261138439178467, "logps/chosen_prompt": -0.8740865588188171, "logps/rejected": -6.346927642822266, "logps/rejected_both": -6.251557350158691, "logps/rejected_prompt": -1.028618574142456, "loss": 2.0516, "nll_loss": 1.9256139993667603, "rewards/accuracies": 1.0, "rewards/chosen": -0.77752685546875, "rewards/margins": 1.7612441778182983, "rewards/rejected": -2.538771152496338, "step": 1160 }, { "epoch": 0.936, "grad_norm": 2.074191616812015, "learning_rate": 3.8921890412204705e-05, "log_odds_chosen": 3.9714667797088623, "log_odds_ratio": -0.10122326761484146, "logits/chosen": -2.9742226600646973, "logits/chosen_prompt": -2.8603179454803467, "logits/rejected": -2.8532581329345703, "logits/rejected_prompt": -2.833484172821045, "logps/chosen": -2.3508994579315186, "logps/chosen_both": -2.333052158355713, "logps/chosen_prompt": -0.8015215992927551, "logps/rejected": -6.174811363220215, "logps/rejected_both": -6.111483573913574, "logps/rejected_prompt": -1.0183693170547485, "loss": 2.2824, "nll_loss": 2.3322701454162598, "rewards/accuracies": 1.0, "rewards/chosen": -0.9403597712516785, "rewards/margins": 1.5295648574829102, "rewards/rejected": -2.4699246883392334, "step": 1170 }, { "epoch": 0.944, "grad_norm": 0.2875489978173768, "learning_rate": 3.8747444305621e-05, "log_odds_chosen": 4.248479843139648, "log_odds_ratio": -0.08145709335803986, "logits/chosen": -2.950727939605713, "logits/chosen_prompt": -2.822025775909424, "logits/rejected": -2.663987398147583, "logits/rejected_prompt": -2.8115882873535156, "logps/chosen": -1.9704688787460327, "logps/chosen_both": -1.9537798166275024, "logps/chosen_prompt": -0.8284621238708496, "logps/rejected": -6.081311225891113, "logps/rejected_both": -6.007387161254883, "logps/rejected_prompt": -1.0018432140350342, "loss": 1.9987, "nll_loss": 1.9535901546478271, "rewards/accuracies": 1.0, "rewards/chosen": -0.7881874442100525, "rewards/margins": 1.6443370580673218, "rewards/rejected": -2.4325246810913086, "step": 1180 }, { "epoch": 0.952, "grad_norm": 6.520768567954707, "learning_rate": 3.8572033356062943e-05, "log_odds_chosen": 3.6630382537841797, "log_odds_ratio": -0.1266271471977234, "logits/chosen": -2.9928297996520996, "logits/chosen_prompt": -2.8252012729644775, "logits/rejected": -2.722259521484375, "logits/rejected_prompt": -2.7941107749938965, "logps/chosen": -2.0680882930755615, "logps/chosen_both": -2.0539040565490723, "logps/chosen_prompt": -0.7603567838668823, "logps/rejected": -5.370635032653809, "logps/rejected_both": -5.302577018737793, "logps/rejected_prompt": -1.007256031036377, "loss": 2.1861, "nll_loss": 2.052879810333252, "rewards/accuracies": 1.0, "rewards/chosen": -0.8272353410720825, "rewards/margins": 1.3210185766220093, "rewards/rejected": -2.148253917694092, "step": 1190 }, { "epoch": 0.96, "grad_norm": 2.68559023802143, "learning_rate": 3.8395669874474915e-05, "log_odds_chosen": 4.359891414642334, "log_odds_ratio": -0.015468957833945751, "logits/chosen": -2.91310453414917, "logits/chosen_prompt": -2.7794852256774902, "logits/rejected": -2.6371960639953613, "logits/rejected_prompt": -2.7625763416290283, "logps/chosen": -1.8540757894515991, "logps/chosen_both": -1.839600920677185, "logps/chosen_prompt": -0.8248388171195984, "logps/rejected": -6.038485527038574, "logps/rejected_both": -5.962553977966309, "logps/rejected_prompt": -0.9856597185134888, "loss": 2.0673, "nll_loss": 1.8394546508789062, "rewards/accuracies": 1.0, "rewards/chosen": -0.7416303753852844, "rewards/margins": 1.6737639904022217, "rewards/rejected": -2.4153940677642822, "step": 1200 }, { "epoch": 0.968, "grad_norm": 0.185073881578095, "learning_rate": 3.821836623865329e-05, "log_odds_chosen": 4.161174297332764, "log_odds_ratio": -0.07971666753292084, "logits/chosen": -2.903371572494507, "logits/chosen_prompt": -2.778414487838745, "logits/rejected": -2.5587830543518066, "logits/rejected_prompt": -2.762293815612793, "logps/chosen": -2.1283013820648193, "logps/chosen_both": -2.1046059131622314, "logps/chosen_prompt": -0.7429525852203369, "logps/rejected": -6.169132232666016, "logps/rejected_both": -6.081439018249512, "logps/rejected_prompt": -0.9049463272094727, "loss": 2.2118, "nll_loss": 2.104139566421509, "rewards/accuracies": 1.0, "rewards/chosen": -0.8513206243515015, "rewards/margins": 1.6163326501846313, "rewards/rejected": -2.467653274536133, "step": 1210 }, { "epoch": 0.976, "grad_norm": 0.19264797772361533, "learning_rate": 3.80401348923777e-05, "log_odds_chosen": 4.120739936828613, "log_odds_ratio": -0.14354461431503296, "logits/chosen": -2.9424567222595215, "logits/chosen_prompt": -2.7921371459960938, "logits/rejected": -2.5477294921875, "logits/rejected_prompt": -2.7542147636413574, "logps/chosen": -1.913551688194275, "logps/chosen_both": -1.8978935480117798, "logps/chosen_prompt": -0.8339295387268066, "logps/rejected": -5.9061384201049805, "logps/rejected_both": -5.837408542633057, "logps/rejected_prompt": -0.9619489908218384, "loss": 2.0995, "nll_loss": 1.8977426290512085, "rewards/accuracies": 1.0, "rewards/chosen": -0.7654207348823547, "rewards/margins": 1.5970344543457031, "rewards/rejected": -2.362455129623413, "step": 1220 }, { "epoch": 0.984, "grad_norm": 9.292901066306287, "learning_rate": 3.786098834453766e-05, "log_odds_chosen": 3.505579710006714, "log_odds_ratio": -0.15101362764835358, "logits/chosen": -2.910395622253418, "logits/chosen_prompt": -2.8129782676696777, "logits/rejected": -2.574031352996826, "logits/rejected_prompt": -2.782696008682251, "logps/chosen": -2.1372461318969727, "logps/chosen_both": -2.112764835357666, "logps/chosen_prompt": -0.8219666481018066, "logps/rejected": -5.543887138366699, "logps/rejected_both": -5.4572343826293945, "logps/rejected_prompt": -0.9813167452812195, "loss": 2.0645, "nll_loss": 2.111912488937378, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8548984527587891, "rewards/margins": 1.3626563549041748, "rewards/rejected": -2.217555046081543, "step": 1230 }, { "epoch": 0.992, "grad_norm": 1.6964209385464728, "learning_rate": 3.7680939168254733e-05, "log_odds_chosen": 3.888018846511841, "log_odds_ratio": -0.1449870765209198, "logits/chosen": -2.9042837619781494, "logits/chosen_prompt": -2.823965549468994, "logits/rejected": -2.4834845066070557, "logits/rejected_prompt": -2.7938156127929688, "logps/chosen": -2.0088438987731934, "logps/chosen_both": -1.9936256408691406, "logps/chosen_prompt": -0.7543269395828247, "logps/rejected": -5.783638000488281, "logps/rejected_both": -5.7258687019348145, "logps/rejected_prompt": -0.9668887257575989, "loss": 2.038, "nll_loss": 1.992997169494629, "rewards/accuracies": 1.0, "rewards/chosen": -0.8035375475883484, "rewards/margins": 1.5099177360534668, "rewards/rejected": -2.31345534324646, "step": 1240 }, { "epoch": 1.0, "grad_norm": 2.478887419876043, "learning_rate": 3.7500000000000003e-05, "log_odds_chosen": 5.275210380554199, "log_odds_ratio": -0.006285688374191523, "logits/chosen": -2.9461379051208496, "logits/chosen_prompt": -2.7684402465820312, "logits/rejected": -2.312152147293091, "logits/rejected_prompt": -2.7450311183929443, "logps/chosen": -1.8539674282073975, "logps/chosen_both": -1.839685082435608, "logps/chosen_prompt": -0.8559527397155762, "logps/rejected": -6.958900451660156, "logps/rejected_both": -6.868790626525879, "logps/rejected_prompt": -1.0536139011383057, "loss": 2.2404, "nll_loss": 1.8390467166900635, "rewards/accuracies": 1.0, "rewards/chosen": -0.7415870428085327, "rewards/margins": 2.04197359085083, "rewards/rejected": -2.783560276031494, "step": 1250 }, { "epoch": 1.008, "grad_norm": 0.17878604739151382, "learning_rate": 3.731818353870729e-05, "log_odds_chosen": 4.191466331481934, "log_odds_ratio": -0.09246650338172913, "logits/chosen": -2.957552433013916, "logits/chosen_prompt": -2.771613359451294, "logits/rejected": -2.3375356197357178, "logits/rejected_prompt": -2.7522428035736084, "logps/chosen": -1.989243745803833, "logps/chosen_both": -1.9734690189361572, "logps/chosen_prompt": -0.8279644250869751, "logps/rejected": -6.043200969696045, "logps/rejected_both": -5.973423480987549, "logps/rejected_prompt": -1.0317699909210205, "loss": 2.0389, "nll_loss": 1.9726651906967163, "rewards/accuracies": 1.0, "rewards/chosen": -0.7956975102424622, "rewards/margins": 1.6215832233428955, "rewards/rejected": -2.417280673980713, "step": 1260 }, { "epoch": 1.016, "grad_norm": 23.252626998417625, "learning_rate": 3.713550254488185e-05, "log_odds_chosen": 3.7449231147766113, "log_odds_ratio": -0.16642269492149353, "logits/chosen": -2.8947479724884033, "logits/chosen_prompt": -2.7788119316101074, "logits/rejected": -2.3416316509246826, "logits/rejected_prompt": -2.760896921157837, "logps/chosen": -2.020059585571289, "logps/chosen_both": -2.0054023265838623, "logps/chosen_prompt": -0.8935413360595703, "logps/rejected": -5.6518659591674805, "logps/rejected_both": -5.590303897857666, "logps/rejected_prompt": -1.0056589841842651, "loss": 2.0643, "nll_loss": 2.0046825408935547, "rewards/accuracies": 1.0, "rewards/chosen": -0.8080238103866577, "rewards/margins": 1.452722430229187, "rewards/rejected": -2.2607462406158447, "step": 1270 }, { "epoch": 1.024, "grad_norm": 0.1852421213956056, "learning_rate": 3.695196983970481e-05, "log_odds_chosen": 5.502694129943848, "log_odds_ratio": -0.07146742194890976, "logits/chosen": -2.9081971645355225, "logits/chosen_prompt": -2.745790719985962, "logits/rejected": -2.0626957416534424, "logits/rejected_prompt": -2.7173855304718018, "logps/chosen": -1.7873703241348267, "logps/chosen_both": -1.7739589214324951, "logps/chosen_prompt": -0.8900352716445923, "logps/rejected": -7.1119537353515625, "logps/rejected_both": -7.017317295074463, "logps/rejected_prompt": -1.0950191020965576, "loss": 2.0059, "nll_loss": 1.7733700275421143, "rewards/accuracies": 1.0, "rewards/chosen": -0.7149480581283569, "rewards/margins": 2.129833698272705, "rewards/rejected": -2.8447818756103516, "step": 1280 }, { "epoch": 1.032, "grad_norm": 0.1901267311863244, "learning_rate": 3.6767598304133324e-05, "log_odds_chosen": 4.644869804382324, "log_odds_ratio": -0.14166082441806793, "logits/chosen": -2.9974873065948486, "logits/chosen_prompt": -2.7224061489105225, "logits/rejected": -2.2138378620147705, "logits/rejected_prompt": -2.6832873821258545, "logps/chosen": -1.9028959274291992, "logps/chosen_both": -1.8842157125473022, "logps/chosen_prompt": -0.8141298294067383, "logps/rejected": -6.421015739440918, "logps/rejected_both": -6.323419094085693, "logps/rejected_prompt": -0.979651153087616, "loss": 1.9806, "nll_loss": 1.8838021755218506, "rewards/accuracies": 1.0, "rewards/chosen": -0.7611583471298218, "rewards/margins": 1.8072481155395508, "rewards/rejected": -2.568406581878662, "step": 1290 }, { "epoch": 1.04, "grad_norm": 0.1720032056568101, "learning_rate": 3.6582400877996546e-05, "log_odds_chosen": 5.198369026184082, "log_odds_ratio": -0.07235782593488693, "logits/chosen": -2.8921890258789062, "logits/chosen_prompt": -2.7482800483703613, "logits/rejected": -1.9527368545532227, "logits/rejected_prompt": -2.7276439666748047, "logps/chosen": -2.0934653282165527, "logps/chosen_both": -2.076221227645874, "logps/chosen_prompt": -0.8200351595878601, "logps/rejected": -7.170855522155762, "logps/rejected_both": -7.079026699066162, "logps/rejected_prompt": -0.9832828640937805, "loss": 2.0527, "nll_loss": 2.075456380844116, "rewards/accuracies": 1.0, "rewards/chosen": -0.8373861312866211, "rewards/margins": 2.0309560298919678, "rewards/rejected": -2.868342161178589, "step": 1300 }, { "epoch": 1.048, "grad_norm": 0.18338089227039325, "learning_rate": 3.639639055908751e-05, "log_odds_chosen": 5.48695707321167, "log_odds_ratio": -0.07169006019830704, "logits/chosen": -2.874192953109741, "logits/chosen_prompt": -2.733611583709717, "logits/rejected": -1.8326069116592407, "logits/rejected_prompt": -2.6982951164245605, "logps/chosen": -2.0102884769439697, "logps/chosen_both": -1.9914735555648804, "logps/chosen_prompt": -0.8337292671203613, "logps/rejected": -7.363123416900635, "logps/rejected_both": -7.263747215270996, "logps/rejected_prompt": -0.9874321818351746, "loss": 1.9824, "nll_loss": 1.9909473657608032, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8041152954101562, "rewards/margins": 2.141134023666382, "rewards/rejected": -2.945249319076538, "step": 1310 }, { "epoch": 1.056, "grad_norm": 0.1837356662895363, "learning_rate": 3.6209580402250815e-05, "log_odds_chosen": 5.6873369216918945, "log_odds_ratio": -0.07120365649461746, "logits/chosen": -2.9526381492614746, "logits/chosen_prompt": -2.7081189155578613, "logits/rejected": -1.8793054819107056, "logits/rejected_prompt": -2.6829447746276855, "logps/chosen": -1.9104582071304321, "logps/chosen_both": -1.8940789699554443, "logps/chosen_prompt": -0.8755657076835632, "logps/rejected": -7.447749137878418, "logps/rejected_both": -7.334907531738281, "logps/rejected_prompt": -1.0553802251815796, "loss": 2.1442, "nll_loss": 1.8928571939468384, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7641832828521729, "rewards/margins": 2.214916706085205, "rewards/rejected": -2.979099750518799, "step": 1320 }, { "epoch": 1.064, "grad_norm": 0.3951461995742214, "learning_rate": 3.602198351846647e-05, "log_odds_chosen": 4.024718761444092, "log_odds_ratio": -0.5831412672996521, "logits/chosen": -2.981672525405884, "logits/chosen_prompt": -2.7551183700561523, "logits/rejected": -2.1212754249572754, "logits/rejected_prompt": -2.7351596355438232, "logps/chosen": -2.4395077228546143, "logps/chosen_both": -2.417250871658325, "logps/chosen_prompt": -0.8564618825912476, "logps/rejected": -6.365363597869873, "logps/rejected_both": -6.2751054763793945, "logps/rejected_prompt": -1.031884789466858, "loss": 2.2375, "nll_loss": 2.4155256748199463, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9758030772209167, "rewards/margins": 1.5703424215316772, "rewards/rejected": -2.5461456775665283, "step": 1330 }, { "epoch": 1.072, "grad_norm": 0.18983825409437058, "learning_rate": 3.5833613073929684e-05, "log_odds_chosen": 4.155622482299805, "log_odds_ratio": -0.14320290088653564, "logits/chosen": -3.005096673965454, "logits/chosen_prompt": -2.8319993019104004, "logits/rejected": -2.3421382904052734, "logits/rejected_prompt": -2.8086118698120117, "logps/chosen": -1.9423980712890625, "logps/chosen_both": -1.9247316122055054, "logps/chosen_prompt": -0.7214570045471191, "logps/rejected": -5.971634864807129, "logps/rejected_both": -5.893637657165527, "logps/rejected_prompt": -0.9021228551864624, "loss": 2.092, "nll_loss": 1.9240925312042236, "rewards/accuracies": 1.0, "rewards/chosen": -0.7769593000411987, "rewards/margins": 1.6116949319839478, "rewards/rejected": -2.3886542320251465, "step": 1340 }, { "epoch": 1.08, "grad_norm": 0.6083000414245734, "learning_rate": 3.564448228912682e-05, "log_odds_chosen": 4.163081169128418, "log_odds_ratio": -0.10094372928142548, "logits/chosen": -2.963536262512207, "logits/chosen_prompt": -2.846693515777588, "logits/rejected": -2.542693614959717, "logits/rejected_prompt": -2.819491386413574, "logps/chosen": -2.337949275970459, "logps/chosen_both": -2.3158886432647705, "logps/chosen_prompt": -0.845288872718811, "logps/rejected": -6.368934154510498, "logps/rejected_both": -6.2805986404418945, "logps/rejected_prompt": -1.0301436185836792, "loss": 2.0382, "nll_loss": 2.3151814937591553, "rewards/accuracies": 1.0, "rewards/chosen": -0.9351798295974731, "rewards/margins": 1.6123939752578735, "rewards/rejected": -2.5475735664367676, "step": 1350 }, { "epoch": 1.088, "grad_norm": 0.2154750785789702, "learning_rate": 3.545460443790753e-05, "log_odds_chosen": 5.453991889953613, "log_odds_ratio": -0.004712260328233242, "logits/chosen": -2.908536434173584, "logits/chosen_prompt": -2.868119716644287, "logits/rejected": -2.40228533744812, "logits/rejected_prompt": -2.843205451965332, "logps/chosen": -2.089245319366455, "logps/chosen_both": -2.072594165802002, "logps/chosen_prompt": -0.8769745826721191, "logps/rejected": -7.407778739929199, "logps/rejected_both": -7.316309928894043, "logps/rejected_prompt": -0.9720737338066101, "loss": 2.0088, "nll_loss": 2.071500778198242, "rewards/accuracies": 1.0, "rewards/chosen": -0.8356983065605164, "rewards/margins": 2.127413749694824, "rewards/rejected": -2.9631123542785645, "step": 1360 }, { "epoch": 1.096, "grad_norm": 0.1857265942387655, "learning_rate": 3.52639928465532e-05, "log_odds_chosen": 4.4336113929748535, "log_odds_ratio": -0.14170871675014496, "logits/chosen": -3.0002169609069824, "logits/chosen_prompt": -2.8658928871154785, "logits/rejected": -2.558640956878662, "logits/rejected_prompt": -2.843383550643921, "logps/chosen": -1.8998088836669922, "logps/chosen_both": -1.8837999105453491, "logps/chosen_prompt": -0.8331824541091919, "logps/rejected": -6.191910266876221, "logps/rejected_both": -6.1067986488342285, "logps/rejected_prompt": -0.9438120126724243, "loss": 2.0577, "nll_loss": 1.883371353149414, "rewards/accuracies": 1.0, "rewards/chosen": -0.7599235773086548, "rewards/margins": 1.7168405055999756, "rewards/rejected": -2.47676420211792, "step": 1370 }, { "epoch": 1.104, "grad_norm": 0.22400309241356314, "learning_rate": 3.507266089284157e-05, "log_odds_chosen": 5.497137069702148, "log_odds_ratio": -0.004467605613172054, "logits/chosen": -2.9908201694488525, "logits/chosen_prompt": -2.821722984313965, "logits/rejected": -2.416836977005005, "logits/rejected_prompt": -2.796220541000366, "logps/chosen": -1.8564481735229492, "logps/chosen_both": -1.8395103216171265, "logps/chosen_prompt": -0.8639839291572571, "logps/rejected": -7.180043697357178, "logps/rejected_both": -7.0766448974609375, "logps/rejected_prompt": -0.9959409832954407, "loss": 2.0609, "nll_loss": 1.8391234874725342, "rewards/accuracies": 1.0, "rewards/chosen": -0.7425792813301086, "rewards/margins": 2.1294379234313965, "rewards/rejected": -2.8720173835754395, "step": 1380 }, { "epoch": 1.112, "grad_norm": 0.19384408681406856, "learning_rate": 3.488062200510791e-05, "log_odds_chosen": 5.338822841644287, "log_odds_ratio": -0.00644069816917181, "logits/chosen": -2.959766387939453, "logits/chosen_prompt": -2.7905402183532715, "logits/rejected": -2.3757593631744385, "logits/rejected_prompt": -2.763526678085327, "logps/chosen": -1.9314730167388916, "logps/chosen_both": -1.9157222509384155, "logps/chosen_prompt": -0.8981779217720032, "logps/rejected": -7.111077785491943, "logps/rejected_both": -7.018582344055176, "logps/rejected_prompt": -0.9950772523880005, "loss": 1.9482, "nll_loss": 1.9154551029205322, "rewards/accuracies": 1.0, "rewards/chosen": -0.7725892066955566, "rewards/margins": 2.0718419551849365, "rewards/rejected": -2.8444314002990723, "step": 1390 }, { "epoch": 1.12, "grad_norm": 60.752749653266065, "learning_rate": 3.4687889661302576e-05, "log_odds_chosen": 4.680363655090332, "log_odds_ratio": -0.03717372566461563, "logits/chosen": -2.920323610305786, "logits/chosen_prompt": -2.8357200622558594, "logits/rejected": -2.4031760692596436, "logits/rejected_prompt": -2.802396535873413, "logps/chosen": -2.005197286605835, "logps/chosen_both": -1.9863475561141968, "logps/chosen_prompt": -0.7522888779640198, "logps/rejected": -6.545997619628906, "logps/rejected_both": -6.455955505371094, "logps/rejected_prompt": -0.965649425983429, "loss": 2.0466, "nll_loss": 1.985174536705017, "rewards/accuracies": 1.0, "rewards/chosen": -0.802078902721405, "rewards/margins": 1.8163198232650757, "rewards/rejected": -2.618398904800415, "step": 1400 }, { "epoch": 1.1280000000000001, "grad_norm": 0.5002021593337239, "learning_rate": 3.4494477388045035e-05, "log_odds_chosen": 4.483678340911865, "log_odds_ratio": -0.028768246993422508, "logits/chosen": -2.92014741897583, "logits/chosen_prompt": -2.8309707641601562, "logits/rejected": -2.486912250518799, "logits/rejected_prompt": -2.804452419281006, "logps/chosen": -2.067333459854126, "logps/chosen_both": -2.0484328269958496, "logps/chosen_prompt": -0.7646309733390808, "logps/rejected": -6.416478157043457, "logps/rejected_both": -6.335555076599121, "logps/rejected_prompt": -0.9275982975959778, "loss": 2.062, "nll_loss": 2.047743558883667, "rewards/accuracies": 1.0, "rewards/chosen": -0.8269332647323608, "rewards/margins": 1.7396576404571533, "rewards/rejected": -2.5665910243988037, "step": 1410 }, { "epoch": 1.1360000000000001, "grad_norm": 0.16513157762808564, "learning_rate": 3.430039875967454e-05, "log_odds_chosen": 4.668246746063232, "log_odds_ratio": -0.07646802067756653, "logits/chosen": -2.9350738525390625, "logits/chosen_prompt": -2.8208534717559814, "logits/rejected": -2.421509265899658, "logits/rejected_prompt": -2.783437490463257, "logps/chosen": -2.0800347328186035, "logps/chosen_both": -2.0644993782043457, "logps/chosen_prompt": -0.8468448519706726, "logps/rejected": -6.625657558441162, "logps/rejected_both": -6.545504570007324, "logps/rejected_prompt": -1.04305100440979, "loss": 2.0206, "nll_loss": 2.0629351139068604, "rewards/accuracies": 1.0, "rewards/chosen": -0.8320137858390808, "rewards/margins": 1.8182493448257446, "rewards/rejected": -2.6502633094787598, "step": 1420 }, { "epoch": 1.144, "grad_norm": 2.2295314469384206, "learning_rate": 3.410566739729746e-05, "log_odds_chosen": 5.851050853729248, "log_odds_ratio": -0.004526123404502869, "logits/chosen": -2.940370798110962, "logits/chosen_prompt": -2.7820496559143066, "logits/rejected": -2.2556514739990234, "logits/rejected_prompt": -2.7672178745269775, "logps/chosen": -1.8526496887207031, "logps/chosen_both": -1.8396713733673096, "logps/chosen_prompt": -0.8455888628959656, "logps/rejected": -7.520164489746094, "logps/rejected_both": -7.432145595550537, "logps/rejected_prompt": -1.002396821975708, "loss": 2.1827, "nll_loss": 1.8387296199798584, "rewards/accuracies": 1.0, "rewards/chosen": -0.7410598993301392, "rewards/margins": 2.267005681991577, "rewards/rejected": -3.008065700531006, "step": 1430 }, { "epoch": 1.152, "grad_norm": 4.408515203042964, "learning_rate": 3.3910296967831266e-05, "log_odds_chosen": 4.456727027893066, "log_odds_ratio": -0.14154654741287231, "logits/chosen": -2.9346349239349365, "logits/chosen_prompt": -2.7853639125823975, "logits/rejected": -2.2783145904541016, "logits/rejected_prompt": -2.7635715007781982, "logps/chosen": -1.9494521617889404, "logps/chosen_both": -1.9318408966064453, "logps/chosen_prompt": -0.9306742548942566, "logps/rejected": -6.29015588760376, "logps/rejected_both": -6.198000907897949, "logps/rejected_prompt": -1.0760185718536377, "loss": 2.1572, "nll_loss": 1.931610107421875, "rewards/accuracies": 1.0, "rewards/chosen": -0.7797808647155762, "rewards/margins": 1.7362816333770752, "rewards/rejected": -2.5160624980926514, "step": 1440 }, { "epoch": 1.16, "grad_norm": 0.3551432285571037, "learning_rate": 3.3714301183045385e-05, "log_odds_chosen": 5.155561447143555, "log_odds_ratio": -0.07224146276712418, "logits/chosen": -2.9873647689819336, "logits/chosen_prompt": -2.7700507640838623, "logits/rejected": -2.2287240028381348, "logits/rejected_prompt": -2.7513465881347656, "logps/chosen": -1.9037456512451172, "logps/chosen_both": -1.8827041387557983, "logps/chosen_prompt": -0.8036454319953918, "logps/rejected": -6.904747009277344, "logps/rejected_both": -6.79779052734375, "logps/rejected_prompt": -0.9606531858444214, "loss": 2.0135, "nll_loss": 1.8827041387557983, "rewards/accuracies": 1.0, "rewards/chosen": -0.7614982724189758, "rewards/margins": 2.0004005432128906, "rewards/rejected": -2.7618985176086426, "step": 1450 }, { "epoch": 1.168, "grad_norm": 0.23892058786604192, "learning_rate": 3.35176937985988e-05, "log_odds_chosen": 4.485732078552246, "log_odds_ratio": -0.14207962155342102, "logits/chosen": -2.945270538330078, "logits/chosen_prompt": -2.786912441253662, "logits/rejected": -2.270350217819214, "logits/rejected_prompt": -2.752725124359131, "logps/chosen": -2.024524211883545, "logps/chosen_both": -2.0046331882476807, "logps/chosen_prompt": -0.774206817150116, "logps/rejected": -6.382667064666748, "logps/rejected_both": -6.294032096862793, "logps/rejected_prompt": -0.9491628408432007, "loss": 2.0727, "nll_loss": 2.003938674926758, "rewards/accuracies": 1.0, "rewards/chosen": -0.809809684753418, "rewards/margins": 1.7432572841644287, "rewards/rejected": -2.5530669689178467, "step": 1460 }, { "epoch": 1.176, "grad_norm": 0.2032800647611215, "learning_rate": 3.332048861307467e-05, "log_odds_chosen": 4.051968097686768, "log_odds_ratio": -0.14674244821071625, "logits/chosen": -2.99367094039917, "logits/chosen_prompt": -2.802661657333374, "logits/rejected": -2.338299512863159, "logits/rejected_prompt": -2.7645983695983887, "logps/chosen": -1.9771573543548584, "logps/chosen_both": -1.9634653329849243, "logps/chosen_prompt": -0.8673089742660522, "logps/rejected": -5.909640789031982, "logps/rejected_both": -5.843233585357666, "logps/rejected_prompt": -0.918237030506134, "loss": 2.0442, "nll_loss": 1.9626314640045166, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.790863037109375, "rewards/margins": 1.572993516921997, "rewards/rejected": -2.363856554031372, "step": 1470 }, { "epoch": 1.184, "grad_norm": 0.6791006786877852, "learning_rate": 3.312269946701191e-05, "log_odds_chosen": 5.11738395690918, "log_odds_ratio": -0.08993680030107498, "logits/chosen": -2.986436605453491, "logits/chosen_prompt": -2.733582019805908, "logits/rejected": -2.186984062194824, "logits/rejected_prompt": -2.714433193206787, "logps/chosen": -1.95094895362854, "logps/chosen_both": -1.9355719089508057, "logps/chosen_prompt": -0.9025853276252747, "logps/rejected": -6.931356906890869, "logps/rejected_both": -6.842989444732666, "logps/rejected_prompt": -0.9505090713500977, "loss": 2.0225, "nll_loss": 1.935101866722107, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.780379593372345, "rewards/margins": 1.992163062095642, "rewards/rejected": -2.7725424766540527, "step": 1480 }, { "epoch": 1.192, "grad_norm": 12.7002740206941, "learning_rate": 3.29243402419338e-05, "log_odds_chosen": 4.771432399749756, "log_odds_ratio": -0.2453218698501587, "logits/chosen": -2.9012749195098877, "logits/chosen_prompt": -2.791215419769287, "logits/rejected": -2.076328754425049, "logits/rejected_prompt": -2.7599706649780273, "logps/chosen": -2.869783878326416, "logps/chosen_both": -2.8310511112213135, "logps/chosen_prompt": -0.8819573521614075, "logps/rejected": -7.530523777008057, "logps/rejected_both": -7.408067226409912, "logps/rejected_prompt": -1.0235049724578857, "loss": 2.0981, "nll_loss": 2.8310508728027344, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1479135751724243, "rewards/margins": 1.8642956018447876, "rewards/rejected": -3.012209415435791, "step": 1490 }, { "epoch": 1.2, "grad_norm": 0.19981467699086264, "learning_rate": 3.272542485937369e-05, "log_odds_chosen": 5.507603645324707, "log_odds_ratio": -0.020156098529696465, "logits/chosen": -2.9788875579833984, "logits/chosen_prompt": -2.7711877822875977, "logits/rejected": -2.0624115467071533, "logits/rejected_prompt": -2.744807720184326, "logps/chosen": -2.279694080352783, "logps/chosen_both": -2.2537825107574463, "logps/chosen_prompt": -0.8054102659225464, "logps/rejected": -7.658332824707031, "logps/rejected_both": -7.547041416168213, "logps/rejected_prompt": -1.0083348751068115, "loss": 2.1891, "nll_loss": 2.2532057762145996, "rewards/accuracies": 1.0, "rewards/chosen": -0.9118776321411133, "rewards/margins": 2.1514554023742676, "rewards/rejected": -3.063333034515381, "step": 1500 }, { "epoch": 1.208, "grad_norm": 0.2071781414340563, "learning_rate": 3.2525967279898015e-05, "log_odds_chosen": 3.779675006866455, "log_odds_ratio": -0.2771868109703064, "logits/chosen": -2.9284844398498535, "logits/chosen_prompt": -2.73115873336792, "logits/rejected": -2.319711446762085, "logits/rejected_prompt": -2.7305550575256348, "logps/chosen": -2.069701910018921, "logps/chosen_both": -2.0511586666107178, "logps/chosen_prompt": -0.8410334587097168, "logps/rejected": -5.765010356903076, "logps/rejected_both": -5.696343421936035, "logps/rejected_prompt": -1.032503366470337, "loss": 2.0199, "nll_loss": 2.050447940826416, "rewards/accuracies": 1.0, "rewards/chosen": -0.8278807401657104, "rewards/margins": 1.4781235456466675, "rewards/rejected": -2.306004762649536, "step": 1510 }, { "epoch": 1.216, "grad_norm": 0.18615530258539528, "learning_rate": 3.2325981502126433e-05, "log_odds_chosen": 4.861352443695068, "log_odds_ratio": -0.14049410820007324, "logits/chosen": -2.913702964782715, "logits/chosen_prompt": -2.647313117980957, "logits/rejected": -2.130164623260498, "logits/rejected_prompt": -2.638327121734619, "logps/chosen": -1.9652678966522217, "logps/chosen_both": -1.948897361755371, "logps/chosen_prompt": -0.8634968996047974, "logps/rejected": -6.705462455749512, "logps/rejected_both": -6.624319553375244, "logps/rejected_prompt": -1.035592794418335, "loss": 2.042, "nll_loss": 1.948264718055725, "rewards/accuracies": 1.0, "rewards/chosen": -0.7861071825027466, "rewards/margins": 1.896078109741211, "rewards/rejected": -2.682184934616089, "step": 1520 }, { "epoch": 1.224, "grad_norm": 0.3295494652465448, "learning_rate": 3.21254815617494e-05, "log_odds_chosen": 5.780041694641113, "log_odds_ratio": -0.004303447902202606, "logits/chosen": -2.9733996391296387, "logits/chosen_prompt": -2.7753407955169678, "logits/rejected": -2.149972438812256, "logits/rejected_prompt": -2.7639622688293457, "logps/chosen": -1.992742896080017, "logps/chosen_both": -1.975515604019165, "logps/chosen_prompt": -0.8223434686660767, "logps/rejected": -7.623780727386475, "logps/rejected_both": -7.520285606384277, "logps/rejected_prompt": -0.9390355348587036, "loss": 2.0442, "nll_loss": 1.974700689315796, "rewards/accuracies": 1.0, "rewards/chosen": -0.7970971465110779, "rewards/margins": 2.252415180206299, "rewards/rejected": -3.0495123863220215, "step": 1530 }, { "epoch": 1.232, "grad_norm": 0.19966280929549698, "learning_rate": 3.192448153054306e-05, "log_odds_chosen": 3.838728427886963, "log_odds_ratio": -0.14647504687309265, "logits/chosen": -2.9667465686798096, "logits/chosen_prompt": -2.8230855464935303, "logits/rejected": -2.5847840309143066, "logits/rejected_prompt": -2.822601795196533, "logps/chosen": -2.122664451599121, "logps/chosen_both": -2.0995185375213623, "logps/chosen_prompt": -0.9422351717948914, "logps/rejected": -5.859042644500732, "logps/rejected_both": -5.767674446105957, "logps/rejected_prompt": -1.1390842199325562, "loss": 2.095, "nll_loss": 2.0988729000091553, "rewards/accuracies": 1.0, "rewards/chosen": -0.8490656614303589, "rewards/margins": 1.494551420211792, "rewards/rejected": -2.3436172008514404, "step": 1540 }, { "epoch": 1.24, "grad_norm": 0.21556247694383007, "learning_rate": 3.172299551538164e-05, "log_odds_chosen": 4.561056137084961, "log_odds_ratio": -0.07612424343824387, "logits/chosen": -2.8919880390167236, "logits/chosen_prompt": -2.808797836303711, "logits/rejected": -2.5644102096557617, "logits/rejected_prompt": -2.802969455718994, "logps/chosen": -1.9356054067611694, "logps/chosen_both": -1.9162133932113647, "logps/chosen_prompt": -0.7942633032798767, "logps/rejected": -6.3366522789001465, "logps/rejected_both": -6.251999855041504, "logps/rejected_prompt": -0.9459937810897827, "loss": 2.1408, "nll_loss": 1.915776252746582, "rewards/accuracies": 1.0, "rewards/chosen": -0.7742422223091125, "rewards/margins": 1.7604186534881592, "rewards/rejected": -2.534660816192627, "step": 1550 }, { "epoch": 1.248, "grad_norm": 0.19312969446080464, "learning_rate": 3.152103765724743e-05, "log_odds_chosen": 3.9786903858184814, "log_odds_ratio": -0.10893861204385757, "logits/chosen": -3.0307998657226562, "logits/chosen_prompt": -2.7775232791900635, "logits/rejected": -2.6540513038635254, "logits/rejected_prompt": -2.7630362510681152, "logps/chosen": -1.9151197671890259, "logps/chosen_both": -1.8977829217910767, "logps/chosen_prompt": -0.8471347689628601, "logps/rejected": -5.757778644561768, "logps/rejected_both": -5.6885457038879395, "logps/rejected_prompt": -1.02475106716156, "loss": 1.9805, "nll_loss": 1.8967196941375732, "rewards/accuracies": 1.0, "rewards/chosen": -0.7660478949546814, "rewards/margins": 1.537063717842102, "rewards/rejected": -2.3031115531921387, "step": 1560 }, { "epoch": 1.256, "grad_norm": 0.19646035725215968, "learning_rate": 3.1318622130238236e-05, "log_odds_chosen": 4.679540157318115, "log_odds_ratio": -0.07853083312511444, "logits/chosen": -2.9802026748657227, "logits/chosen_prompt": -2.761209011077881, "logits/rejected": -2.5600242614746094, "logits/rejected_prompt": -2.7424654960632324, "logps/chosen": -1.7784169912338257, "logps/chosen_both": -1.7646305561065674, "logps/chosen_prompt": -0.7139529585838318, "logps/rejected": -6.263562202453613, "logps/rejected_both": -6.190931797027588, "logps/rejected_prompt": -0.9958028793334961, "loss": 1.9559, "nll_loss": 1.76325261592865, "rewards/accuracies": 1.0, "rewards/chosen": -0.7113668322563171, "rewards/margins": 1.794058084487915, "rewards/rejected": -2.505424976348877, "step": 1570 }, { "epoch": 1.264, "grad_norm": 0.3866885307142738, "learning_rate": 3.111576314057268e-05, "log_odds_chosen": 3.801389694213867, "log_odds_ratio": -0.20994290709495544, "logits/chosen": -2.9368879795074463, "logits/chosen_prompt": -2.7586987018585205, "logits/rejected": -2.599658966064453, "logits/rejected_prompt": -2.743234157562256, "logps/chosen": -1.9905316829681396, "logps/chosen_both": -1.9739116430282593, "logps/chosen_prompt": -0.779675304889679, "logps/rejected": -5.689120292663574, "logps/rejected_both": -5.620154857635498, "logps/rejected_prompt": -1.0595715045928955, "loss": 2.0955, "nll_loss": 1.9729188680648804, "rewards/accuracies": 1.0, "rewards/chosen": -0.7962126731872559, "rewards/margins": 1.4794353246688843, "rewards/rejected": -2.275648355484009, "step": 1580 }, { "epoch": 1.272, "grad_norm": 0.2325643788869979, "learning_rate": 3.091247492559312e-05, "log_odds_chosen": 4.095303058624268, "log_odds_ratio": -0.1479816436767578, "logits/chosen": -2.9735686779022217, "logits/chosen_prompt": -2.8000283241271973, "logits/rejected": -2.558763027191162, "logits/rejected_prompt": -2.7583069801330566, "logps/chosen": -1.8590002059936523, "logps/chosen_both": -1.8417994976043701, "logps/chosen_prompt": -0.7681006193161011, "logps/rejected": -5.769632816314697, "logps/rejected_both": -5.685044288635254, "logps/rejected_prompt": -0.97776859998703, "loss": 2.1087, "nll_loss": 1.8410179615020752, "rewards/accuracies": 1.0, "rewards/chosen": -0.743600070476532, "rewards/margins": 1.5642529726028442, "rewards/rejected": -2.3078532218933105, "step": 1590 }, { "epoch": 1.28, "grad_norm": 0.19381282768297478, "learning_rate": 3.0708771752766394e-05, "log_odds_chosen": 4.324513912200928, "log_odds_ratio": -0.0676613599061966, "logits/chosen": -2.9503540992736816, "logits/chosen_prompt": -2.7982351779937744, "logits/rejected": -2.5562634468078613, "logits/rejected_prompt": -2.7812817096710205, "logps/chosen": -1.9172391891479492, "logps/chosen_both": -1.9022390842437744, "logps/chosen_prompt": -0.7488449811935425, "logps/rejected": -6.095970630645752, "logps/rejected_both": -6.031794548034668, "logps/rejected_prompt": -0.9277693033218384, "loss": 1.9931, "nll_loss": 1.9018001556396484, "rewards/accuracies": 1.0, "rewards/chosen": -0.7668957114219666, "rewards/margins": 1.671492338180542, "rewards/rejected": -2.4383881092071533, "step": 1600 }, { "epoch": 1.288, "grad_norm": 0.23915750516620793, "learning_rate": 3.050466791868254e-05, "log_odds_chosen": 5.146353721618652, "log_odds_ratio": -0.07113925367593765, "logits/chosen": -3.0021820068359375, "logits/chosen_prompt": -2.7346436977386475, "logits/rejected": -2.400503635406494, "logits/rejected_prompt": -2.715362071990967, "logps/chosen": -1.8657314777374268, "logps/chosen_both": -1.847728967666626, "logps/chosen_prompt": -0.8974820375442505, "logps/rejected": -6.850257873535156, "logps/rejected_both": -6.73916482925415, "logps/rejected_prompt": -0.9878479838371277, "loss": 2.0166, "nll_loss": 1.8474719524383545, "rewards/accuracies": 1.0, "rewards/chosen": -0.7462925910949707, "rewards/margins": 1.9938108921051025, "rewards/rejected": -2.7401034832000732, "step": 1610 }, { "epoch": 1.296, "grad_norm": 0.25793388819398966, "learning_rate": 3.0300177748051373e-05, "log_odds_chosen": 5.57846212387085, "log_odds_ratio": -0.0040098619647324085, "logits/chosen": -2.921875476837158, "logits/chosen_prompt": -2.7485337257385254, "logits/rejected": -2.2575137615203857, "logits/rejected_prompt": -2.729705333709717, "logps/chosen": -2.0379016399383545, "logps/chosen_both": -2.023336410522461, "logps/chosen_prompt": -0.8523913621902466, "logps/rejected": -7.4703474044799805, "logps/rejected_both": -7.384527683258057, "logps/rejected_prompt": -1.0912959575653076, "loss": 2.1288, "nll_loss": 2.021984100341797, "rewards/accuracies": 1.0, "rewards/chosen": -0.8151607513427734, "rewards/margins": 2.172978639602661, "rewards/rejected": -2.9881393909454346, "step": 1620 }, { "epoch": 1.304, "grad_norm": 0.22406539118846014, "learning_rate": 3.0095315592697126e-05, "log_odds_chosen": 4.797575950622559, "log_odds_ratio": -0.07414670288562775, "logits/chosen": -2.9373860359191895, "logits/chosen_prompt": -2.7567806243896484, "logits/rejected": -2.339370012283325, "logits/rejected_prompt": -2.738049030303955, "logps/chosen": -2.040771961212158, "logps/chosen_both": -2.022752523422241, "logps/chosen_prompt": -0.8437407612800598, "logps/rejected": -6.715930938720703, "logps/rejected_both": -6.622492790222168, "logps/rejected_prompt": -1.1104066371917725, "loss": 2.0022, "nll_loss": 2.021770715713501, "rewards/accuracies": 1.0, "rewards/chosen": -0.8163086771965027, "rewards/margins": 1.8700635433197021, "rewards/rejected": -2.6863722801208496, "step": 1630 }, { "epoch": 1.312, "grad_norm": 0.19146540891141792, "learning_rate": 2.9890095830551207e-05, "log_odds_chosen": 5.205162525177002, "log_odds_ratio": -0.015068802051246166, "logits/chosen": -2.9850218296051025, "logits/chosen_prompt": -2.7482991218566895, "logits/rejected": -2.2866098880767822, "logits/rejected_prompt": -2.7363736629486084, "logps/chosen": -1.9450336694717407, "logps/chosen_both": -1.9250189065933228, "logps/chosen_prompt": -0.8316828012466431, "logps/rejected": -6.989903450012207, "logps/rejected_both": -6.88253927230835, "logps/rejected_prompt": -0.9859585762023926, "loss": 2.088, "nll_loss": 1.924430251121521, "rewards/accuracies": 1.0, "rewards/chosen": -0.7780135273933411, "rewards/margins": 2.0179476737976074, "rewards/rejected": -2.7959611415863037, "step": 1640 }, { "epoch": 1.32, "grad_norm": 0.22495066893496063, "learning_rate": 2.9684532864643122e-05, "log_odds_chosen": 5.308048725128174, "log_odds_ratio": -0.00845087319612503, "logits/chosen": -2.9742932319641113, "logits/chosen_prompt": -2.7849392890930176, "logits/rejected": -2.2982254028320312, "logits/rejected_prompt": -2.7615458965301514, "logps/chosen": -1.9874608516693115, "logps/chosen_both": -1.9658311605453491, "logps/chosen_prompt": -0.7408405542373657, "logps/rejected": -7.14414119720459, "logps/rejected_both": -7.038477897644043, "logps/rejected_prompt": -1.045243501663208, "loss": 2.0386, "nll_loss": 1.9651544094085693, "rewards/accuracies": 1.0, "rewards/chosen": -0.7949844002723694, "rewards/margins": 2.0626721382141113, "rewards/rejected": -2.857656478881836, "step": 1650 }, { "epoch": 1.328, "grad_norm": 0.2286734318135687, "learning_rate": 2.9478641122089562e-05, "log_odds_chosen": 4.840089797973633, "log_odds_ratio": -0.07564349472522736, "logits/chosen": -3.008890151977539, "logits/chosen_prompt": -2.8013384342193604, "logits/rejected": -2.394143581390381, "logits/rejected_prompt": -2.77929425239563, "logps/chosen": -1.9756405353546143, "logps/chosen_both": -1.9581083059310913, "logps/chosen_prompt": -0.7473115921020508, "logps/rejected": -6.674158573150635, "logps/rejected_both": -6.598573207855225, "logps/rejected_prompt": -0.996438205242157, "loss": 2.0632, "nll_loss": 1.9576594829559326, "rewards/accuracies": 1.0, "rewards/chosen": -0.790256142616272, "rewards/margins": 1.8794071674346924, "rewards/rejected": -2.669663429260254, "step": 1660 }, { "epoch": 1.336, "grad_norm": 1.6039791025981895, "learning_rate": 2.9272435053081922e-05, "log_odds_chosen": 4.911754131317139, "log_odds_ratio": -0.08321253210306168, "logits/chosen": -2.912379741668701, "logits/chosen_prompt": -2.7961792945861816, "logits/rejected": -2.264275312423706, "logits/rejected_prompt": -2.7643306255340576, "logps/chosen": -1.951281189918518, "logps/chosen_both": -1.9351087808609009, "logps/chosen_prompt": -0.7827764749526978, "logps/rejected": -6.725755214691162, "logps/rejected_both": -6.646947383880615, "logps/rejected_prompt": -1.0157705545425415, "loss": 2.1063, "nll_loss": 1.9346641302108765, "rewards/accuracies": 1.0, "rewards/chosen": -0.7805125713348389, "rewards/margins": 1.9097894430160522, "rewards/rejected": -2.6903018951416016, "step": 1670 }, { "epoch": 1.3439999999999999, "grad_norm": 0.3698076131375805, "learning_rate": 2.9065929129872094e-05, "log_odds_chosen": 4.74294376373291, "log_odds_ratio": -0.08516435325145721, "logits/chosen": -2.9431169033050537, "logits/chosen_prompt": -2.7804017066955566, "logits/rejected": -2.2715518474578857, "logits/rejected_prompt": -2.7543439865112305, "logps/chosen": -2.047203779220581, "logps/chosen_both": -2.028724193572998, "logps/chosen_prompt": -0.8540178537368774, "logps/rejected": -6.660338401794434, "logps/rejected_both": -6.572705268859863, "logps/rejected_prompt": -1.0315988063812256, "loss": 2.1122, "nll_loss": 2.0279080867767334, "rewards/accuracies": 1.0, "rewards/chosen": -0.818881630897522, "rewards/margins": 1.8452539443969727, "rewards/rejected": -2.664135456085205, "step": 1680 }, { "epoch": 1.3519999999999999, "grad_norm": 14.756635490233291, "learning_rate": 2.8859137845756784e-05, "log_odds_chosen": 5.338567733764648, "log_odds_ratio": -0.07245531678199768, "logits/chosen": -3.0019686222076416, "logits/chosen_prompt": -2.7564592361450195, "logits/rejected": -2.10023832321167, "logits/rejected_prompt": -2.75854754447937, "logps/chosen": -1.801944375038147, "logps/chosen_both": -1.7874317169189453, "logps/chosen_prompt": -0.7828146815299988, "logps/rejected": -6.980807304382324, "logps/rejected_both": -6.885933876037598, "logps/rejected_prompt": -1.0353758335113525, "loss": 1.9922, "nll_loss": 1.7853384017944336, "rewards/accuracies": 1.0, "rewards/chosen": -0.720777690410614, "rewards/margins": 2.071545124053955, "rewards/rejected": -2.792322874069214, "step": 1690 }, { "epoch": 1.3599999999999999, "grad_norm": 1.0205003901521117, "learning_rate": 2.8652075714060295e-05, "log_odds_chosen": 4.316029071807861, "log_odds_ratio": -0.18554985523223877, "logits/chosen": -2.9789249897003174, "logits/chosen_prompt": -2.7761483192443848, "logits/rejected": -2.230045795440674, "logits/rejected_prompt": -2.7322373390197754, "logps/chosen": -1.9758758544921875, "logps/chosen_both": -1.958141565322876, "logps/chosen_prompt": -0.839580237865448, "logps/rejected": -6.175426006317139, "logps/rejected_both": -6.096805572509766, "logps/rejected_prompt": -1.002239465713501, "loss": 2.0489, "nll_loss": 1.957658052444458, "rewards/accuracies": 1.0, "rewards/chosen": -0.7903503179550171, "rewards/margins": 1.6798200607299805, "rewards/rejected": -2.470170497894287, "step": 1700 }, { "epoch": 1.3679999999999999, "grad_norm": 0.5093034024599485, "learning_rate": 2.844475726711595e-05, "log_odds_chosen": 5.062729835510254, "log_odds_ratio": -0.05383139103651047, "logits/chosen": -2.9323840141296387, "logits/chosen_prompt": -2.757789134979248, "logits/rejected": -2.114853620529175, "logits/rejected_prompt": -2.740206003189087, "logps/chosen": -1.9980299472808838, "logps/chosen_both": -1.9810377359390259, "logps/chosen_prompt": -0.8025790452957153, "logps/rejected": -6.92165994644165, "logps/rejected_both": -6.837998867034912, "logps/rejected_prompt": -1.0708694458007812, "loss": 2.02, "nll_loss": 1.980063796043396, "rewards/accuracies": 1.0, "rewards/chosen": -0.7992119789123535, "rewards/margins": 1.9694522619247437, "rewards/rejected": -2.7686638832092285, "step": 1710 }, { "epoch": 1.376, "grad_norm": 0.1922091365417996, "learning_rate": 2.8237197055246172e-05, "log_odds_chosen": 5.407708644866943, "log_odds_ratio": -0.07208568602800369, "logits/chosen": -2.930446147918701, "logits/chosen_prompt": -2.7493677139282227, "logits/rejected": -1.8252556324005127, "logits/rejected_prompt": -2.716831684112549, "logps/chosen": -1.99956476688385, "logps/chosen_both": -1.9826438426971436, "logps/chosen_prompt": -0.8026520609855652, "logps/rejected": -7.266847133636475, "logps/rejected_both": -7.16598653793335, "logps/rejected_prompt": -0.9821138381958008, "loss": 2.0055, "nll_loss": 1.9819648265838623, "rewards/accuracies": 1.0, "rewards/chosen": -0.79982590675354, "rewards/margins": 2.1069130897521973, "rewards/rejected": -2.9067392349243164, "step": 1720 }, { "epoch": 1.384, "grad_norm": 0.19884693939871143, "learning_rate": 2.8029409645741267e-05, "log_odds_chosen": 5.655479907989502, "log_odds_ratio": -0.07094166427850723, "logits/chosen": -2.9133386611938477, "logits/chosen_prompt": -2.7181575298309326, "logits/rejected": -1.8967100381851196, "logits/rejected_prompt": -2.7026288509368896, "logps/chosen": -2.0701959133148193, "logps/chosen_both": -2.0524401664733887, "logps/chosen_prompt": -0.8565284609794617, "logps/rejected": -7.606234550476074, "logps/rejected_both": -7.5077009201049805, "logps/rejected_prompt": -1.0423924922943115, "loss": 2.1485, "nll_loss": 2.0521743297576904, "rewards/accuracies": 1.0, "rewards/chosen": -0.8280783891677856, "rewards/margins": 2.2144155502319336, "rewards/rejected": -3.042494058609009, "step": 1730 }, { "epoch": 1.392, "grad_norm": 0.22986043369921255, "learning_rate": 2.782140962183704e-05, "log_odds_chosen": 6.107487678527832, "log_odds_ratio": -0.0026633774396032095, "logits/chosen": -2.98026442527771, "logits/chosen_prompt": -2.780827522277832, "logits/rejected": -1.9798576831817627, "logits/rejected_prompt": -2.7703700065612793, "logps/chosen": -1.9474899768829346, "logps/chosen_both": -1.9275726079940796, "logps/chosen_prompt": -0.7816404700279236, "logps/rejected": -7.895272731781006, "logps/rejected_both": -7.769126892089844, "logps/rejected_prompt": -0.9758648872375488, "loss": 1.9516, "nll_loss": 1.925616979598999, "rewards/accuracies": 1.0, "rewards/chosen": -0.778995931148529, "rewards/margins": 2.3791134357452393, "rewards/rejected": -3.158109188079834, "step": 1740 }, { "epoch": 1.4, "grad_norm": 1.3967778423182213, "learning_rate": 2.761321158169134e-05, "log_odds_chosen": 5.588977336883545, "log_odds_ratio": -0.07164627313613892, "logits/chosen": -2.942800998687744, "logits/chosen_prompt": -2.765923023223877, "logits/rejected": -2.1541590690612793, "logits/rejected_prompt": -2.7391622066497803, "logps/chosen": -1.8856910467147827, "logps/chosen_both": -1.8705289363861084, "logps/chosen_prompt": -0.7254279851913452, "logps/rejected": -7.315940856933594, "logps/rejected_both": -7.2315239906311035, "logps/rejected_prompt": -0.9249277114868164, "loss": 2.037, "nll_loss": 1.8701813220977783, "rewards/accuracies": 1.0, "rewards/chosen": -0.7542763948440552, "rewards/margins": 2.172100305557251, "rewards/rejected": -2.9263763427734375, "step": 1750 }, { "epoch": 1.408, "grad_norm": 0.19174060756423858, "learning_rate": 2.7404830137359444e-05, "log_odds_chosen": 5.684497356414795, "log_odds_ratio": -0.03275999799370766, "logits/chosen": -2.958325147628784, "logits/chosen_prompt": -2.728274345397949, "logits/rejected": -2.046318531036377, "logits/rejected_prompt": -2.6898844242095947, "logps/chosen": -2.253990411758423, "logps/chosen_both": -2.2328062057495117, "logps/chosen_prompt": -0.8659110069274902, "logps/rejected": -7.784188747406006, "logps/rejected_both": -7.674757480621338, "logps/rejected_prompt": -1.1274776458740234, "loss": 2.1275, "nll_loss": 2.2321293354034424, "rewards/accuracies": 1.0, "rewards/chosen": -0.901596188545227, "rewards/margins": 2.212078809738159, "rewards/rejected": -3.1136748790740967, "step": 1760 }, { "epoch": 1.416, "grad_norm": 0.1908777514352998, "learning_rate": 2.7196279913768584e-05, "log_odds_chosen": 5.167336940765381, "log_odds_ratio": -0.07482357323169708, "logits/chosen": -2.9330124855041504, "logits/chosen_prompt": -2.7444446086883545, "logits/rejected": -2.105210065841675, "logits/rejected_prompt": -2.721642255783081, "logps/chosen": -2.0776610374450684, "logps/chosen_both": -2.0597071647644043, "logps/chosen_prompt": -0.8555063009262085, "logps/rejected": -7.124932765960693, "logps/rejected_both": -7.030417442321777, "logps/rejected_prompt": -1.0413535833358765, "loss": 1.9978, "nll_loss": 2.058987617492676, "rewards/accuracies": 1.0, "rewards/chosen": -0.8310644030570984, "rewards/margins": 2.018908739089966, "rewards/rejected": -2.84997296333313, "step": 1770 }, { "epoch": 1.424, "grad_norm": 0.17855815500184188, "learning_rate": 2.6987575547691497e-05, "log_odds_chosen": 4.549686908721924, "log_odds_ratio": -0.20390887558460236, "logits/chosen": -2.9623754024505615, "logits/chosen_prompt": -2.74225115776062, "logits/rejected": -2.1663219928741455, "logits/rejected_prompt": -2.7345423698425293, "logps/chosen": -1.9926074743270874, "logps/chosen_both": -1.9742103815078735, "logps/chosen_prompt": -0.7784561514854431, "logps/rejected": -6.431072235107422, "logps/rejected_both": -6.3410797119140625, "logps/rejected_prompt": -0.9243408441543579, "loss": 2.0508, "nll_loss": 1.973905324935913, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7970430850982666, "rewards/margins": 1.775386095046997, "rewards/rejected": -2.5724291801452637, "step": 1780 }, { "epoch": 1.432, "grad_norm": 0.41995614329947717, "learning_rate": 2.6778731686719178e-05, "log_odds_chosen": 6.473885536193848, "log_odds_ratio": -0.0018433562945574522, "logits/chosen": -2.952514410018921, "logits/chosen_prompt": -2.7027528285980225, "logits/rejected": -1.8595733642578125, "logits/rejected_prompt": -2.6798789501190186, "logps/chosen": -1.9392732381820679, "logps/chosen_both": -1.9248685836791992, "logps/chosen_prompt": -0.931847095489502, "logps/rejected": -8.25381088256836, "logps/rejected_both": -8.140459060668945, "logps/rejected_prompt": -1.0698789358139038, "loss": 1.9874, "nll_loss": 1.923288106918335, "rewards/accuracies": 1.0, "rewards/chosen": -0.7757093906402588, "rewards/margins": 2.5258147716522217, "rewards/rejected": -3.3015239238739014, "step": 1790 }, { "epoch": 1.44, "grad_norm": 0.7745820877648287, "learning_rate": 2.656976298823284e-05, "log_odds_chosen": 3.4408886432647705, "log_odds_ratio": -0.27857550978660583, "logits/chosen": -2.878281831741333, "logits/chosen_prompt": -2.734473705291748, "logits/rejected": -2.3365187644958496, "logits/rejected_prompt": -2.7160048484802246, "logps/chosen": -2.0569214820861816, "logps/chosen_both": -2.0396482944488525, "logps/chosen_prompt": -0.6810625791549683, "logps/rejected": -5.414828300476074, "logps/rejected_both": -5.35118293762207, "logps/rejected_prompt": -0.8160842061042786, "loss": 2.0419, "nll_loss": 2.038651943206787, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.82276850938797, "rewards/margins": 1.3431627750396729, "rewards/rejected": -2.165931224822998, "step": 1800 }, { "epoch": 1.448, "grad_norm": 0.19675956388988333, "learning_rate": 2.636068411837523e-05, "log_odds_chosen": 3.9148197174072266, "log_odds_ratio": -0.23557178676128387, "logits/chosen": -3.045487642288208, "logits/chosen_prompt": -2.759061574935913, "logits/rejected": -2.4077014923095703, "logits/rejected_prompt": -2.7576231956481934, "logps/chosen": -1.8861596584320068, "logps/chosen_both": -1.8683302402496338, "logps/chosen_prompt": -0.9071288108825684, "logps/rejected": -5.683122158050537, "logps/rejected_both": -5.601851940155029, "logps/rejected_prompt": -1.0434454679489136, "loss": 2.0263, "nll_loss": 1.8671506643295288, "rewards/accuracies": 1.0, "rewards/chosen": -0.7544639110565186, "rewards/margins": 1.5187851190567017, "rewards/rejected": -2.2732491493225098, "step": 1810 }, { "epoch": 1.456, "grad_norm": 6.189918533614061, "learning_rate": 2.615150975102131e-05, "log_odds_chosen": 6.713578701019287, "log_odds_ratio": -0.0015258995117619634, "logits/chosen": -3.0059263706207275, "logits/chosen_prompt": -2.7889323234558105, "logits/rejected": -1.839082956314087, "logits/rejected_prompt": -2.7647995948791504, "logps/chosen": -2.004807233810425, "logps/chosen_both": -1.98598313331604, "logps/chosen_prompt": -0.7677423357963562, "logps/rejected": -8.555073738098145, "logps/rejected_both": -8.430871963500977, "logps/rejected_prompt": -1.011725664138794, "loss": 2.0302, "nll_loss": 1.9841728210449219, "rewards/accuracies": 1.0, "rewards/chosen": -0.8019229769706726, "rewards/margins": 2.6201066970825195, "rewards/rejected": -3.422029495239258, "step": 1820 }, { "epoch": 1.464, "grad_norm": 0.21797873657619965, "learning_rate": 2.594225456674837e-05, "log_odds_chosen": 5.328610420227051, "log_odds_ratio": -0.0812341570854187, "logits/chosen": -2.979506731033325, "logits/chosen_prompt": -2.792584180831909, "logits/rejected": -2.08947491645813, "logits/rejected_prompt": -2.781327962875366, "logps/chosen": -1.9279931783676147, "logps/chosen_both": -1.9127006530761719, "logps/chosen_prompt": -0.7814801335334778, "logps/rejected": -7.120486259460449, "logps/rejected_both": -7.026519775390625, "logps/rejected_prompt": -0.9352282285690308, "loss": 2.0587, "nll_loss": 1.9114625453948975, "rewards/accuracies": 1.0, "rewards/chosen": -0.7711972594261169, "rewards/margins": 2.0769975185394287, "rewards/rejected": -2.8481948375701904, "step": 1830 }, { "epoch": 1.472, "grad_norm": 0.1871801141599041, "learning_rate": 2.5732933251805713e-05, "log_odds_chosen": 5.583043575286865, "log_odds_ratio": -0.13880962133407593, "logits/chosen": -2.9580206871032715, "logits/chosen_prompt": -2.7731950283050537, "logits/rejected": -2.012089490890503, "logits/rejected_prompt": -2.75722336769104, "logps/chosen": -1.855268120765686, "logps/chosen_both": -1.8423293828964233, "logps/chosen_prompt": -0.8601115942001343, "logps/rejected": -7.305128574371338, "logps/rejected_both": -7.2211809158325195, "logps/rejected_prompt": -1.0132110118865967, "loss": 1.9359, "nll_loss": 1.8416475057601929, "rewards/accuracies": 1.0, "rewards/chosen": -0.7421072125434875, "rewards/margins": 2.1799445152282715, "rewards/rejected": -2.922051191329956, "step": 1840 }, { "epoch": 1.48, "grad_norm": 0.22592416955066014, "learning_rate": 2.5523560497083926e-05, "log_odds_chosen": 5.949292182922363, "log_odds_ratio": -0.07134632766246796, "logits/chosen": -2.956613779067993, "logits/chosen_prompt": -2.722937822341919, "logits/rejected": -1.9237785339355469, "logits/rejected_prompt": -2.704369068145752, "logps/chosen": -1.9562047719955444, "logps/chosen_both": -1.9380409717559814, "logps/chosen_prompt": -0.7973084449768066, "logps/rejected": -7.771543979644775, "logps/rejected_both": -7.661837577819824, "logps/rejected_prompt": -0.9722532033920288, "loss": 1.9892, "nll_loss": 1.9374074935913086, "rewards/accuracies": 1.0, "rewards/chosen": -0.7824817895889282, "rewards/margins": 2.3261356353759766, "rewards/rejected": -3.1086175441741943, "step": 1850 }, { "epoch": 1.488, "grad_norm": 0.19952883102568983, "learning_rate": 2.531415099708382e-05, "log_odds_chosen": 5.468968868255615, "log_odds_ratio": -0.13928017020225525, "logits/chosen": -2.901470184326172, "logits/chosen_prompt": -2.7253496646881104, "logits/rejected": -1.9635553359985352, "logits/rejected_prompt": -2.721364736557007, "logps/chosen": -2.024766683578491, "logps/chosen_both": -2.0091967582702637, "logps/chosen_prompt": -0.8794494867324829, "logps/rejected": -7.388121604919434, "logps/rejected_both": -7.304760932922363, "logps/rejected_prompt": -1.0697910785675049, "loss": 2.1409, "nll_loss": 2.0086288452148438, "rewards/accuracies": 1.0, "rewards/chosen": -0.8099067807197571, "rewards/margins": 2.1453423500061035, "rewards/rejected": -2.955249071121216, "step": 1860 }, { "epoch": 1.496, "grad_norm": 0.20218369179299622, "learning_rate": 2.51047194488851e-05, "log_odds_chosen": 5.442208766937256, "log_odds_ratio": -0.14097937941551208, "logits/chosen": -2.9763107299804688, "logits/chosen_prompt": -2.7768394947052, "logits/rejected": -2.108531951904297, "logits/rejected_prompt": -2.7451493740081787, "logps/chosen": -1.79744553565979, "logps/chosen_both": -1.7835102081298828, "logps/chosen_prompt": -0.7872709631919861, "logps/rejected": -7.031289577484131, "logps/rejected_both": -6.934246063232422, "logps/rejected_prompt": -0.9216675758361816, "loss": 2.1195, "nll_loss": 1.7827249765396118, "rewards/accuracies": 1.0, "rewards/chosen": -0.718978226184845, "rewards/margins": 2.0935378074645996, "rewards/rejected": -2.8125159740448, "step": 1870 }, { "epoch": 1.504, "grad_norm": 0.9652790170177806, "learning_rate": 2.4895280551114907e-05, "log_odds_chosen": 5.730778694152832, "log_odds_ratio": -0.07072736322879791, "logits/chosen": -2.950146198272705, "logits/chosen_prompt": -2.7803640365600586, "logits/rejected": -1.9521598815917969, "logits/rejected_prompt": -2.764260768890381, "logps/chosen": -2.0558481216430664, "logps/chosen_both": -2.0352180004119873, "logps/chosen_prompt": -0.8978110551834106, "logps/rejected": -7.663902282714844, "logps/rejected_both": -7.545947074890137, "logps/rejected_prompt": -1.037939429283142, "loss": 2.049, "nll_loss": 2.0345263481140137, "rewards/accuracies": 1.0, "rewards/chosen": -0.8223392367362976, "rewards/margins": 2.2432212829589844, "rewards/rejected": -3.0655605792999268, "step": 1880 }, { "epoch": 1.512, "grad_norm": 0.1890875725333666, "learning_rate": 2.4685849002916183e-05, "log_odds_chosen": 6.257909297943115, "log_odds_ratio": -0.00222708098590374, "logits/chosen": -2.9233384132385254, "logits/chosen_prompt": -2.7774055004119873, "logits/rejected": -1.9378130435943604, "logits/rejected_prompt": -2.751840114593506, "logps/chosen": -1.9843826293945312, "logps/chosen_both": -1.9667317867279053, "logps/chosen_prompt": -0.6825822591781616, "logps/rejected": -8.092279434204102, "logps/rejected_both": -7.992387294769287, "logps/rejected_prompt": -0.9652584195137024, "loss": 1.9485, "nll_loss": 1.965959906578064, "rewards/accuracies": 1.0, "rewards/chosen": -0.7937530279159546, "rewards/margins": 2.4431586265563965, "rewards/rejected": -3.2369117736816406, "step": 1890 }, { "epoch": 1.52, "grad_norm": 0.2373809038859539, "learning_rate": 2.447643950291608e-05, "log_odds_chosen": 6.489705562591553, "log_odds_ratio": -0.0016050601843744516, "logits/chosen": -2.9970052242279053, "logits/chosen_prompt": -2.755345106124878, "logits/rejected": -1.9105993509292603, "logits/rejected_prompt": -2.7229576110839844, "logps/chosen": -1.8970317840576172, "logps/chosen_both": -1.8811533451080322, "logps/chosen_prompt": -0.7929924726486206, "logps/rejected": -8.21942138671875, "logps/rejected_both": -8.108181953430176, "logps/rejected_prompt": -0.9921186566352844, "loss": 1.954, "nll_loss": 1.8801666498184204, "rewards/accuracies": 1.0, "rewards/chosen": -0.7588127851486206, "rewards/margins": 2.5289556980133057, "rewards/rejected": -3.287768602371216, "step": 1900 }, { "epoch": 1.528, "grad_norm": 0.1741002343821723, "learning_rate": 2.4267066748194296e-05, "log_odds_chosen": 5.774570941925049, "log_odds_ratio": -0.07103729248046875, "logits/chosen": -2.886838436126709, "logits/chosen_prompt": -2.7209315299987793, "logits/rejected": -2.010939836502075, "logits/rejected_prompt": -2.7094690799713135, "logps/chosen": -2.068047523498535, "logps/chosen_both": -2.051417350769043, "logps/chosen_prompt": -0.7632136940956116, "logps/rejected": -7.714223384857178, "logps/rejected_both": -7.628198146820068, "logps/rejected_prompt": -0.9632788896560669, "loss": 2.0981, "nll_loss": 2.051051139831543, "rewards/accuracies": 1.0, "rewards/chosen": -0.8272191286087036, "rewards/margins": 2.258470296859741, "rewards/rejected": -3.085689067840576, "step": 1910 }, { "epoch": 1.536, "grad_norm": 0.18057749289339498, "learning_rate": 2.4057745433251635e-05, "log_odds_chosen": 6.403738498687744, "log_odds_ratio": -0.0018427784088999033, "logits/chosen": -2.9575610160827637, "logits/chosen_prompt": -2.7303547859191895, "logits/rejected": -1.862630844116211, "logits/rejected_prompt": -2.71962833404541, "logps/chosen": -2.0046885013580322, "logps/chosen_both": -1.9884449243545532, "logps/chosen_prompt": -0.763080894947052, "logps/rejected": -8.254236221313477, "logps/rejected_both": -8.159029960632324, "logps/rejected_prompt": -1.045041799545288, "loss": 2.0516, "nll_loss": 1.9879404306411743, "rewards/accuracies": 1.0, "rewards/chosen": -0.8018752932548523, "rewards/margins": 2.49981951713562, "rewards/rejected": -3.301694869995117, "step": 1920 }, { "epoch": 1.544, "grad_norm": 0.20142735097076295, "learning_rate": 2.384849024897869e-05, "log_odds_chosen": 5.733250617980957, "log_odds_ratio": -0.004482199437916279, "logits/chosen": -2.9741549491882324, "logits/chosen_prompt": -2.7055163383483887, "logits/rejected": -2.124002456665039, "logits/rejected_prompt": -2.688239812850952, "logps/chosen": -1.9430478811264038, "logps/chosen_both": -1.926995038986206, "logps/chosen_prompt": -0.7834355235099792, "logps/rejected": -7.518483638763428, "logps/rejected_both": -7.4232635498046875, "logps/rejected_prompt": -1.0878071784973145, "loss": 2.1323, "nll_loss": 1.9260002374649048, "rewards/accuracies": 1.0, "rewards/chosen": -0.7772191166877747, "rewards/margins": 2.2301743030548096, "rewards/rejected": -3.0073933601379395, "step": 1930 }, { "epoch": 1.552, "grad_norm": 40.90864961224279, "learning_rate": 2.3639315881624777e-05, "log_odds_chosen": 5.306234836578369, "log_odds_ratio": -0.00918310321867466, "logits/chosen": -2.9237542152404785, "logits/chosen_prompt": -2.7105278968811035, "logits/rejected": -2.2239270210266113, "logits/rejected_prompt": -2.686476469039917, "logps/chosen": -1.9409538507461548, "logps/chosen_both": -1.9275703430175781, "logps/chosen_prompt": -0.8563373684883118, "logps/rejected": -7.0894670486450195, "logps/rejected_both": -7.007052421569824, "logps/rejected_prompt": -0.9907125234603882, "loss": 1.9112, "nll_loss": 1.926429033279419, "rewards/accuracies": 1.0, "rewards/chosen": -0.7763815522193909, "rewards/margins": 2.059405565261841, "rewards/rejected": -2.835787296295166, "step": 1940 }, { "epoch": 1.56, "grad_norm": 0.21885482692879285, "learning_rate": 2.3430237011767167e-05, "log_odds_chosen": 5.6596198081970215, "log_odds_ratio": -0.023314189165830612, "logits/chosen": -2.9358747005462646, "logits/chosen_prompt": -2.727999687194824, "logits/rejected": -2.0308213233947754, "logits/rejected_prompt": -2.686753749847412, "logps/chosen": -1.9377899169921875, "logps/chosen_both": -1.922545075416565, "logps/chosen_prompt": -0.8713130950927734, "logps/rejected": -7.442534446716309, "logps/rejected_both": -7.339343070983887, "logps/rejected_prompt": -1.057796835899353, "loss": 2.0015, "nll_loss": 1.9221293926239014, "rewards/accuracies": 1.0, "rewards/chosen": -0.7751160264015198, "rewards/margins": 2.2018978595733643, "rewards/rejected": -2.9770140647888184, "step": 1950 }, { "epoch": 1.568, "grad_norm": 0.5403488938261225, "learning_rate": 2.3221268313280838e-05, "log_odds_chosen": 5.778319358825684, "log_odds_ratio": -0.07066681236028671, "logits/chosen": -2.954177141189575, "logits/chosen_prompt": -2.678536891937256, "logits/rejected": -1.9524621963500977, "logits/rejected_prompt": -2.6848878860473633, "logps/chosen": -1.9211227893829346, "logps/chosen_both": -1.902917504310608, "logps/chosen_prompt": -0.8868004083633423, "logps/rejected": -7.527622222900391, "logps/rejected_both": -7.4302239418029785, "logps/rejected_prompt": -1.1353758573532104, "loss": 2.0128, "nll_loss": 1.9021003246307373, "rewards/accuracies": 1.0, "rewards/chosen": -0.768449068069458, "rewards/margins": 2.2425997257232666, "rewards/rejected": -3.0110487937927246, "step": 1960 }, { "epoch": 1.576, "grad_norm": 6.334469044302015, "learning_rate": 2.301242445230851e-05, "log_odds_chosen": 4.549070358276367, "log_odds_ratio": -0.10954795777797699, "logits/chosen": -2.9302010536193848, "logits/chosen_prompt": -2.6880440711975098, "logits/rejected": -2.190250873565674, "logits/rejected_prompt": -2.6803054809570312, "logps/chosen": -2.2468152046203613, "logps/chosen_both": -2.227410316467285, "logps/chosen_prompt": -0.7418851852416992, "logps/rejected": -6.677786827087402, "logps/rejected_both": -6.601284980773926, "logps/rejected_prompt": -0.9388518333435059, "loss": 2.1059, "nll_loss": 2.226693630218506, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8987261056900024, "rewards/margins": 1.7723888158798218, "rewards/rejected": -2.671114683151245, "step": 1970 }, { "epoch": 1.584, "grad_norm": 0.21099709481066398, "learning_rate": 2.280372008623142e-05, "log_odds_chosen": 4.277853488922119, "log_odds_ratio": -0.18287745118141174, "logits/chosen": -2.989633321762085, "logits/chosen_prompt": -2.6874613761901855, "logits/rejected": -2.2610902786254883, "logits/rejected_prompt": -2.664952516555786, "logps/chosen": -1.912766695022583, "logps/chosen_both": -1.8961530923843384, "logps/chosen_prompt": -0.7984111905097961, "logps/rejected": -6.0515875816345215, "logps/rejected_both": -5.97214412689209, "logps/rejected_prompt": -1.0341233015060425, "loss": 2.0542, "nll_loss": 1.894964575767517, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7651066780090332, "rewards/margins": 1.6555284261703491, "rewards/rejected": -2.4206349849700928, "step": 1980 }, { "epoch": 1.592, "grad_norm": 0.23272826174313574, "learning_rate": 2.2595169862640568e-05, "log_odds_chosen": 6.768258094787598, "log_odds_ratio": -0.001374961924739182, "logits/chosen": -2.973562240600586, "logits/chosen_prompt": -2.686769962310791, "logits/rejected": -1.666338562965393, "logits/rejected_prompt": -2.683814764022827, "logps/chosen": -1.9322917461395264, "logps/chosen_both": -1.9166603088378906, "logps/chosen_prompt": -0.8024829626083374, "logps/rejected": -8.528871536254883, "logps/rejected_both": -8.413396835327148, "logps/rejected_prompt": -1.0380266904830933, "loss": 2.0648, "nll_loss": 1.9158977270126343, "rewards/accuracies": 1.0, "rewards/chosen": -0.7729167342185974, "rewards/margins": 2.63863205909729, "rewards/rejected": -3.4115490913391113, "step": 1990 }, { "epoch": 1.6, "grad_norm": 0.20086785213912312, "learning_rate": 2.238678841830867e-05, "log_odds_chosen": 6.327115058898926, "log_odds_ratio": -0.004725167062133551, "logits/chosen": -2.966679573059082, "logits/chosen_prompt": -2.6999356746673584, "logits/rejected": -1.8506364822387695, "logits/rejected_prompt": -2.6866955757141113, "logps/chosen": -1.8783817291259766, "logps/chosen_both": -1.8609817028045654, "logps/chosen_prompt": -0.7905829548835754, "logps/rejected": -8.029566764831543, "logps/rejected_both": -7.908673286437988, "logps/rejected_prompt": -1.0723146200180054, "loss": 1.9398, "nll_loss": 1.860142469406128, "rewards/accuracies": 1.0, "rewards/chosen": -0.7513527274131775, "rewards/margins": 2.4604744911193848, "rewards/rejected": -3.211826801300049, "step": 2000 }, { "epoch": 1.608, "grad_norm": 3.6061661350197456, "learning_rate": 2.217859037816296e-05, "log_odds_chosen": 4.772618770599365, "log_odds_ratio": -0.14787371456623077, "logits/chosen": -2.9939560890197754, "logits/chosen_prompt": -2.712306499481201, "logits/rejected": -2.120854139328003, "logits/rejected_prompt": -2.699389934539795, "logps/chosen": -2.0005993843078613, "logps/chosen_both": -1.9795938730239868, "logps/chosen_prompt": -0.7556421160697937, "logps/rejected": -6.654515743255615, "logps/rejected_both": -6.551595211029053, "logps/rejected_prompt": -0.9516459703445435, "loss": 1.9737, "nll_loss": 1.9790796041488647, "rewards/accuracies": 1.0, "rewards/chosen": -0.8002398610115051, "rewards/margins": 1.8615667819976807, "rewards/rejected": -2.661806344985962, "step": 2010 }, { "epoch": 1.616, "grad_norm": 0.3283349921571691, "learning_rate": 2.1970590354258745e-05, "log_odds_chosen": 6.253961086273193, "log_odds_ratio": -0.07067908346652985, "logits/chosen": -2.9472057819366455, "logits/chosen_prompt": -2.6802945137023926, "logits/rejected": -1.744763731956482, "logits/rejected_prompt": -2.6687159538269043, "logps/chosen": -1.955038070678711, "logps/chosen_both": -1.9407745599746704, "logps/chosen_prompt": -1.00258469581604, "logps/rejected": -8.05742073059082, "logps/rejected_both": -7.938286781311035, "logps/rejected_prompt": -1.1584670543670654, "loss": 2.0349, "nll_loss": 1.940118432044983, "rewards/accuracies": 1.0, "rewards/chosen": -0.7820152044296265, "rewards/margins": 2.440953254699707, "rewards/rejected": -3.222968339920044, "step": 2020 }, { "epoch": 1.624, "grad_norm": 1.8747875530540283, "learning_rate": 2.176280294475383e-05, "log_odds_chosen": 6.281460762023926, "log_odds_ratio": -0.03783145174384117, "logits/chosen": -3.011366844177246, "logits/chosen_prompt": -2.6553094387054443, "logits/rejected": -1.8144845962524414, "logits/rejected_prompt": -2.649622678756714, "logps/chosen": -1.9069626331329346, "logps/chosen_both": -1.8888943195343018, "logps/chosen_prompt": -0.7433997988700867, "logps/rejected": -8.008193969726562, "logps/rejected_both": -7.898676872253418, "logps/rejected_prompt": -0.9908720254898071, "loss": 1.9971, "nll_loss": 1.8877136707305908, "rewards/accuracies": 1.0, "rewards/chosen": -0.762785017490387, "rewards/margins": 2.440492630004883, "rewards/rejected": -3.203277587890625, "step": 2030 }, { "epoch": 1.6320000000000001, "grad_norm": 2.9635896306517915, "learning_rate": 2.155524273288405e-05, "log_odds_chosen": 4.7696404457092285, "log_odds_ratio": -0.2104126662015915, "logits/chosen": -2.9527573585510254, "logits/chosen_prompt": -2.6921048164367676, "logits/rejected": -2.0738635063171387, "logits/rejected_prompt": -2.67110538482666, "logps/chosen": -1.996506690979004, "logps/chosen_both": -1.9748737812042236, "logps/chosen_prompt": -0.7325566411018372, "logps/rejected": -6.6651411056518555, "logps/rejected_both": -6.573362827301025, "logps/rejected_prompt": -0.9392368197441101, "loss": 1.9348, "nll_loss": 1.9730939865112305, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7986027002334595, "rewards/margins": 1.8674538135528564, "rewards/rejected": -2.6660561561584473, "step": 2040 }, { "epoch": 1.6400000000000001, "grad_norm": 0.19964912068774665, "learning_rate": 2.1347924285939714e-05, "log_odds_chosen": 6.8775224685668945, "log_odds_ratio": -0.008257986977696419, "logits/chosen": -2.917914867401123, "logits/chosen_prompt": -2.6802151203155518, "logits/rejected": -1.6495475769042969, "logits/rejected_prompt": -2.661830186843872, "logps/chosen": -2.0301578044891357, "logps/chosen_both": -2.007798910140991, "logps/chosen_prompt": -0.8403179049491882, "logps/rejected": -8.763223648071289, "logps/rejected_both": -8.611532211303711, "logps/rejected_prompt": -1.09980046749115, "loss": 2.1549, "nll_loss": 2.006844997406006, "rewards/accuracies": 1.0, "rewards/chosen": -0.8120630979537964, "rewards/margins": 2.6932263374328613, "rewards/rejected": -3.5052895545959473, "step": 2050 }, { "epoch": 1.6480000000000001, "grad_norm": 0.17460562158440138, "learning_rate": 2.114086215424322e-05, "log_odds_chosen": 6.110722064971924, "log_odds_ratio": -0.023483365774154663, "logits/chosen": -2.909790515899658, "logits/chosen_prompt": -2.6986935138702393, "logits/rejected": -1.758716344833374, "logits/rejected_prompt": -2.6658692359924316, "logps/chosen": -2.3056933879852295, "logps/chosen_both": -2.285371780395508, "logps/chosen_prompt": -0.833857536315918, "logps/rejected": -8.272215843200684, "logps/rejected_both": -8.151971817016602, "logps/rejected_prompt": -1.0774855613708496, "loss": 2.0557, "nll_loss": 2.284456968307495, "rewards/accuracies": 1.0, "rewards/chosen": -0.9222772717475891, "rewards/margins": 2.3866093158721924, "rewards/rejected": -3.308886766433716, "step": 2060 }, { "epoch": 1.6560000000000001, "grad_norm": 0.19823340696579927, "learning_rate": 2.0934070870127912e-05, "log_odds_chosen": 5.7514495849609375, "log_odds_ratio": -0.13919630646705627, "logits/chosen": -2.9313971996307373, "logits/chosen_prompt": -2.690089225769043, "logits/rejected": -1.7628095149993896, "logits/rejected_prompt": -2.6867289543151855, "logps/chosen": -2.0054726600646973, "logps/chosen_both": -1.9867470264434814, "logps/chosen_prompt": -0.730907678604126, "logps/rejected": -7.626162528991699, "logps/rejected_both": -7.516133785247803, "logps/rejected_prompt": -0.9458767771720886, "loss": 2.0384, "nll_loss": 1.9859052896499634, "rewards/accuracies": 1.0, "rewards/chosen": -0.8021891713142395, "rewards/margins": 2.2482759952545166, "rewards/rejected": -3.0504648685455322, "step": 2070 }, { "epoch": 1.6640000000000001, "grad_norm": 0.19392027541652682, "learning_rate": 2.0727564946918087e-05, "log_odds_chosen": 7.237205505371094, "log_odds_ratio": -0.001250033383257687, "logits/chosen": -2.934305429458618, "logits/chosen_prompt": -2.7029290199279785, "logits/rejected": -1.5330889225006104, "logits/rejected_prompt": -2.6817727088928223, "logps/chosen": -2.0364651679992676, "logps/chosen_both": -2.015903949737549, "logps/chosen_prompt": -0.8590591549873352, "logps/rejected": -9.122060775756836, "logps/rejected_both": -8.987492561340332, "logps/rejected_prompt": -1.0628540515899658, "loss": 1.9994, "nll_loss": 2.0151782035827637, "rewards/accuracies": 1.0, "rewards/chosen": -0.8145861625671387, "rewards/margins": 2.834237575531006, "rewards/rejected": -3.6488234996795654, "step": 2080 }, { "epoch": 1.6720000000000002, "grad_norm": 0.2855392155807927, "learning_rate": 2.0521358877910444e-05, "log_odds_chosen": 6.342986583709717, "log_odds_ratio": -0.07219625264406204, "logits/chosen": -2.9752235412597656, "logits/chosen_prompt": -2.7005088329315186, "logits/rejected": -1.7442362308502197, "logits/rejected_prompt": -2.693645477294922, "logps/chosen": -1.990447759628296, "logps/chosen_both": -1.970177412033081, "logps/chosen_prompt": -0.7856583595275879, "logps/rejected": -8.199989318847656, "logps/rejected_both": -8.072303771972656, "logps/rejected_prompt": -0.9411813020706177, "loss": 2.021, "nll_loss": 1.9698638916015625, "rewards/accuracies": 1.0, "rewards/chosen": -0.7961790561676025, "rewards/margins": 2.483816623687744, "rewards/rejected": -3.2799954414367676, "step": 2090 }, { "epoch": 1.6800000000000002, "grad_norm": 0.36170871833517027, "learning_rate": 2.031546713535688e-05, "log_odds_chosen": 5.634890079498291, "log_odds_ratio": -0.1395900696516037, "logits/chosen": -2.93391752243042, "logits/chosen_prompt": -2.718055248260498, "logits/rejected": -1.7808215618133545, "logits/rejected_prompt": -2.6867878437042236, "logps/chosen": -2.3721437454223633, "logps/chosen_both": -2.3435354232788086, "logps/chosen_prompt": -0.7950377464294434, "logps/rejected": -7.916224479675293, "logps/rejected_both": -7.782776832580566, "logps/rejected_prompt": -0.9661157727241516, "loss": 2.1271, "nll_loss": 2.341766595840454, "rewards/accuracies": 1.0, "rewards/chosen": -0.9488574862480164, "rewards/margins": 2.21763277053833, "rewards/rejected": -3.166490077972412, "step": 2100 }, { "epoch": 1.688, "grad_norm": 0.3052641697772741, "learning_rate": 2.01099041694488e-05, "log_odds_chosen": 5.173205375671387, "log_odds_ratio": -0.2093629539012909, "logits/chosen": -2.913505792617798, "logits/chosen_prompt": -2.695497512817383, "logits/rejected": -1.9728949069976807, "logits/rejected_prompt": -2.681952476501465, "logps/chosen": -1.9676679372787476, "logps/chosen_both": -1.9531806707382202, "logps/chosen_prompt": -0.8127241134643555, "logps/rejected": -7.031458377838135, "logps/rejected_both": -6.950935363769531, "logps/rejected_prompt": -0.9248498678207397, "loss": 2.0659, "nll_loss": 1.9526466131210327, "rewards/accuracies": 1.0, "rewards/chosen": -0.787067174911499, "rewards/margins": 2.0255160331726074, "rewards/rejected": -2.8125832080841064, "step": 2110 }, { "epoch": 1.696, "grad_norm": 0.1918548604852694, "learning_rate": 1.9904684407302883e-05, "log_odds_chosen": 7.995016574859619, "log_odds_ratio": -0.00040107182576321065, "logits/chosen": -3.0051703453063965, "logits/chosen_prompt": -2.7128148078918457, "logits/rejected": -1.3667514324188232, "logits/rejected_prompt": -2.695828676223755, "logps/chosen": -1.9211137294769287, "logps/chosen_both": -1.9036529064178467, "logps/chosen_prompt": -0.8414414525032043, "logps/rejected": -9.738038063049316, "logps/rejected_both": -9.58409309387207, "logps/rejected_prompt": -0.957872748374939, "loss": 1.9882, "nll_loss": 1.9027389287948608, "rewards/accuracies": 1.0, "rewards/chosen": -0.7684455513954163, "rewards/margins": 3.126769781112671, "rewards/rejected": -3.8952155113220215, "step": 2120 }, { "epoch": 1.704, "grad_norm": 25.341642829209718, "learning_rate": 1.969982225194864e-05, "log_odds_chosen": 6.443746089935303, "log_odds_ratio": -0.13866354525089264, "logits/chosen": -2.8991589546203613, "logits/chosen_prompt": -2.704436779022217, "logits/rejected": -1.6840307712554932, "logits/rejected_prompt": -2.696018695831299, "logps/chosen": -1.971212387084961, "logps/chosen_both": -1.9563363790512085, "logps/chosen_prompt": -0.7664562463760376, "logps/rejected": -8.291219711303711, "logps/rejected_both": -8.195323944091797, "logps/rejected_prompt": -0.8870849609375, "loss": 2.0512, "nll_loss": 1.955370545387268, "rewards/accuracies": 1.0, "rewards/chosen": -0.7884851098060608, "rewards/margins": 2.5280027389526367, "rewards/rejected": -3.3164875507354736, "step": 2130 }, { "epoch": 1.712, "grad_norm": 0.20382071740750204, "learning_rate": 1.9495332081317464e-05, "log_odds_chosen": 6.890301704406738, "log_odds_ratio": -0.009469824843108654, "logits/chosen": -2.8794448375701904, "logits/chosen_prompt": -2.694141387939453, "logits/rejected": -1.638772964477539, "logits/rejected_prompt": -2.6982343196868896, "logps/chosen": -2.006687641143799, "logps/chosen_both": -1.9925482273101807, "logps/chosen_prompt": -0.8075912594795227, "logps/rejected": -8.752016067504883, "logps/rejected_both": -8.659661293029785, "logps/rejected_prompt": -1.0454128980636597, "loss": 1.9488, "nll_loss": 1.9920895099639893, "rewards/accuracies": 1.0, "rewards/chosen": -0.8026750683784485, "rewards/margins": 2.698131561279297, "rewards/rejected": -3.5008063316345215, "step": 2140 }, { "epoch": 1.72, "grad_norm": 3.232652124328266, "learning_rate": 1.9291228247233605e-05, "log_odds_chosen": 6.535033226013184, "log_odds_ratio": -0.0724484771490097, "logits/chosen": -2.8941891193389893, "logits/chosen_prompt": -2.70381498336792, "logits/rejected": -1.799768090248108, "logits/rejected_prompt": -2.6814205646514893, "logps/chosen": -1.9803783893585205, "logps/chosen_both": -1.9626888036727905, "logps/chosen_prompt": -0.8645817041397095, "logps/rejected": -8.38414192199707, "logps/rejected_both": -8.267631530761719, "logps/rejected_prompt": -0.9822869300842285, "loss": 1.9512, "nll_loss": 1.9625753164291382, "rewards/accuracies": 1.0, "rewards/chosen": -0.7921513319015503, "rewards/margins": 2.5615053176879883, "rewards/rejected": -3.353656768798828, "step": 2150 }, { "epoch": 1.728, "grad_norm": 0.5121046736628673, "learning_rate": 1.908752507440689e-05, "log_odds_chosen": 6.229867458343506, "log_odds_ratio": -0.0752544105052948, "logits/chosen": -2.935990571975708, "logits/chosen_prompt": -2.68332576751709, "logits/rejected": -1.7542794942855835, "logits/rejected_prompt": -2.6715810298919678, "logps/chosen": -2.238250732421875, "logps/chosen_both": -2.217163562774658, "logps/chosen_prompt": -0.7275692820549011, "logps/rejected": -8.351387023925781, "logps/rejected_both": -8.241617202758789, "logps/rejected_prompt": -0.9444383382797241, "loss": 2.1639, "nll_loss": 2.2166025638580322, "rewards/accuracies": 1.0, "rewards/chosen": -0.8953002691268921, "rewards/margins": 2.445254325866699, "rewards/rejected": -3.340554714202881, "step": 2160 }, { "epoch": 1.736, "grad_norm": 0.18380447787382737, "learning_rate": 1.888423685942732e-05, "log_odds_chosen": 7.403123378753662, "log_odds_ratio": -0.0035772870760411024, "logits/chosen": -2.9258389472961426, "logits/chosen_prompt": -2.7035067081451416, "logits/rejected": -1.6778090000152588, "logits/rejected_prompt": -2.682382106781006, "logps/chosen": -1.8578765392303467, "logps/chosen_both": -1.8427069187164307, "logps/chosen_prompt": -0.832676887512207, "logps/rejected": -9.08339786529541, "logps/rejected_both": -8.959403038024902, "logps/rejected_prompt": -1.1029479503631592, "loss": 1.9654, "nll_loss": 1.8422781229019165, "rewards/accuracies": 1.0, "rewards/chosen": -0.7431506514549255, "rewards/margins": 2.8902084827423096, "rewards/rejected": -3.63335919380188, "step": 2170 }, { "epoch": 1.744, "grad_norm": 15.42646908452697, "learning_rate": 1.868137786976177e-05, "log_odds_chosen": 6.83737325668335, "log_odds_ratio": -0.09123753756284714, "logits/chosen": -2.9604616165161133, "logits/chosen_prompt": -2.6771702766418457, "logits/rejected": -1.7559928894042969, "logits/rejected_prompt": -2.6906254291534424, "logps/chosen": -1.9559208154678345, "logps/chosen_both": -1.9405914545059204, "logps/chosen_prompt": -0.7949713468551636, "logps/rejected": -8.641664505004883, "logps/rejected_both": -8.521966934204102, "logps/rejected_prompt": -0.9677802324295044, "loss": 2.0939, "nll_loss": 1.938951849937439, "rewards/accuracies": 1.0, "rewards/chosen": -0.7823683619499207, "rewards/margins": 2.6742970943450928, "rewards/rejected": -3.4566657543182373, "step": 2180 }, { "epoch": 1.752, "grad_norm": 12.062069037613009, "learning_rate": 1.8478962342752583e-05, "log_odds_chosen": 6.820882320404053, "log_odds_ratio": -0.07564956694841385, "logits/chosen": -2.904177665710449, "logits/chosen_prompt": -2.666506052017212, "logits/rejected": -1.7927961349487305, "logits/rejected_prompt": -2.67189884185791, "logps/chosen": -2.0425262451171875, "logps/chosen_both": -2.0270590782165527, "logps/chosen_prompt": -0.8014975786209106, "logps/rejected": -8.73670768737793, "logps/rejected_both": -8.633912086486816, "logps/rejected_prompt": -1.0191423892974854, "loss": 2.0463, "nll_loss": 2.0263657569885254, "rewards/accuracies": 1.0, "rewards/chosen": -0.8170105218887329, "rewards/margins": 2.6776726245880127, "rewards/rejected": -3.4946835041046143, "step": 2190 }, { "epoch": 1.76, "grad_norm": 1.1920813557914467, "learning_rate": 1.827700448461836e-05, "log_odds_chosen": 7.279504299163818, "log_odds_ratio": -0.13858437538146973, "logits/chosen": -3.018719434738159, "logits/chosen_prompt": -2.687682628631592, "logits/rejected": -1.6826099157333374, "logits/rejected_prompt": -2.678703784942627, "logps/chosen": -1.860093355178833, "logps/chosen_both": -1.8447208404541016, "logps/chosen_prompt": -0.8991209268569946, "logps/rejected": -9.011571884155273, "logps/rejected_both": -8.870678901672363, "logps/rejected_prompt": -1.096939206123352, "loss": 2.012, "nll_loss": 1.84355890750885, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7440372705459595, "rewards/margins": 2.8605916500091553, "rewards/rejected": -3.6046290397644043, "step": 2200 }, { "epoch": 1.768, "grad_norm": 0.19530589950798477, "learning_rate": 1.807551846945694e-05, "log_odds_chosen": 8.2916898727417, "log_odds_ratio": -0.06947987526655197, "logits/chosen": -2.939237117767334, "logits/chosen_prompt": -2.6988303661346436, "logits/rejected": -1.6200687885284424, "logits/rejected_prompt": -2.68789005279541, "logps/chosen": -1.9331436157226562, "logps/chosen_both": -1.916733741760254, "logps/chosen_prompt": -0.7277871370315552, "logps/rejected": -10.084833145141602, "logps/rejected_both": -9.953168869018555, "logps/rejected_prompt": -1.032865285873413, "loss": 1.9735, "nll_loss": 1.916029691696167, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7732575535774231, "rewards/margins": 3.260676145553589, "rewards/rejected": -4.033933639526367, "step": 2210 }, { "epoch": 1.776, "grad_norm": 15.17903488212651, "learning_rate": 1.7874518438250597e-05, "log_odds_chosen": 9.437470436096191, "log_odds_ratio": -0.00649250065907836, "logits/chosen": -2.9586923122406006, "logits/chosen_prompt": -2.700380802154541, "logits/rejected": -1.6204473972320557, "logits/rejected_prompt": -2.668332576751709, "logps/chosen": -2.0388143062591553, "logps/chosen_both": -2.017122268676758, "logps/chosen_prompt": -0.7435789108276367, "logps/rejected": -11.334449768066406, "logps/rejected_both": -11.154394149780273, "logps/rejected_prompt": -0.9411799311637878, "loss": 2.1772, "nll_loss": 2.0165975093841553, "rewards/accuracies": 1.0, "rewards/chosen": -0.8155257105827332, "rewards/margins": 3.718254566192627, "rewards/rejected": -4.533780097961426, "step": 2220 }, { "epoch": 1.784, "grad_norm": 0.28398933589113434, "learning_rate": 1.767401849787357e-05, "log_odds_chosen": 6.384799957275391, "log_odds_ratio": -0.07637131214141846, "logits/chosen": -2.9650635719299316, "logits/chosen_prompt": -2.6936004161834717, "logits/rejected": -1.797628402709961, "logits/rejected_prompt": -2.690913438796997, "logps/chosen": -1.8709478378295898, "logps/chosen_both": -1.856300950050354, "logps/chosen_prompt": -0.8806565403938293, "logps/rejected": -8.102632522583008, "logps/rejected_both": -7.991517543792725, "logps/rejected_prompt": -1.0237706899642944, "loss": 2.1791, "nll_loss": 1.8553836345672607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.748379111289978, "rewards/margins": 2.492673635482788, "rewards/rejected": -3.2410526275634766, "step": 2230 }, { "epoch": 1.792, "grad_norm": 7.344829678329039, "learning_rate": 1.747403272010199e-05, "log_odds_chosen": 4.503691673278809, "log_odds_ratio": -0.44330325722694397, "logits/chosen": -2.9304556846618652, "logits/chosen_prompt": -2.7112066745758057, "logits/rejected": -2.020601749420166, "logits/rejected_prompt": -2.6991848945617676, "logps/chosen": -2.2137069702148438, "logps/chosen_both": -2.192910671234131, "logps/chosen_prompt": -0.7757335305213928, "logps/rejected": -6.606595039367676, "logps/rejected_both": -6.522683143615723, "logps/rejected_prompt": -1.0225099325180054, "loss": 2.0432, "nll_loss": 2.1926403045654297, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.885482668876648, "rewards/margins": 1.7571556568145752, "rewards/rejected": -2.6426382064819336, "step": 2240 }, { "epoch": 1.8, "grad_norm": 0.2830736721750178, "learning_rate": 1.7274575140626318e-05, "log_odds_chosen": 6.729086399078369, "log_odds_ratio": -0.002848730655387044, "logits/chosen": -2.9603111743927, "logits/chosen_prompt": -2.712522268295288, "logits/rejected": -1.471806287765503, "logits/rejected_prompt": -2.711698055267334, "logps/chosen": -1.9502222537994385, "logps/chosen_both": -1.932050347328186, "logps/chosen_prompt": -0.7791944146156311, "logps/rejected": -8.519399642944336, "logps/rejected_both": -8.40225887298584, "logps/rejected_prompt": -0.9917134046554565, "loss": 2.004, "nll_loss": 1.9314903020858765, "rewards/accuracies": 1.0, "rewards/chosen": -0.7800888419151306, "rewards/margins": 2.627671241760254, "rewards/rejected": -3.4077601432800293, "step": 2250 }, { "epoch": 1.808, "grad_norm": 1.3701495350067383, "learning_rate": 1.7075659758066208e-05, "log_odds_chosen": 4.735475063323975, "log_odds_ratio": -0.14837773144245148, "logits/chosen": -2.9037442207336426, "logits/chosen_prompt": -2.6904830932617188, "logits/rejected": -1.9993311166763306, "logits/rejected_prompt": -2.672048807144165, "logps/chosen": -2.0128910541534424, "logps/chosen_both": -1.9941341876983643, "logps/chosen_prompt": -0.7718429565429688, "logps/rejected": -6.631512641906738, "logps/rejected_both": -6.536102294921875, "logps/rejected_prompt": -0.9579516649246216, "loss": 2.0311, "nll_loss": 1.9931504726409912, "rewards/accuracies": 1.0, "rewards/chosen": -0.805156409740448, "rewards/margins": 1.8474489450454712, "rewards/rejected": -2.6526052951812744, "step": 2260 }, { "epoch": 1.8159999999999998, "grad_norm": 21.366222606488684, "learning_rate": 1.6877300532988094e-05, "log_odds_chosen": 7.610182762145996, "log_odds_ratio": -0.0006168467225506902, "logits/chosen": -2.9680118560791016, "logits/chosen_prompt": -2.664792776107788, "logits/rejected": -1.2061169147491455, "logits/rejected_prompt": -2.642937183380127, "logps/chosen": -2.109647512435913, "logps/chosen_both": -2.0934646129608154, "logps/chosen_prompt": -0.9366092681884766, "logps/rejected": -9.573705673217773, "logps/rejected_both": -9.448970794677734, "logps/rejected_prompt": -1.088648796081543, "loss": 2.0712, "nll_loss": 2.0929782390594482, "rewards/accuracies": 1.0, "rewards/chosen": -0.843859076499939, "rewards/margins": 2.9856228828430176, "rewards/rejected": -3.829482316970825, "step": 2270 }, { "epoch": 1.8239999999999998, "grad_norm": 16.630798038144235, "learning_rate": 1.6679511386925337e-05, "log_odds_chosen": 7.555551052093506, "log_odds_ratio": -0.0009092552354559302, "logits/chosen": -2.9446640014648438, "logits/chosen_prompt": -2.703678607940674, "logits/rejected": -1.251961588859558, "logits/rejected_prompt": -2.686135768890381, "logps/chosen": -1.9308589696884155, "logps/chosen_both": -1.9166322946548462, "logps/chosen_prompt": -0.7264224290847778, "logps/rejected": -9.319347381591797, "logps/rejected_both": -9.213998794555664, "logps/rejected_prompt": -0.9491874575614929, "loss": 1.9893, "nll_loss": 1.915757179260254, "rewards/accuracies": 1.0, "rewards/chosen": -0.7723435759544373, "rewards/margins": 2.955395221710205, "rewards/rejected": -3.727738857269287, "step": 2280 }, { "epoch": 1.8319999999999999, "grad_norm": 12.241247239412013, "learning_rate": 1.648230620140121e-05, "log_odds_chosen": 5.702427864074707, "log_odds_ratio": -0.08441531658172607, "logits/chosen": -2.9145102500915527, "logits/chosen_prompt": -2.7137434482574463, "logits/rejected": -1.6203930377960205, "logits/rejected_prompt": -2.7078521251678467, "logps/chosen": -2.2361724376678467, "logps/chosen_both": -2.2188587188720703, "logps/chosen_prompt": -0.8718380928039551, "logps/rejected": -7.825617790222168, "logps/rejected_both": -7.731575012207031, "logps/rejected_prompt": -0.9629694223403931, "loss": 2.0784, "nll_loss": 2.2178969383239746, "rewards/accuracies": 1.0, "rewards/chosen": -0.8944689631462097, "rewards/margins": 2.2357778549194336, "rewards/rejected": -3.130246877670288, "step": 2290 }, { "epoch": 1.8399999999999999, "grad_norm": 0.2611980916177983, "learning_rate": 1.6285698816954624e-05, "log_odds_chosen": 5.886144638061523, "log_odds_ratio": -0.14016158878803253, "logits/chosen": -2.961277484893799, "logits/chosen_prompt": -2.7103641033172607, "logits/rejected": -1.6664111614227295, "logits/rejected_prompt": -2.7068681716918945, "logps/chosen": -1.891758918762207, "logps/chosen_both": -1.877873182296753, "logps/chosen_prompt": -0.8406246304512024, "logps/rejected": -7.652543067932129, "logps/rejected_both": -7.56333065032959, "logps/rejected_prompt": -0.9318545460700989, "loss": 1.9727, "nll_loss": 1.877637267112732, "rewards/accuracies": 1.0, "rewards/chosen": -0.7567036151885986, "rewards/margins": 2.3043136596679688, "rewards/rejected": -3.0610175132751465, "step": 2300 }, { "epoch": 1.8479999999999999, "grad_norm": 0.17316872141044676, "learning_rate": 1.6089703032168733e-05, "log_odds_chosen": 6.335439205169678, "log_odds_ratio": -0.007680490612983704, "logits/chosen": -2.9618372917175293, "logits/chosen_prompt": -2.6908061504364014, "logits/rejected": -1.7726625204086304, "logits/rejected_prompt": -2.684845447540283, "logps/chosen": -2.021721839904785, "logps/chosen_both": -2.0070888996124268, "logps/chosen_prompt": -0.8626869916915894, "logps/rejected": -8.212113380432129, "logps/rejected_both": -8.121031761169434, "logps/rejected_prompt": -1.1338939666748047, "loss": 2.086, "nll_loss": 2.0067009925842285, "rewards/accuracies": 1.0, "rewards/chosen": -0.808688759803772, "rewards/margins": 2.4761569499969482, "rewards/rejected": -3.2848453521728516, "step": 2310 }, { "epoch": 1.8559999999999999, "grad_norm": 0.21046741293754637, "learning_rate": 1.5894332602702545e-05, "log_odds_chosen": 5.3062238693237305, "log_odds_ratio": -0.09238220006227493, "logits/chosen": -2.863762378692627, "logits/chosen_prompt": -2.698549747467041, "logits/rejected": -1.7465136051177979, "logits/rejected_prompt": -2.68521785736084, "logps/chosen": -2.2504518032073975, "logps/chosen_both": -2.2304165363311768, "logps/chosen_prompt": -0.8663703203201294, "logps/rejected": -7.458860874176025, "logps/rejected_both": -7.366589546203613, "logps/rejected_prompt": -1.0120290517807007, "loss": 2.0757, "nll_loss": 2.2292349338531494, "rewards/accuracies": 1.0, "rewards/chosen": -0.9001806974411011, "rewards/margins": 2.0833640098571777, "rewards/rejected": -2.9835448265075684, "step": 2320 }, { "epoch": 1.8639999999999999, "grad_norm": 0.2325223892090008, "learning_rate": 1.5699601240325474e-05, "log_odds_chosen": 5.675802230834961, "log_odds_ratio": -0.14025500416755676, "logits/chosen": -2.9541871547698975, "logits/chosen_prompt": -2.739253520965576, "logits/rejected": -1.7137792110443115, "logits/rejected_prompt": -2.7213757038116455, "logps/chosen": -2.011998176574707, "logps/chosen_both": -1.9911746978759766, "logps/chosen_prompt": -0.7685104012489319, "logps/rejected": -7.561570167541504, "logps/rejected_both": -7.440642356872559, "logps/rejected_prompt": -0.9734441041946411, "loss": 2.1005, "nll_loss": 1.9904701709747314, "rewards/accuracies": 1.0, "rewards/chosen": -0.8047992587089539, "rewards/margins": 2.2198290824890137, "rewards/rejected": -3.024627923965454, "step": 2330 }, { "epoch": 1.8719999999999999, "grad_norm": 0.22047561828057208, "learning_rate": 1.5505522611954975e-05, "log_odds_chosen": 5.360434532165527, "log_odds_ratio": -0.015295952558517456, "logits/chosen": -2.899050235748291, "logits/chosen_prompt": -2.718276262283325, "logits/rejected": -2.08345365524292, "logits/rejected_prompt": -2.6998016834259033, "logps/chosen": -1.8844950199127197, "logps/chosen_both": -1.8703863620758057, "logps/chosen_prompt": -0.851974606513977, "logps/rejected": -7.060413360595703, "logps/rejected_both": -6.973315238952637, "logps/rejected_prompt": -1.0805187225341797, "loss": 2.1013, "nll_loss": 1.8690898418426514, "rewards/accuracies": 1.0, "rewards/chosen": -0.7537980675697327, "rewards/margins": 2.0703673362731934, "rewards/rejected": -2.8241655826568604, "step": 2340 }, { "epoch": 1.88, "grad_norm": 1.1925590095899927, "learning_rate": 1.5312110338697426e-05, "log_odds_chosen": 4.792149066925049, "log_odds_ratio": -0.1287117898464203, "logits/chosen": -2.9038636684417725, "logits/chosen_prompt": -2.692437171936035, "logits/rejected": -1.9894816875457764, "logits/rejected_prompt": -2.6797823905944824, "logps/chosen": -1.9725837707519531, "logps/chosen_both": -1.9566154479980469, "logps/chosen_prompt": -0.7425985932350159, "logps/rejected": -6.638279914855957, "logps/rejected_both": -6.564992427825928, "logps/rejected_prompt": -0.9972286224365234, "loss": 1.9786, "nll_loss": 1.9555227756500244, "rewards/accuracies": 1.0, "rewards/chosen": -0.7890334725379944, "rewards/margins": 1.8662786483764648, "rewards/rejected": -2.6553120613098145, "step": 2350 }, { "epoch": 1.888, "grad_norm": 0.2631097802741203, "learning_rate": 1.5119377994892094e-05, "log_odds_chosen": 7.000193119049072, "log_odds_ratio": -0.0028563719242811203, "logits/chosen": -3.0186381340026855, "logits/chosen_prompt": -2.723498821258545, "logits/rejected": -1.5227829217910767, "logits/rejected_prompt": -2.7204127311706543, "logps/chosen": -1.8698396682739258, "logps/chosen_both": -1.850454330444336, "logps/chosen_prompt": -0.7684019207954407, "logps/rejected": -8.695045471191406, "logps/rejected_both": -8.554825782775879, "logps/rejected_prompt": -1.0279042720794678, "loss": 2.0014, "nll_loss": 1.8499305248260498, "rewards/accuracies": 1.0, "rewards/chosen": -0.7479358315467834, "rewards/margins": 2.7300820350646973, "rewards/rejected": -3.478017807006836, "step": 2360 }, { "epoch": 1.896, "grad_norm": 0.20426857310467877, "learning_rate": 1.4927339107158437e-05, "log_odds_chosen": 8.02978515625, "log_odds_ratio": -0.0003904960467480123, "logits/chosen": -2.951490879058838, "logits/chosen_prompt": -2.708991289138794, "logits/rejected": -1.2117061614990234, "logits/rejected_prompt": -2.6999001502990723, "logps/chosen": -1.9645278453826904, "logps/chosen_both": -1.9457321166992188, "logps/chosen_prompt": -0.761443018913269, "logps/rejected": -9.840217590332031, "logps/rejected_both": -9.700372695922852, "logps/rejected_prompt": -0.9850748181343079, "loss": 1.9906, "nll_loss": 1.9449169635772705, "rewards/accuracies": 1.0, "rewards/chosen": -0.7858111262321472, "rewards/margins": 3.1502761840820312, "rewards/rejected": -3.936087131500244, "step": 2370 }, { "epoch": 1.904, "grad_norm": 8.58950626485984, "learning_rate": 1.4736007153446801e-05, "log_odds_chosen": 8.620465278625488, "log_odds_ratio": -0.00021180181647650898, "logits/chosen": -2.903035879135132, "logits/chosen_prompt": -2.735071897506714, "logits/rejected": -1.012452483177185, "logits/rejected_prompt": -2.7112841606140137, "logps/chosen": -2.025474786758423, "logps/chosen_both": -2.007967472076416, "logps/chosen_prompt": -0.8391423225402832, "logps/rejected": -10.502188682556152, "logps/rejected_both": -10.356060028076172, "logps/rejected_prompt": -0.9537385106086731, "loss": 2.3368, "nll_loss": 2.0072412490844727, "rewards/accuracies": 1.0, "rewards/chosen": -0.8101899027824402, "rewards/margins": 3.39068603515625, "rewards/rejected": -4.200875282287598, "step": 2380 }, { "epoch": 1.912, "grad_norm": 0.19583726689690906, "learning_rate": 1.4545395562092468e-05, "log_odds_chosen": 6.079274654388428, "log_odds_ratio": -0.4031279981136322, "logits/chosen": -2.844682455062866, "logits/chosen_prompt": -2.8039345741271973, "logits/rejected": -1.3123562335968018, "logits/rejected_prompt": -2.7909157276153564, "logps/chosen": -3.1939139366149902, "logps/chosen_both": -3.164135217666626, "logps/chosen_prompt": -0.8311759233474731, "logps/rejected": -9.186834335327148, "logps/rejected_both": -9.063508033752441, "logps/rejected_prompt": -1.087949275970459, "loss": 2.1303, "nll_loss": 3.163341999053955, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.277565598487854, "rewards/margins": 2.3971686363220215, "rewards/rejected": -3.674734592437744, "step": 2390 }, { "epoch": 1.92, "grad_norm": 4.277812021967688, "learning_rate": 1.4355517710873184e-05, "log_odds_chosen": 6.059283256530762, "log_odds_ratio": -0.09234263747930527, "logits/chosen": -3.0424129962921143, "logits/chosen_prompt": -2.83634614944458, "logits/rejected": -1.6069847345352173, "logits/rejected_prompt": -2.818171739578247, "logps/chosen": -1.861696481704712, "logps/chosen_both": -1.8472903966903687, "logps/chosen_prompt": -0.783744752407074, "logps/rejected": -7.765946865081787, "logps/rejected_both": -7.661751747131348, "logps/rejected_prompt": -1.0380439758300781, "loss": 2.0007, "nll_loss": 1.846143126487732, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7446784973144531, "rewards/margins": 2.3617005348205566, "rewards/rejected": -3.1063787937164307, "step": 2400 }, { "epoch": 1.928, "grad_norm": 0.1934446201158471, "learning_rate": 1.4166386926070322e-05, "log_odds_chosen": 7.342792510986328, "log_odds_ratio": -0.005115572828799486, "logits/chosen": -2.9572060108184814, "logits/chosen_prompt": -2.7633354663848877, "logits/rejected": -1.3063112497329712, "logits/rejected_prompt": -2.7578670978546143, "logps/chosen": -1.9242970943450928, "logps/chosen_both": -1.9093306064605713, "logps/chosen_prompt": -0.8123539686203003, "logps/rejected": -9.096908569335938, "logps/rejected_both": -8.975044250488281, "logps/rejected_prompt": -1.0593974590301514, "loss": 1.9902, "nll_loss": 1.9083023071289062, "rewards/accuracies": 1.0, "rewards/chosen": -0.7697189450263977, "rewards/margins": 2.869044780731201, "rewards/rejected": -3.638763904571533, "step": 2410 }, { "epoch": 1.936, "grad_norm": 0.216837041093156, "learning_rate": 1.397801648153354e-05, "log_odds_chosen": 6.378230094909668, "log_odds_ratio": -0.07421709597110748, "logits/chosen": -3.0056633949279785, "logits/chosen_prompt": -2.768573045730591, "logits/rejected": -1.5620958805084229, "logits/rejected_prompt": -2.7487571239471436, "logps/chosen": -1.9807904958724976, "logps/chosen_both": -1.9623302221298218, "logps/chosen_prompt": -0.8482378125190735, "logps/rejected": -8.202530860900879, "logps/rejected_both": -8.077143669128418, "logps/rejected_prompt": -1.0352851152420044, "loss": 1.9778, "nll_loss": 1.9611247777938843, "rewards/accuracies": 1.0, "rewards/chosen": -0.7923161387443542, "rewards/margins": 2.488696575164795, "rewards/rejected": -3.2810122966766357, "step": 2420 }, { "epoch": 1.944, "grad_norm": 0.21162368892876318, "learning_rate": 1.3790419597749199e-05, "log_odds_chosen": 5.369621753692627, "log_odds_ratio": -0.20802097022533417, "logits/chosen": -2.925058126449585, "logits/chosen_prompt": -2.727915048599243, "logits/rejected": -1.7108662128448486, "logits/rejected_prompt": -2.729671001434326, "logps/chosen": -2.030609607696533, "logps/chosen_both": -2.013143301010132, "logps/chosen_prompt": -0.7951982021331787, "logps/rejected": -7.307798862457275, "logps/rejected_both": -7.217469692230225, "logps/rejected_prompt": -0.9677292108535767, "loss": 2.0275, "nll_loss": 2.0122172832489014, "rewards/accuracies": 1.0, "rewards/chosen": -0.8122437596321106, "rewards/margins": 2.1108758449554443, "rewards/rejected": -2.92311954498291, "step": 2430 }, { "epoch": 1.952, "grad_norm": 0.1882777054319625, "learning_rate": 1.3603609440912507e-05, "log_odds_chosen": 7.206502437591553, "log_odds_ratio": -0.06993956863880157, "logits/chosen": -2.9723217487335205, "logits/chosen_prompt": -2.7605624198913574, "logits/rejected": -1.3072056770324707, "logits/rejected_prompt": -2.7452828884124756, "logps/chosen": -2.0292842388153076, "logps/chosen_both": -2.0142998695373535, "logps/chosen_prompt": -0.8006251454353333, "logps/rejected": -9.103940963745117, "logps/rejected_both": -8.999374389648438, "logps/rejected_prompt": -0.9301830530166626, "loss": 1.9849, "nll_loss": 2.0136048793792725, "rewards/accuracies": 1.0, "rewards/chosen": -0.8117138147354126, "rewards/margins": 2.829862356185913, "rewards/rejected": -3.6415767669677734, "step": 2440 }, { "epoch": 1.96, "grad_norm": 6.854544334281628, "learning_rate": 1.3417599122003464e-05, "log_odds_chosen": 5.873773574829102, "log_odds_ratio": -0.09982452541589737, "logits/chosen": -2.8911209106445312, "logits/chosen_prompt": -2.751624584197998, "logits/rejected": -1.530667781829834, "logits/rejected_prompt": -2.731210947036743, "logps/chosen": -2.39859938621521, "logps/chosen_both": -2.3739638328552246, "logps/chosen_prompt": -0.818207859992981, "logps/rejected": -8.159021377563477, "logps/rejected_both": -8.031126976013184, "logps/rejected_prompt": -0.9556495547294617, "loss": 2.0669, "nll_loss": 2.3730950355529785, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9594398736953735, "rewards/margins": 2.304168701171875, "rewards/rejected": -3.263608455657959, "step": 2450 }, { "epoch": 1.968, "grad_norm": 0.20527915967987695, "learning_rate": 1.3232401695866687e-05, "log_odds_chosen": 6.752752780914307, "log_odds_ratio": -0.093865767121315, "logits/chosen": -3.0047717094421387, "logits/chosen_prompt": -2.7637996673583984, "logits/rejected": -1.454332947731018, "logits/rejected_prompt": -2.7430145740509033, "logps/chosen": -1.9008424282073975, "logps/chosen_both": -1.8828001022338867, "logps/chosen_prompt": -0.877086341381073, "logps/rejected": -8.502935409545898, "logps/rejected_both": -8.374523162841797, "logps/rejected_prompt": -1.0814439058303833, "loss": 2.0633, "nll_loss": 1.8823230266571045, "rewards/accuracies": 1.0, "rewards/chosen": -0.7603369951248169, "rewards/margins": 2.6408374309539795, "rewards/rejected": -3.401175022125244, "step": 2460 }, { "epoch": 1.976, "grad_norm": 6.470506820654642, "learning_rate": 1.3048030160295196e-05, "log_odds_chosen": 6.849400520324707, "log_odds_ratio": -0.07237619161605835, "logits/chosen": -2.963409662246704, "logits/chosen_prompt": -2.758953094482422, "logits/rejected": -1.3645999431610107, "logits/rejected_prompt": -2.7408089637756348, "logps/chosen": -2.0132029056549072, "logps/chosen_both": -1.9941928386688232, "logps/chosen_prompt": -0.7748836874961853, "logps/rejected": -8.71554183959961, "logps/rejected_both": -8.596506118774414, "logps/rejected_prompt": -0.936238169670105, "loss": 1.9731, "nll_loss": 1.9934498071670532, "rewards/accuracies": 1.0, "rewards/chosen": -0.8052810430526733, "rewards/margins": 2.6809353828430176, "rewards/rejected": -3.4862167835235596, "step": 2470 }, { "epoch": 1.984, "grad_norm": 0.18875404411296617, "learning_rate": 1.2864497455118152e-05, "log_odds_chosen": 5.949180603027344, "log_odds_ratio": -0.20756885409355164, "logits/chosen": -2.90920352935791, "logits/chosen_prompt": -2.731333017349243, "logits/rejected": -1.5196672677993774, "logits/rejected_prompt": -2.7116055488586426, "logps/chosen": -2.0656113624572754, "logps/chosen_both": -2.0481104850769043, "logps/chosen_prompt": -0.7715897560119629, "logps/rejected": -7.913069725036621, "logps/rejected_both": -7.810868263244629, "logps/rejected_prompt": -1.0343679189682007, "loss": 2.0494, "nll_loss": 2.0465188026428223, "rewards/accuracies": 1.0, "rewards/chosen": -0.8262445330619812, "rewards/margins": 2.3389835357666016, "rewards/rejected": -3.1652283668518066, "step": 2480 }, { "epoch": 1.992, "grad_norm": 0.19965333207670072, "learning_rate": 1.2681816461292715e-05, "log_odds_chosen": 6.9041619300842285, "log_odds_ratio": -0.07076757401227951, "logits/chosen": -2.9241271018981934, "logits/chosen_prompt": -2.7164487838745117, "logits/rejected": -1.2974779605865479, "logits/rejected_prompt": -2.7119083404541016, "logps/chosen": -2.164299249649048, "logps/chosen_both": -2.1463229656219482, "logps/chosen_prompt": -0.8179939389228821, "logps/rejected": -8.948786735534668, "logps/rejected_both": -8.831026077270508, "logps/rejected_prompt": -1.014527678489685, "loss": 2.069, "nll_loss": 2.1452174186706543, "rewards/accuracies": 1.0, "rewards/chosen": -0.8657197952270508, "rewards/margins": 2.713794469833374, "rewards/rejected": -3.579514265060425, "step": 2490 }, { "epoch": 2.0, "grad_norm": 7.401684464890164, "learning_rate": 1.2500000000000006e-05, "log_odds_chosen": 7.955414772033691, "log_odds_ratio": -0.004814439453184605, "logits/chosen": -2.977412700653076, "logits/chosen_prompt": -2.712825298309326, "logits/rejected": -1.1496913433074951, "logits/rejected_prompt": -2.6892926692962646, "logps/chosen": -1.8996845483779907, "logps/chosen_both": -1.8827598094940186, "logps/chosen_prompt": -0.8927472233772278, "logps/rejected": -9.687314987182617, "logps/rejected_both": -9.540821075439453, "logps/rejected_prompt": -1.00954270362854, "loss": 2.0789, "nll_loss": 1.882759690284729, "rewards/accuracies": 1.0, "rewards/chosen": -0.7598739266395569, "rewards/margins": 3.1150519847869873, "rewards/rejected": -3.8749260902404785, "step": 2500 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }