diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6033 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 2500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.008, + "grad_norm": 0.4211425877451838, + "learning_rate": 4.999912270696202e-05, + "log_odds_chosen": -0.0004254445375408977, + "log_odds_ratio": -0.6933605670928955, + "logits/chosen": -2.876610279083252, + "logits/chosen_prompt": -2.844738245010376, + "logits/rejected": -2.8758692741394043, + "logits/rejected_prompt": -2.8239073753356934, + "logps/chosen": -1.9094527959823608, + "logps/chosen_both": -1.9286587238311768, + "logps/chosen_prompt": -3.189321756362915, + "logps/rejected": -1.9090824127197266, + "logps/rejected_both": -1.9364073276519775, + "logps/rejected_prompt": -3.4751086235046387, + "loss": 2.325, + "nll_loss": 1.928330421447754, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.7637811899185181, + "rewards/margins": -0.00014820098294876516, + "rewards/rejected": -0.7636328935623169, + "step": 10 + }, + { + "epoch": 0.016, + "grad_norm": 0.19485166995413405, + "learning_rate": 4.9996490889419514e-05, + "log_odds_chosen": 0.0011974871158599854, + "log_odds_ratio": -0.6925489902496338, + "logits/chosen": -2.9591917991638184, + "logits/chosen_prompt": -2.8109309673309326, + "logits/rejected": -2.9579415321350098, + "logits/rejected_prompt": -2.789308547973633, + "logps/chosen": -2.084634304046631, + "logps/chosen_both": -2.0863680839538574, + "logps/chosen_prompt": -2.1795780658721924, + "logps/rejected": -2.0856688022613525, + "logps/rejected_both": -2.0941364765167236, + "logps/rejected_prompt": -2.347795009613037, + "loss": 2.2922, + "nll_loss": 2.08614182472229, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8338537216186523, + "rewards/margins": 0.0004138052463531494, + "rewards/rejected": -0.8342674970626831, + "step": 20 + }, + { + "epoch": 0.024, + "grad_norm": 0.16144893961648712, + "learning_rate": 4.99921047320825e-05, + "log_odds_chosen": 0.003194092307239771, + "log_odds_ratio": -0.6915546655654907, + "logits/chosen": -2.9421558380126953, + "logits/chosen_prompt": -2.7285828590393066, + "logits/rejected": -2.939770221710205, + "logits/rejected_prompt": -2.70296311378479, + "logps/chosen": -2.0509393215179443, + "logps/chosen_both": -2.0457570552825928, + "logps/chosen_prompt": -1.5747671127319336, + "logps/rejected": -2.0534369945526123, + "logps/rejected_both": -2.0497002601623535, + "logps/rejected_prompt": -1.6531193256378174, + "loss": 2.2795, + "nll_loss": 2.04412841796875, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8203758001327515, + "rewards/margins": 0.0009990095859393477, + "rewards/rejected": -0.821374773979187, + "step": 30 + }, + { + "epoch": 0.032, + "grad_norm": 0.16210904759452727, + "learning_rate": 4.9985964542786614e-05, + "log_odds_chosen": 0.0012136728037148714, + "log_odds_ratio": -0.6925405859947205, + "logits/chosen": -2.92653226852417, + "logits/chosen_prompt": -2.7136194705963135, + "logits/rejected": -2.925443172454834, + "logits/rejected_prompt": -2.700766086578369, + "logps/chosen": -2.0835628509521484, + "logps/chosen_both": -2.070845365524292, + "logps/chosen_prompt": -1.1743593215942383, + "logps/rejected": -2.084618330001831, + "logps/rejected_both": -2.076547384262085, + "logps/rejected_prompt": -1.2668603658676147, + "loss": 2.2852, + "nll_loss": 2.070385694503784, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8334251642227173, + "rewards/margins": 0.00042223333730362356, + "rewards/rejected": -0.8338474035263062, + "step": 40 + }, + { + "epoch": 0.04, + "grad_norm": 0.1829717877342827, + "learning_rate": 4.997807075247146e-05, + "log_odds_chosen": 0.000906852656044066, + "log_odds_ratio": -0.6926941871643066, + "logits/chosen": -2.8913445472717285, + "logits/chosen_prompt": -2.6892333030700684, + "logits/rejected": -2.8896098136901855, + "logits/rejected_prompt": -2.6766159534454346, + "logps/chosen": -2.009531259536743, + "logps/chosen_both": -1.9982995986938477, + "logps/chosen_prompt": -1.053348422050476, + "logps/rejected": -2.0103189945220947, + "logps/rejected_both": -2.0013835430145264, + "logps/rejected_prompt": -1.2616751194000244, + "loss": 2.2716, + "nll_loss": 1.996681571006775, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8038125038146973, + "rewards/margins": 0.0003150761185679585, + "rewards/rejected": -0.80412757396698, + "step": 50 + }, + { + "epoch": 0.048, + "grad_norm": 0.1926273569998765, + "learning_rate": 4.996842391515044e-05, + "log_odds_chosen": 0.0007017262396402657, + "log_odds_ratio": -0.6927965879440308, + "logits/chosen": -2.9328999519348145, + "logits/chosen_prompt": -2.684788227081299, + "logits/rejected": -2.93101167678833, + "logits/rejected_prompt": -2.659271240234375, + "logps/chosen": -1.9513660669326782, + "logps/chosen_both": -1.93800950050354, + "logps/chosen_prompt": -0.95411616563797, + "logps/rejected": -1.9519250392913818, + "logps/rejected_both": -1.9419523477554321, + "logps/rejected_prompt": -1.0883800983428955, + "loss": 2.2492, + "nll_loss": 1.9371274709701538, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.7805464863777161, + "rewards/margins": 0.00022354423708748072, + "rewards/rejected": -0.7807700634002686, + "step": 60 + }, + { + "epoch": 0.056, + "grad_norm": 0.1815660976282933, + "learning_rate": 4.9957024707871806e-05, + "log_odds_chosen": 0.0007978074136190116, + "log_odds_ratio": -0.6927486062049866, + "logits/chosen": -3.0125765800476074, + "logits/chosen_prompt": -2.6774511337280273, + "logits/rejected": -3.0124025344848633, + "logits/rejected_prompt": -2.6662356853485107, + "logps/chosen": -2.0494558811187744, + "logps/chosen_both": -2.0350148677825928, + "logps/chosen_prompt": -0.9741342663764954, + "logps/rejected": -2.050143003463745, + "logps/rejected_both": -2.042119264602661, + "logps/rejected_prompt": -1.1199967861175537, + "loss": 2.2682, + "nll_loss": 2.0335299968719482, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8197824358940125, + "rewards/margins": 0.00027483105077408254, + "rewards/rejected": -0.820057213306427, + "step": 70 + }, + { + "epoch": 0.064, + "grad_norm": 0.18993029983534432, + "learning_rate": 4.994387393067117e-05, + "log_odds_chosen": 0.0014978877734392881, + "log_odds_ratio": -0.6923991441726685, + "logits/chosen": -2.9860825538635254, + "logits/chosen_prompt": -2.6699416637420654, + "logits/rejected": -2.9854748249053955, + "logits/rejected_prompt": -2.6453309059143066, + "logps/chosen": -2.025066614151001, + "logps/chosen_both": -2.0116593837738037, + "logps/chosen_prompt": -1.0876951217651367, + "logps/rejected": -2.0263991355895996, + "logps/rejected_both": -2.0129716396331787, + "logps/rejected_prompt": -1.1680071353912354, + "loss": 2.2805, + "nll_loss": 2.008460283279419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8100266456604004, + "rewards/margins": 0.0005330622079782188, + "rewards/rejected": -0.8105596303939819, + "step": 80 + }, + { + "epoch": 0.072, + "grad_norm": 0.19392806669970095, + "learning_rate": 4.992897250651535e-05, + "log_odds_chosen": 0.0007344387704506516, + "log_odds_ratio": -0.6927801370620728, + "logits/chosen": -2.998304605484009, + "logits/chosen_prompt": -2.7530579566955566, + "logits/rejected": -2.9966633319854736, + "logits/rejected_prompt": -2.726839542388916, + "logps/chosen": -1.9492180347442627, + "logps/chosen_both": -1.9305731058120728, + "logps/chosen_prompt": -0.871951699256897, + "logps/rejected": -1.9498412609100342, + "logps/rejected_both": -1.9371519088745117, + "logps/rejected_prompt": -1.0174219608306885, + "loss": 2.2152, + "nll_loss": 1.929351806640625, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.7796871662139893, + "rewards/margins": 0.000249391800025478, + "rewards/rejected": -0.779936671257019, + "step": 90 + }, + { + "epoch": 0.08, + "grad_norm": 0.18477584362829488, + "learning_rate": 4.991232148123761e-05, + "log_odds_chosen": 0.0013153791660442948, + "log_odds_ratio": -0.6924898624420166, + "logits/chosen": -2.959036350250244, + "logits/chosen_prompt": -2.6582894325256348, + "logits/rejected": -2.959897518157959, + "logits/rejected_prompt": -2.656588077545166, + "logps/chosen": -1.980985403060913, + "logps/chosen_both": -1.965191125869751, + "logps/chosen_prompt": -0.8711269497871399, + "logps/rejected": -1.9821256399154663, + "logps/rejected_both": -1.9721254110336304, + "logps/rejected_prompt": -0.9294773936271667, + "loss": 2.2517, + "nll_loss": 1.964665412902832, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7923941612243652, + "rewards/margins": 0.00045606493949890137, + "rewards/rejected": -0.7928503155708313, + "step": 100 + }, + { + "epoch": 0.088, + "grad_norm": 0.19924379886100949, + "learning_rate": 4.9893922023464236e-05, + "log_odds_chosen": 0.002966083586215973, + "log_odds_ratio": -0.6916661858558655, + "logits/chosen": -3.0152981281280518, + "logits/chosen_prompt": -2.685716152191162, + "logits/rejected": -3.0145790576934814, + "logits/rejected_prompt": -2.6468653678894043, + "logps/chosen": -1.8295310735702515, + "logps/chosen_both": -1.8159011602401733, + "logps/chosen_prompt": -1.0153570175170898, + "logps/rejected": -1.8320270776748657, + "logps/rejected_both": -1.8261594772338867, + "logps/rejected_prompt": -1.1217412948608398, + "loss": 2.2814, + "nll_loss": 1.815495491027832, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7318124175071716, + "rewards/margins": 0.0009983479976654053, + "rewards/rejected": -0.7328108549118042, + "step": 110 + }, + { + "epoch": 0.096, + "grad_norm": 0.2009899005827714, + "learning_rate": 4.987377542453251e-05, + "log_odds_chosen": 0.0022429400123655796, + "log_odds_ratio": -0.6920267939567566, + "logits/chosen": -2.9447622299194336, + "logits/chosen_prompt": -2.632648468017578, + "logits/rejected": -2.9442973136901855, + "logits/rejected_prompt": -2.6101832389831543, + "logps/chosen": -2.0063014030456543, + "logps/chosen_both": -1.991539716720581, + "logps/chosen_prompt": -0.9827820658683777, + "logps/rejected": -2.0082459449768066, + "logps/rejected_both": -1.9994781017303467, + "logps/rejected_prompt": -1.0614566802978516, + "loss": 2.2719, + "nll_loss": 1.99040949344635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8025206327438354, + "rewards/margins": 0.0007776618003845215, + "rewards/rejected": -0.8032983541488647, + "step": 120 + }, + { + "epoch": 0.104, + "grad_norm": 0.18861397575558203, + "learning_rate": 4.985188309840012e-05, + "log_odds_chosen": 0.001361916190944612, + "log_odds_ratio": -0.692466676235199, + "logits/chosen": -2.95689058303833, + "logits/chosen_prompt": -2.6187005043029785, + "logits/rejected": -2.95717191696167, + "logits/rejected_prompt": -2.592301607131958, + "logps/chosen": -2.0394482612609863, + "logps/chosen_both": -2.02314829826355, + "logps/chosen_prompt": -0.9008905291557312, + "logps/rejected": -2.040587902069092, + "logps/rejected_both": -2.0329113006591797, + "logps/rejected_prompt": -1.0704509019851685, + "loss": 2.2882, + "nll_loss": 2.023050546646118, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8157793283462524, + "rewards/margins": 0.00045590996160171926, + "rewards/rejected": -0.8162351846694946, + "step": 130 + }, + { + "epoch": 0.112, + "grad_norm": 0.2030737765327122, + "learning_rate": 4.982824658154589e-05, + "log_odds_chosen": 0.0003186427056789398, + "log_odds_ratio": -0.6929879188537598, + "logits/chosen": -2.934846878051758, + "logits/chosen_prompt": -2.6593239307403564, + "logits/rejected": -2.9346649646759033, + "logits/rejected_prompt": -2.637718677520752, + "logps/chosen": -2.066263437271118, + "logps/chosen_both": -2.0494155883789062, + "logps/chosen_prompt": -0.9298864603042603, + "logps/rejected": -2.066551685333252, + "logps/rejected_both": -2.0526323318481445, + "logps/rejected_prompt": -1.0461074113845825, + "loss": 2.2784, + "nll_loss": 2.048583984375, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.8265053629875183, + "rewards/margins": 0.0001151919350377284, + "rewards/rejected": -0.8266205787658691, + "step": 140 + }, + { + "epoch": 0.12, + "grad_norm": 0.17845448491542337, + "learning_rate": 4.980286753286195e-05, + "log_odds_chosen": 0.0020511746406555176, + "log_odds_ratio": -0.6921236515045166, + "logits/chosen": -2.9423627853393555, + "logits/chosen_prompt": -2.6544814109802246, + "logits/rejected": -2.9413440227508545, + "logits/rejected_prompt": -2.6495890617370605, + "logps/chosen": -2.0567996501922607, + "logps/chosen_both": -2.0376124382019043, + "logps/chosen_prompt": -0.8456690907478333, + "logps/rejected": -2.058603525161743, + "logps/rejected_both": -2.0455679893493652, + "logps/rejected_prompt": -1.0780448913574219, + "loss": 2.2474, + "nll_loss": 2.036198616027832, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.822719931602478, + "rewards/margins": 0.0007214724901132286, + "rewards/rejected": -0.8234413862228394, + "step": 150 + }, + { + "epoch": 0.128, + "grad_norm": 0.18228364635340788, + "learning_rate": 4.977574773353732e-05, + "log_odds_chosen": 0.0005785167450085282, + "log_odds_ratio": -0.6928580403327942, + "logits/chosen": -2.906240940093994, + "logits/chosen_prompt": -2.656862735748291, + "logits/rejected": -2.906233072280884, + "logits/rejected_prompt": -2.658569812774658, + "logps/chosen": -1.8988163471221924, + "logps/chosen_both": -1.8861125707626343, + "logps/chosen_prompt": -0.9287108182907104, + "logps/rejected": -1.8993009328842163, + "logps/rejected_both": -1.890856146812439, + "logps/rejected_prompt": -1.113793134689331, + "loss": 2.2658, + "nll_loss": 1.8859831094741821, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.7595265507698059, + "rewards/margins": 0.00019387007341720164, + "rewards/rejected": -0.7597203850746155, + "step": 160 + }, + { + "epoch": 0.136, + "grad_norm": 0.21059375256598528, + "learning_rate": 4.9746889086932895e-05, + "log_odds_chosen": 0.0012606128584593534, + "log_odds_ratio": -0.6925175786018372, + "logits/chosen": -2.9255146980285645, + "logits/chosen_prompt": -2.681833505630493, + "logits/rejected": -2.9241907596588135, + "logits/rejected_prompt": -2.6375930309295654, + "logps/chosen": -2.018401861190796, + "logps/chosen_both": -2.0020346641540527, + "logps/chosen_prompt": -0.8163633346557617, + "logps/rejected": -2.0194990634918213, + "logps/rejected_both": -2.0088753700256348, + "logps/rejected_prompt": -1.024702787399292, + "loss": 2.2545, + "nll_loss": 2.0013086795806885, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8073607683181763, + "rewards/margins": 0.00043891073437407613, + "rewards/rejected": -0.8077996373176575, + "step": 170 + }, + { + "epoch": 0.144, + "grad_norm": 0.2593749816883702, + "learning_rate": 4.971629361844785e-05, + "log_odds_chosen": 0.000588211405556649, + "log_odds_ratio": -0.6928532123565674, + "logits/chosen": -2.9365015029907227, + "logits/chosen_prompt": -2.6852712631225586, + "logits/rejected": -2.9362454414367676, + "logits/rejected_prompt": -2.6527528762817383, + "logps/chosen": -2.049866199493408, + "logps/chosen_both": -2.03619122505188, + "logps/chosen_prompt": -0.8910077214241028, + "logps/rejected": -2.050372838973999, + "logps/rejected_both": -2.0393173694610596, + "logps/rejected_prompt": -1.0920004844665527, + "loss": 2.2312, + "nll_loss": 2.0342373847961426, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8199464678764343, + "rewards/margins": 0.00020260215387679636, + "rewards/rejected": -0.8201491236686707, + "step": 180 + }, + { + "epoch": 0.152, + "grad_norm": 0.21239961737940086, + "learning_rate": 4.968396347537751e-05, + "log_odds_chosen": 0.0017036155331879854, + "log_odds_ratio": -0.6922971606254578, + "logits/chosen": -2.9285712242126465, + "logits/chosen_prompt": -2.637676477432251, + "logits/rejected": -2.9268641471862793, + "logits/rejected_prompt": -2.601259231567383, + "logps/chosen": -2.019813060760498, + "logps/chosen_both": -2.003007173538208, + "logps/chosen_prompt": -0.9411777257919312, + "logps/rejected": -2.0213375091552734, + "logps/rejected_both": -2.013278007507324, + "logps/rejected_prompt": -1.0966544151306152, + "loss": 2.2257, + "nll_loss": 2.003007173538208, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.8079251050949097, + "rewards/margins": 0.0006098627927713096, + "rewards/rejected": -0.8085349798202515, + "step": 190 + }, + { + "epoch": 0.16, + "grad_norm": 0.2296631901191577, + "learning_rate": 4.964990092676263e-05, + "log_odds_chosen": 0.002268400741741061, + "log_odds_ratio": -0.6920153498649597, + "logits/chosen": -2.9518988132476807, + "logits/chosen_prompt": -2.6878037452697754, + "logits/rejected": -2.9512124061584473, + "logits/rejected_prompt": -2.6565701961517334, + "logps/chosen": -1.69021475315094, + "logps/chosen_both": -1.6815983057022095, + "logps/chosen_prompt": -0.8377019762992859, + "logps/rejected": -1.6910902261734009, + "logps/rejected_both": -1.686661958694458, + "logps/rejected_prompt": -0.9836887121200562, + "loss": 2.2189, + "nll_loss": 1.6812556982040405, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.6760859489440918, + "rewards/margins": 0.0003500869497656822, + "rewards/rejected": -0.6764360666275024, + "step": 200 + }, + { + "epoch": 0.168, + "grad_norm": 0.20512599393851222, + "learning_rate": 4.9614108363230135e-05, + "log_odds_chosen": 0.0021390921901911497, + "log_odds_ratio": -0.6920791268348694, + "logits/chosen": -2.9732565879821777, + "logits/chosen_prompt": -2.6687545776367188, + "logits/rejected": -2.9718270301818848, + "logits/rejected_prompt": -2.6496801376342773, + "logps/chosen": -2.0387587547302246, + "logps/chosen_both": -2.017876148223877, + "logps/chosen_prompt": -0.897871196269989, + "logps/rejected": -2.040605068206787, + "logps/rejected_both": -2.0265369415283203, + "logps/rejected_prompt": -1.0972706079483032, + "loss": 2.2179, + "nll_loss": 2.0162312984466553, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8155035972595215, + "rewards/margins": 0.000738424074370414, + "rewards/rejected": -0.8162419199943542, + "step": 210 + }, + { + "epoch": 0.176, + "grad_norm": 0.2127533742878833, + "learning_rate": 4.9576588296825386e-05, + "log_odds_chosen": 0.0020120560657233, + "log_odds_ratio": -0.6921423077583313, + "logits/chosen": -2.8992626667022705, + "logits/chosen_prompt": -2.7236571311950684, + "logits/rejected": -2.8986992835998535, + "logits/rejected_prompt": -2.676098346710205, + "logps/chosen": -2.0563912391662598, + "logps/chosen_both": -2.036818027496338, + "logps/chosen_prompt": -0.9310529828071594, + "logps/rejected": -2.058103322982788, + "logps/rejected_both": -2.0425784587860107, + "logps/rejected_prompt": -1.0257813930511475, + "loss": 2.273, + "nll_loss": 2.03584885597229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8225564956665039, + "rewards/margins": 0.0006849050405435264, + "rewards/rejected": -0.8232414126396179, + "step": 220 + }, + { + "epoch": 0.184, + "grad_norm": 0.17056867832509964, + "learning_rate": 4.953734336083583e-05, + "log_odds_chosen": 0.0011583305895328522, + "log_odds_ratio": -0.6925683617591858, + "logits/chosen": -3.0050501823425293, + "logits/chosen_prompt": -2.7037124633789062, + "logits/rejected": -3.0038866996765137, + "logits/rejected_prompt": -2.6890504360198975, + "logps/chosen": -2.0860724449157715, + "logps/chosen_both": -2.067084550857544, + "logps/chosen_prompt": -0.8457021713256836, + "logps/rejected": -2.087078332901001, + "logps/rejected_both": -2.0733180046081543, + "logps/rejected_prompt": -1.0261476039886475, + "loss": 2.2779, + "nll_loss": 2.065519094467163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8344290852546692, + "rewards/margins": 0.00040218234062194824, + "rewards/rejected": -0.8348312377929688, + "step": 230 + }, + { + "epoch": 0.192, + "grad_norm": 0.2058632754394824, + "learning_rate": 4.949637630960617e-05, + "log_odds_chosen": 0.0013900771737098694, + "log_odds_ratio": -0.6924527883529663, + "logits/chosen": -2.966139316558838, + "logits/chosen_prompt": -2.7504935264587402, + "logits/rejected": -2.965026378631592, + "logits/rejected_prompt": -2.7268807888031006, + "logps/chosen": -1.945728063583374, + "logps/chosen_both": -1.9301140308380127, + "logps/chosen_prompt": -0.9403144717216492, + "logps/rejected": -1.946915626525879, + "logps/rejected_both": -1.936022162437439, + "logps/rejected_prompt": -1.0291379690170288, + "loss": 2.2775, + "nll_loss": 1.9295330047607422, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.7782912254333496, + "rewards/margins": 0.000475037086289376, + "rewards/rejected": -0.7787662744522095, + "step": 240 + }, + { + "epoch": 0.2, + "grad_norm": 0.18030355585658703, + "learning_rate": 4.9453690018345144e-05, + "log_odds_chosen": 0.0017323314677923918, + "log_odds_ratio": -0.6922817826271057, + "logits/chosen": -2.9892709255218506, + "logits/chosen_prompt": -2.7419209480285645, + "logits/rejected": -2.9878451824188232, + "logits/rejected_prompt": -2.706714391708374, + "logps/chosen": -2.0075595378875732, + "logps/chosen_both": -1.9899797439575195, + "logps/chosen_prompt": -0.8903474807739258, + "logps/rejected": -2.0090558528900146, + "logps/rejected_both": -1.998038649559021, + "logps/rejected_prompt": -1.0070338249206543, + "loss": 2.2079, + "nll_loss": 1.9889189004898071, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8030239343643188, + "rewards/margins": 0.0005984127637930214, + "rewards/rejected": -0.8036222457885742, + "step": 250 + }, + { + "epoch": 0.208, + "grad_norm": 0.18677326033959232, + "learning_rate": 4.940928748292363e-05, + "log_odds_chosen": 0.0003323271812405437, + "log_odds_ratio": -0.6929812431335449, + "logits/chosen": -2.8448781967163086, + "logits/chosen_prompt": -2.6570119857788086, + "logits/rejected": -2.844160795211792, + "logits/rejected_prompt": -2.6436538696289062, + "logps/chosen": -2.090553045272827, + "logps/chosen_both": -2.077347993850708, + "logps/chosen_prompt": -0.8073711395263672, + "logps/rejected": -2.090845823287964, + "logps/rejected_both": -2.077338695526123, + "logps/rejected_prompt": -0.9910534024238586, + "loss": 2.2579, + "nll_loss": 2.0748660564422607, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8362210988998413, + "rewards/margins": 0.00011717081360984594, + "rewards/rejected": -0.8363384008407593, + "step": 260 + }, + { + "epoch": 0.216, + "grad_norm": 0.19524819903076443, + "learning_rate": 4.9363171819664434e-05, + "log_odds_chosen": 0.001574930502101779, + "log_odds_ratio": -0.6923605799674988, + "logits/chosen": -2.9072844982147217, + "logits/chosen_prompt": -2.6988863945007324, + "logits/rejected": -2.9070873260498047, + "logits/rejected_prompt": -2.6662864685058594, + "logps/chosen": -1.8586593866348267, + "logps/chosen_both": -1.847161889076233, + "logps/chosen_prompt": -0.8614280819892883, + "logps/rejected": -1.8599656820297241, + "logps/rejected_both": -1.8520950078964233, + "logps/rejected_prompt": -1.0004897117614746, + "loss": 2.2122, + "nll_loss": 1.8460156917572021, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.7434637546539307, + "rewards/margins": 0.0005225300556048751, + "rewards/rejected": -0.7439862489700317, + "step": 270 + }, + { + "epoch": 0.224, + "grad_norm": 0.17891708421025293, + "learning_rate": 4.9315346265123594e-05, + "log_odds_chosen": 0.0014710575342178345, + "log_odds_ratio": -0.6924123764038086, + "logits/chosen": -2.893035888671875, + "logits/chosen_prompt": -2.6818959712982178, + "logits/rejected": -2.8925375938415527, + "logits/rejected_prompt": -2.6510303020477295, + "logps/chosen": -1.959538221359253, + "logps/chosen_both": -1.9448583126068115, + "logps/chosen_prompt": -0.8354212641716003, + "logps/rejected": -1.9608103036880493, + "logps/rejected_both": -1.9502098560333252, + "logps/rejected_prompt": -0.9869192838668823, + "loss": 2.2903, + "nll_loss": 1.94313645362854, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7838152647018433, + "rewards/margins": 0.0005089103942736983, + "rewards/rejected": -0.7843241691589355, + "step": 280 + }, + { + "epoch": 0.232, + "grad_norm": 0.21094148380709188, + "learning_rate": 4.9265814175863186e-05, + "log_odds_chosen": 0.0009952529799193144, + "log_odds_ratio": -0.6926498413085938, + "logits/chosen": -2.9005274772644043, + "logits/chosen_prompt": -2.71238374710083, + "logits/rejected": -2.8991751670837402, + "logits/rejected_prompt": -2.6699583530426025, + "logps/chosen": -2.1492276191711426, + "logps/chosen_both": -2.1339974403381348, + "logps/chosen_prompt": -0.9373821020126343, + "logps/rejected": -2.1501176357269287, + "logps/rejected_both": -2.1395199298858643, + "logps/rejected_prompt": -1.100056529045105, + "loss": 2.2923, + "nll_loss": 2.1338019371032715, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8596910238265991, + "rewards/margins": 0.00035610198392532766, + "rewards/rejected": -0.8600472211837769, + "step": 290 + }, + { + "epoch": 0.24, + "grad_norm": 0.19361551914630554, + "learning_rate": 4.9214579028215776e-05, + "log_odds_chosen": 0.0016762830782681704, + "log_odds_ratio": -0.6923099160194397, + "logits/chosen": -2.9360158443450928, + "logits/chosen_prompt": -2.7480220794677734, + "logits/rejected": -2.9349968433380127, + "logits/rejected_prompt": -2.733687400817871, + "logps/chosen": -1.8898597955703735, + "logps/chosen_both": -1.874415636062622, + "logps/chosen_prompt": -0.8352281451225281, + "logps/rejected": -1.8912776708602905, + "logps/rejected_both": -1.8779733180999756, + "logps/rejected_prompt": -0.9313365817070007, + "loss": 2.2525, + "nll_loss": 1.8733183145523071, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.7559438943862915, + "rewards/margins": 0.0005672037368640304, + "rewards/rejected": -0.7565110921859741, + "step": 300 + }, + { + "epoch": 0.248, + "grad_norm": 0.19645084360565487, + "learning_rate": 4.916164441804044e-05, + "log_odds_chosen": 0.0019232749473303556, + "log_odds_ratio": -0.692186176776886, + "logits/chosen": -2.9699971675872803, + "logits/chosen_prompt": -2.7393062114715576, + "logits/rejected": -2.9690558910369873, + "logits/rejected_prompt": -2.7017319202423096, + "logps/chosen": -1.9972589015960693, + "logps/chosen_both": -1.981871247291565, + "logps/chosen_prompt": -0.8229547739028931, + "logps/rejected": -1.9988943338394165, + "logps/rejected_both": -1.9911056756973267, + "logps/rejected_prompt": -0.9741779565811157, + "loss": 2.2527, + "nll_loss": 1.981127381324768, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7989035844802856, + "rewards/margins": 0.000654196715913713, + "rewards/rejected": -0.7995578050613403, + "step": 310 + }, + { + "epoch": 0.256, + "grad_norm": 0.17696578224649318, + "learning_rate": 4.910701406047037e-05, + "log_odds_chosen": 0.0012397505342960358, + "log_odds_ratio": -0.6925276517868042, + "logits/chosen": -2.9160306453704834, + "logits/chosen_prompt": -2.7327325344085693, + "logits/rejected": -2.915261745452881, + "logits/rejected_prompt": -2.701322078704834, + "logps/chosen": -1.9081172943115234, + "logps/chosen_both": -1.892844557762146, + "logps/chosen_prompt": -0.8174566030502319, + "logps/rejected": -1.9091819524765015, + "logps/rejected_both": -1.9010097980499268, + "logps/rejected_prompt": -1.0786253213882446, + "loss": 2.2602, + "nll_loss": 1.8927319049835205, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7632468938827515, + "rewards/margins": 0.00042594075785018504, + "rewards/rejected": -0.7636728286743164, + "step": 320 + }, + { + "epoch": 0.264, + "grad_norm": 0.17292787330822676, + "learning_rate": 4.905069178965215e-05, + "log_odds_chosen": 0.0019163743127137423, + "log_odds_ratio": -0.692189633846283, + "logits/chosen": -2.9151923656463623, + "logits/chosen_prompt": -2.7165563106536865, + "logits/rejected": -2.914482593536377, + "logits/rejected_prompt": -2.6829206943511963, + "logps/chosen": -1.8700447082519531, + "logps/chosen_both": -1.8556480407714844, + "logps/chosen_prompt": -0.8194649815559387, + "logps/rejected": -1.8716179132461548, + "logps/rejected_both": -1.864458680152893, + "logps/rejected_prompt": -1.1078553199768066, + "loss": 2.1808, + "nll_loss": 1.8551757335662842, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7480179071426392, + "rewards/margins": 0.0006292253965511918, + "rewards/rejected": -0.7486470937728882, + "step": 330 + }, + { + "epoch": 0.272, + "grad_norm": 0.19147435771992855, + "learning_rate": 4.899268155847667e-05, + "log_odds_chosen": 0.002677363809198141, + "log_odds_ratio": -0.6918100118637085, + "logits/chosen": -3.017524242401123, + "logits/chosen_prompt": -2.756082534790039, + "logits/rejected": -3.016745090484619, + "logits/rejected_prompt": -2.7283802032470703, + "logps/chosen": -1.8907134532928467, + "logps/chosen_both": -1.8744417428970337, + "logps/chosen_prompt": -0.8424029350280762, + "logps/rejected": -1.8929758071899414, + "logps/rejected_both": -1.8843475580215454, + "logps/rejected_prompt": -1.0425379276275635, + "loss": 2.225, + "nll_loss": 1.8739697933197021, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7562853693962097, + "rewards/margins": 0.000904941582120955, + "rewards/rejected": -0.7571902275085449, + "step": 340 + }, + { + "epoch": 0.28, + "grad_norm": 0.19050905162566348, + "learning_rate": 4.893298743830168e-05, + "log_odds_chosen": 0.0014245070051401854, + "log_odds_ratio": -0.6924355626106262, + "logits/chosen": -2.881587028503418, + "logits/chosen_prompt": -2.7358975410461426, + "logits/rejected": -2.8803658485412598, + "logits/rejected_prompt": -2.693080186843872, + "logps/chosen": -2.135007381439209, + "logps/chosen_both": -2.115304470062256, + "logps/chosen_prompt": -0.8588684797286987, + "logps/rejected": -2.1362690925598145, + "logps/rejected_both": -2.1251254081726074, + "logps/rejected_prompt": -1.0595465898513794, + "loss": 2.3085, + "nll_loss": 2.1149659156799316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8540030717849731, + "rewards/margins": 0.0005046069854870439, + "rewards/rejected": -0.8545076251029968, + "step": 350 + }, + { + "epoch": 0.288, + "grad_norm": 0.2409286506380079, + "learning_rate": 4.887161361866608e-05, + "log_odds_chosen": 0.0026388473343104124, + "log_odds_ratio": -0.6918294429779053, + "logits/chosen": -2.983471632003784, + "logits/chosen_prompt": -2.755098819732666, + "logits/rejected": -2.982506513595581, + "logits/rejected_prompt": -2.7400355339050293, + "logps/chosen": -1.9234100580215454, + "logps/chosen_both": -1.904706597328186, + "logps/chosen_prompt": -0.8400828242301941, + "logps/rejected": -1.9256340265274048, + "logps/rejected_both": -1.912940263748169, + "logps/rejected_prompt": -0.9321552515029907, + "loss": 2.2324, + "nll_loss": 1.9039466381072998, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.769364058971405, + "rewards/margins": 0.0008895128848962486, + "rewards/rejected": -0.7702535390853882, + "step": 360 + }, + { + "epoch": 0.296, + "grad_norm": 0.16083280812237927, + "learning_rate": 4.880856440699582e-05, + "log_odds_chosen": 0.0021248466800898314, + "log_odds_ratio": -0.6920855641365051, + "logits/chosen": -2.9351096153259277, + "logits/chosen_prompt": -2.723745107650757, + "logits/rejected": -2.93329119682312, + "logits/rejected_prompt": -2.689175844192505, + "logps/chosen": -2.005812644958496, + "logps/chosen_both": -1.9874347448349, + "logps/chosen_prompt": -0.8169828653335571, + "logps/rejected": -2.0076451301574707, + "logps/rejected_both": -1.9974247217178345, + "logps/rejected_prompt": -0.9817326664924622, + "loss": 2.2565, + "nll_loss": 1.9868465662002563, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8023250699043274, + "rewards/margins": 0.0007329642539843917, + "rewards/rejected": -0.8030580282211304, + "step": 370 + }, + { + "epoch": 0.304, + "grad_norm": 0.22470013003589273, + "learning_rate": 4.874384422830167e-05, + "log_odds_chosen": 0.0011979244882240891, + "log_odds_ratio": -0.6925488710403442, + "logits/chosen": -2.9063477516174316, + "logits/chosen_prompt": -2.607713222503662, + "logits/rejected": -2.905827760696411, + "logits/rejected_prompt": -2.5853092670440674, + "logps/chosen": -1.9979126453399658, + "logps/chosen_both": -1.982242226600647, + "logps/chosen_prompt": -0.8234804272651672, + "logps/rejected": -1.9988930225372314, + "logps/rejected_both": -1.9891548156738281, + "logps/rejected_prompt": -0.9966527223587036, + "loss": 2.266, + "nll_loss": 1.9814211130142212, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.7991650104522705, + "rewards/margins": 0.0003922194300685078, + "rewards/rejected": -0.7995571494102478, + "step": 380 + }, + { + "epoch": 0.312, + "grad_norm": 0.16501384834156196, + "learning_rate": 4.867745762486861e-05, + "log_odds_chosen": 0.0010735094547271729, + "log_odds_ratio": -0.6926108598709106, + "logits/chosen": -2.9659483432769775, + "logits/chosen_prompt": -2.684511661529541, + "logits/rejected": -2.9646358489990234, + "logits/rejected_prompt": -2.6466262340545654, + "logps/chosen": -1.8777449131011963, + "logps/chosen_both": -1.8621854782104492, + "logps/chosen_prompt": -0.8326584100723267, + "logps/rejected": -1.8786296844482422, + "logps/rejected_both": -1.8694502115249634, + "logps/rejected_prompt": -1.119554042816162, + "loss": 2.2551, + "nll_loss": 1.8609716892242432, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.7510979771614075, + "rewards/margins": 0.0003538370074238628, + "rewards/rejected": -0.751451849937439, + "step": 390 + }, + { + "epoch": 0.32, + "grad_norm": 0.18496197696993874, + "learning_rate": 4.860940925593703e-05, + "log_odds_chosen": 0.0022099569905549288, + "log_odds_ratio": -0.6920434832572937, + "logits/chosen": -2.8903660774230957, + "logits/chosen_prompt": -2.6781816482543945, + "logits/rejected": -2.890045166015625, + "logits/rejected_prompt": -2.6534364223480225, + "logps/chosen": -1.969386339187622, + "logps/chosen_both": -1.954185128211975, + "logps/chosen_prompt": -0.7636314630508423, + "logps/rejected": -1.9712820053100586, + "logps/rejected_both": -1.9598472118377686, + "logps/rejected_prompt": -0.9155877232551575, + "loss": 2.243, + "nll_loss": 1.9532957077026367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7877545356750488, + "rewards/margins": 0.0007582366233691573, + "rewards/rejected": -0.7885128259658813, + "step": 400 + }, + { + "epoch": 0.328, + "grad_norm": 0.22859080108494093, + "learning_rate": 4.8539703897375755e-05, + "log_odds_chosen": 0.004624041263014078, + "log_odds_ratio": -0.690842866897583, + "logits/chosen": -2.9258294105529785, + "logits/chosen_prompt": -2.6813464164733887, + "logits/rejected": -2.9250378608703613, + "logits/rejected_prompt": -2.6571106910705566, + "logps/chosen": -2.0521552562713623, + "logps/chosen_both": -2.034921646118164, + "logps/chosen_prompt": -0.8797234296798706, + "logps/rejected": -2.056114673614502, + "logps/rejected_both": -2.044158935546875, + "logps/rejected_prompt": -0.9540025591850281, + "loss": 2.2663, + "nll_loss": 2.0334911346435547, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8208619952201843, + "rewards/margins": 0.0015838384861126542, + "rewards/rejected": -0.8224459886550903, + "step": 410 + }, + { + "epoch": 0.336, + "grad_norm": 0.2070567132691218, + "learning_rate": 4.846834644134686e-05, + "log_odds_chosen": 0.001986052840948105, + "log_odds_ratio": -0.6921548843383789, + "logits/chosen": -2.9888834953308105, + "logits/chosen_prompt": -2.6887311935424805, + "logits/rejected": -2.989170789718628, + "logits/rejected_prompt": -2.694418430328369, + "logps/chosen": -1.9955952167510986, + "logps/chosen_both": -1.9792373180389404, + "logps/chosen_prompt": -0.8381233215332031, + "logps/rejected": -1.997323751449585, + "logps/rejected_both": -1.9859631061553955, + "logps/rejected_prompt": -0.9913262128829956, + "loss": 2.2321, + "nll_loss": 1.9785674810409546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7982381582260132, + "rewards/margins": 0.0006913721445016563, + "rewards/rejected": -0.7989295721054077, + "step": 420 + }, + { + "epoch": 0.344, + "grad_norm": 0.19605531509972363, + "learning_rate": 4.839534189596228e-05, + "log_odds_chosen": 0.0027246386744081974, + "log_odds_ratio": -0.6917861104011536, + "logits/chosen": -2.912360429763794, + "logits/chosen_prompt": -2.653672218322754, + "logits/rejected": -2.910978317260742, + "logits/rejected_prompt": -2.627488613128662, + "logps/chosen": -2.060957908630371, + "logps/chosen_both": -2.043726921081543, + "logps/chosen_prompt": -0.7695341110229492, + "logps/rejected": -2.0633223056793213, + "logps/rejected_both": -2.051257371902466, + "logps/rejected_prompt": -1.0156570672988892, + "loss": 2.2675, + "nll_loss": 2.042490005493164, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8243831396102905, + "rewards/margins": 0.0009458243730477989, + "rewards/rejected": -0.8253289461135864, + "step": 430 + }, + { + "epoch": 0.352, + "grad_norm": 0.44833865318290117, + "learning_rate": 4.832069538493237e-05, + "log_odds_chosen": 0.04500371962785721, + "log_odds_ratio": -0.6715863943099976, + "logits/chosen": -2.9302279949188232, + "logits/chosen_prompt": -2.6701042652130127, + "logits/rejected": -2.9281227588653564, + "logits/rejected_prompt": -2.666865587234497, + "logps/chosen": -1.9099162817001343, + "logps/chosen_both": -1.898306131362915, + "logps/chosen_prompt": -0.702593982219696, + "logps/rejected": -1.948999047279358, + "logps/rejected_both": -1.9378995895385742, + "logps/rejected_prompt": -0.971504807472229, + "loss": 2.2392, + "nll_loss": 1.895094633102417, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.76396644115448, + "rewards/margins": 0.015633201226592064, + "rewards/rejected": -0.7795997262001038, + "step": 440 + }, + { + "epoch": 0.36, + "grad_norm": 0.21226948111262156, + "learning_rate": 4.8244412147206284e-05, + "log_odds_chosen": 2.9653515815734863, + "log_odds_ratio": -0.40015140175819397, + "logits/chosen": -2.9068620204925537, + "logits/chosen_prompt": -2.6536412239074707, + "logits/rejected": -2.1202731132507324, + "logits/rejected_prompt": -2.6555583477020264, + "logps/chosen": -2.0414326190948486, + "logps/chosen_both": -2.0248727798461914, + "logps/chosen_prompt": -0.8300280570983887, + "logps/rejected": -4.945545196533203, + "logps/rejected_both": -4.884528160095215, + "logps/rejected_prompt": -0.9442939758300781, + "loss": 2.1853, + "nll_loss": 2.0240979194641113, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8165730237960815, + "rewards/margins": 1.1616451740264893, + "rewards/rejected": -1.9782178401947021, + "step": 450 + }, + { + "epoch": 0.368, + "grad_norm": 0.1736633646525648, + "learning_rate": 4.81664975366043e-05, + "log_odds_chosen": 7.59240198135376, + "log_odds_ratio": -0.1370885670185089, + "logits/chosen": -2.9020304679870605, + "logits/chosen_prompt": -2.6753904819488525, + "logits/rejected": -0.7233905792236328, + "logits/rejected_prompt": -2.637943983078003, + "logps/chosen": -1.8611255884170532, + "logps/chosen_both": -1.8469617366790771, + "logps/chosen_prompt": -0.8501307368278503, + "logps/rejected": -9.215188026428223, + "logps/rejected_both": -9.101489067077637, + "logps/rejected_prompt": -1.2299854755401611, + "loss": 2.0244, + "nll_loss": 1.8459827899932861, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7444502115249634, + "rewards/margins": 2.9416251182556152, + "rewards/rejected": -3.686075210571289, + "step": 460 + }, + { + "epoch": 0.376, + "grad_norm": 0.17769599500435684, + "learning_rate": 4.808695702144206e-05, + "log_odds_chosen": 5.727511882781982, + "log_odds_ratio": -0.2772656977176666, + "logits/chosen": -2.879725694656372, + "logits/chosen_prompt": -2.642578125, + "logits/rejected": -1.0399138927459717, + "logits/rejected_prompt": -2.6099534034729004, + "logps/chosen": -2.0047779083251953, + "logps/chosen_both": -1.9910427331924438, + "logps/chosen_prompt": -0.8587312698364258, + "logps/rejected": -7.64484167098999, + "logps/rejected_both": -7.5631890296936035, + "logps/rejected_prompt": -1.0231356620788574, + "loss": 2.0507, + "nll_loss": 1.990276575088501, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8019111752510071, + "rewards/margins": 2.2560253143310547, + "rewards/rejected": -3.057936429977417, + "step": 470 + }, + { + "epoch": 0.384, + "grad_norm": 0.18584682979949957, + "learning_rate": 4.800579618414676e-05, + "log_odds_chosen": 4.071249961853027, + "log_odds_ratio": -0.34571754932403564, + "logits/chosen": -2.903729200363159, + "logits/chosen_prompt": -2.7958900928497314, + "logits/rejected": -3.239121198654175, + "logits/rejected_prompt": -2.7663371562957764, + "logps/chosen": -1.9373371601104736, + "logps/chosen_both": -1.921233892440796, + "logps/chosen_prompt": -0.9925417900085449, + "logps/rejected": -5.936069488525391, + "logps/rejected_both": -5.837677955627441, + "logps/rejected_prompt": -1.1928670406341553, + "loss": 2.4809, + "nll_loss": 1.919942855834961, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7749348282814026, + "rewards/margins": 1.5994927883148193, + "rewards/rejected": -2.3744280338287354, + "step": 480 + }, + { + "epoch": 0.392, + "grad_norm": 0.1641543403493392, + "learning_rate": 4.7923020720865414e-05, + "log_odds_chosen": 3.001093626022339, + "log_odds_ratio": -0.484192430973053, + "logits/chosen": -2.983025312423706, + "logits/chosen_prompt": -3.0399768352508545, + "logits/rejected": -4.017498970031738, + "logits/rejected_prompt": -3.0394999980926514, + "logps/chosen": -2.209317922592163, + "logps/chosen_both": -2.1894264221191406, + "logps/chosen_prompt": -0.8747655153274536, + "logps/rejected": -5.173645496368408, + "logps/rejected_both": -5.1185221672058105, + "logps/rejected_prompt": -1.2934271097183228, + "loss": 2.1178, + "nll_loss": 2.188310384750366, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8837271928787231, + "rewards/margins": 1.185731053352356, + "rewards/rejected": -2.069458484649658, + "step": 490 + }, + { + "epoch": 0.4, + "grad_norm": 4.569078846846734, + "learning_rate": 4.783863644106502e-05, + "log_odds_chosen": 6.397196292877197, + "log_odds_ratio": -0.20790621638298035, + "logits/chosen": -2.8709733486175537, + "logits/chosen_prompt": -2.905733585357666, + "logits/rejected": -4.449090480804443, + "logits/rejected_prompt": -2.8762049674987793, + "logps/chosen": -1.861519455909729, + "logps/chosen_both": -1.8485714197158813, + "logps/chosen_prompt": -0.7894952893257141, + "logps/rejected": -8.093868255615234, + "logps/rejected_both": -7.9878997802734375, + "logps/rejected_prompt": -1.098191499710083, + "loss": 2.2466, + "nll_loss": 1.847815752029419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7446077466011047, + "rewards/margins": 2.4929394721984863, + "rewards/rejected": -3.2375473976135254, + "step": 500 + }, + { + "epoch": 0.408, + "grad_norm": 26.906300876077555, + "learning_rate": 4.775264926712489e-05, + "log_odds_chosen": 5.443802833557129, + "log_odds_ratio": -0.13954684138298035, + "logits/chosen": -2.9360134601593018, + "logits/chosen_prompt": -2.6900744438171387, + "logits/rejected": -3.0484580993652344, + "logits/rejected_prompt": -2.612032890319824, + "logps/chosen": -1.974119782447815, + "logps/chosen_both": -1.958168625831604, + "logps/chosen_prompt": -0.8577529788017273, + "logps/rejected": -7.293883323669434, + "logps/rejected_both": -7.204199314117432, + "logps/rejected_prompt": -1.3446273803710938, + "loss": 2.518, + "nll_loss": 1.9573103189468384, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7896479368209839, + "rewards/margins": 2.1279053688049316, + "rewards/rejected": -2.917553424835205, + "step": 510 + }, + { + "epoch": 0.416, + "grad_norm": 1.0236738821403857, + "learning_rate": 4.7665065233920945e-05, + "log_odds_chosen": 4.726571559906006, + "log_odds_ratio": -0.14057810604572296, + "logits/chosen": -2.9554474353790283, + "logits/chosen_prompt": -3.076146364212036, + "logits/rejected": -3.131758689880371, + "logits/rejected_prompt": -3.045212507247925, + "logps/chosen": -1.9218995571136475, + "logps/chosen_both": -1.910244345664978, + "logps/chosen_prompt": -0.8790926933288574, + "logps/rejected": -6.504288673400879, + "logps/rejected_both": -6.445836544036865, + "logps/rejected_prompt": -1.288549542427063, + "loss": 2.0423, + "nll_loss": 1.909478783607483, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7687598466873169, + "rewards/margins": 1.8329557180404663, + "rewards/rejected": -2.601715564727783, + "step": 520 + }, + { + "epoch": 0.424, + "grad_norm": 0.6288533211783596, + "learning_rate": 4.7575890488402185e-05, + "log_odds_chosen": 4.645321846008301, + "log_odds_ratio": -0.14102457463741302, + "logits/chosen": -2.9634203910827637, + "logits/chosen_prompt": -3.0218586921691895, + "logits/rejected": -3.2898871898651123, + "logits/rejected_prompt": -3.0139455795288086, + "logps/chosen": -1.9550220966339111, + "logps/chosen_both": -1.9388656616210938, + "logps/chosen_prompt": -0.826554000377655, + "logps/rejected": -6.471889495849609, + "logps/rejected_both": -6.390293121337891, + "logps/rejected_prompt": -1.0643904209136963, + "loss": 2.2513, + "nll_loss": 1.9378074407577515, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7820087671279907, + "rewards/margins": 1.8067471981048584, + "rewards/rejected": -2.5887560844421387, + "step": 530 + }, + { + "epoch": 0.432, + "grad_norm": 2.277583713971035, + "learning_rate": 4.7485131289159276e-05, + "log_odds_chosen": 4.095762252807617, + "log_odds_ratio": -0.15678586065769196, + "logits/chosen": -2.9781079292297363, + "logits/chosen_prompt": -3.05256986618042, + "logits/rejected": -2.9668664932250977, + "logits/rejected_prompt": -3.041161060333252, + "logps/chosen": -1.9822967052459717, + "logps/chosen_both": -1.9686206579208374, + "logps/chosen_prompt": -0.9377325177192688, + "logps/rejected": -5.9602532386779785, + "logps/rejected_both": -5.897341728210449, + "logps/rejected_prompt": -1.051451563835144, + "loss": 2.0657, + "nll_loss": 1.9684457778930664, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7929186820983887, + "rewards/margins": 1.5911824703216553, + "rewards/rejected": -2.384101390838623, + "step": 540 + }, + { + "epoch": 0.44, + "grad_norm": 0.17707808472563716, + "learning_rate": 4.7392794005985326e-05, + "log_odds_chosen": 4.996828556060791, + "log_odds_ratio": -0.1402866542339325, + "logits/chosen": -2.9852428436279297, + "logits/chosen_prompt": -3.1000924110412598, + "logits/rejected": -3.4309897422790527, + "logits/rejected_prompt": -3.088724374771118, + "logps/chosen": -1.9283807277679443, + "logps/chosen_both": -1.913000464439392, + "logps/chosen_prompt": -0.7973994612693787, + "logps/rejected": -6.7942705154418945, + "logps/rejected_both": -6.711949348449707, + "logps/rejected_prompt": -1.098016619682312, + "loss": 2.2189, + "nll_loss": 1.9121148586273193, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.7713521718978882, + "rewards/margins": 1.946356177330017, + "rewards/rejected": -2.7177083492279053, + "step": 550 + }, + { + "epoch": 0.448, + "grad_norm": 5.519018494250257, + "learning_rate": 4.7298885119428773e-05, + "log_odds_chosen": 5.843784332275391, + "log_odds_ratio": -0.07069602608680725, + "logits/chosen": -3.0550990104675293, + "logits/chosen_prompt": -3.058029890060425, + "logits/rejected": -3.9521071910858154, + "logits/rejected_prompt": -3.025411367416382, + "logps/chosen": -1.8835957050323486, + "logps/chosen_both": -1.8681533336639404, + "logps/chosen_prompt": -0.8553426861763, + "logps/rejected": -7.572214603424072, + "logps/rejected_both": -7.47025203704834, + "logps/rejected_prompt": -1.0323774814605713, + "loss": 2.077, + "nll_loss": 1.8675563335418701, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7534382939338684, + "rewards/margins": 2.275447368621826, + "rewards/rejected": -3.02888560295105, + "step": 560 + }, + { + "epoch": 0.456, + "grad_norm": 0.6103366310438396, + "learning_rate": 4.720341122033862e-05, + "log_odds_chosen": 5.190781593322754, + "log_odds_ratio": -0.4892934262752533, + "logits/chosen": -2.9757232666015625, + "logits/chosen_prompt": -3.0236659049987793, + "logits/rejected": -3.8188633918762207, + "logits/rejected_prompt": -3.0117480754852295, + "logps/chosen": -2.410020351409912, + "logps/chosen_both": -2.387420415878296, + "logps/chosen_prompt": -0.8877968788146973, + "logps/rejected": -7.459628105163574, + "logps/rejected_both": -7.362242698669434, + "logps/rejected_prompt": -1.1302134990692139, + "loss": 2.4112, + "nll_loss": 2.3871912956237793, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.9640080332756042, + "rewards/margins": 2.019843339920044, + "rewards/rejected": -2.983851432800293, + "step": 570 + }, + { + "epoch": 0.464, + "grad_norm": 0.151774002914212, + "learning_rate": 4.710637900940181e-05, + "log_odds_chosen": 3.729964017868042, + "log_odds_ratio": -0.2660212516784668, + "logits/chosen": -2.9713380336761475, + "logits/chosen_prompt": -2.968736410140991, + "logits/rejected": -3.0788886547088623, + "logits/rejected_prompt": -2.944664478302002, + "logps/chosen": -1.842739462852478, + "logps/chosen_both": -1.829923391342163, + "logps/chosen_prompt": -0.7877852320671082, + "logps/rejected": -5.440505504608154, + "logps/rejected_both": -5.388018608093262, + "logps/rejected_prompt": -1.0643196105957031, + "loss": 2.2685, + "nll_loss": 1.8282448053359985, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7370957732200623, + "rewards/margins": 1.4391063451766968, + "rewards/rejected": -2.1762022972106934, + "step": 580 + }, + { + "epoch": 0.472, + "grad_norm": 0.19312538023716122, + "learning_rate": 4.7007795296673006e-05, + "log_odds_chosen": 3.5488052368164062, + "log_odds_ratio": -0.27949827909469604, + "logits/chosen": -2.9776198863983154, + "logits/chosen_prompt": -3.0068747997283936, + "logits/rejected": -3.2581207752227783, + "logits/rejected_prompt": -2.980543613433838, + "logps/chosen": -1.9455007314682007, + "logps/chosen_both": -1.929386854171753, + "logps/chosen_prompt": -0.7683624625205994, + "logps/rejected": -5.4047675132751465, + "logps/rejected_both": -5.334201812744141, + "logps/rejected_prompt": -1.0063989162445068, + "loss": 2.0098, + "nll_loss": 1.927821159362793, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7782004475593567, + "rewards/margins": 1.3837066888809204, + "rewards/rejected": -2.161907196044922, + "step": 590 + }, + { + "epoch": 0.48, + "grad_norm": 0.6876461909667617, + "learning_rate": 4.690766700109659e-05, + "log_odds_chosen": 3.753337860107422, + "log_odds_ratio": -0.21310639381408691, + "logits/chosen": -2.983619213104248, + "logits/chosen_prompt": -3.056485652923584, + "logits/rejected": -3.4968714714050293, + "logits/rejected_prompt": -3.052788496017456, + "logps/chosen": -2.029822587966919, + "logps/chosen_both": -2.0120925903320312, + "logps/chosen_prompt": -0.8819751739501953, + "logps/rejected": -5.680521488189697, + "logps/rejected_both": -5.621560096740723, + "logps/rejected_prompt": -1.1547878980636597, + "loss": 2.2063, + "nll_loss": 2.011672258377075, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8119290471076965, + "rewards/margins": 1.4602794647216797, + "rewards/rejected": -2.2722086906433105, + "step": 600 + }, + { + "epoch": 0.488, + "grad_norm": 0.19286504559147355, + "learning_rate": 4.68060011500211e-05, + "log_odds_chosen": 4.486660957336426, + "log_odds_ratio": -0.16551145911216736, + "logits/chosen": -2.9143826961517334, + "logits/chosen_prompt": -3.077587366104126, + "logits/rejected": -3.641350507736206, + "logits/rejected_prompt": -3.062753677368164, + "logps/chosen": -1.9688940048217773, + "logps/chosen_both": -1.954045295715332, + "logps/chosen_prompt": -0.6965051293373108, + "logps/rejected": -6.356810569763184, + "logps/rejected_both": -6.294190406799316, + "logps/rejected_prompt": -0.9163694381713867, + "loss": 2.0169, + "nll_loss": 1.953741431236267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7875575423240662, + "rewards/margins": 1.7551662921905518, + "rewards/rejected": -2.542724132537842, + "step": 610 + }, + { + "epoch": 0.496, + "grad_norm": 0.19049752930142771, + "learning_rate": 4.670280487870598e-05, + "log_odds_chosen": 4.947572708129883, + "log_odds_ratio": -0.14103658497333527, + "logits/chosen": -2.8884735107421875, + "logits/chosen_prompt": -3.0340023040771484, + "logits/rejected": -3.598095655441284, + "logits/rejected_prompt": -3.0135154724121094, + "logps/chosen": -2.0803651809692383, + "logps/chosen_both": -2.065659284591675, + "logps/chosen_prompt": -0.7768818140029907, + "logps/rejected": -6.917575836181641, + "logps/rejected_both": -6.847512245178223, + "logps/rejected_prompt": -1.0173327922821045, + "loss": 2.4222, + "nll_loss": 2.0645294189453125, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.832146167755127, + "rewards/margins": 1.9348840713500977, + "rewards/rejected": -2.7670302391052246, + "step": 620 + }, + { + "epoch": 0.504, + "grad_norm": 0.17010508801078386, + "learning_rate": 4.659808542982088e-05, + "log_odds_chosen": 4.44757604598999, + "log_odds_ratio": -0.07313639670610428, + "logits/chosen": -2.8788280487060547, + "logits/chosen_prompt": -2.848573923110962, + "logits/rejected": -2.6464812755584717, + "logits/rejected_prompt": -2.814408540725708, + "logps/chosen": -2.0289366245269775, + "logps/chosen_both": -2.014009952545166, + "logps/chosen_prompt": -0.7678987979888916, + "logps/rejected": -6.3392744064331055, + "logps/rejected_both": -6.2660441398620605, + "logps/rejected_prompt": -1.0411919355392456, + "loss": 2.0605, + "nll_loss": 2.0118680000305176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8115746378898621, + "rewards/margins": 1.724135160446167, + "rewards/rejected": -2.535709857940674, + "step": 630 + }, + { + "epoch": 0.512, + "grad_norm": 102.20159023426744, + "learning_rate": 4.649185015293728e-05, + "log_odds_chosen": 5.305100440979004, + "log_odds_ratio": -0.02886788547039032, + "logits/chosen": -2.934922456741333, + "logits/chosen_prompt": -2.8038196563720703, + "logits/rejected": -2.483616828918457, + "logits/rejected_prompt": -2.801661491394043, + "logps/chosen": -1.7393245697021484, + "logps/chosen_both": -1.728514313697815, + "logps/chosen_prompt": -0.882293701171875, + "logps/rejected": -6.811369895935059, + "logps/rejected_both": -6.727609157562256, + "logps/rejected_prompt": -1.0623975992202759, + "loss": 2.1612, + "nll_loss": 1.7267690896987915, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6957297921180725, + "rewards/margins": 2.028818130493164, + "rewards/rejected": -2.724547863006592, + "step": 640 + }, + { + "epoch": 0.52, + "grad_norm": 2.577120716963003, + "learning_rate": 4.638410650401267e-05, + "log_odds_chosen": 5.029098033905029, + "log_odds_ratio": -0.0729464739561081, + "logits/chosen": -2.946472644805908, + "logits/chosen_prompt": -2.7987747192382812, + "logits/rejected": -2.31748628616333, + "logits/rejected_prompt": -2.7790069580078125, + "logps/chosen": -1.9928621053695679, + "logps/chosen_both": -1.97336745262146, + "logps/chosen_prompt": -0.8152757883071899, + "logps/rejected": -6.8893632888793945, + "logps/rejected_both": -6.791792392730713, + "logps/rejected_prompt": -1.0174424648284912, + "loss": 2.0913, + "nll_loss": 1.9712880849838257, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7971449494361877, + "rewards/margins": 1.9586002826690674, + "rewards/rejected": -2.7557451725006104, + "step": 650 + }, + { + "epoch": 0.528, + "grad_norm": 0.15356571620190568, + "learning_rate": 4.6274862044867304e-05, + "log_odds_chosen": 4.515711307525635, + "log_odds_ratio": -0.14140725135803223, + "logits/chosen": -2.93347430229187, + "logits/chosen_prompt": -2.790188789367676, + "logits/rejected": -2.197619915008545, + "logits/rejected_prompt": -2.7709336280822754, + "logps/chosen": -1.9486901760101318, + "logps/chosen_both": -1.936274766921997, + "logps/chosen_prompt": -0.9808751940727234, + "logps/rejected": -6.346037864685059, + "logps/rejected_both": -6.276151180267334, + "logps/rejected_prompt": -1.2042269706726074, + "loss": 2.0583, + "nll_loss": 1.9354143142700195, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7794761657714844, + "rewards/margins": 1.7589390277862549, + "rewards/rejected": -2.5384154319763184, + "step": 660 + }, + { + "epoch": 0.536, + "grad_norm": 0.18548636024672094, + "learning_rate": 4.616412444265345e-05, + "log_odds_chosen": 5.104066371917725, + "log_odds_ratio": -0.0724453255534172, + "logits/chosen": -2.9771525859832764, + "logits/chosen_prompt": -2.8243517875671387, + "logits/rejected": -2.083482265472412, + "logits/rejected_prompt": -2.8059630393981934, + "logps/chosen": -2.0861048698425293, + "logps/chosen_both": -2.068869113922119, + "logps/chosen_prompt": -0.8699228167533875, + "logps/rejected": -7.067320823669434, + "logps/rejected_both": -6.978930473327637, + "logps/rejected_prompt": -1.0220625400543213, + "loss": 2.1363, + "nll_loss": 2.0682852268218994, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8344419598579407, + "rewards/margins": 1.9924862384796143, + "rewards/rejected": -2.8269283771514893, + "step": 670 + }, + { + "epoch": 0.544, + "grad_norm": 59.01092604551995, + "learning_rate": 4.605190146931731e-05, + "log_odds_chosen": 4.40061092376709, + "log_odds_ratio": -0.1419232189655304, + "logits/chosen": -2.9263124465942383, + "logits/chosen_prompt": -2.8417701721191406, + "logits/rejected": -2.351675510406494, + "logits/rejected_prompt": -2.8414313793182373, + "logps/chosen": -2.124084711074829, + "logps/chosen_both": -2.102914571762085, + "logps/chosen_prompt": -0.8957809209823608, + "logps/rejected": -6.422041893005371, + "logps/rejected_both": -6.32672643661499, + "logps/rejected_prompt": -1.0718226432800293, + "loss": 2.1268, + "nll_loss": 2.1024184226989746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8496338725090027, + "rewards/margins": 1.7191829681396484, + "rewards/rejected": -2.568816661834717, + "step": 680 + }, + { + "epoch": 0.552, + "grad_norm": 0.1936209207703668, + "learning_rate": 4.593820100105355e-05, + "log_odds_chosen": 4.4033403396606445, + "log_odds_ratio": -0.1418362557888031, + "logits/chosen": -2.947152614593506, + "logits/chosen_prompt": -2.8191583156585693, + "logits/rejected": -2.3703582286834717, + "logits/rejected_prompt": -2.8038182258605957, + "logps/chosen": -1.993703842163086, + "logps/chosen_both": -1.9738051891326904, + "logps/chosen_prompt": -0.8131387829780579, + "logps/rejected": -6.278976917266846, + "logps/rejected_both": -6.194762229919434, + "logps/rejected_prompt": -0.9806526303291321, + "loss": 2.0429, + "nll_loss": 1.9733550548553467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7974814772605896, + "rewards/margins": 1.7141094207763672, + "rewards/rejected": -2.5115909576416016, + "step": 690 + }, + { + "epoch": 0.56, + "grad_norm": 0.21779512193360956, + "learning_rate": 4.5823031017752485e-05, + "log_odds_chosen": 4.373869895935059, + "log_odds_ratio": -0.1618097722530365, + "logits/chosen": -2.9762911796569824, + "logits/chosen_prompt": -2.787757396697998, + "logits/rejected": -2.3213400840759277, + "logits/rejected_prompt": -2.7804551124572754, + "logps/chosen": -1.8093370199203491, + "logps/chosen_both": -1.7962630987167358, + "logps/chosen_prompt": -0.7294620871543884, + "logps/rejected": -6.035723686218262, + "logps/rejected_both": -5.961843490600586, + "logps/rejected_prompt": -0.9543176889419556, + "loss": 2.0382, + "nll_loss": 1.7948728799819946, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7237349152565002, + "rewards/margins": 1.6905548572540283, + "rewards/rejected": -2.414289712905884, + "step": 700 + }, + { + "epoch": 0.568, + "grad_norm": 1.9769361000953782, + "learning_rate": 4.5706399602440106e-05, + "log_odds_chosen": 4.656636714935303, + "log_odds_ratio": -0.1408310979604721, + "logits/chosen": -2.916656255722046, + "logits/chosen_prompt": -2.787416458129883, + "logits/rejected": -2.190491199493408, + "logits/rejected_prompt": -2.754542589187622, + "logps/chosen": -2.000397205352783, + "logps/chosen_both": -1.983769416809082, + "logps/chosen_prompt": -0.7894454002380371, + "logps/rejected": -6.537571907043457, + "logps/rejected_both": -6.459201812744141, + "logps/rejected_prompt": -1.0599520206451416, + "loss": 2.098, + "nll_loss": 1.9831438064575195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8001587986946106, + "rewards/margins": 1.8148695230484009, + "rewards/rejected": -2.6150283813476562, + "step": 710 + }, + { + "epoch": 0.576, + "grad_norm": 0.18745727904701265, + "learning_rate": 4.558831494071069e-05, + "log_odds_chosen": 4.969104290008545, + "log_odds_ratio": -0.14006975293159485, + "logits/chosen": -2.9004273414611816, + "logits/chosen_prompt": -2.7481789588928223, + "logits/rejected": -1.9203866720199585, + "logits/rejected_prompt": -2.7317967414855957, + "logps/chosen": -2.000072479248047, + "logps/chosen_both": -1.9829126596450806, + "logps/chosen_prompt": -0.9659306406974792, + "logps/rejected": -6.8479132652282715, + "logps/rejected_both": -6.743927955627441, + "logps/rejected_prompt": -1.1112347841262817, + "loss": 2.0041, + "nll_loss": 1.982696533203125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.800028920173645, + "rewards/margins": 1.9391365051269531, + "rewards/rejected": -2.7391655445098877, + "step": 720 + }, + { + "epoch": 0.584, + "grad_norm": 44.61607145258881, + "learning_rate": 4.5468785320152365e-05, + "log_odds_chosen": 4.449766635894775, + "log_odds_ratio": -0.20899026095867157, + "logits/chosen": -3.0241429805755615, + "logits/chosen_prompt": -2.746372699737549, + "logits/rejected": -2.07698917388916, + "logits/rejected_prompt": -2.746025562286377, + "logps/chosen": -1.9495675563812256, + "logps/chosen_both": -1.9276573657989502, + "logps/chosen_prompt": -0.8301995992660522, + "logps/rejected": -6.287846565246582, + "logps/rejected_both": -6.172031402587891, + "logps/rejected_prompt": -0.9652963876724243, + "loss": 2.1169, + "nll_loss": 1.9262176752090454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7798271179199219, + "rewards/margins": 1.735311508178711, + "rewards/rejected": -2.515138626098633, + "step": 730 + }, + { + "epoch": 0.592, + "grad_norm": 0.39108071766635244, + "learning_rate": 4.534781912976546e-05, + "log_odds_chosen": 3.2947051525115967, + "log_odds_ratio": -0.2812163829803467, + "logits/chosen": -2.989047050476074, + "logits/chosen_prompt": -2.7699084281921387, + "logits/rejected": -2.4307093620300293, + "logits/rejected_prompt": -2.756155014038086, + "logps/chosen": -1.9651190042495728, + "logps/chosen_both": -1.9502513408660889, + "logps/chosen_prompt": -0.7651479840278625, + "logps/rejected": -5.176846981048584, + "logps/rejected_both": -5.1231608390808105, + "logps/rejected_prompt": -0.8976105451583862, + "loss": 2.0946, + "nll_loss": 1.949180245399475, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7860475778579712, + "rewards/margins": 1.2846912145614624, + "rewards/rejected": -2.070739269256592, + "step": 740 + }, + { + "epoch": 0.6, + "grad_norm": 4.489644128912903, + "learning_rate": 4.522542485937369e-05, + "log_odds_chosen": 4.886274337768555, + "log_odds_ratio": -0.14077258110046387, + "logits/chosen": -2.948451519012451, + "logits/chosen_prompt": -2.7478134632110596, + "logits/rejected": -2.1101903915405273, + "logits/rejected_prompt": -2.7366366386413574, + "logps/chosen": -1.992583990097046, + "logps/chosen_both": -1.9766371250152588, + "logps/chosen_prompt": -0.8634021878242493, + "logps/rejected": -6.756987571716309, + "logps/rejected_both": -6.673755645751953, + "logps/rejected_prompt": -1.0165636539459229, + "loss": 2.1241, + "nll_loss": 1.9759677648544312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7970336675643921, + "rewards/margins": 1.90576171875, + "rewards/rejected": -2.7027952671051025, + "step": 750 + }, + { + "epoch": 0.608, + "grad_norm": 0.4533909423122121, + "learning_rate": 4.510161109902837e-05, + "log_odds_chosen": 3.120637893676758, + "log_odds_ratio": -0.6285208463668823, + "logits/chosen": -2.909808397293091, + "logits/chosen_prompt": -2.8316149711608887, + "logits/rejected": -2.377187490463257, + "logits/rejected_prompt": -2.823117971420288, + "logps/chosen": -2.327125072479248, + "logps/chosen_both": -2.3096871376037598, + "logps/chosen_prompt": -0.868097186088562, + "logps/rejected": -5.366008281707764, + "logps/rejected_both": -5.30277681350708, + "logps/rejected_prompt": -1.0501350164413452, + "loss": 2.1836, + "nll_loss": 2.3085296154022217, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.9308500289916992, + "rewards/margins": 1.2155535221099854, + "rewards/rejected": -2.1464035511016846, + "step": 760 + }, + { + "epoch": 0.616, + "grad_norm": 0.19141971158001736, + "learning_rate": 4.4976386538405495e-05, + "log_odds_chosen": 2.943345546722412, + "log_odds_ratio": -0.2832263708114624, + "logits/chosen": -2.926583766937256, + "logits/chosen_prompt": -2.8340327739715576, + "logits/rejected": -2.5858168601989746, + "logits/rejected_prompt": -2.8149476051330566, + "logps/chosen": -2.0653610229492188, + "logps/chosen_both": -2.0445759296417236, + "logps/chosen_prompt": -0.8157526254653931, + "logps/rejected": -4.919131278991699, + "logps/rejected_both": -4.849064350128174, + "logps/rejected_prompt": -1.005324125289917, + "loss": 2.0024, + "nll_loss": 2.0445759296417236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8261443972587585, + "rewards/margins": 1.1415081024169922, + "rewards/rejected": -1.9676525592803955, + "step": 770 + }, + { + "epoch": 0.624, + "grad_norm": 0.299820230370255, + "learning_rate": 4.484975996619589e-05, + "log_odds_chosen": 4.539975166320801, + "log_odds_ratio": -0.11812126636505127, + "logits/chosen": -2.87815523147583, + "logits/chosen_prompt": -2.8412280082702637, + "logits/rejected": -2.3637688159942627, + "logits/rejected_prompt": -2.8588156700134277, + "logps/chosen": -2.4759485721588135, + "logps/chosen_both": -2.454190731048584, + "logps/chosen_prompt": -0.7899399995803833, + "logps/rejected": -6.8973388671875, + "logps/rejected_both": -6.819916725158691, + "logps/rejected_prompt": -1.066646695137024, + "loss": 2.3702, + "nll_loss": 2.454133987426758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9903793334960938, + "rewards/margins": 1.7685562372207642, + "rewards/rejected": -2.7589354515075684, + "step": 780 + }, + { + "epoch": 0.632, + "grad_norm": 5.192675922080671, + "learning_rate": 4.4721740269488355e-05, + "log_odds_chosen": 2.496995210647583, + "log_odds_ratio": -0.32391008734703064, + "logits/chosen": -2.966625213623047, + "logits/chosen_prompt": -2.795879602432251, + "logits/rejected": -2.514392137527466, + "logits/rejected_prompt": -2.783583164215088, + "logps/chosen": -2.563605546951294, + "logps/chosen_both": -2.541128635406494, + "logps/chosen_prompt": -0.9771214723587036, + "logps/rejected": -4.989082336425781, + "logps/rejected_both": -4.936980724334717, + "logps/rejected_prompt": -1.0889393091201782, + "loss": 2.1847, + "nll_loss": 2.5405211448669434, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.025442123413086, + "rewards/margins": 0.9701908230781555, + "rewards/rejected": -1.9956328868865967, + "step": 790 + }, + { + "epoch": 0.64, + "grad_norm": 0.4695325524437554, + "learning_rate": 4.4592336433146e-05, + "log_odds_chosen": 5.124607563018799, + "log_odds_ratio": -0.018428776413202286, + "logits/chosen": -3.051105260848999, + "logits/chosen_prompt": -2.8179726600646973, + "logits/rejected": -1.909102201461792, + "logits/rejected_prompt": -2.7916340827941895, + "logps/chosen": -1.8969109058380127, + "logps/chosen_both": -1.8779878616333008, + "logps/chosen_prompt": -0.8452935218811035, + "logps/rejected": -6.845399379730225, + "logps/rejected_both": -6.747313022613525, + "logps/rejected_prompt": -0.9934666752815247, + "loss": 2.0368, + "nll_loss": 1.8772528171539307, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.758764386177063, + "rewards/margins": 1.9793955087661743, + "rewards/rejected": -2.7381598949432373, + "step": 800 + }, + { + "epoch": 0.648, + "grad_norm": 0.21280813340257887, + "learning_rate": 4.4461557539175594e-05, + "log_odds_chosen": 5.451117515563965, + "log_odds_ratio": -0.07145892083644867, + "logits/chosen": -2.9378345012664795, + "logits/chosen_prompt": -2.762908458709717, + "logits/rejected": -1.6283600330352783, + "logits/rejected_prompt": -2.7498764991760254, + "logps/chosen": -2.0257043838500977, + "logps/chosen_both": -2.008737087249756, + "logps/chosen_prompt": -0.8673852682113647, + "logps/rejected": -7.346819877624512, + "logps/rejected_both": -7.247427940368652, + "logps/rejected_prompt": -1.0632621049880981, + "loss": 2.0447, + "nll_loss": 2.0078537464141846, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8102817535400391, + "rewards/margins": 2.128446340560913, + "rewards/rejected": -2.938728094100952, + "step": 810 + }, + { + "epoch": 0.656, + "grad_norm": 0.209653515397789, + "learning_rate": 4.432941276609018e-05, + "log_odds_chosen": 5.421745777130127, + "log_odds_ratio": -0.07243818789720535, + "logits/chosen": -2.9660727977752686, + "logits/chosen_prompt": -2.805607318878174, + "logits/rejected": -1.6398050785064697, + "logits/rejected_prompt": -2.7811026573181152, + "logps/chosen": -2.0751829147338867, + "logps/chosen_both": -2.0558664798736572, + "logps/chosen_prompt": -0.7402461767196655, + "logps/rejected": -7.376537322998047, + "logps/rejected_both": -7.285178184509277, + "logps/rejected_prompt": -0.9955169558525085, + "loss": 2.1673, + "nll_loss": 2.05536150932312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8300731778144836, + "rewards/margins": 2.1205410957336426, + "rewards/rejected": -2.9506144523620605, + "step": 820 + }, + { + "epoch": 0.664, + "grad_norm": 0.2932004663372407, + "learning_rate": 4.4195911388264946e-05, + "log_odds_chosen": 3.337216854095459, + "log_odds_ratio": -0.28040507435798645, + "logits/chosen": -3.0083236694335938, + "logits/chosen_prompt": -2.7438673973083496, + "logits/rejected": -2.2188708782196045, + "logits/rejected_prompt": -2.710932970046997, + "logps/chosen": -1.7532163858413696, + "logps/chosen_both": -1.7392990589141846, + "logps/chosen_prompt": -0.881622314453125, + "logps/rejected": -4.988051891326904, + "logps/rejected_both": -4.921896934509277, + "logps/rejected_prompt": -0.8814730644226074, + "loss": 2.0387, + "nll_loss": 1.7385940551757812, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7012865543365479, + "rewards/margins": 1.2939343452453613, + "rewards/rejected": -1.9952208995819092, + "step": 830 + }, + { + "epoch": 0.672, + "grad_norm": 2.9403489436512475, + "learning_rate": 4.40610627752862e-05, + "log_odds_chosen": 5.995909690856934, + "log_odds_ratio": -0.07048363983631134, + "logits/chosen": -2.951843738555908, + "logits/chosen_prompt": -2.657824993133545, + "logits/rejected": -1.3483891487121582, + "logits/rejected_prompt": -2.6459240913391113, + "logps/chosen": -2.0297625064849854, + "logps/chosen_both": -2.011107921600342, + "logps/chosen_prompt": -0.8041833639144897, + "logps/rejected": -7.886776924133301, + "logps/rejected_both": -7.784094333648682, + "logps/rejected_prompt": -0.9874393343925476, + "loss": 2.0868, + "nll_loss": 2.0107545852661133, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8119049072265625, + "rewards/margins": 2.342806100845337, + "rewards/rejected": -3.1547107696533203, + "step": 840 + }, + { + "epoch": 0.68, + "grad_norm": 0.1951986041864062, + "learning_rate": 4.3924876391293915e-05, + "log_odds_chosen": 5.405202865600586, + "log_odds_ratio": -0.4933692514896393, + "logits/chosen": -2.8229470252990723, + "logits/chosen_prompt": -2.70353102684021, + "logits/rejected": -1.516230821609497, + "logits/rejected_prompt": -2.682372570037842, + "logps/chosen": -2.4473724365234375, + "logps/chosen_both": -2.4278030395507812, + "logps/chosen_prompt": -0.8016360402107239, + "logps/rejected": -7.731281280517578, + "logps/rejected_both": -7.645183563232422, + "logps/rejected_prompt": -0.9825652241706848, + "loss": 2.2426, + "nll_loss": 2.427164316177368, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.9789490699768066, + "rewards/margins": 2.113563299179077, + "rewards/rejected": -3.092512369155884, + "step": 850 + }, + { + "epoch": 0.688, + "grad_norm": 0.19812900890844543, + "learning_rate": 4.3787361794317405e-05, + "log_odds_chosen": 3.4184670448303223, + "log_odds_ratio": -0.22132563591003418, + "logits/chosen": -2.9762589931488037, + "logits/chosen_prompt": -2.764681816101074, + "logits/rejected": -2.4695773124694824, + "logits/rejected_prompt": -2.739607095718384, + "logps/chosen": -1.889784812927246, + "logps/chosen_both": -1.8726049661636353, + "logps/chosen_prompt": -0.8000418543815613, + "logps/rejected": -5.191944122314453, + "logps/rejected_both": -5.127084732055664, + "logps/rejected_prompt": -0.973870575428009, + "loss": 2.0017, + "nll_loss": 1.8721071481704712, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7559138536453247, + "rewards/margins": 1.3208638429641724, + "rewards/rejected": -2.076777935028076, + "step": 860 + }, + { + "epoch": 0.696, + "grad_norm": 1.1208289382374679, + "learning_rate": 4.3648528635604556e-05, + "log_odds_chosen": 4.736769199371338, + "log_odds_ratio": -0.07410699129104614, + "logits/chosen": -2.9047577381134033, + "logits/chosen_prompt": -2.7688372135162354, + "logits/rejected": -2.297377824783325, + "logits/rejected_prompt": -2.7379658222198486, + "logps/chosen": -2.166656017303467, + "logps/chosen_both": -2.149369955062866, + "logps/chosen_prompt": -0.7613478899002075, + "logps/rejected": -6.790528774261475, + "logps/rejected_both": -6.711920738220215, + "logps/rejected_prompt": -0.9217512011528015, + "loss": 2.19, + "nll_loss": 2.1481828689575195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8666625022888184, + "rewards/margins": 1.8495492935180664, + "rewards/rejected": -2.7162115573883057, + "step": 870 + }, + { + "epoch": 0.704, + "grad_norm": 0.18802597714184358, + "learning_rate": 4.350838665894446e-05, + "log_odds_chosen": 3.573579788208008, + "log_odds_ratio": -0.2119835913181305, + "logits/chosen": -2.9564337730407715, + "logits/chosen_prompt": -2.8878400325775146, + "logits/rejected": -2.7999844551086426, + "logits/rejected_prompt": -2.8850619792938232, + "logps/chosen": -2.041067361831665, + "logps/chosen_both": -2.0219027996063232, + "logps/chosen_prompt": -0.7945634126663208, + "logps/rejected": -5.52020788192749, + "logps/rejected_both": -5.447958946228027, + "logps/rejected_prompt": -0.9404302835464478, + "loss": 2.1522, + "nll_loss": 2.0212433338165283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8164268732070923, + "rewards/margins": 1.3916563987731934, + "rewards/rejected": -2.208083152770996, + "step": 880 + }, + { + "epoch": 0.712, + "grad_norm": 1.3417035590493764, + "learning_rate": 4.336694569998354e-05, + "log_odds_chosen": 4.419407367706299, + "log_odds_ratio": -0.07842884957790375, + "logits/chosen": -2.980591297149658, + "logits/chosen_prompt": -2.9254255294799805, + "logits/rejected": -2.7680697441101074, + "logits/rejected_prompt": -2.905561923980713, + "logps/chosen": -2.0169148445129395, + "logps/chosen_both": -2.0003621578216553, + "logps/chosen_prompt": -0.8039913177490234, + "logps/rejected": -6.302676200866699, + "logps/rejected_both": -6.233563423156738, + "logps/rejected_prompt": -0.9547332525253296, + "loss": 2.0996, + "nll_loss": 2.000209331512451, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8067659139633179, + "rewards/margins": 1.7143046855926514, + "rewards/rejected": -2.5210704803466797, + "step": 890 + }, + { + "epoch": 0.72, + "grad_norm": 0.17015695407576262, + "learning_rate": 4.3224215685535294e-05, + "log_odds_chosen": 3.736863613128662, + "log_odds_ratio": -0.21099340915679932, + "logits/chosen": -2.9480998516082764, + "logits/chosen_prompt": -2.909301519393921, + "logits/rejected": -2.5860133171081543, + "logits/rejected_prompt": -2.8961730003356934, + "logps/chosen": -1.99604070186615, + "logps/chosen_both": -1.9824683666229248, + "logps/chosen_prompt": -0.8537474870681763, + "logps/rejected": -5.6191020011901855, + "logps/rejected_both": -5.559712886810303, + "logps/rejected_prompt": -1.0109044313430786, + "loss": 2.0333, + "nll_loss": 1.9815161228179932, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.798416256904602, + "rewards/margins": 1.449224591255188, + "rewards/rejected": -2.247641086578369, + "step": 900 + }, + { + "epoch": 0.728, + "grad_norm": 0.1938256131016386, + "learning_rate": 4.3080206632883554e-05, + "log_odds_chosen": 4.993983745574951, + "log_odds_ratio": -0.07278299331665039, + "logits/chosen": -2.9305057525634766, + "logits/chosen_prompt": -2.8883767127990723, + "logits/rejected": -2.744293212890625, + "logits/rejected_prompt": -2.865830183029175, + "logps/chosen": -1.9137989282608032, + "logps/chosen_both": -1.897878646850586, + "logps/chosen_prompt": -0.8952886462211609, + "logps/rejected": -6.773948669433594, + "logps/rejected_both": -6.680284023284912, + "logps/rejected_prompt": -1.1111478805541992, + "loss": 2.072, + "nll_loss": 1.896592378616333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7655196189880371, + "rewards/margins": 1.9440600872039795, + "rewards/rejected": -2.7095799446105957, + "step": 910 + }, + { + "epoch": 0.736, + "grad_norm": 0.19422924079693882, + "learning_rate": 4.293492864907947e-05, + "log_odds_chosen": 4.982480049133301, + "log_odds_ratio": -0.07303477078676224, + "logits/chosen": -2.897078275680542, + "logits/chosen_prompt": -2.8844199180603027, + "logits/rejected": -2.5853612422943115, + "logits/rejected_prompt": -2.896810531616211, + "logps/chosen": -2.046506404876709, + "logps/chosen_both": -2.027215003967285, + "logps/chosen_prompt": -0.8521916270256042, + "logps/rejected": -6.898811340332031, + "logps/rejected_both": -6.797191619873047, + "logps/rejected_prompt": -1.0783166885375977, + "loss": 2.0343, + "nll_loss": 2.025817394256592, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8186025619506836, + "rewards/margins": 1.9409217834472656, + "rewards/rejected": -2.7595245838165283, + "step": 920 + }, + { + "epoch": 0.744, + "grad_norm": 0.17503233577716112, + "learning_rate": 4.278839193023214e-05, + "log_odds_chosen": 5.051764011383057, + "log_odds_ratio": -0.07269078493118286, + "logits/chosen": -2.968621015548706, + "logits/chosen_prompt": -2.8850250244140625, + "logits/rejected": -2.575244426727295, + "logits/rejected_prompt": -2.879965305328369, + "logps/chosen": -2.0476856231689453, + "logps/chosen_both": -2.0287888050079346, + "logps/chosen_prompt": -0.8320780992507935, + "logps/rejected": -6.972892761230469, + "logps/rejected_both": -6.875253200531006, + "logps/rejected_prompt": -0.9857944250106812, + "loss": 2.4164, + "nll_loss": 2.027635335922241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8190741539001465, + "rewards/margins": 1.9700825214385986, + "rewards/rejected": -2.7891571521759033, + "step": 930 + }, + { + "epoch": 0.752, + "grad_norm": 47.44652080135077, + "learning_rate": 4.264060676079302e-05, + "log_odds_chosen": 3.4615960121154785, + "log_odds_ratio": -0.25266528129577637, + "logits/chosen": -2.9501328468322754, + "logits/chosen_prompt": -2.8721659183502197, + "logits/rejected": -3.1557369232177734, + "logits/rejected_prompt": -2.854639768600464, + "logps/chosen": -2.153719425201416, + "logps/chosen_both": -2.135387897491455, + "logps/chosen_prompt": -0.9698511958122253, + "logps/rejected": -5.52289342880249, + "logps/rejected_both": -5.454329490661621, + "logps/rejected_prompt": -1.0520834922790527, + "loss": 2.1268, + "nll_loss": 2.1349105834960938, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8614877462387085, + "rewards/margins": 1.3476698398590088, + "rewards/rejected": -2.2091574668884277, + "step": 940 + }, + { + "epoch": 0.76, + "grad_norm": 0.19086614631182744, + "learning_rate": 4.249158351283414e-05, + "log_odds_chosen": 4.672451496124268, + "log_odds_ratio": -0.14073383808135986, + "logits/chosen": -3.003997325897217, + "logits/chosen_prompt": -2.9195713996887207, + "logits/rejected": -3.2987685203552246, + "logits/rejected_prompt": -2.9031708240509033, + "logps/chosen": -2.006805896759033, + "logps/chosen_both": -1.9856882095336914, + "logps/chosen_prompt": -0.8608209490776062, + "logps/rejected": -6.554454803466797, + "logps/rejected_both": -6.446510314941406, + "logps/rejected_prompt": -1.0303418636322021, + "loss": 2.0534, + "nll_loss": 1.9856884479522705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8027224540710449, + "rewards/margins": 1.8190593719482422, + "rewards/rejected": -2.621781826019287, + "step": 950 + }, + { + "epoch": 0.768, + "grad_norm": 0.16090899053372315, + "learning_rate": 4.234133264532012e-05, + "log_odds_chosen": 6.077364444732666, + "log_odds_ratio": -0.004217286594212055, + "logits/chosen": -2.842454433441162, + "logits/chosen_prompt": -2.8957276344299316, + "logits/rejected": -3.5180137157440186, + "logits/rejected_prompt": -2.9135992527008057, + "logps/chosen": -1.9932161569595337, + "logps/chosen_both": -1.9756605625152588, + "logps/chosen_prompt": -0.8626230359077454, + "logps/rejected": -7.9156999588012695, + "logps/rejected_both": -7.813823699951172, + "logps/rejected_prompt": -1.0395594835281372, + "loss": 2.0091, + "nll_loss": 1.975542664527893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7972863912582397, + "rewards/margins": 2.3689935207366943, + "rewards/rejected": -3.1662800312042236, + "step": 960 + }, + { + "epoch": 0.776, + "grad_norm": 0.4432866833057449, + "learning_rate": 4.218986470337419e-05, + "log_odds_chosen": 5.5125412940979, + "log_odds_ratio": -0.07154224812984467, + "logits/chosen": -2.9377503395080566, + "logits/chosen_prompt": -2.926082134246826, + "logits/rejected": -3.535740375518799, + "logits/rejected_prompt": -2.9182417392730713, + "logps/chosen": -1.919931411743164, + "logps/chosen_both": -1.9039018154144287, + "logps/chosen_prompt": -0.7944774627685547, + "logps/rejected": -7.288356781005859, + "logps/rejected_both": -7.193412780761719, + "logps/rejected_prompt": -0.9629098773002625, + "loss": 2.3092, + "nll_loss": 1.9036260843276978, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7679725289344788, + "rewards/margins": 2.147369861602783, + "rewards/rejected": -2.9153425693511963, + "step": 970 + }, + { + "epoch": 0.784, + "grad_norm": 0.19680727751711977, + "learning_rate": 4.2037190317538e-05, + "log_odds_chosen": 4.595906734466553, + "log_odds_ratio": -0.07939890027046204, + "logits/chosen": -2.9524266719818115, + "logits/chosen_prompt": -2.790818691253662, + "logits/rejected": -2.9070940017700195, + "logits/rejected_prompt": -2.781165599822998, + "logps/chosen": -1.9940401315689087, + "logps/chosen_both": -1.978316068649292, + "logps/chosen_prompt": -0.7690817713737488, + "logps/rejected": -6.455039024353027, + "logps/rejected_both": -6.385528087615967, + "logps/rejected_prompt": -0.9404104948043823, + "loss": 2.0872, + "nll_loss": 1.9778735637664795, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7976160049438477, + "rewards/margins": 1.7843996286392212, + "rewards/rejected": -2.5820157527923584, + "step": 980 + }, + { + "epoch": 0.792, + "grad_norm": 0.1584132780664858, + "learning_rate": 4.188332020302561e-05, + "log_odds_chosen": 4.230597496032715, + "log_odds_ratio": -0.14310847222805023, + "logits/chosen": -2.956609010696411, + "logits/chosen_prompt": -2.8512063026428223, + "logits/rejected": -2.678597927093506, + "logits/rejected_prompt": -2.8333568572998047, + "logps/chosen": -1.8776973485946655, + "logps/chosen_both": -1.8625962734222412, + "logps/chosen_prompt": -0.8090478777885437, + "logps/rejected": -5.976474761962891, + "logps/rejected_both": -5.902680397033691, + "logps/rejected_prompt": -0.9692068099975586, + "loss": 1.9999, + "nll_loss": 1.861577033996582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7510789632797241, + "rewards/margins": 1.6395108699798584, + "rewards/rejected": -2.390589952468872, + "step": 990 + }, + { + "epoch": 0.8, + "grad_norm": 0.18982243368973564, + "learning_rate": 4.172826515897146e-05, + "log_odds_chosen": 4.3918375968933105, + "log_odds_ratio": -0.14247746765613556, + "logits/chosen": -2.9714953899383545, + "logits/chosen_prompt": -2.824305772781372, + "logits/rejected": -2.6518845558166504, + "logits/rejected_prompt": -2.8202338218688965, + "logps/chosen": -1.8688671588897705, + "logps/chosen_both": -1.8508541584014893, + "logps/chosen_prompt": -0.9176328778266907, + "logps/rejected": -6.10614538192749, + "logps/rejected_both": -6.007752418518066, + "logps/rejected_prompt": -1.0590510368347168, + "loss": 2.0857, + "nll_loss": 1.8497679233551025, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.747546911239624, + "rewards/margins": 1.694911241531372, + "rewards/rejected": -2.442458391189575, + "step": 1000 + }, + { + "epoch": 0.808, + "grad_norm": 0.15641654006436478, + "learning_rate": 4.157203606767238e-05, + "log_odds_chosen": 4.2656779289245605, + "log_odds_ratio": -0.14230065047740936, + "logits/chosen": -2.9932308197021484, + "logits/chosen_prompt": -2.830867290496826, + "logits/rejected": -2.6234424114227295, + "logits/rejected_prompt": -2.8216352462768555, + "logps/chosen": -2.024932384490967, + "logps/chosen_both": -2.0056064128875732, + "logps/chosen_prompt": -0.7936287522315979, + "logps/rejected": -6.178097724914551, + "logps/rejected_both": -6.095284938812256, + "logps/rejected_prompt": -0.9350797533988953, + "loss": 2.037, + "nll_loss": 2.0045900344848633, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.809972882270813, + "rewards/margins": 1.6612660884857178, + "rewards/rejected": -2.4712390899658203, + "step": 1010 + }, + { + "epoch": 0.816, + "grad_norm": 0.20983648268469735, + "learning_rate": 4.1414643893823914e-05, + "log_odds_chosen": 4.862036228179932, + "log_odds_ratio": -0.07260783016681671, + "logits/chosen": -2.9284424781799316, + "logits/chosen_prompt": -2.8569953441619873, + "logits/rejected": -2.5351157188415527, + "logits/rejected_prompt": -2.8426971435546875, + "logps/chosen": -2.1229608058929443, + "logps/chosen_both": -2.10365629196167, + "logps/chosen_prompt": -0.8154341578483582, + "logps/rejected": -6.869643211364746, + "logps/rejected_both": -6.7744574546813965, + "logps/rejected_prompt": -0.9435701370239258, + "loss": 2.1102, + "nll_loss": 2.1023664474487305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8491843342781067, + "rewards/margins": 1.8986728191375732, + "rewards/rejected": -2.7478575706481934, + "step": 1020 + }, + { + "epoch": 0.824, + "grad_norm": 0.16714535237522857, + "learning_rate": 4.125609968375072e-05, + "log_odds_chosen": 5.137936115264893, + "log_odds_ratio": -0.0722423866391182, + "logits/chosen": -2.917429208755493, + "logits/chosen_prompt": -2.805572509765625, + "logits/rejected": -2.4986531734466553, + "logits/rejected_prompt": -2.7935025691986084, + "logps/chosen": -1.898790717124939, + "logps/chosen_both": -1.88314688205719, + "logps/chosen_prompt": -0.8224050402641296, + "logps/rejected": -6.880563259124756, + "logps/rejected_both": -6.7928266525268555, + "logps/rejected_prompt": -0.9875515699386597, + "loss": 2.0572, + "nll_loss": 1.8828372955322266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7595163583755493, + "rewards/margins": 1.9927089214324951, + "rewards/rejected": -2.752225637435913, + "step": 1030 + }, + { + "epoch": 0.832, + "grad_norm": 0.17116655114302515, + "learning_rate": 4.109641456463135e-05, + "log_odds_chosen": 4.716578006744385, + "log_odds_ratio": -0.05661363527178764, + "logits/chosen": -2.9051055908203125, + "logits/chosen_prompt": -2.861964702606201, + "logits/rejected": -2.489297866821289, + "logits/rejected_prompt": -2.8317601680755615, + "logps/chosen": -2.72660493850708, + "logps/chosen_both": -2.6989545822143555, + "logps/chosen_prompt": -0.786345899105072, + "logps/rejected": -7.32622766494751, + "logps/rejected_both": -7.235006809234619, + "logps/rejected_prompt": -0.9496296048164368, + "loss": 2.0544, + "nll_loss": 2.698387622833252, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0906422138214111, + "rewards/margins": 1.8398488759994507, + "rewards/rejected": -2.9304909706115723, + "step": 1040 + }, + { + "epoch": 0.84, + "grad_norm": 2.0421569101702004, + "learning_rate": 4.093559974371725e-05, + "log_odds_chosen": 4.683531284332275, + "log_odds_ratio": -0.14838626980781555, + "logits/chosen": -2.983940601348877, + "logits/chosen_prompt": -2.8726494312286377, + "logits/rejected": -2.683384418487549, + "logits/rejected_prompt": -2.844991683959961, + "logps/chosen": -1.7734657526016235, + "logps/chosen_both": -1.762310266494751, + "logps/chosen_prompt": -0.8980112075805664, + "logps/rejected": -6.07004976272583, + "logps/rejected_both": -5.987616062164307, + "logps/rejected_prompt": -1.1182132959365845, + "loss": 2.145, + "nll_loss": 1.7613089084625244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7093862891197205, + "rewards/margins": 1.7186336517333984, + "rewards/rejected": -2.4280200004577637, + "step": 1050 + }, + { + "epoch": 0.848, + "grad_norm": 0.1891364752116159, + "learning_rate": 4.077366650754624e-05, + "log_odds_chosen": 4.3087382316589355, + "log_odds_ratio": -0.1364879608154297, + "logits/chosen": -2.9432783126831055, + "logits/chosen_prompt": -2.815147638320923, + "logits/rejected": -2.721280097961426, + "logits/rejected_prompt": -2.818236827850342, + "logps/chosen": -1.8882700204849243, + "logps/chosen_both": -1.8756290674209595, + "logps/chosen_prompt": -0.8526128530502319, + "logps/rejected": -6.065881729125977, + "logps/rejected_both": -6.0042314529418945, + "logps/rejected_prompt": -0.9744648933410645, + "loss": 2.1355, + "nll_loss": 1.8748886585235596, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7553080320358276, + "rewards/margins": 1.6710445880889893, + "rewards/rejected": -2.4263527393341064, + "step": 1060 + }, + { + "epoch": 0.856, + "grad_norm": 33.963511456298086, + "learning_rate": 4.0610626221150394e-05, + "log_odds_chosen": 4.251172065734863, + "log_odds_ratio": -0.09040095657110214, + "logits/chosen": -2.9414284229278564, + "logits/chosen_prompt": -2.8389973640441895, + "logits/rejected": -2.8033430576324463, + "logits/rejected_prompt": -2.82332706451416, + "logps/chosen": -1.9342035055160522, + "logps/chosen_both": -1.9176651239395142, + "logps/chosen_prompt": -0.8298524022102356, + "logps/rejected": -6.048348903656006, + "logps/rejected_both": -5.979620933532715, + "logps/rejected_prompt": -0.9826586842536926, + "loss": 2.0673, + "nll_loss": 1.9169620275497437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7736814618110657, + "rewards/margins": 1.645658254623413, + "rewards/rejected": -2.419339656829834, + "step": 1070 + }, + { + "epoch": 0.864, + "grad_norm": 5.339928107993312, + "learning_rate": 4.044649032725836e-05, + "log_odds_chosen": 4.668586730957031, + "log_odds_ratio": -0.04072408378124237, + "logits/chosen": -2.9805121421813965, + "logits/chosen_prompt": -2.858212947845459, + "logits/rejected": -2.779395580291748, + "logits/rejected_prompt": -2.8353207111358643, + "logps/chosen": -2.4372153282165527, + "logps/chosen_both": -2.4168477058410645, + "logps/chosen_prompt": -0.7482016086578369, + "logps/rejected": -6.966684818267822, + "logps/rejected_both": -6.886708736419678, + "logps/rejected_prompt": -0.9111725687980652, + "loss": 2.1177, + "nll_loss": 2.4160780906677246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9748862981796265, + "rewards/margins": 1.8117873668670654, + "rewards/rejected": -2.7866737842559814, + "step": 1080 + }, + { + "epoch": 0.872, + "grad_norm": 0.2060230046824354, + "learning_rate": 4.028127034549229e-05, + "log_odds_chosen": 2.597301483154297, + "log_odds_ratio": -0.6685577630996704, + "logits/chosen": -2.9436233043670654, + "logits/chosen_prompt": -2.8545641899108887, + "logits/rejected": -2.8262507915496826, + "logits/rejected_prompt": -2.8353445529937744, + "logps/chosen": -2.3411784172058105, + "logps/chosen_both": -2.3227829933166504, + "logps/chosen_prompt": -0.7935237884521484, + "logps/rejected": -4.853774070739746, + "logps/rejected_both": -4.805240154266357, + "logps/rejected_prompt": -0.958962082862854, + "loss": 2.139, + "nll_loss": 2.3222460746765137, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.9364713430404663, + "rewards/margins": 1.0050380229949951, + "rewards/rejected": -1.941509485244751, + "step": 1090 + }, + { + "epoch": 0.88, + "grad_norm": 0.17774111122195055, + "learning_rate": 4.011497787155938e-05, + "log_odds_chosen": 4.53702449798584, + "log_odds_ratio": -0.02008737251162529, + "logits/chosen": -2.898667335510254, + "logits/chosen_prompt": -2.8412561416625977, + "logits/rejected": -2.799050807952881, + "logits/rejected_prompt": -2.819329023361206, + "logps/chosen": -2.120091438293457, + "logps/chosen_both": -2.0994343757629395, + "logps/chosen_prompt": -0.7898808717727661, + "logps/rejected": -6.5274176597595215, + "logps/rejected_both": -6.440402030944824, + "logps/rejected_prompt": -1.0125057697296143, + "loss": 2.0681, + "nll_loss": 2.0985283851623535, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8480366468429565, + "rewards/margins": 1.7629306316375732, + "rewards/rejected": -2.6109673976898193, + "step": 1100 + }, + { + "epoch": 0.888, + "grad_norm": 0.5492835402833951, + "learning_rate": 3.9947624576437975e-05, + "log_odds_chosen": 3.65099835395813, + "log_odds_ratio": -0.21185067296028137, + "logits/chosen": -2.8890416622161865, + "logits/chosen_prompt": -2.8260998725891113, + "logits/rejected": -2.8036818504333496, + "logits/rejected_prompt": -2.8174471855163574, + "logps/chosen": -2.0846400260925293, + "logps/chosen_both": -2.065948247909546, + "logps/chosen_prompt": -0.8428912162780762, + "logps/rejected": -5.634668350219727, + "logps/rejected_both": -5.555979251861572, + "logps/rejected_prompt": -1.0157763957977295, + "loss": 2.128, + "nll_loss": 2.065037488937378, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8338559865951538, + "rewards/margins": 1.420011281967163, + "rewards/rejected": -2.2538673877716064, + "step": 1110 + }, + { + "epoch": 0.896, + "grad_norm": 0.2391375753226414, + "learning_rate": 3.977922220555855e-05, + "log_odds_chosen": 4.121129989624023, + "log_odds_ratio": -0.2298469990491867, + "logits/chosen": -2.969383955001831, + "logits/chosen_prompt": -2.841618061065674, + "logits/rejected": -2.8132920265197754, + "logits/rejected_prompt": -2.8176777362823486, + "logps/chosen": -2.3696742057800293, + "logps/chosen_both": -2.350247621536255, + "logps/chosen_prompt": -0.8721768260002136, + "logps/rejected": -6.348196029663086, + "logps/rejected_both": -6.277990818023682, + "logps/rejected_prompt": -1.0750401020050049, + "loss": 2.1621, + "nll_loss": 2.3494279384613037, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.9478696584701538, + "rewards/margins": 1.591408610343933, + "rewards/rejected": -2.539278268814087, + "step": 1120 + }, + { + "epoch": 0.904, + "grad_norm": 1.0869471605926033, + "learning_rate": 3.960978257797931e-05, + "log_odds_chosen": 3.306716203689575, + "log_odds_ratio": -0.17165422439575195, + "logits/chosen": -2.901864767074585, + "logits/chosen_prompt": -2.8563239574432373, + "logits/rejected": -2.815932273864746, + "logits/rejected_prompt": -2.829672336578369, + "logps/chosen": -2.3288769721984863, + "logps/chosen_both": -2.307668447494507, + "logps/chosen_prompt": -0.8160017132759094, + "logps/rejected": -5.531130790710449, + "logps/rejected_both": -5.466065406799316, + "logps/rejected_prompt": -0.9807281494140625, + "loss": 2.0755, + "nll_loss": 2.3062796592712402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9315508604049683, + "rewards/margins": 1.280901551246643, + "rewards/rejected": -2.2124524116516113, + "step": 1130 + }, + { + "epoch": 0.912, + "grad_norm": 0.21229431870443033, + "learning_rate": 3.943931758555669e-05, + "log_odds_chosen": 4.015295505523682, + "log_odds_ratio": -0.14405557513237, + "logits/chosen": -2.9465222358703613, + "logits/chosen_prompt": -2.830146074295044, + "logits/rejected": -2.7873902320861816, + "logits/rejected_prompt": -2.8030102252960205, + "logps/chosen": -1.9876712560653687, + "logps/chosen_both": -1.9711806774139404, + "logps/chosen_prompt": -0.8330597877502441, + "logps/rejected": -5.87436580657959, + "logps/rejected_both": -5.79966402053833, + "logps/rejected_prompt": -1.0102033615112305, + "loss": 1.9833, + "nll_loss": 1.9705440998077393, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7950685620307922, + "rewards/margins": 1.554678201675415, + "rewards/rejected": -2.3497467041015625, + "step": 1140 + }, + { + "epoch": 0.92, + "grad_norm": 0.18607892338713655, + "learning_rate": 3.92678391921108e-05, + "log_odds_chosen": 4.167088985443115, + "log_odds_ratio": -0.081887386739254, + "logits/chosen": -2.9688785076141357, + "logits/chosen_prompt": -2.8491876125335693, + "logits/rejected": -2.8233845233917236, + "logits/rejected_prompt": -2.836411237716675, + "logps/chosen": -2.0486931800842285, + "logps/chosen_both": -2.0284764766693115, + "logps/chosen_prompt": -0.8191589117050171, + "logps/rejected": -6.082810878753662, + "logps/rejected_both": -5.993044853210449, + "logps/rejected_prompt": -0.957076907157898, + "loss": 2.086, + "nll_loss": 2.0268213748931885, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8194772601127625, + "rewards/margins": 1.613647222518921, + "rewards/rejected": -2.433124303817749, + "step": 1150 + }, + { + "epoch": 0.928, + "grad_norm": 0.21278740734057763, + "learning_rate": 3.909535943258567e-05, + "log_odds_chosen": 4.548261642456055, + "log_odds_ratio": -0.07581990212202072, + "logits/chosen": -3.092094898223877, + "logits/chosen_prompt": -2.8779349327087402, + "logits/rejected": -2.840526580810547, + "logits/rejected_prompt": -2.8706183433532715, + "logps/chosen": -1.943817138671875, + "logps/chosen_both": -1.9261138439178467, + "logps/chosen_prompt": -0.8740865588188171, + "logps/rejected": -6.346927642822266, + "logps/rejected_both": -6.251557350158691, + "logps/rejected_prompt": -1.028618574142456, + "loss": 2.0516, + "nll_loss": 1.9256139993667603, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.77752685546875, + "rewards/margins": 1.7612441778182983, + "rewards/rejected": -2.538771152496338, + "step": 1160 + }, + { + "epoch": 0.936, + "grad_norm": 2.074191616812015, + "learning_rate": 3.8921890412204705e-05, + "log_odds_chosen": 3.9714667797088623, + "log_odds_ratio": -0.10122326761484146, + "logits/chosen": -2.9742226600646973, + "logits/chosen_prompt": -2.8603179454803467, + "logits/rejected": -2.8532581329345703, + "logits/rejected_prompt": -2.833484172821045, + "logps/chosen": -2.3508994579315186, + "logps/chosen_both": -2.333052158355713, + "logps/chosen_prompt": -0.8015215992927551, + "logps/rejected": -6.174811363220215, + "logps/rejected_both": -6.111483573913574, + "logps/rejected_prompt": -1.0183693170547485, + "loss": 2.2824, + "nll_loss": 2.3322701454162598, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9403597712516785, + "rewards/margins": 1.5295648574829102, + "rewards/rejected": -2.4699246883392334, + "step": 1170 + }, + { + "epoch": 0.944, + "grad_norm": 0.2875489978173768, + "learning_rate": 3.8747444305621e-05, + "log_odds_chosen": 4.248479843139648, + "log_odds_ratio": -0.08145709335803986, + "logits/chosen": -2.950727939605713, + "logits/chosen_prompt": -2.822025775909424, + "logits/rejected": -2.663987398147583, + "logits/rejected_prompt": -2.8115882873535156, + "logps/chosen": -1.9704688787460327, + "logps/chosen_both": -1.9537798166275024, + "logps/chosen_prompt": -0.8284621238708496, + "logps/rejected": -6.081311225891113, + "logps/rejected_both": -6.007387161254883, + "logps/rejected_prompt": -1.0018432140350342, + "loss": 1.9987, + "nll_loss": 1.9535901546478271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7881874442100525, + "rewards/margins": 1.6443370580673218, + "rewards/rejected": -2.4325246810913086, + "step": 1180 + }, + { + "epoch": 0.952, + "grad_norm": 6.520768567954707, + "learning_rate": 3.8572033356062943e-05, + "log_odds_chosen": 3.6630382537841797, + "log_odds_ratio": -0.1266271471977234, + "logits/chosen": -2.9928297996520996, + "logits/chosen_prompt": -2.8252012729644775, + "logits/rejected": -2.722259521484375, + "logits/rejected_prompt": -2.7941107749938965, + "logps/chosen": -2.0680882930755615, + "logps/chosen_both": -2.0539040565490723, + "logps/chosen_prompt": -0.7603567838668823, + "logps/rejected": -5.370635032653809, + "logps/rejected_both": -5.302577018737793, + "logps/rejected_prompt": -1.007256031036377, + "loss": 2.1861, + "nll_loss": 2.052879810333252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8272353410720825, + "rewards/margins": 1.3210185766220093, + "rewards/rejected": -2.148253917694092, + "step": 1190 + }, + { + "epoch": 0.96, + "grad_norm": 2.68559023802143, + "learning_rate": 3.8395669874474915e-05, + "log_odds_chosen": 4.359891414642334, + "log_odds_ratio": -0.015468957833945751, + "logits/chosen": -2.91310453414917, + "logits/chosen_prompt": -2.7794852256774902, + "logits/rejected": -2.6371960639953613, + "logits/rejected_prompt": -2.7625763416290283, + "logps/chosen": -1.8540757894515991, + "logps/chosen_both": -1.839600920677185, + "logps/chosen_prompt": -0.8248388171195984, + "logps/rejected": -6.038485527038574, + "logps/rejected_both": -5.962553977966309, + "logps/rejected_prompt": -0.9856597185134888, + "loss": 2.0673, + "nll_loss": 1.8394546508789062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7416303753852844, + "rewards/margins": 1.6737639904022217, + "rewards/rejected": -2.4153940677642822, + "step": 1200 + }, + { + "epoch": 0.968, + "grad_norm": 0.185073881578095, + "learning_rate": 3.821836623865329e-05, + "log_odds_chosen": 4.161174297332764, + "log_odds_ratio": -0.07971666753292084, + "logits/chosen": -2.903371572494507, + "logits/chosen_prompt": -2.778414487838745, + "logits/rejected": -2.5587830543518066, + "logits/rejected_prompt": -2.762293815612793, + "logps/chosen": -2.1283013820648193, + "logps/chosen_both": -2.1046059131622314, + "logps/chosen_prompt": -0.7429525852203369, + "logps/rejected": -6.169132232666016, + "logps/rejected_both": -6.081439018249512, + "logps/rejected_prompt": -0.9049463272094727, + "loss": 2.2118, + "nll_loss": 2.104139566421509, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8513206243515015, + "rewards/margins": 1.6163326501846313, + "rewards/rejected": -2.467653274536133, + "step": 1210 + }, + { + "epoch": 0.976, + "grad_norm": 0.19264797772361533, + "learning_rate": 3.80401348923777e-05, + "log_odds_chosen": 4.120739936828613, + "log_odds_ratio": -0.14354461431503296, + "logits/chosen": -2.9424567222595215, + "logits/chosen_prompt": -2.7921371459960938, + "logits/rejected": -2.5477294921875, + "logits/rejected_prompt": -2.7542147636413574, + "logps/chosen": -1.913551688194275, + "logps/chosen_both": -1.8978935480117798, + "logps/chosen_prompt": -0.8339295387268066, + "logps/rejected": -5.9061384201049805, + "logps/rejected_both": -5.837408542633057, + "logps/rejected_prompt": -0.9619489908218384, + "loss": 2.0995, + "nll_loss": 1.8977426290512085, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7654207348823547, + "rewards/margins": 1.5970344543457031, + "rewards/rejected": -2.362455129623413, + "step": 1220 + }, + { + "epoch": 0.984, + "grad_norm": 9.292901066306287, + "learning_rate": 3.786098834453766e-05, + "log_odds_chosen": 3.505579710006714, + "log_odds_ratio": -0.15101362764835358, + "logits/chosen": -2.910395622253418, + "logits/chosen_prompt": -2.8129782676696777, + "logits/rejected": -2.574031352996826, + "logits/rejected_prompt": -2.782696008682251, + "logps/chosen": -2.1372461318969727, + "logps/chosen_both": -2.112764835357666, + "logps/chosen_prompt": -0.8219666481018066, + "logps/rejected": -5.543887138366699, + "logps/rejected_both": -5.4572343826293945, + "logps/rejected_prompt": -0.9813167452812195, + "loss": 2.0645, + "nll_loss": 2.111912488937378, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8548984527587891, + "rewards/margins": 1.3626563549041748, + "rewards/rejected": -2.217555046081543, + "step": 1230 + }, + { + "epoch": 0.992, + "grad_norm": 1.6964209385464728, + "learning_rate": 3.7680939168254733e-05, + "log_odds_chosen": 3.888018846511841, + "log_odds_ratio": -0.1449870765209198, + "logits/chosen": -2.9042837619781494, + "logits/chosen_prompt": -2.823965549468994, + "logits/rejected": -2.4834845066070557, + "logits/rejected_prompt": -2.7938156127929688, + "logps/chosen": -2.0088438987731934, + "logps/chosen_both": -1.9936256408691406, + "logps/chosen_prompt": -0.7543269395828247, + "logps/rejected": -5.783638000488281, + "logps/rejected_both": -5.7258687019348145, + "logps/rejected_prompt": -0.9668887257575989, + "loss": 2.038, + "nll_loss": 1.992997169494629, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8035375475883484, + "rewards/margins": 1.5099177360534668, + "rewards/rejected": -2.31345534324646, + "step": 1240 + }, + { + "epoch": 1.0, + "grad_norm": 2.478887419876043, + "learning_rate": 3.7500000000000003e-05, + "log_odds_chosen": 5.275210380554199, + "log_odds_ratio": -0.006285688374191523, + "logits/chosen": -2.9461379051208496, + "logits/chosen_prompt": -2.7684402465820312, + "logits/rejected": -2.312152147293091, + "logits/rejected_prompt": -2.7450311183929443, + "logps/chosen": -1.8539674282073975, + "logps/chosen_both": -1.839685082435608, + "logps/chosen_prompt": -0.8559527397155762, + "logps/rejected": -6.958900451660156, + "logps/rejected_both": -6.868790626525879, + "logps/rejected_prompt": -1.0536139011383057, + "loss": 2.2404, + "nll_loss": 1.8390467166900635, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7415870428085327, + "rewards/margins": 2.04197359085083, + "rewards/rejected": -2.783560276031494, + "step": 1250 + }, + { + "epoch": 1.008, + "grad_norm": 0.17878604739151382, + "learning_rate": 3.731818353870729e-05, + "log_odds_chosen": 4.191466331481934, + "log_odds_ratio": -0.09246650338172913, + "logits/chosen": -2.957552433013916, + "logits/chosen_prompt": -2.771613359451294, + "logits/rejected": -2.3375356197357178, + "logits/rejected_prompt": -2.7522428035736084, + "logps/chosen": -1.989243745803833, + "logps/chosen_both": -1.9734690189361572, + "logps/chosen_prompt": -0.8279644250869751, + "logps/rejected": -6.043200969696045, + "logps/rejected_both": -5.973423480987549, + "logps/rejected_prompt": -1.0317699909210205, + "loss": 2.0389, + "nll_loss": 1.9726651906967163, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7956975102424622, + "rewards/margins": 1.6215832233428955, + "rewards/rejected": -2.417280673980713, + "step": 1260 + }, + { + "epoch": 1.016, + "grad_norm": 23.252626998417625, + "learning_rate": 3.713550254488185e-05, + "log_odds_chosen": 3.7449231147766113, + "log_odds_ratio": -0.16642269492149353, + "logits/chosen": -2.8947479724884033, + "logits/chosen_prompt": -2.7788119316101074, + "logits/rejected": -2.3416316509246826, + "logits/rejected_prompt": -2.760896921157837, + "logps/chosen": -2.020059585571289, + "logps/chosen_both": -2.0054023265838623, + "logps/chosen_prompt": -0.8935413360595703, + "logps/rejected": -5.6518659591674805, + "logps/rejected_both": -5.590303897857666, + "logps/rejected_prompt": -1.0056589841842651, + "loss": 2.0643, + "nll_loss": 2.0046825408935547, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8080238103866577, + "rewards/margins": 1.452722430229187, + "rewards/rejected": -2.2607462406158447, + "step": 1270 + }, + { + "epoch": 1.024, + "grad_norm": 0.1852421213956056, + "learning_rate": 3.695196983970481e-05, + "log_odds_chosen": 5.502694129943848, + "log_odds_ratio": -0.07146742194890976, + "logits/chosen": -2.9081971645355225, + "logits/chosen_prompt": -2.745790719985962, + "logits/rejected": -2.0626957416534424, + "logits/rejected_prompt": -2.7173855304718018, + "logps/chosen": -1.7873703241348267, + "logps/chosen_both": -1.7739589214324951, + "logps/chosen_prompt": -0.8900352716445923, + "logps/rejected": -7.1119537353515625, + "logps/rejected_both": -7.017317295074463, + "logps/rejected_prompt": -1.0950191020965576, + "loss": 2.0059, + "nll_loss": 1.7733700275421143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7149480581283569, + "rewards/margins": 2.129833698272705, + "rewards/rejected": -2.8447818756103516, + "step": 1280 + }, + { + "epoch": 1.032, + "grad_norm": 0.1901267311863244, + "learning_rate": 3.6767598304133324e-05, + "log_odds_chosen": 4.644869804382324, + "log_odds_ratio": -0.14166082441806793, + "logits/chosen": -2.9974873065948486, + "logits/chosen_prompt": -2.7224061489105225, + "logits/rejected": -2.2138378620147705, + "logits/rejected_prompt": -2.6832873821258545, + "logps/chosen": -1.9028959274291992, + "logps/chosen_both": -1.8842157125473022, + "logps/chosen_prompt": -0.8141298294067383, + "logps/rejected": -6.421015739440918, + "logps/rejected_both": -6.323419094085693, + "logps/rejected_prompt": -0.979651153087616, + "loss": 1.9806, + "nll_loss": 1.8838021755218506, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7611583471298218, + "rewards/margins": 1.8072481155395508, + "rewards/rejected": -2.568406581878662, + "step": 1290 + }, + { + "epoch": 1.04, + "grad_norm": 0.1720032056568101, + "learning_rate": 3.6582400877996546e-05, + "log_odds_chosen": 5.198369026184082, + "log_odds_ratio": -0.07235782593488693, + "logits/chosen": -2.8921890258789062, + "logits/chosen_prompt": -2.7482800483703613, + "logits/rejected": -1.9527368545532227, + "logits/rejected_prompt": -2.7276439666748047, + "logps/chosen": -2.0934653282165527, + "logps/chosen_both": -2.076221227645874, + "logps/chosen_prompt": -0.8200351595878601, + "logps/rejected": -7.170855522155762, + "logps/rejected_both": -7.079026699066162, + "logps/rejected_prompt": -0.9832828640937805, + "loss": 2.0527, + "nll_loss": 2.075456380844116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8373861312866211, + "rewards/margins": 2.0309560298919678, + "rewards/rejected": -2.868342161178589, + "step": 1300 + }, + { + "epoch": 1.048, + "grad_norm": 0.18338089227039325, + "learning_rate": 3.639639055908751e-05, + "log_odds_chosen": 5.48695707321167, + "log_odds_ratio": -0.07169006019830704, + "logits/chosen": -2.874192953109741, + "logits/chosen_prompt": -2.733611583709717, + "logits/rejected": -1.8326069116592407, + "logits/rejected_prompt": -2.6982951164245605, + "logps/chosen": -2.0102884769439697, + "logps/chosen_both": -1.9914735555648804, + "logps/chosen_prompt": -0.8337292671203613, + "logps/rejected": -7.363123416900635, + "logps/rejected_both": -7.263747215270996, + "logps/rejected_prompt": -0.9874321818351746, + "loss": 1.9824, + "nll_loss": 1.9909473657608032, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8041152954101562, + "rewards/margins": 2.141134023666382, + "rewards/rejected": -2.945249319076538, + "step": 1310 + }, + { + "epoch": 1.056, + "grad_norm": 0.1837356662895363, + "learning_rate": 3.6209580402250815e-05, + "log_odds_chosen": 5.6873369216918945, + "log_odds_ratio": -0.07120365649461746, + "logits/chosen": -2.9526381492614746, + "logits/chosen_prompt": -2.7081189155578613, + "logits/rejected": -1.8793054819107056, + "logits/rejected_prompt": -2.6829447746276855, + "logps/chosen": -1.9104582071304321, + "logps/chosen_both": -1.8940789699554443, + "logps/chosen_prompt": -0.8755657076835632, + "logps/rejected": -7.447749137878418, + "logps/rejected_both": -7.334907531738281, + "logps/rejected_prompt": -1.0553802251815796, + "loss": 2.1442, + "nll_loss": 1.8928571939468384, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7641832828521729, + "rewards/margins": 2.214916706085205, + "rewards/rejected": -2.979099750518799, + "step": 1320 + }, + { + "epoch": 1.064, + "grad_norm": 0.3951461995742214, + "learning_rate": 3.602198351846647e-05, + "log_odds_chosen": 4.024718761444092, + "log_odds_ratio": -0.5831412672996521, + "logits/chosen": -2.981672525405884, + "logits/chosen_prompt": -2.7551183700561523, + "logits/rejected": -2.1212754249572754, + "logits/rejected_prompt": -2.7351596355438232, + "logps/chosen": -2.4395077228546143, + "logps/chosen_both": -2.417250871658325, + "logps/chosen_prompt": -0.8564618825912476, + "logps/rejected": -6.365363597869873, + "logps/rejected_both": -6.2751054763793945, + "logps/rejected_prompt": -1.031884789466858, + "loss": 2.2375, + "nll_loss": 2.4155256748199463, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.9758030772209167, + "rewards/margins": 1.5703424215316772, + "rewards/rejected": -2.5461456775665283, + "step": 1330 + }, + { + "epoch": 1.072, + "grad_norm": 0.18983825409437058, + "learning_rate": 3.5833613073929684e-05, + "log_odds_chosen": 4.155622482299805, + "log_odds_ratio": -0.14320290088653564, + "logits/chosen": -3.005096673965454, + "logits/chosen_prompt": -2.8319993019104004, + "logits/rejected": -2.3421382904052734, + "logits/rejected_prompt": -2.8086118698120117, + "logps/chosen": -1.9423980712890625, + "logps/chosen_both": -1.9247316122055054, + "logps/chosen_prompt": -0.7214570045471191, + "logps/rejected": -5.971634864807129, + "logps/rejected_both": -5.893637657165527, + "logps/rejected_prompt": -0.9021228551864624, + "loss": 2.092, + "nll_loss": 1.9240925312042236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7769593000411987, + "rewards/margins": 1.6116949319839478, + "rewards/rejected": -2.3886542320251465, + "step": 1340 + }, + { + "epoch": 1.08, + "grad_norm": 0.6083000414245734, + "learning_rate": 3.564448228912682e-05, + "log_odds_chosen": 4.163081169128418, + "log_odds_ratio": -0.10094372928142548, + "logits/chosen": -2.963536262512207, + "logits/chosen_prompt": -2.846693515777588, + "logits/rejected": -2.542693614959717, + "logits/rejected_prompt": -2.819491386413574, + "logps/chosen": -2.337949275970459, + "logps/chosen_both": -2.3158886432647705, + "logps/chosen_prompt": -0.845288872718811, + "logps/rejected": -6.368934154510498, + "logps/rejected_both": -6.2805986404418945, + "logps/rejected_prompt": -1.0301436185836792, + "loss": 2.0382, + "nll_loss": 2.3151814937591553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9351798295974731, + "rewards/margins": 1.6123939752578735, + "rewards/rejected": -2.5475735664367676, + "step": 1350 + }, + { + "epoch": 1.088, + "grad_norm": 0.2154750785789702, + "learning_rate": 3.545460443790753e-05, + "log_odds_chosen": 5.453991889953613, + "log_odds_ratio": -0.004712260328233242, + "logits/chosen": -2.908536434173584, + "logits/chosen_prompt": -2.868119716644287, + "logits/rejected": -2.40228533744812, + "logits/rejected_prompt": -2.843205451965332, + "logps/chosen": -2.089245319366455, + "logps/chosen_both": -2.072594165802002, + "logps/chosen_prompt": -0.8769745826721191, + "logps/rejected": -7.407778739929199, + "logps/rejected_both": -7.316309928894043, + "logps/rejected_prompt": -0.9720737338066101, + "loss": 2.0088, + "nll_loss": 2.071500778198242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8356983065605164, + "rewards/margins": 2.127413749694824, + "rewards/rejected": -2.9631123542785645, + "step": 1360 + }, + { + "epoch": 1.096, + "grad_norm": 0.1857265942387655, + "learning_rate": 3.52639928465532e-05, + "log_odds_chosen": 4.4336113929748535, + "log_odds_ratio": -0.14170871675014496, + "logits/chosen": -3.0002169609069824, + "logits/chosen_prompt": -2.8658928871154785, + "logits/rejected": -2.558640956878662, + "logits/rejected_prompt": -2.843383550643921, + "logps/chosen": -1.8998088836669922, + "logps/chosen_both": -1.8837999105453491, + "logps/chosen_prompt": -0.8331824541091919, + "logps/rejected": -6.191910266876221, + "logps/rejected_both": -6.1067986488342285, + "logps/rejected_prompt": -0.9438120126724243, + "loss": 2.0577, + "nll_loss": 1.883371353149414, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7599235773086548, + "rewards/margins": 1.7168405055999756, + "rewards/rejected": -2.47676420211792, + "step": 1370 + }, + { + "epoch": 1.104, + "grad_norm": 0.22400309241356314, + "learning_rate": 3.507266089284157e-05, + "log_odds_chosen": 5.497137069702148, + "log_odds_ratio": -0.004467605613172054, + "logits/chosen": -2.9908201694488525, + "logits/chosen_prompt": -2.821722984313965, + "logits/rejected": -2.416836977005005, + "logits/rejected_prompt": -2.796220541000366, + "logps/chosen": -1.8564481735229492, + "logps/chosen_both": -1.8395103216171265, + "logps/chosen_prompt": -0.8639839291572571, + "logps/rejected": -7.180043697357178, + "logps/rejected_both": -7.0766448974609375, + "logps/rejected_prompt": -0.9959409832954407, + "loss": 2.0609, + "nll_loss": 1.8391234874725342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7425792813301086, + "rewards/margins": 2.1294379234313965, + "rewards/rejected": -2.8720173835754395, + "step": 1380 + }, + { + "epoch": 1.112, + "grad_norm": 0.19384408681406856, + "learning_rate": 3.488062200510791e-05, + "log_odds_chosen": 5.338822841644287, + "log_odds_ratio": -0.00644069816917181, + "logits/chosen": -2.959766387939453, + "logits/chosen_prompt": -2.7905402183532715, + "logits/rejected": -2.3757593631744385, + "logits/rejected_prompt": -2.763526678085327, + "logps/chosen": -1.9314730167388916, + "logps/chosen_both": -1.9157222509384155, + "logps/chosen_prompt": -0.8981779217720032, + "logps/rejected": -7.111077785491943, + "logps/rejected_both": -7.018582344055176, + "logps/rejected_prompt": -0.9950772523880005, + "loss": 1.9482, + "nll_loss": 1.9154551029205322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7725892066955566, + "rewards/margins": 2.0718419551849365, + "rewards/rejected": -2.8444314002990723, + "step": 1390 + }, + { + "epoch": 1.12, + "grad_norm": 60.752749653266065, + "learning_rate": 3.4687889661302576e-05, + "log_odds_chosen": 4.680363655090332, + "log_odds_ratio": -0.03717372566461563, + "logits/chosen": -2.920323610305786, + "logits/chosen_prompt": -2.8357200622558594, + "logits/rejected": -2.4031760692596436, + "logits/rejected_prompt": -2.802396535873413, + "logps/chosen": -2.005197286605835, + "logps/chosen_both": -1.9863475561141968, + "logps/chosen_prompt": -0.7522888779640198, + "logps/rejected": -6.545997619628906, + "logps/rejected_both": -6.455955505371094, + "logps/rejected_prompt": -0.965649425983429, + "loss": 2.0466, + "nll_loss": 1.985174536705017, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.802078902721405, + "rewards/margins": 1.8163198232650757, + "rewards/rejected": -2.618398904800415, + "step": 1400 + }, + { + "epoch": 1.1280000000000001, + "grad_norm": 0.5002021593337239, + "learning_rate": 3.4494477388045035e-05, + "log_odds_chosen": 4.483678340911865, + "log_odds_ratio": -0.028768246993422508, + "logits/chosen": -2.92014741897583, + "logits/chosen_prompt": -2.8309707641601562, + "logits/rejected": -2.486912250518799, + "logits/rejected_prompt": -2.804452419281006, + "logps/chosen": -2.067333459854126, + "logps/chosen_both": -2.0484328269958496, + "logps/chosen_prompt": -0.7646309733390808, + "logps/rejected": -6.416478157043457, + "logps/rejected_both": -6.335555076599121, + "logps/rejected_prompt": -0.9275982975959778, + "loss": 2.062, + "nll_loss": 2.047743558883667, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8269332647323608, + "rewards/margins": 1.7396576404571533, + "rewards/rejected": -2.5665910243988037, + "step": 1410 + }, + { + "epoch": 1.1360000000000001, + "grad_norm": 0.16513157762808564, + "learning_rate": 3.430039875967454e-05, + "log_odds_chosen": 4.668246746063232, + "log_odds_ratio": -0.07646802067756653, + "logits/chosen": -2.9350738525390625, + "logits/chosen_prompt": -2.8208534717559814, + "logits/rejected": -2.421509265899658, + "logits/rejected_prompt": -2.783437490463257, + "logps/chosen": -2.0800347328186035, + "logps/chosen_both": -2.0644993782043457, + "logps/chosen_prompt": -0.8468448519706726, + "logps/rejected": -6.625657558441162, + "logps/rejected_both": -6.545504570007324, + "logps/rejected_prompt": -1.04305100440979, + "loss": 2.0206, + "nll_loss": 2.0629351139068604, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8320137858390808, + "rewards/margins": 1.8182493448257446, + "rewards/rejected": -2.6502633094787598, + "step": 1420 + }, + { + "epoch": 1.144, + "grad_norm": 2.2295314469384206, + "learning_rate": 3.410566739729746e-05, + "log_odds_chosen": 5.851050853729248, + "log_odds_ratio": -0.004526123404502869, + "logits/chosen": -2.940370798110962, + "logits/chosen_prompt": -2.7820496559143066, + "logits/rejected": -2.2556514739990234, + "logits/rejected_prompt": -2.7672178745269775, + "logps/chosen": -1.8526496887207031, + "logps/chosen_both": -1.8396713733673096, + "logps/chosen_prompt": -0.8455888628959656, + "logps/rejected": -7.520164489746094, + "logps/rejected_both": -7.432145595550537, + "logps/rejected_prompt": -1.002396821975708, + "loss": 2.1827, + "nll_loss": 1.8387296199798584, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7410598993301392, + "rewards/margins": 2.267005681991577, + "rewards/rejected": -3.008065700531006, + "step": 1430 + }, + { + "epoch": 1.152, + "grad_norm": 4.408515203042964, + "learning_rate": 3.3910296967831266e-05, + "log_odds_chosen": 4.456727027893066, + "log_odds_ratio": -0.14154654741287231, + "logits/chosen": -2.9346349239349365, + "logits/chosen_prompt": -2.7853639125823975, + "logits/rejected": -2.2783145904541016, + "logits/rejected_prompt": -2.7635715007781982, + "logps/chosen": -1.9494521617889404, + "logps/chosen_both": -1.9318408966064453, + "logps/chosen_prompt": -0.9306742548942566, + "logps/rejected": -6.29015588760376, + "logps/rejected_both": -6.198000907897949, + "logps/rejected_prompt": -1.0760185718536377, + "loss": 2.1572, + "nll_loss": 1.931610107421875, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7797808647155762, + "rewards/margins": 1.7362816333770752, + "rewards/rejected": -2.5160624980926514, + "step": 1440 + }, + { + "epoch": 1.16, + "grad_norm": 0.3551432285571037, + "learning_rate": 3.3714301183045385e-05, + "log_odds_chosen": 5.155561447143555, + "log_odds_ratio": -0.07224146276712418, + "logits/chosen": -2.9873647689819336, + "logits/chosen_prompt": -2.7700507640838623, + "logits/rejected": -2.2287240028381348, + "logits/rejected_prompt": -2.7513465881347656, + "logps/chosen": -1.9037456512451172, + "logps/chosen_both": -1.8827041387557983, + "logps/chosen_prompt": -0.8036454319953918, + "logps/rejected": -6.904747009277344, + "logps/rejected_both": -6.79779052734375, + "logps/rejected_prompt": -0.9606531858444214, + "loss": 2.0135, + "nll_loss": 1.8827041387557983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7614982724189758, + "rewards/margins": 2.0004005432128906, + "rewards/rejected": -2.7618985176086426, + "step": 1450 + }, + { + "epoch": 1.168, + "grad_norm": 0.23892058786604192, + "learning_rate": 3.35176937985988e-05, + "log_odds_chosen": 4.485732078552246, + "log_odds_ratio": -0.14207962155342102, + "logits/chosen": -2.945270538330078, + "logits/chosen_prompt": -2.786912441253662, + "logits/rejected": -2.270350217819214, + "logits/rejected_prompt": -2.752725124359131, + "logps/chosen": -2.024524211883545, + "logps/chosen_both": -2.0046331882476807, + "logps/chosen_prompt": -0.774206817150116, + "logps/rejected": -6.382667064666748, + "logps/rejected_both": -6.294032096862793, + "logps/rejected_prompt": -0.9491628408432007, + "loss": 2.0727, + "nll_loss": 2.003938674926758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.809809684753418, + "rewards/margins": 1.7432572841644287, + "rewards/rejected": -2.5530669689178467, + "step": 1460 + }, + { + "epoch": 1.176, + "grad_norm": 0.2032800647611215, + "learning_rate": 3.332048861307467e-05, + "log_odds_chosen": 4.051968097686768, + "log_odds_ratio": -0.14674244821071625, + "logits/chosen": -2.99367094039917, + "logits/chosen_prompt": -2.802661657333374, + "logits/rejected": -2.338299512863159, + "logits/rejected_prompt": -2.7645983695983887, + "logps/chosen": -1.9771573543548584, + "logps/chosen_both": -1.9634653329849243, + "logps/chosen_prompt": -0.8673089742660522, + "logps/rejected": -5.909640789031982, + "logps/rejected_both": -5.843233585357666, + "logps/rejected_prompt": -0.918237030506134, + "loss": 2.0442, + "nll_loss": 1.9626314640045166, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.790863037109375, + "rewards/margins": 1.572993516921997, + "rewards/rejected": -2.363856554031372, + "step": 1470 + }, + { + "epoch": 1.184, + "grad_norm": 0.6791006786877852, + "learning_rate": 3.312269946701191e-05, + "log_odds_chosen": 5.11738395690918, + "log_odds_ratio": -0.08993680030107498, + "logits/chosen": -2.986436605453491, + "logits/chosen_prompt": -2.733582019805908, + "logits/rejected": -2.186984062194824, + "logits/rejected_prompt": -2.714433193206787, + "logps/chosen": -1.95094895362854, + "logps/chosen_both": -1.9355719089508057, + "logps/chosen_prompt": -0.9025853276252747, + "logps/rejected": -6.931356906890869, + "logps/rejected_both": -6.842989444732666, + "logps/rejected_prompt": -0.9505090713500977, + "loss": 2.0225, + "nll_loss": 1.935101866722107, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.780379593372345, + "rewards/margins": 1.992163062095642, + "rewards/rejected": -2.7725424766540527, + "step": 1480 + }, + { + "epoch": 1.192, + "grad_norm": 12.7002740206941, + "learning_rate": 3.29243402419338e-05, + "log_odds_chosen": 4.771432399749756, + "log_odds_ratio": -0.2453218698501587, + "logits/chosen": -2.9012749195098877, + "logits/chosen_prompt": -2.791215419769287, + "logits/rejected": -2.076328754425049, + "logits/rejected_prompt": -2.7599706649780273, + "logps/chosen": -2.869783878326416, + "logps/chosen_both": -2.8310511112213135, + "logps/chosen_prompt": -0.8819573521614075, + "logps/rejected": -7.530523777008057, + "logps/rejected_both": -7.408067226409912, + "logps/rejected_prompt": -1.0235049724578857, + "loss": 2.0981, + "nll_loss": 2.8310508728027344, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.1479135751724243, + "rewards/margins": 1.8642956018447876, + "rewards/rejected": -3.012209415435791, + "step": 1490 + }, + { + "epoch": 1.2, + "grad_norm": 0.19981467699086264, + "learning_rate": 3.272542485937369e-05, + "log_odds_chosen": 5.507603645324707, + "log_odds_ratio": -0.020156098529696465, + "logits/chosen": -2.9788875579833984, + "logits/chosen_prompt": -2.7711877822875977, + "logits/rejected": -2.0624115467071533, + "logits/rejected_prompt": -2.744807720184326, + "logps/chosen": -2.279694080352783, + "logps/chosen_both": -2.2537825107574463, + "logps/chosen_prompt": -0.8054102659225464, + "logps/rejected": -7.658332824707031, + "logps/rejected_both": -7.547041416168213, + "logps/rejected_prompt": -1.0083348751068115, + "loss": 2.1891, + "nll_loss": 2.2532057762145996, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9118776321411133, + "rewards/margins": 2.1514554023742676, + "rewards/rejected": -3.063333034515381, + "step": 1500 + }, + { + "epoch": 1.208, + "grad_norm": 0.2071781414340563, + "learning_rate": 3.2525967279898015e-05, + "log_odds_chosen": 3.779675006866455, + "log_odds_ratio": -0.2771868109703064, + "logits/chosen": -2.9284844398498535, + "logits/chosen_prompt": -2.73115873336792, + "logits/rejected": -2.319711446762085, + "logits/rejected_prompt": -2.7305550575256348, + "logps/chosen": -2.069701910018921, + "logps/chosen_both": -2.0511586666107178, + "logps/chosen_prompt": -0.8410334587097168, + "logps/rejected": -5.765010356903076, + "logps/rejected_both": -5.696343421936035, + "logps/rejected_prompt": -1.032503366470337, + "loss": 2.0199, + "nll_loss": 2.050447940826416, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8278807401657104, + "rewards/margins": 1.4781235456466675, + "rewards/rejected": -2.306004762649536, + "step": 1510 + }, + { + "epoch": 1.216, + "grad_norm": 0.18615530258539528, + "learning_rate": 3.2325981502126433e-05, + "log_odds_chosen": 4.861352443695068, + "log_odds_ratio": -0.14049410820007324, + "logits/chosen": -2.913702964782715, + "logits/chosen_prompt": -2.647313117980957, + "logits/rejected": -2.130164623260498, + "logits/rejected_prompt": -2.638327121734619, + "logps/chosen": -1.9652678966522217, + "logps/chosen_both": -1.948897361755371, + "logps/chosen_prompt": -0.8634968996047974, + "logps/rejected": -6.705462455749512, + "logps/rejected_both": -6.624319553375244, + "logps/rejected_prompt": -1.035592794418335, + "loss": 2.042, + "nll_loss": 1.948264718055725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7861071825027466, + "rewards/margins": 1.896078109741211, + "rewards/rejected": -2.682184934616089, + "step": 1520 + }, + { + "epoch": 1.224, + "grad_norm": 0.3295494652465448, + "learning_rate": 3.21254815617494e-05, + "log_odds_chosen": 5.780041694641113, + "log_odds_ratio": -0.004303447902202606, + "logits/chosen": -2.9733996391296387, + "logits/chosen_prompt": -2.7753407955169678, + "logits/rejected": -2.149972438812256, + "logits/rejected_prompt": -2.7639622688293457, + "logps/chosen": -1.992742896080017, + "logps/chosen_both": -1.975515604019165, + "logps/chosen_prompt": -0.8223434686660767, + "logps/rejected": -7.623780727386475, + "logps/rejected_both": -7.520285606384277, + "logps/rejected_prompt": -0.9390355348587036, + "loss": 2.0442, + "nll_loss": 1.974700689315796, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7970971465110779, + "rewards/margins": 2.252415180206299, + "rewards/rejected": -3.0495123863220215, + "step": 1530 + }, + { + "epoch": 1.232, + "grad_norm": 0.19966280929549698, + "learning_rate": 3.192448153054306e-05, + "log_odds_chosen": 3.838728427886963, + "log_odds_ratio": -0.14647504687309265, + "logits/chosen": -2.9667465686798096, + "logits/chosen_prompt": -2.8230855464935303, + "logits/rejected": -2.5847840309143066, + "logits/rejected_prompt": -2.822601795196533, + "logps/chosen": -2.122664451599121, + "logps/chosen_both": -2.0995185375213623, + "logps/chosen_prompt": -0.9422351717948914, + "logps/rejected": -5.859042644500732, + "logps/rejected_both": -5.767674446105957, + "logps/rejected_prompt": -1.1390842199325562, + "loss": 2.095, + "nll_loss": 2.0988729000091553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8490656614303589, + "rewards/margins": 1.494551420211792, + "rewards/rejected": -2.3436172008514404, + "step": 1540 + }, + { + "epoch": 1.24, + "grad_norm": 0.21556247694383007, + "learning_rate": 3.172299551538164e-05, + "log_odds_chosen": 4.561056137084961, + "log_odds_ratio": -0.07612424343824387, + "logits/chosen": -2.8919880390167236, + "logits/chosen_prompt": -2.808797836303711, + "logits/rejected": -2.5644102096557617, + "logits/rejected_prompt": -2.802969455718994, + "logps/chosen": -1.9356054067611694, + "logps/chosen_both": -1.9162133932113647, + "logps/chosen_prompt": -0.7942633032798767, + "logps/rejected": -6.3366522789001465, + "logps/rejected_both": -6.251999855041504, + "logps/rejected_prompt": -0.9459937810897827, + "loss": 2.1408, + "nll_loss": 1.915776252746582, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7742422223091125, + "rewards/margins": 1.7604186534881592, + "rewards/rejected": -2.534660816192627, + "step": 1550 + }, + { + "epoch": 1.248, + "grad_norm": 0.19312969446080464, + "learning_rate": 3.152103765724743e-05, + "log_odds_chosen": 3.9786903858184814, + "log_odds_ratio": -0.10893861204385757, + "logits/chosen": -3.0307998657226562, + "logits/chosen_prompt": -2.7775232791900635, + "logits/rejected": -2.6540513038635254, + "logits/rejected_prompt": -2.7630362510681152, + "logps/chosen": -1.9151197671890259, + "logps/chosen_both": -1.8977829217910767, + "logps/chosen_prompt": -0.8471347689628601, + "logps/rejected": -5.757778644561768, + "logps/rejected_both": -5.6885457038879395, + "logps/rejected_prompt": -1.02475106716156, + "loss": 1.9805, + "nll_loss": 1.8967196941375732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7660478949546814, + "rewards/margins": 1.537063717842102, + "rewards/rejected": -2.3031115531921387, + "step": 1560 + }, + { + "epoch": 1.256, + "grad_norm": 0.19646035725215968, + "learning_rate": 3.1318622130238236e-05, + "log_odds_chosen": 4.679540157318115, + "log_odds_ratio": -0.07853083312511444, + "logits/chosen": -2.9802026748657227, + "logits/chosen_prompt": -2.761209011077881, + "logits/rejected": -2.5600242614746094, + "logits/rejected_prompt": -2.7424654960632324, + "logps/chosen": -1.7784169912338257, + "logps/chosen_both": -1.7646305561065674, + "logps/chosen_prompt": -0.7139529585838318, + "logps/rejected": -6.263562202453613, + "logps/rejected_both": -6.190931797027588, + "logps/rejected_prompt": -0.9958028793334961, + "loss": 1.9559, + "nll_loss": 1.76325261592865, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7113668322563171, + "rewards/margins": 1.794058084487915, + "rewards/rejected": -2.505424976348877, + "step": 1570 + }, + { + "epoch": 1.264, + "grad_norm": 0.3866885307142738, + "learning_rate": 3.111576314057268e-05, + "log_odds_chosen": 3.801389694213867, + "log_odds_ratio": -0.20994290709495544, + "logits/chosen": -2.9368879795074463, + "logits/chosen_prompt": -2.7586987018585205, + "logits/rejected": -2.599658966064453, + "logits/rejected_prompt": -2.743234157562256, + "logps/chosen": -1.9905316829681396, + "logps/chosen_both": -1.9739116430282593, + "logps/chosen_prompt": -0.779675304889679, + "logps/rejected": -5.689120292663574, + "logps/rejected_both": -5.620154857635498, + "logps/rejected_prompt": -1.0595715045928955, + "loss": 2.0955, + "nll_loss": 1.9729188680648804, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7962126731872559, + "rewards/margins": 1.4794353246688843, + "rewards/rejected": -2.275648355484009, + "step": 1580 + }, + { + "epoch": 1.272, + "grad_norm": 0.2325643788869979, + "learning_rate": 3.091247492559312e-05, + "log_odds_chosen": 4.095303058624268, + "log_odds_ratio": -0.1479816436767578, + "logits/chosen": -2.9735686779022217, + "logits/chosen_prompt": -2.8000283241271973, + "logits/rejected": -2.558763027191162, + "logits/rejected_prompt": -2.7583069801330566, + "logps/chosen": -1.8590002059936523, + "logps/chosen_both": -1.8417994976043701, + "logps/chosen_prompt": -0.7681006193161011, + "logps/rejected": -5.769632816314697, + "logps/rejected_both": -5.685044288635254, + "logps/rejected_prompt": -0.97776859998703, + "loss": 2.1087, + "nll_loss": 1.8410179615020752, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.743600070476532, + "rewards/margins": 1.5642529726028442, + "rewards/rejected": -2.3078532218933105, + "step": 1590 + }, + { + "epoch": 1.28, + "grad_norm": 0.19381282768297478, + "learning_rate": 3.0708771752766394e-05, + "log_odds_chosen": 4.324513912200928, + "log_odds_ratio": -0.0676613599061966, + "logits/chosen": -2.9503540992736816, + "logits/chosen_prompt": -2.7982351779937744, + "logits/rejected": -2.5562634468078613, + "logits/rejected_prompt": -2.7812817096710205, + "logps/chosen": -1.9172391891479492, + "logps/chosen_both": -1.9022390842437744, + "logps/chosen_prompt": -0.7488449811935425, + "logps/rejected": -6.095970630645752, + "logps/rejected_both": -6.031794548034668, + "logps/rejected_prompt": -0.9277693033218384, + "loss": 1.9931, + "nll_loss": 1.9018001556396484, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7668957114219666, + "rewards/margins": 1.671492338180542, + "rewards/rejected": -2.4383881092071533, + "step": 1600 + }, + { + "epoch": 1.288, + "grad_norm": 0.23915750516620793, + "learning_rate": 3.050466791868254e-05, + "log_odds_chosen": 5.146353721618652, + "log_odds_ratio": -0.07113925367593765, + "logits/chosen": -3.0021820068359375, + "logits/chosen_prompt": -2.7346436977386475, + "logits/rejected": -2.400503635406494, + "logits/rejected_prompt": -2.715362071990967, + "logps/chosen": -1.8657314777374268, + "logps/chosen_both": -1.847728967666626, + "logps/chosen_prompt": -0.8974820375442505, + "logps/rejected": -6.850257873535156, + "logps/rejected_both": -6.73916482925415, + "logps/rejected_prompt": -0.9878479838371277, + "loss": 2.0166, + "nll_loss": 1.8474719524383545, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7462925910949707, + "rewards/margins": 1.9938108921051025, + "rewards/rejected": -2.7401034832000732, + "step": 1610 + }, + { + "epoch": 1.296, + "grad_norm": 0.25793388819398966, + "learning_rate": 3.0300177748051373e-05, + "log_odds_chosen": 5.57846212387085, + "log_odds_ratio": -0.0040098619647324085, + "logits/chosen": -2.921875476837158, + "logits/chosen_prompt": -2.7485337257385254, + "logits/rejected": -2.2575137615203857, + "logits/rejected_prompt": -2.729705333709717, + "logps/chosen": -2.0379016399383545, + "logps/chosen_both": -2.023336410522461, + "logps/chosen_prompt": -0.8523913621902466, + "logps/rejected": -7.4703474044799805, + "logps/rejected_both": -7.384527683258057, + "logps/rejected_prompt": -1.0912959575653076, + "loss": 2.1288, + "nll_loss": 2.021984100341797, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8151607513427734, + "rewards/margins": 2.172978639602661, + "rewards/rejected": -2.9881393909454346, + "step": 1620 + }, + { + "epoch": 1.304, + "grad_norm": 0.22406539118846014, + "learning_rate": 3.0095315592697126e-05, + "log_odds_chosen": 4.797575950622559, + "log_odds_ratio": -0.07414670288562775, + "logits/chosen": -2.9373860359191895, + "logits/chosen_prompt": -2.7567806243896484, + "logits/rejected": -2.339370012283325, + "logits/rejected_prompt": -2.738049030303955, + "logps/chosen": -2.040771961212158, + "logps/chosen_both": -2.022752523422241, + "logps/chosen_prompt": -0.8437407612800598, + "logps/rejected": -6.715930938720703, + "logps/rejected_both": -6.622492790222168, + "logps/rejected_prompt": -1.1104066371917725, + "loss": 2.0022, + "nll_loss": 2.021770715713501, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8163086771965027, + "rewards/margins": 1.8700635433197021, + "rewards/rejected": -2.6863722801208496, + "step": 1630 + }, + { + "epoch": 1.312, + "grad_norm": 0.19146540891141792, + "learning_rate": 2.9890095830551207e-05, + "log_odds_chosen": 5.205162525177002, + "log_odds_ratio": -0.015068802051246166, + "logits/chosen": -2.9850218296051025, + "logits/chosen_prompt": -2.7482991218566895, + "logits/rejected": -2.2866098880767822, + "logits/rejected_prompt": -2.7363736629486084, + "logps/chosen": -1.9450336694717407, + "logps/chosen_both": -1.9250189065933228, + "logps/chosen_prompt": -0.8316828012466431, + "logps/rejected": -6.989903450012207, + "logps/rejected_both": -6.88253927230835, + "logps/rejected_prompt": -0.9859585762023926, + "loss": 2.088, + "nll_loss": 1.924430251121521, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7780135273933411, + "rewards/margins": 2.0179476737976074, + "rewards/rejected": -2.7959611415863037, + "step": 1640 + }, + { + "epoch": 1.32, + "grad_norm": 0.22495066893496063, + "learning_rate": 2.9684532864643122e-05, + "log_odds_chosen": 5.308048725128174, + "log_odds_ratio": -0.00845087319612503, + "logits/chosen": -2.9742932319641113, + "logits/chosen_prompt": -2.7849392890930176, + "logits/rejected": -2.2982254028320312, + "logits/rejected_prompt": -2.7615458965301514, + "logps/chosen": -1.9874608516693115, + "logps/chosen_both": -1.9658311605453491, + "logps/chosen_prompt": -0.7408405542373657, + "logps/rejected": -7.14414119720459, + "logps/rejected_both": -7.038477897644043, + "logps/rejected_prompt": -1.045243501663208, + "loss": 2.0386, + "nll_loss": 1.9651544094085693, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7949844002723694, + "rewards/margins": 2.0626721382141113, + "rewards/rejected": -2.857656478881836, + "step": 1650 + }, + { + "epoch": 1.328, + "grad_norm": 0.2286734318135687, + "learning_rate": 2.9478641122089562e-05, + "log_odds_chosen": 4.840089797973633, + "log_odds_ratio": -0.07564349472522736, + "logits/chosen": -3.008890151977539, + "logits/chosen_prompt": -2.8013384342193604, + "logits/rejected": -2.394143581390381, + "logits/rejected_prompt": -2.77929425239563, + "logps/chosen": -1.9756405353546143, + "logps/chosen_both": -1.9581083059310913, + "logps/chosen_prompt": -0.7473115921020508, + "logps/rejected": -6.674158573150635, + "logps/rejected_both": -6.598573207855225, + "logps/rejected_prompt": -0.996438205242157, + "loss": 2.0632, + "nll_loss": 1.9576594829559326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.790256142616272, + "rewards/margins": 1.8794071674346924, + "rewards/rejected": -2.669663429260254, + "step": 1660 + }, + { + "epoch": 1.336, + "grad_norm": 1.6039791025981895, + "learning_rate": 2.9272435053081922e-05, + "log_odds_chosen": 4.911754131317139, + "log_odds_ratio": -0.08321253210306168, + "logits/chosen": -2.912379741668701, + "logits/chosen_prompt": -2.7961792945861816, + "logits/rejected": -2.264275312423706, + "logits/rejected_prompt": -2.7643306255340576, + "logps/chosen": -1.951281189918518, + "logps/chosen_both": -1.9351087808609009, + "logps/chosen_prompt": -0.7827764749526978, + "logps/rejected": -6.725755214691162, + "logps/rejected_both": -6.646947383880615, + "logps/rejected_prompt": -1.0157705545425415, + "loss": 2.1063, + "nll_loss": 1.9346641302108765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7805125713348389, + "rewards/margins": 1.9097894430160522, + "rewards/rejected": -2.6903018951416016, + "step": 1670 + }, + { + "epoch": 1.3439999999999999, + "grad_norm": 0.3698076131375805, + "learning_rate": 2.9065929129872094e-05, + "log_odds_chosen": 4.74294376373291, + "log_odds_ratio": -0.08516435325145721, + "logits/chosen": -2.9431169033050537, + "logits/chosen_prompt": -2.7804017066955566, + "logits/rejected": -2.2715518474578857, + "logits/rejected_prompt": -2.7543439865112305, + "logps/chosen": -2.047203779220581, + "logps/chosen_both": -2.028724193572998, + "logps/chosen_prompt": -0.8540178537368774, + "logps/rejected": -6.660338401794434, + "logps/rejected_both": -6.572705268859863, + "logps/rejected_prompt": -1.0315988063812256, + "loss": 2.1122, + "nll_loss": 2.0279080867767334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.818881630897522, + "rewards/margins": 1.8452539443969727, + "rewards/rejected": -2.664135456085205, + "step": 1680 + }, + { + "epoch": 1.3519999999999999, + "grad_norm": 14.756635490233291, + "learning_rate": 2.8859137845756784e-05, + "log_odds_chosen": 5.338567733764648, + "log_odds_ratio": -0.07245531678199768, + "logits/chosen": -3.0019686222076416, + "logits/chosen_prompt": -2.7564592361450195, + "logits/rejected": -2.10023832321167, + "logits/rejected_prompt": -2.75854754447937, + "logps/chosen": -1.801944375038147, + "logps/chosen_both": -1.7874317169189453, + "logps/chosen_prompt": -0.7828146815299988, + "logps/rejected": -6.980807304382324, + "logps/rejected_both": -6.885933876037598, + "logps/rejected_prompt": -1.0353758335113525, + "loss": 1.9922, + "nll_loss": 1.7853384017944336, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.720777690410614, + "rewards/margins": 2.071545124053955, + "rewards/rejected": -2.792322874069214, + "step": 1690 + }, + { + "epoch": 1.3599999999999999, + "grad_norm": 1.0205003901521117, + "learning_rate": 2.8652075714060295e-05, + "log_odds_chosen": 4.316029071807861, + "log_odds_ratio": -0.18554985523223877, + "logits/chosen": -2.9789249897003174, + "logits/chosen_prompt": -2.7761483192443848, + "logits/rejected": -2.230045795440674, + "logits/rejected_prompt": -2.7322373390197754, + "logps/chosen": -1.9758758544921875, + "logps/chosen_both": -1.958141565322876, + "logps/chosen_prompt": -0.839580237865448, + "logps/rejected": -6.175426006317139, + "logps/rejected_both": -6.096805572509766, + "logps/rejected_prompt": -1.002239465713501, + "loss": 2.0489, + "nll_loss": 1.957658052444458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7903503179550171, + "rewards/margins": 1.6798200607299805, + "rewards/rejected": -2.470170497894287, + "step": 1700 + }, + { + "epoch": 1.3679999999999999, + "grad_norm": 0.5093034024599485, + "learning_rate": 2.844475726711595e-05, + "log_odds_chosen": 5.062729835510254, + "log_odds_ratio": -0.05383139103651047, + "logits/chosen": -2.9323840141296387, + "logits/chosen_prompt": -2.757789134979248, + "logits/rejected": -2.114853620529175, + "logits/rejected_prompt": -2.740206003189087, + "logps/chosen": -1.9980299472808838, + "logps/chosen_both": -1.9810377359390259, + "logps/chosen_prompt": -0.8025790452957153, + "logps/rejected": -6.92165994644165, + "logps/rejected_both": -6.837998867034912, + "logps/rejected_prompt": -1.0708694458007812, + "loss": 2.02, + "nll_loss": 1.980063796043396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7992119789123535, + "rewards/margins": 1.9694522619247437, + "rewards/rejected": -2.7686638832092285, + "step": 1710 + }, + { + "epoch": 1.376, + "grad_norm": 0.1922091365417996, + "learning_rate": 2.8237197055246172e-05, + "log_odds_chosen": 5.407708644866943, + "log_odds_ratio": -0.07208568602800369, + "logits/chosen": -2.930446147918701, + "logits/chosen_prompt": -2.7493677139282227, + "logits/rejected": -1.8252556324005127, + "logits/rejected_prompt": -2.716831684112549, + "logps/chosen": -1.99956476688385, + "logps/chosen_both": -1.9826438426971436, + "logps/chosen_prompt": -0.8026520609855652, + "logps/rejected": -7.266847133636475, + "logps/rejected_both": -7.16598653793335, + "logps/rejected_prompt": -0.9821138381958008, + "loss": 2.0055, + "nll_loss": 1.9819648265838623, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.79982590675354, + "rewards/margins": 2.1069130897521973, + "rewards/rejected": -2.9067392349243164, + "step": 1720 + }, + { + "epoch": 1.384, + "grad_norm": 0.19884693939871143, + "learning_rate": 2.8029409645741267e-05, + "log_odds_chosen": 5.655479907989502, + "log_odds_ratio": -0.07094166427850723, + "logits/chosen": -2.9133386611938477, + "logits/chosen_prompt": -2.7181575298309326, + "logits/rejected": -1.8967100381851196, + "logits/rejected_prompt": -2.7026288509368896, + "logps/chosen": -2.0701959133148193, + "logps/chosen_both": -2.0524401664733887, + "logps/chosen_prompt": -0.8565284609794617, + "logps/rejected": -7.606234550476074, + "logps/rejected_both": -7.5077009201049805, + "logps/rejected_prompt": -1.0423924922943115, + "loss": 2.1485, + "nll_loss": 2.0521743297576904, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8280783891677856, + "rewards/margins": 2.2144155502319336, + "rewards/rejected": -3.042494058609009, + "step": 1730 + }, + { + "epoch": 1.392, + "grad_norm": 0.22986043369921255, + "learning_rate": 2.782140962183704e-05, + "log_odds_chosen": 6.107487678527832, + "log_odds_ratio": -0.0026633774396032095, + "logits/chosen": -2.98026442527771, + "logits/chosen_prompt": -2.780827522277832, + "logits/rejected": -1.9798576831817627, + "logits/rejected_prompt": -2.7703700065612793, + "logps/chosen": -1.9474899768829346, + "logps/chosen_both": -1.9275726079940796, + "logps/chosen_prompt": -0.7816404700279236, + "logps/rejected": -7.895272731781006, + "logps/rejected_both": -7.769126892089844, + "logps/rejected_prompt": -0.9758648872375488, + "loss": 1.9516, + "nll_loss": 1.925616979598999, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.778995931148529, + "rewards/margins": 2.3791134357452393, + "rewards/rejected": -3.158109188079834, + "step": 1740 + }, + { + "epoch": 1.4, + "grad_norm": 1.3967778423182213, + "learning_rate": 2.761321158169134e-05, + "log_odds_chosen": 5.588977336883545, + "log_odds_ratio": -0.07164627313613892, + "logits/chosen": -2.942800998687744, + "logits/chosen_prompt": -2.765923023223877, + "logits/rejected": -2.1541590690612793, + "logits/rejected_prompt": -2.7391622066497803, + "logps/chosen": -1.8856910467147827, + "logps/chosen_both": -1.8705289363861084, + "logps/chosen_prompt": -0.7254279851913452, + "logps/rejected": -7.315940856933594, + "logps/rejected_both": -7.2315239906311035, + "logps/rejected_prompt": -0.9249277114868164, + "loss": 2.037, + "nll_loss": 1.8701813220977783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7542763948440552, + "rewards/margins": 2.172100305557251, + "rewards/rejected": -2.9263763427734375, + "step": 1750 + }, + { + "epoch": 1.408, + "grad_norm": 0.19174060756423858, + "learning_rate": 2.7404830137359444e-05, + "log_odds_chosen": 5.684497356414795, + "log_odds_ratio": -0.03275999799370766, + "logits/chosen": -2.958325147628784, + "logits/chosen_prompt": -2.728274345397949, + "logits/rejected": -2.046318531036377, + "logits/rejected_prompt": -2.6898844242095947, + "logps/chosen": -2.253990411758423, + "logps/chosen_both": -2.2328062057495117, + "logps/chosen_prompt": -0.8659110069274902, + "logps/rejected": -7.784188747406006, + "logps/rejected_both": -7.674757480621338, + "logps/rejected_prompt": -1.1274776458740234, + "loss": 2.1275, + "nll_loss": 2.2321293354034424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.901596188545227, + "rewards/margins": 2.212078809738159, + "rewards/rejected": -3.1136748790740967, + "step": 1760 + }, + { + "epoch": 1.416, + "grad_norm": 0.1908777514352998, + "learning_rate": 2.7196279913768584e-05, + "log_odds_chosen": 5.167336940765381, + "log_odds_ratio": -0.07482357323169708, + "logits/chosen": -2.9330124855041504, + "logits/chosen_prompt": -2.7444446086883545, + "logits/rejected": -2.105210065841675, + "logits/rejected_prompt": -2.721642255783081, + "logps/chosen": -2.0776610374450684, + "logps/chosen_both": -2.0597071647644043, + "logps/chosen_prompt": -0.8555063009262085, + "logps/rejected": -7.124932765960693, + "logps/rejected_both": -7.030417442321777, + "logps/rejected_prompt": -1.0413535833358765, + "loss": 1.9978, + "nll_loss": 2.058987617492676, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8310644030570984, + "rewards/margins": 2.018908739089966, + "rewards/rejected": -2.84997296333313, + "step": 1770 + }, + { + "epoch": 1.424, + "grad_norm": 0.17855815500184188, + "learning_rate": 2.6987575547691497e-05, + "log_odds_chosen": 4.549686908721924, + "log_odds_ratio": -0.20390887558460236, + "logits/chosen": -2.9623754024505615, + "logits/chosen_prompt": -2.74225115776062, + "logits/rejected": -2.1663219928741455, + "logits/rejected_prompt": -2.7345423698425293, + "logps/chosen": -1.9926074743270874, + "logps/chosen_both": -1.9742103815078735, + "logps/chosen_prompt": -0.7784561514854431, + "logps/rejected": -6.431072235107422, + "logps/rejected_both": -6.3410797119140625, + "logps/rejected_prompt": -0.9243408441543579, + "loss": 2.0508, + "nll_loss": 1.973905324935913, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7970430850982666, + "rewards/margins": 1.775386095046997, + "rewards/rejected": -2.5724291801452637, + "step": 1780 + }, + { + "epoch": 1.432, + "grad_norm": 0.41995614329947717, + "learning_rate": 2.6778731686719178e-05, + "log_odds_chosen": 6.473885536193848, + "log_odds_ratio": -0.0018433562945574522, + "logits/chosen": -2.952514410018921, + "logits/chosen_prompt": -2.7027528285980225, + "logits/rejected": -1.8595733642578125, + "logits/rejected_prompt": -2.6798789501190186, + "logps/chosen": -1.9392732381820679, + "logps/chosen_both": -1.9248685836791992, + "logps/chosen_prompt": -0.931847095489502, + "logps/rejected": -8.25381088256836, + "logps/rejected_both": -8.140459060668945, + "logps/rejected_prompt": -1.0698789358139038, + "loss": 1.9874, + "nll_loss": 1.923288106918335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7757093906402588, + "rewards/margins": 2.5258147716522217, + "rewards/rejected": -3.3015239238739014, + "step": 1790 + }, + { + "epoch": 1.44, + "grad_norm": 0.7745820877648287, + "learning_rate": 2.656976298823284e-05, + "log_odds_chosen": 3.4408886432647705, + "log_odds_ratio": -0.27857550978660583, + "logits/chosen": -2.878281831741333, + "logits/chosen_prompt": -2.734473705291748, + "logits/rejected": -2.3365187644958496, + "logits/rejected_prompt": -2.7160048484802246, + "logps/chosen": -2.0569214820861816, + "logps/chosen_both": -2.0396482944488525, + "logps/chosen_prompt": -0.6810625791549683, + "logps/rejected": -5.414828300476074, + "logps/rejected_both": -5.35118293762207, + "logps/rejected_prompt": -0.8160842061042786, + "loss": 2.0419, + "nll_loss": 2.038651943206787, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.82276850938797, + "rewards/margins": 1.3431627750396729, + "rewards/rejected": -2.165931224822998, + "step": 1800 + }, + { + "epoch": 1.448, + "grad_norm": 0.19675956388988333, + "learning_rate": 2.636068411837523e-05, + "log_odds_chosen": 3.9148197174072266, + "log_odds_ratio": -0.23557178676128387, + "logits/chosen": -3.045487642288208, + "logits/chosen_prompt": -2.759061574935913, + "logits/rejected": -2.4077014923095703, + "logits/rejected_prompt": -2.7576231956481934, + "logps/chosen": -1.8861596584320068, + "logps/chosen_both": -1.8683302402496338, + "logps/chosen_prompt": -0.9071288108825684, + "logps/rejected": -5.683122158050537, + "logps/rejected_both": -5.601851940155029, + "logps/rejected_prompt": -1.0434454679489136, + "loss": 2.0263, + "nll_loss": 1.8671506643295288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7544639110565186, + "rewards/margins": 1.5187851190567017, + "rewards/rejected": -2.2732491493225098, + "step": 1810 + }, + { + "epoch": 1.456, + "grad_norm": 6.189918533614061, + "learning_rate": 2.615150975102131e-05, + "log_odds_chosen": 6.713578701019287, + "log_odds_ratio": -0.0015258995117619634, + "logits/chosen": -3.0059263706207275, + "logits/chosen_prompt": -2.7889323234558105, + "logits/rejected": -1.839082956314087, + "logits/rejected_prompt": -2.7647995948791504, + "logps/chosen": -2.004807233810425, + "logps/chosen_both": -1.98598313331604, + "logps/chosen_prompt": -0.7677423357963562, + "logps/rejected": -8.555073738098145, + "logps/rejected_both": -8.430871963500977, + "logps/rejected_prompt": -1.011725664138794, + "loss": 2.0302, + "nll_loss": 1.9841728210449219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8019229769706726, + "rewards/margins": 2.6201066970825195, + "rewards/rejected": -3.422029495239258, + "step": 1820 + }, + { + "epoch": 1.464, + "grad_norm": 0.21797873657619965, + "learning_rate": 2.594225456674837e-05, + "log_odds_chosen": 5.328610420227051, + "log_odds_ratio": -0.0812341570854187, + "logits/chosen": -2.979506731033325, + "logits/chosen_prompt": -2.792584180831909, + "logits/rejected": -2.08947491645813, + "logits/rejected_prompt": -2.781327962875366, + "logps/chosen": -1.9279931783676147, + "logps/chosen_both": -1.9127006530761719, + "logps/chosen_prompt": -0.7814801335334778, + "logps/rejected": -7.120486259460449, + "logps/rejected_both": -7.026519775390625, + "logps/rejected_prompt": -0.9352282285690308, + "loss": 2.0587, + "nll_loss": 1.9114625453948975, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7711972594261169, + "rewards/margins": 2.0769975185394287, + "rewards/rejected": -2.8481948375701904, + "step": 1830 + }, + { + "epoch": 1.472, + "grad_norm": 0.1871801141599041, + "learning_rate": 2.5732933251805713e-05, + "log_odds_chosen": 5.583043575286865, + "log_odds_ratio": -0.13880962133407593, + "logits/chosen": -2.9580206871032715, + "logits/chosen_prompt": -2.7731950283050537, + "logits/rejected": -2.012089490890503, + "logits/rejected_prompt": -2.75722336769104, + "logps/chosen": -1.855268120765686, + "logps/chosen_both": -1.8423293828964233, + "logps/chosen_prompt": -0.8601115942001343, + "logps/rejected": -7.305128574371338, + "logps/rejected_both": -7.2211809158325195, + "logps/rejected_prompt": -1.0132110118865967, + "loss": 1.9359, + "nll_loss": 1.8416475057601929, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7421072125434875, + "rewards/margins": 2.1799445152282715, + "rewards/rejected": -2.922051191329956, + "step": 1840 + }, + { + "epoch": 1.48, + "grad_norm": 0.22592416955066014, + "learning_rate": 2.5523560497083926e-05, + "log_odds_chosen": 5.949292182922363, + "log_odds_ratio": -0.07134632766246796, + "logits/chosen": -2.956613779067993, + "logits/chosen_prompt": -2.722937822341919, + "logits/rejected": -1.9237785339355469, + "logits/rejected_prompt": -2.704369068145752, + "logps/chosen": -1.9562047719955444, + "logps/chosen_both": -1.9380409717559814, + "logps/chosen_prompt": -0.7973084449768066, + "logps/rejected": -7.771543979644775, + "logps/rejected_both": -7.661837577819824, + "logps/rejected_prompt": -0.9722532033920288, + "loss": 1.9892, + "nll_loss": 1.9374074935913086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7824817895889282, + "rewards/margins": 2.3261356353759766, + "rewards/rejected": -3.1086175441741943, + "step": 1850 + }, + { + "epoch": 1.488, + "grad_norm": 0.19952883102568983, + "learning_rate": 2.531415099708382e-05, + "log_odds_chosen": 5.468968868255615, + "log_odds_ratio": -0.13928017020225525, + "logits/chosen": -2.901470184326172, + "logits/chosen_prompt": -2.7253496646881104, + "logits/rejected": -1.9635553359985352, + "logits/rejected_prompt": -2.721364736557007, + "logps/chosen": -2.024766683578491, + "logps/chosen_both": -2.0091967582702637, + "logps/chosen_prompt": -0.8794494867324829, + "logps/rejected": -7.388121604919434, + "logps/rejected_both": -7.304760932922363, + "logps/rejected_prompt": -1.0697910785675049, + "loss": 2.1409, + "nll_loss": 2.0086288452148438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8099067807197571, + "rewards/margins": 2.1453423500061035, + "rewards/rejected": -2.955249071121216, + "step": 1860 + }, + { + "epoch": 1.496, + "grad_norm": 0.20218369179299622, + "learning_rate": 2.51047194488851e-05, + "log_odds_chosen": 5.442208766937256, + "log_odds_ratio": -0.14097937941551208, + "logits/chosen": -2.9763107299804688, + "logits/chosen_prompt": -2.7768394947052, + "logits/rejected": -2.108531951904297, + "logits/rejected_prompt": -2.7451493740081787, + "logps/chosen": -1.79744553565979, + "logps/chosen_both": -1.7835102081298828, + "logps/chosen_prompt": -0.7872709631919861, + "logps/rejected": -7.031289577484131, + "logps/rejected_both": -6.934246063232422, + "logps/rejected_prompt": -0.9216675758361816, + "loss": 2.1195, + "nll_loss": 1.7827249765396118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.718978226184845, + "rewards/margins": 2.0935378074645996, + "rewards/rejected": -2.8125159740448, + "step": 1870 + }, + { + "epoch": 1.504, + "grad_norm": 0.9652790170177806, + "learning_rate": 2.4895280551114907e-05, + "log_odds_chosen": 5.730778694152832, + "log_odds_ratio": -0.07072736322879791, + "logits/chosen": -2.950146198272705, + "logits/chosen_prompt": -2.7803640365600586, + "logits/rejected": -1.9521598815917969, + "logits/rejected_prompt": -2.764260768890381, + "logps/chosen": -2.0558481216430664, + "logps/chosen_both": -2.0352180004119873, + "logps/chosen_prompt": -0.8978110551834106, + "logps/rejected": -7.663902282714844, + "logps/rejected_both": -7.545947074890137, + "logps/rejected_prompt": -1.037939429283142, + "loss": 2.049, + "nll_loss": 2.0345263481140137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8223392367362976, + "rewards/margins": 2.2432212829589844, + "rewards/rejected": -3.0655605792999268, + "step": 1880 + }, + { + "epoch": 1.512, + "grad_norm": 0.1890875725333666, + "learning_rate": 2.4685849002916183e-05, + "log_odds_chosen": 6.257909297943115, + "log_odds_ratio": -0.00222708098590374, + "logits/chosen": -2.9233384132385254, + "logits/chosen_prompt": -2.7774055004119873, + "logits/rejected": -1.9378130435943604, + "logits/rejected_prompt": -2.751840114593506, + "logps/chosen": -1.9843826293945312, + "logps/chosen_both": -1.9667317867279053, + "logps/chosen_prompt": -0.6825822591781616, + "logps/rejected": -8.092279434204102, + "logps/rejected_both": -7.992387294769287, + "logps/rejected_prompt": -0.9652584195137024, + "loss": 1.9485, + "nll_loss": 1.965959906578064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7937530279159546, + "rewards/margins": 2.4431586265563965, + "rewards/rejected": -3.2369117736816406, + "step": 1890 + }, + { + "epoch": 1.52, + "grad_norm": 0.2373809038859539, + "learning_rate": 2.447643950291608e-05, + "log_odds_chosen": 6.489705562591553, + "log_odds_ratio": -0.0016050601843744516, + "logits/chosen": -2.9970052242279053, + "logits/chosen_prompt": -2.755345106124878, + "logits/rejected": -1.9105993509292603, + "logits/rejected_prompt": -2.7229576110839844, + "logps/chosen": -1.8970317840576172, + "logps/chosen_both": -1.8811533451080322, + "logps/chosen_prompt": -0.7929924726486206, + "logps/rejected": -8.21942138671875, + "logps/rejected_both": -8.108181953430176, + "logps/rejected_prompt": -0.9921186566352844, + "loss": 1.954, + "nll_loss": 1.8801666498184204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7588127851486206, + "rewards/margins": 2.5289556980133057, + "rewards/rejected": -3.287768602371216, + "step": 1900 + }, + { + "epoch": 1.528, + "grad_norm": 0.1741002343821723, + "learning_rate": 2.4267066748194296e-05, + "log_odds_chosen": 5.774570941925049, + "log_odds_ratio": -0.07103729248046875, + "logits/chosen": -2.886838436126709, + "logits/chosen_prompt": -2.7209315299987793, + "logits/rejected": -2.010939836502075, + "logits/rejected_prompt": -2.7094690799713135, + "logps/chosen": -2.068047523498535, + "logps/chosen_both": -2.051417350769043, + "logps/chosen_prompt": -0.7632136940956116, + "logps/rejected": -7.714223384857178, + "logps/rejected_both": -7.628198146820068, + "logps/rejected_prompt": -0.9632788896560669, + "loss": 2.0981, + "nll_loss": 2.051051139831543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8272191286087036, + "rewards/margins": 2.258470296859741, + "rewards/rejected": -3.085689067840576, + "step": 1910 + }, + { + "epoch": 1.536, + "grad_norm": 0.18057749289339498, + "learning_rate": 2.4057745433251635e-05, + "log_odds_chosen": 6.403738498687744, + "log_odds_ratio": -0.0018427784088999033, + "logits/chosen": -2.9575610160827637, + "logits/chosen_prompt": -2.7303547859191895, + "logits/rejected": -1.862630844116211, + "logits/rejected_prompt": -2.71962833404541, + "logps/chosen": -2.0046885013580322, + "logps/chosen_both": -1.9884449243545532, + "logps/chosen_prompt": -0.763080894947052, + "logps/rejected": -8.254236221313477, + "logps/rejected_both": -8.159029960632324, + "logps/rejected_prompt": -1.045041799545288, + "loss": 2.0516, + "nll_loss": 1.9879404306411743, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8018752932548523, + "rewards/margins": 2.49981951713562, + "rewards/rejected": -3.301694869995117, + "step": 1920 + }, + { + "epoch": 1.544, + "grad_norm": 0.20142735097076295, + "learning_rate": 2.384849024897869e-05, + "log_odds_chosen": 5.733250617980957, + "log_odds_ratio": -0.004482199437916279, + "logits/chosen": -2.9741549491882324, + "logits/chosen_prompt": -2.7055163383483887, + "logits/rejected": -2.124002456665039, + "logits/rejected_prompt": -2.688239812850952, + "logps/chosen": -1.9430478811264038, + "logps/chosen_both": -1.926995038986206, + "logps/chosen_prompt": -0.7834355235099792, + "logps/rejected": -7.518483638763428, + "logps/rejected_both": -7.4232635498046875, + "logps/rejected_prompt": -1.0878071784973145, + "loss": 2.1323, + "nll_loss": 1.9260002374649048, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7772191166877747, + "rewards/margins": 2.2301743030548096, + "rewards/rejected": -3.0073933601379395, + "step": 1930 + }, + { + "epoch": 1.552, + "grad_norm": 40.90864961224279, + "learning_rate": 2.3639315881624777e-05, + "log_odds_chosen": 5.306234836578369, + "log_odds_ratio": -0.00918310321867466, + "logits/chosen": -2.9237542152404785, + "logits/chosen_prompt": -2.7105278968811035, + "logits/rejected": -2.2239270210266113, + "logits/rejected_prompt": -2.686476469039917, + "logps/chosen": -1.9409538507461548, + "logps/chosen_both": -1.9275703430175781, + "logps/chosen_prompt": -0.8563373684883118, + "logps/rejected": -7.0894670486450195, + "logps/rejected_both": -7.007052421569824, + "logps/rejected_prompt": -0.9907125234603882, + "loss": 1.9112, + "nll_loss": 1.926429033279419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7763815522193909, + "rewards/margins": 2.059405565261841, + "rewards/rejected": -2.835787296295166, + "step": 1940 + }, + { + "epoch": 1.56, + "grad_norm": 0.21885482692879285, + "learning_rate": 2.3430237011767167e-05, + "log_odds_chosen": 5.6596198081970215, + "log_odds_ratio": -0.023314189165830612, + "logits/chosen": -2.9358747005462646, + "logits/chosen_prompt": -2.727999687194824, + "logits/rejected": -2.0308213233947754, + "logits/rejected_prompt": -2.686753749847412, + "logps/chosen": -1.9377899169921875, + "logps/chosen_both": -1.922545075416565, + "logps/chosen_prompt": -0.8713130950927734, + "logps/rejected": -7.442534446716309, + "logps/rejected_both": -7.339343070983887, + "logps/rejected_prompt": -1.057796835899353, + "loss": 2.0015, + "nll_loss": 1.9221293926239014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7751160264015198, + "rewards/margins": 2.2018978595733643, + "rewards/rejected": -2.9770140647888184, + "step": 1950 + }, + { + "epoch": 1.568, + "grad_norm": 0.5403488938261225, + "learning_rate": 2.3221268313280838e-05, + "log_odds_chosen": 5.778319358825684, + "log_odds_ratio": -0.07066681236028671, + "logits/chosen": -2.954177141189575, + "logits/chosen_prompt": -2.678536891937256, + "logits/rejected": -1.9524621963500977, + "logits/rejected_prompt": -2.6848878860473633, + "logps/chosen": -1.9211227893829346, + "logps/chosen_both": -1.902917504310608, + "logps/chosen_prompt": -0.8868004083633423, + "logps/rejected": -7.527622222900391, + "logps/rejected_both": -7.4302239418029785, + "logps/rejected_prompt": -1.1353758573532104, + "loss": 2.0128, + "nll_loss": 1.9021003246307373, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.768449068069458, + "rewards/margins": 2.2425997257232666, + "rewards/rejected": -3.0110487937927246, + "step": 1960 + }, + { + "epoch": 1.576, + "grad_norm": 6.334469044302015, + "learning_rate": 2.301242445230851e-05, + "log_odds_chosen": 4.549070358276367, + "log_odds_ratio": -0.10954795777797699, + "logits/chosen": -2.9302010536193848, + "logits/chosen_prompt": -2.6880440711975098, + "logits/rejected": -2.190250873565674, + "logits/rejected_prompt": -2.6803054809570312, + "logps/chosen": -2.2468152046203613, + "logps/chosen_both": -2.227410316467285, + "logps/chosen_prompt": -0.7418851852416992, + "logps/rejected": -6.677786827087402, + "logps/rejected_both": -6.601284980773926, + "logps/rejected_prompt": -0.9388518333435059, + "loss": 2.1059, + "nll_loss": 2.226693630218506, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.8987261056900024, + "rewards/margins": 1.7723888158798218, + "rewards/rejected": -2.671114683151245, + "step": 1970 + }, + { + "epoch": 1.584, + "grad_norm": 0.21099709481066398, + "learning_rate": 2.280372008623142e-05, + "log_odds_chosen": 4.277853488922119, + "log_odds_ratio": -0.18287745118141174, + "logits/chosen": -2.989633321762085, + "logits/chosen_prompt": -2.6874613761901855, + "logits/rejected": -2.2610902786254883, + "logits/rejected_prompt": -2.664952516555786, + "logps/chosen": -1.912766695022583, + "logps/chosen_both": -1.8961530923843384, + "logps/chosen_prompt": -0.7984111905097961, + "logps/rejected": -6.0515875816345215, + "logps/rejected_both": -5.97214412689209, + "logps/rejected_prompt": -1.0341233015060425, + "loss": 2.0542, + "nll_loss": 1.894964575767517, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.7651066780090332, + "rewards/margins": 1.6555284261703491, + "rewards/rejected": -2.4206349849700928, + "step": 1980 + }, + { + "epoch": 1.592, + "grad_norm": 0.23272826174313574, + "learning_rate": 2.2595169862640568e-05, + "log_odds_chosen": 6.768258094787598, + "log_odds_ratio": -0.001374961924739182, + "logits/chosen": -2.973562240600586, + "logits/chosen_prompt": -2.686769962310791, + "logits/rejected": -1.666338562965393, + "logits/rejected_prompt": -2.683814764022827, + "logps/chosen": -1.9322917461395264, + "logps/chosen_both": -1.9166603088378906, + "logps/chosen_prompt": -0.8024829626083374, + "logps/rejected": -8.528871536254883, + "logps/rejected_both": -8.413396835327148, + "logps/rejected_prompt": -1.0380266904830933, + "loss": 2.0648, + "nll_loss": 1.9158977270126343, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7729167342185974, + "rewards/margins": 2.63863205909729, + "rewards/rejected": -3.4115490913391113, + "step": 1990 + }, + { + "epoch": 1.6, + "grad_norm": 0.20086785213912312, + "learning_rate": 2.238678841830867e-05, + "log_odds_chosen": 6.327115058898926, + "log_odds_ratio": -0.004725167062133551, + "logits/chosen": -2.966679573059082, + "logits/chosen_prompt": -2.6999356746673584, + "logits/rejected": -1.8506364822387695, + "logits/rejected_prompt": -2.6866955757141113, + "logps/chosen": -1.8783817291259766, + "logps/chosen_both": -1.8609817028045654, + "logps/chosen_prompt": -0.7905829548835754, + "logps/rejected": -8.029566764831543, + "logps/rejected_both": -7.908673286437988, + "logps/rejected_prompt": -1.0723146200180054, + "loss": 1.9398, + "nll_loss": 1.860142469406128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7513527274131775, + "rewards/margins": 2.4604744911193848, + "rewards/rejected": -3.211826801300049, + "step": 2000 + }, + { + "epoch": 1.608, + "grad_norm": 3.6061661350197456, + "learning_rate": 2.217859037816296e-05, + "log_odds_chosen": 4.772618770599365, + "log_odds_ratio": -0.14787371456623077, + "logits/chosen": -2.9939560890197754, + "logits/chosen_prompt": -2.712306499481201, + "logits/rejected": -2.120854139328003, + "logits/rejected_prompt": -2.699389934539795, + "logps/chosen": -2.0005993843078613, + "logps/chosen_both": -1.9795938730239868, + "logps/chosen_prompt": -0.7556421160697937, + "logps/rejected": -6.654515743255615, + "logps/rejected_both": -6.551595211029053, + "logps/rejected_prompt": -0.9516459703445435, + "loss": 1.9737, + "nll_loss": 1.9790796041488647, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8002398610115051, + "rewards/margins": 1.8615667819976807, + "rewards/rejected": -2.661806344985962, + "step": 2010 + }, + { + "epoch": 1.616, + "grad_norm": 0.3283349921571691, + "learning_rate": 2.1970590354258745e-05, + "log_odds_chosen": 6.253961086273193, + "log_odds_ratio": -0.07067908346652985, + "logits/chosen": -2.9472057819366455, + "logits/chosen_prompt": -2.6802945137023926, + "logits/rejected": -1.744763731956482, + "logits/rejected_prompt": -2.6687159538269043, + "logps/chosen": -1.955038070678711, + "logps/chosen_both": -1.9407745599746704, + "logps/chosen_prompt": -1.00258469581604, + "logps/rejected": -8.05742073059082, + "logps/rejected_both": -7.938286781311035, + "logps/rejected_prompt": -1.1584670543670654, + "loss": 2.0349, + "nll_loss": 1.940118432044983, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7820152044296265, + "rewards/margins": 2.440953254699707, + "rewards/rejected": -3.222968339920044, + "step": 2020 + }, + { + "epoch": 1.624, + "grad_norm": 1.8747875530540283, + "learning_rate": 2.176280294475383e-05, + "log_odds_chosen": 6.281460762023926, + "log_odds_ratio": -0.03783145174384117, + "logits/chosen": -3.011366844177246, + "logits/chosen_prompt": -2.6553094387054443, + "logits/rejected": -1.8144845962524414, + "logits/rejected_prompt": -2.649622678756714, + "logps/chosen": -1.9069626331329346, + "logps/chosen_both": -1.8888943195343018, + "logps/chosen_prompt": -0.7433997988700867, + "logps/rejected": -8.008193969726562, + "logps/rejected_both": -7.898676872253418, + "logps/rejected_prompt": -0.9908720254898071, + "loss": 1.9971, + "nll_loss": 1.8877136707305908, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.762785017490387, + "rewards/margins": 2.440492630004883, + "rewards/rejected": -3.203277587890625, + "step": 2030 + }, + { + "epoch": 1.6320000000000001, + "grad_norm": 2.9635896306517915, + "learning_rate": 2.155524273288405e-05, + "log_odds_chosen": 4.7696404457092285, + "log_odds_ratio": -0.2104126662015915, + "logits/chosen": -2.9527573585510254, + "logits/chosen_prompt": -2.6921048164367676, + "logits/rejected": -2.0738635063171387, + "logits/rejected_prompt": -2.67110538482666, + "logps/chosen": -1.996506690979004, + "logps/chosen_both": -1.9748737812042236, + "logps/chosen_prompt": -0.7325566411018372, + "logps/rejected": -6.6651411056518555, + "logps/rejected_both": -6.573362827301025, + "logps/rejected_prompt": -0.9392368197441101, + "loss": 1.9348, + "nll_loss": 1.9730939865112305, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7986027002334595, + "rewards/margins": 1.8674538135528564, + "rewards/rejected": -2.6660561561584473, + "step": 2040 + }, + { + "epoch": 1.6400000000000001, + "grad_norm": 0.19964912068774665, + "learning_rate": 2.1347924285939714e-05, + "log_odds_chosen": 6.8775224685668945, + "log_odds_ratio": -0.008257986977696419, + "logits/chosen": -2.917914867401123, + "logits/chosen_prompt": -2.6802151203155518, + "logits/rejected": -1.6495475769042969, + "logits/rejected_prompt": -2.661830186843872, + "logps/chosen": -2.0301578044891357, + "logps/chosen_both": -2.007798910140991, + "logps/chosen_prompt": -0.8403179049491882, + "logps/rejected": -8.763223648071289, + "logps/rejected_both": -8.611532211303711, + "logps/rejected_prompt": -1.09980046749115, + "loss": 2.1549, + "nll_loss": 2.006844997406006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8120630979537964, + "rewards/margins": 2.6932263374328613, + "rewards/rejected": -3.5052895545959473, + "step": 2050 + }, + { + "epoch": 1.6480000000000001, + "grad_norm": 0.17460562158440138, + "learning_rate": 2.114086215424322e-05, + "log_odds_chosen": 6.110722064971924, + "log_odds_ratio": -0.023483365774154663, + "logits/chosen": -2.909790515899658, + "logits/chosen_prompt": -2.6986935138702393, + "logits/rejected": -1.758716344833374, + "logits/rejected_prompt": -2.6658692359924316, + "logps/chosen": -2.3056933879852295, + "logps/chosen_both": -2.285371780395508, + "logps/chosen_prompt": -0.833857536315918, + "logps/rejected": -8.272215843200684, + "logps/rejected_both": -8.151971817016602, + "logps/rejected_prompt": -1.0774855613708496, + "loss": 2.0557, + "nll_loss": 2.284456968307495, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9222772717475891, + "rewards/margins": 2.3866093158721924, + "rewards/rejected": -3.308886766433716, + "step": 2060 + }, + { + "epoch": 1.6560000000000001, + "grad_norm": 0.19823340696579927, + "learning_rate": 2.0934070870127912e-05, + "log_odds_chosen": 5.7514495849609375, + "log_odds_ratio": -0.13919630646705627, + "logits/chosen": -2.9313971996307373, + "logits/chosen_prompt": -2.690089225769043, + "logits/rejected": -1.7628095149993896, + "logits/rejected_prompt": -2.6867289543151855, + "logps/chosen": -2.0054726600646973, + "logps/chosen_both": -1.9867470264434814, + "logps/chosen_prompt": -0.730907678604126, + "logps/rejected": -7.626162528991699, + "logps/rejected_both": -7.516133785247803, + "logps/rejected_prompt": -0.9458767771720886, + "loss": 2.0384, + "nll_loss": 1.9859052896499634, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8021891713142395, + "rewards/margins": 2.2482759952545166, + "rewards/rejected": -3.0504648685455322, + "step": 2070 + }, + { + "epoch": 1.6640000000000001, + "grad_norm": 0.19392027541652682, + "learning_rate": 2.0727564946918087e-05, + "log_odds_chosen": 7.237205505371094, + "log_odds_ratio": -0.001250033383257687, + "logits/chosen": -2.934305429458618, + "logits/chosen_prompt": -2.7029290199279785, + "logits/rejected": -1.5330889225006104, + "logits/rejected_prompt": -2.6817727088928223, + "logps/chosen": -2.0364651679992676, + "logps/chosen_both": -2.015903949737549, + "logps/chosen_prompt": -0.8590591549873352, + "logps/rejected": -9.122060775756836, + "logps/rejected_both": -8.987492561340332, + "logps/rejected_prompt": -1.0628540515899658, + "loss": 1.9994, + "nll_loss": 2.0151782035827637, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8145861625671387, + "rewards/margins": 2.834237575531006, + "rewards/rejected": -3.6488234996795654, + "step": 2080 + }, + { + "epoch": 1.6720000000000002, + "grad_norm": 0.2855392155807927, + "learning_rate": 2.0521358877910444e-05, + "log_odds_chosen": 6.342986583709717, + "log_odds_ratio": -0.07219625264406204, + "logits/chosen": -2.9752235412597656, + "logits/chosen_prompt": -2.7005088329315186, + "logits/rejected": -1.7442362308502197, + "logits/rejected_prompt": -2.693645477294922, + "logps/chosen": -1.990447759628296, + "logps/chosen_both": -1.970177412033081, + "logps/chosen_prompt": -0.7856583595275879, + "logps/rejected": -8.199989318847656, + "logps/rejected_both": -8.072303771972656, + "logps/rejected_prompt": -0.9411813020706177, + "loss": 2.021, + "nll_loss": 1.9698638916015625, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7961790561676025, + "rewards/margins": 2.483816623687744, + "rewards/rejected": -3.2799954414367676, + "step": 2090 + }, + { + "epoch": 1.6800000000000002, + "grad_norm": 0.36170871833517027, + "learning_rate": 2.031546713535688e-05, + "log_odds_chosen": 5.634890079498291, + "log_odds_ratio": -0.1395900696516037, + "logits/chosen": -2.93391752243042, + "logits/chosen_prompt": -2.718055248260498, + "logits/rejected": -1.7808215618133545, + "logits/rejected_prompt": -2.6867878437042236, + "logps/chosen": -2.3721437454223633, + "logps/chosen_both": -2.3435354232788086, + "logps/chosen_prompt": -0.7950377464294434, + "logps/rejected": -7.916224479675293, + "logps/rejected_both": -7.782776832580566, + "logps/rejected_prompt": -0.9661157727241516, + "loss": 2.1271, + "nll_loss": 2.341766595840454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9488574862480164, + "rewards/margins": 2.21763277053833, + "rewards/rejected": -3.166490077972412, + "step": 2100 + }, + { + "epoch": 1.688, + "grad_norm": 0.3052641697772741, + "learning_rate": 2.01099041694488e-05, + "log_odds_chosen": 5.173205375671387, + "log_odds_ratio": -0.2093629539012909, + "logits/chosen": -2.913505792617798, + "logits/chosen_prompt": -2.695497512817383, + "logits/rejected": -1.9728949069976807, + "logits/rejected_prompt": -2.681952476501465, + "logps/chosen": -1.9676679372787476, + "logps/chosen_both": -1.9531806707382202, + "logps/chosen_prompt": -0.8127241134643555, + "logps/rejected": -7.031458377838135, + "logps/rejected_both": -6.950935363769531, + "logps/rejected_prompt": -0.9248498678207397, + "loss": 2.0659, + "nll_loss": 1.9526466131210327, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.787067174911499, + "rewards/margins": 2.0255160331726074, + "rewards/rejected": -2.8125832080841064, + "step": 2110 + }, + { + "epoch": 1.696, + "grad_norm": 0.1918548604852694, + "learning_rate": 1.9904684407302883e-05, + "log_odds_chosen": 7.995016574859619, + "log_odds_ratio": -0.00040107182576321065, + "logits/chosen": -3.0051703453063965, + "logits/chosen_prompt": -2.7128148078918457, + "logits/rejected": -1.3667514324188232, + "logits/rejected_prompt": -2.695828676223755, + "logps/chosen": -1.9211137294769287, + "logps/chosen_both": -1.9036529064178467, + "logps/chosen_prompt": -0.8414414525032043, + "logps/rejected": -9.738038063049316, + "logps/rejected_both": -9.58409309387207, + "logps/rejected_prompt": -0.957872748374939, + "loss": 1.9882, + "nll_loss": 1.9027389287948608, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7684455513954163, + "rewards/margins": 3.126769781112671, + "rewards/rejected": -3.8952155113220215, + "step": 2120 + }, + { + "epoch": 1.704, + "grad_norm": 25.341642829209718, + "learning_rate": 1.969982225194864e-05, + "log_odds_chosen": 6.443746089935303, + "log_odds_ratio": -0.13866354525089264, + "logits/chosen": -2.8991589546203613, + "logits/chosen_prompt": -2.704436779022217, + "logits/rejected": -1.6840307712554932, + "logits/rejected_prompt": -2.696018695831299, + "logps/chosen": -1.971212387084961, + "logps/chosen_both": -1.9563363790512085, + "logps/chosen_prompt": -0.7664562463760376, + "logps/rejected": -8.291219711303711, + "logps/rejected_both": -8.195323944091797, + "logps/rejected_prompt": -0.8870849609375, + "loss": 2.0512, + "nll_loss": 1.955370545387268, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7884851098060608, + "rewards/margins": 2.5280027389526367, + "rewards/rejected": -3.3164875507354736, + "step": 2130 + }, + { + "epoch": 1.712, + "grad_norm": 0.20382071740750204, + "learning_rate": 1.9495332081317464e-05, + "log_odds_chosen": 6.890301704406738, + "log_odds_ratio": -0.009469824843108654, + "logits/chosen": -2.8794448375701904, + "logits/chosen_prompt": -2.694141387939453, + "logits/rejected": -1.638772964477539, + "logits/rejected_prompt": -2.6982343196868896, + "logps/chosen": -2.006687641143799, + "logps/chosen_both": -1.9925482273101807, + "logps/chosen_prompt": -0.8075912594795227, + "logps/rejected": -8.752016067504883, + "logps/rejected_both": -8.659661293029785, + "logps/rejected_prompt": -1.0454128980636597, + "loss": 1.9488, + "nll_loss": 1.9920895099639893, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8026750683784485, + "rewards/margins": 2.698131561279297, + "rewards/rejected": -3.5008063316345215, + "step": 2140 + }, + { + "epoch": 1.72, + "grad_norm": 3.232652124328266, + "learning_rate": 1.9291228247233605e-05, + "log_odds_chosen": 6.535033226013184, + "log_odds_ratio": -0.0724484771490097, + "logits/chosen": -2.8941891193389893, + "logits/chosen_prompt": -2.70381498336792, + "logits/rejected": -1.799768090248108, + "logits/rejected_prompt": -2.6814205646514893, + "logps/chosen": -1.9803783893585205, + "logps/chosen_both": -1.9626888036727905, + "logps/chosen_prompt": -0.8645817041397095, + "logps/rejected": -8.38414192199707, + "logps/rejected_both": -8.267631530761719, + "logps/rejected_prompt": -0.9822869300842285, + "loss": 1.9512, + "nll_loss": 1.9625753164291382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7921513319015503, + "rewards/margins": 2.5615053176879883, + "rewards/rejected": -3.353656768798828, + "step": 2150 + }, + { + "epoch": 1.728, + "grad_norm": 0.5121046736628673, + "learning_rate": 1.908752507440689e-05, + "log_odds_chosen": 6.229867458343506, + "log_odds_ratio": -0.0752544105052948, + "logits/chosen": -2.935990571975708, + "logits/chosen_prompt": -2.68332576751709, + "logits/rejected": -1.7542794942855835, + "logits/rejected_prompt": -2.6715810298919678, + "logps/chosen": -2.238250732421875, + "logps/chosen_both": -2.217163562774658, + "logps/chosen_prompt": -0.7275692820549011, + "logps/rejected": -8.351387023925781, + "logps/rejected_both": -8.241617202758789, + "logps/rejected_prompt": -0.9444383382797241, + "loss": 2.1639, + "nll_loss": 2.2166025638580322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8953002691268921, + "rewards/margins": 2.445254325866699, + "rewards/rejected": -3.340554714202881, + "step": 2160 + }, + { + "epoch": 1.736, + "grad_norm": 0.18380447787382737, + "learning_rate": 1.888423685942732e-05, + "log_odds_chosen": 7.403123378753662, + "log_odds_ratio": -0.0035772870760411024, + "logits/chosen": -2.9258389472961426, + "logits/chosen_prompt": -2.7035067081451416, + "logits/rejected": -1.6778090000152588, + "logits/rejected_prompt": -2.682382106781006, + "logps/chosen": -1.8578765392303467, + "logps/chosen_both": -1.8427069187164307, + "logps/chosen_prompt": -0.832676887512207, + "logps/rejected": -9.08339786529541, + "logps/rejected_both": -8.959403038024902, + "logps/rejected_prompt": -1.1029479503631592, + "loss": 1.9654, + "nll_loss": 1.8422781229019165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7431506514549255, + "rewards/margins": 2.8902084827423096, + "rewards/rejected": -3.63335919380188, + "step": 2170 + }, + { + "epoch": 1.744, + "grad_norm": 15.42646908452697, + "learning_rate": 1.868137786976177e-05, + "log_odds_chosen": 6.83737325668335, + "log_odds_ratio": -0.09123753756284714, + "logits/chosen": -2.9604616165161133, + "logits/chosen_prompt": -2.6771702766418457, + "logits/rejected": -1.7559928894042969, + "logits/rejected_prompt": -2.6906254291534424, + "logps/chosen": -1.9559208154678345, + "logps/chosen_both": -1.9405914545059204, + "logps/chosen_prompt": -0.7949713468551636, + "logps/rejected": -8.641664505004883, + "logps/rejected_both": -8.521966934204102, + "logps/rejected_prompt": -0.9677802324295044, + "loss": 2.0939, + "nll_loss": 1.938951849937439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7823683619499207, + "rewards/margins": 2.6742970943450928, + "rewards/rejected": -3.4566657543182373, + "step": 2180 + }, + { + "epoch": 1.752, + "grad_norm": 12.062069037613009, + "learning_rate": 1.8478962342752583e-05, + "log_odds_chosen": 6.820882320404053, + "log_odds_ratio": -0.07564956694841385, + "logits/chosen": -2.904177665710449, + "logits/chosen_prompt": -2.666506052017212, + "logits/rejected": -1.7927961349487305, + "logits/rejected_prompt": -2.67189884185791, + "logps/chosen": -2.0425262451171875, + "logps/chosen_both": -2.0270590782165527, + "logps/chosen_prompt": -0.8014975786209106, + "logps/rejected": -8.73670768737793, + "logps/rejected_both": -8.633912086486816, + "logps/rejected_prompt": -1.0191423892974854, + "loss": 2.0463, + "nll_loss": 2.0263657569885254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8170105218887329, + "rewards/margins": 2.6776726245880127, + "rewards/rejected": -3.4946835041046143, + "step": 2190 + }, + { + "epoch": 1.76, + "grad_norm": 1.1920813557914467, + "learning_rate": 1.827700448461836e-05, + "log_odds_chosen": 7.279504299163818, + "log_odds_ratio": -0.13858437538146973, + "logits/chosen": -3.018719434738159, + "logits/chosen_prompt": -2.687682628631592, + "logits/rejected": -1.6826099157333374, + "logits/rejected_prompt": -2.678703784942627, + "logps/chosen": -1.860093355178833, + "logps/chosen_both": -1.8447208404541016, + "logps/chosen_prompt": -0.8991209268569946, + "logps/rejected": -9.011571884155273, + "logps/rejected_both": -8.870678901672363, + "logps/rejected_prompt": -1.096939206123352, + "loss": 2.012, + "nll_loss": 1.84355890750885, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7440372705459595, + "rewards/margins": 2.8605916500091553, + "rewards/rejected": -3.6046290397644043, + "step": 2200 + }, + { + "epoch": 1.768, + "grad_norm": 0.19530589950798477, + "learning_rate": 1.807551846945694e-05, + "log_odds_chosen": 8.2916898727417, + "log_odds_ratio": -0.06947987526655197, + "logits/chosen": -2.939237117767334, + "logits/chosen_prompt": -2.6988303661346436, + "logits/rejected": -1.6200687885284424, + "logits/rejected_prompt": -2.68789005279541, + "logps/chosen": -1.9331436157226562, + "logps/chosen_both": -1.916733741760254, + "logps/chosen_prompt": -0.7277871370315552, + "logps/rejected": -10.084833145141602, + "logps/rejected_both": -9.953168869018555, + "logps/rejected_prompt": -1.032865285873413, + "loss": 1.9735, + "nll_loss": 1.916029691696167, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7732575535774231, + "rewards/margins": 3.260676145553589, + "rewards/rejected": -4.033933639526367, + "step": 2210 + }, + { + "epoch": 1.776, + "grad_norm": 15.17903488212651, + "learning_rate": 1.7874518438250597e-05, + "log_odds_chosen": 9.437470436096191, + "log_odds_ratio": -0.00649250065907836, + "logits/chosen": -2.9586923122406006, + "logits/chosen_prompt": -2.700380802154541, + "logits/rejected": -1.6204473972320557, + "logits/rejected_prompt": -2.668332576751709, + "logps/chosen": -2.0388143062591553, + "logps/chosen_both": -2.017122268676758, + "logps/chosen_prompt": -0.7435789108276367, + "logps/rejected": -11.334449768066406, + "logps/rejected_both": -11.154394149780273, + "logps/rejected_prompt": -0.9411799311637878, + "loss": 2.1772, + "nll_loss": 2.0165975093841553, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8155257105827332, + "rewards/margins": 3.718254566192627, + "rewards/rejected": -4.533780097961426, + "step": 2220 + }, + { + "epoch": 1.784, + "grad_norm": 0.28398933589113434, + "learning_rate": 1.767401849787357e-05, + "log_odds_chosen": 6.384799957275391, + "log_odds_ratio": -0.07637131214141846, + "logits/chosen": -2.9650635719299316, + "logits/chosen_prompt": -2.6936004161834717, + "logits/rejected": -1.797628402709961, + "logits/rejected_prompt": -2.690913438796997, + "logps/chosen": -1.8709478378295898, + "logps/chosen_both": -1.856300950050354, + "logps/chosen_prompt": -0.8806565403938293, + "logps/rejected": -8.102632522583008, + "logps/rejected_both": -7.991517543792725, + "logps/rejected_prompt": -1.0237706899642944, + "loss": 2.1791, + "nll_loss": 1.8553836345672607, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.748379111289978, + "rewards/margins": 2.492673635482788, + "rewards/rejected": -3.2410526275634766, + "step": 2230 + }, + { + "epoch": 1.792, + "grad_norm": 7.344829678329039, + "learning_rate": 1.747403272010199e-05, + "log_odds_chosen": 4.503691673278809, + "log_odds_ratio": -0.44330325722694397, + "logits/chosen": -2.9304556846618652, + "logits/chosen_prompt": -2.7112066745758057, + "logits/rejected": -2.020601749420166, + "logits/rejected_prompt": -2.6991848945617676, + "logps/chosen": -2.2137069702148438, + "logps/chosen_both": -2.192910671234131, + "logps/chosen_prompt": -0.7757335305213928, + "logps/rejected": -6.606595039367676, + "logps/rejected_both": -6.522683143615723, + "logps/rejected_prompt": -1.0225099325180054, + "loss": 2.0432, + "nll_loss": 2.1926403045654297, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.885482668876648, + "rewards/margins": 1.7571556568145752, + "rewards/rejected": -2.6426382064819336, + "step": 2240 + }, + { + "epoch": 1.8, + "grad_norm": 0.2830736721750178, + "learning_rate": 1.7274575140626318e-05, + "log_odds_chosen": 6.729086399078369, + "log_odds_ratio": -0.002848730655387044, + "logits/chosen": -2.9603111743927, + "logits/chosen_prompt": -2.712522268295288, + "logits/rejected": -1.471806287765503, + "logits/rejected_prompt": -2.711698055267334, + "logps/chosen": -1.9502222537994385, + "logps/chosen_both": -1.932050347328186, + "logps/chosen_prompt": -0.7791944146156311, + "logps/rejected": -8.519399642944336, + "logps/rejected_both": -8.40225887298584, + "logps/rejected_prompt": -0.9917134046554565, + "loss": 2.004, + "nll_loss": 1.9314903020858765, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7800888419151306, + "rewards/margins": 2.627671241760254, + "rewards/rejected": -3.4077601432800293, + "step": 2250 + }, + { + "epoch": 1.808, + "grad_norm": 1.3701495350067383, + "learning_rate": 1.7075659758066208e-05, + "log_odds_chosen": 4.735475063323975, + "log_odds_ratio": -0.14837773144245148, + "logits/chosen": -2.9037442207336426, + "logits/chosen_prompt": -2.6904830932617188, + "logits/rejected": -1.9993311166763306, + "logits/rejected_prompt": -2.672048807144165, + "logps/chosen": -2.0128910541534424, + "logps/chosen_both": -1.9941341876983643, + "logps/chosen_prompt": -0.7718429565429688, + "logps/rejected": -6.631512641906738, + "logps/rejected_both": -6.536102294921875, + "logps/rejected_prompt": -0.9579516649246216, + "loss": 2.0311, + "nll_loss": 1.9931504726409912, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.805156409740448, + "rewards/margins": 1.8474489450454712, + "rewards/rejected": -2.6526052951812744, + "step": 2260 + }, + { + "epoch": 1.8159999999999998, + "grad_norm": 21.366222606488684, + "learning_rate": 1.6877300532988094e-05, + "log_odds_chosen": 7.610182762145996, + "log_odds_ratio": -0.0006168467225506902, + "logits/chosen": -2.9680118560791016, + "logits/chosen_prompt": -2.664792776107788, + "logits/rejected": -1.2061169147491455, + "logits/rejected_prompt": -2.642937183380127, + "logps/chosen": -2.109647512435913, + "logps/chosen_both": -2.0934646129608154, + "logps/chosen_prompt": -0.9366092681884766, + "logps/rejected": -9.573705673217773, + "logps/rejected_both": -9.448970794677734, + "logps/rejected_prompt": -1.088648796081543, + "loss": 2.0712, + "nll_loss": 2.0929782390594482, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.843859076499939, + "rewards/margins": 2.9856228828430176, + "rewards/rejected": -3.829482316970825, + "step": 2270 + }, + { + "epoch": 1.8239999999999998, + "grad_norm": 16.630798038144235, + "learning_rate": 1.6679511386925337e-05, + "log_odds_chosen": 7.555551052093506, + "log_odds_ratio": -0.0009092552354559302, + "logits/chosen": -2.9446640014648438, + "logits/chosen_prompt": -2.703678607940674, + "logits/rejected": -1.251961588859558, + "logits/rejected_prompt": -2.686135768890381, + "logps/chosen": -1.9308589696884155, + "logps/chosen_both": -1.9166322946548462, + "logps/chosen_prompt": -0.7264224290847778, + "logps/rejected": -9.319347381591797, + "logps/rejected_both": -9.213998794555664, + "logps/rejected_prompt": -0.9491874575614929, + "loss": 1.9893, + "nll_loss": 1.915757179260254, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7723435759544373, + "rewards/margins": 2.955395221710205, + "rewards/rejected": -3.727738857269287, + "step": 2280 + }, + { + "epoch": 1.8319999999999999, + "grad_norm": 12.241247239412013, + "learning_rate": 1.648230620140121e-05, + "log_odds_chosen": 5.702427864074707, + "log_odds_ratio": -0.08441531658172607, + "logits/chosen": -2.9145102500915527, + "logits/chosen_prompt": -2.7137434482574463, + "logits/rejected": -1.6203930377960205, + "logits/rejected_prompt": -2.7078521251678467, + "logps/chosen": -2.2361724376678467, + "logps/chosen_both": -2.2188587188720703, + "logps/chosen_prompt": -0.8718380928039551, + "logps/rejected": -7.825617790222168, + "logps/rejected_both": -7.731575012207031, + "logps/rejected_prompt": -0.9629694223403931, + "loss": 2.0784, + "nll_loss": 2.2178969383239746, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8944689631462097, + "rewards/margins": 2.2357778549194336, + "rewards/rejected": -3.130246877670288, + "step": 2290 + }, + { + "epoch": 1.8399999999999999, + "grad_norm": 0.2611980916177983, + "learning_rate": 1.6285698816954624e-05, + "log_odds_chosen": 5.886144638061523, + "log_odds_ratio": -0.14016158878803253, + "logits/chosen": -2.961277484893799, + "logits/chosen_prompt": -2.7103641033172607, + "logits/rejected": -1.6664111614227295, + "logits/rejected_prompt": -2.7068681716918945, + "logps/chosen": -1.891758918762207, + "logps/chosen_both": -1.877873182296753, + "logps/chosen_prompt": -0.8406246304512024, + "logps/rejected": -7.652543067932129, + "logps/rejected_both": -7.56333065032959, + "logps/rejected_prompt": -0.9318545460700989, + "loss": 1.9727, + "nll_loss": 1.877637267112732, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7567036151885986, + "rewards/margins": 2.3043136596679688, + "rewards/rejected": -3.0610175132751465, + "step": 2300 + }, + { + "epoch": 1.8479999999999999, + "grad_norm": 0.17316872141044676, + "learning_rate": 1.6089703032168733e-05, + "log_odds_chosen": 6.335439205169678, + "log_odds_ratio": -0.007680490612983704, + "logits/chosen": -2.9618372917175293, + "logits/chosen_prompt": -2.6908061504364014, + "logits/rejected": -1.7726625204086304, + "logits/rejected_prompt": -2.684845447540283, + "logps/chosen": -2.021721839904785, + "logps/chosen_both": -2.0070888996124268, + "logps/chosen_prompt": -0.8626869916915894, + "logps/rejected": -8.212113380432129, + "logps/rejected_both": -8.121031761169434, + "logps/rejected_prompt": -1.1338939666748047, + "loss": 2.086, + "nll_loss": 2.0067009925842285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.808688759803772, + "rewards/margins": 2.4761569499969482, + "rewards/rejected": -3.2848453521728516, + "step": 2310 + }, + { + "epoch": 1.8559999999999999, + "grad_norm": 0.21046741293754637, + "learning_rate": 1.5894332602702545e-05, + "log_odds_chosen": 5.3062238693237305, + "log_odds_ratio": -0.09238220006227493, + "logits/chosen": -2.863762378692627, + "logits/chosen_prompt": -2.698549747467041, + "logits/rejected": -1.7465136051177979, + "logits/rejected_prompt": -2.68521785736084, + "logps/chosen": -2.2504518032073975, + "logps/chosen_both": -2.2304165363311768, + "logps/chosen_prompt": -0.8663703203201294, + "logps/rejected": -7.458860874176025, + "logps/rejected_both": -7.366589546203613, + "logps/rejected_prompt": -1.0120290517807007, + "loss": 2.0757, + "nll_loss": 2.2292349338531494, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9001806974411011, + "rewards/margins": 2.0833640098571777, + "rewards/rejected": -2.9835448265075684, + "step": 2320 + }, + { + "epoch": 1.8639999999999999, + "grad_norm": 0.2325223892090008, + "learning_rate": 1.5699601240325474e-05, + "log_odds_chosen": 5.675802230834961, + "log_odds_ratio": -0.14025500416755676, + "logits/chosen": -2.9541871547698975, + "logits/chosen_prompt": -2.739253520965576, + "logits/rejected": -1.7137792110443115, + "logits/rejected_prompt": -2.7213757038116455, + "logps/chosen": -2.011998176574707, + "logps/chosen_both": -1.9911746978759766, + "logps/chosen_prompt": -0.7685104012489319, + "logps/rejected": -7.561570167541504, + "logps/rejected_both": -7.440642356872559, + "logps/rejected_prompt": -0.9734441041946411, + "loss": 2.1005, + "nll_loss": 1.9904701709747314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8047992587089539, + "rewards/margins": 2.2198290824890137, + "rewards/rejected": -3.024627923965454, + "step": 2330 + }, + { + "epoch": 1.8719999999999999, + "grad_norm": 0.22047561828057208, + "learning_rate": 1.5505522611954975e-05, + "log_odds_chosen": 5.360434532165527, + "log_odds_ratio": -0.015295952558517456, + "logits/chosen": -2.899050235748291, + "logits/chosen_prompt": -2.718276262283325, + "logits/rejected": -2.08345365524292, + "logits/rejected_prompt": -2.6998016834259033, + "logps/chosen": -1.8844950199127197, + "logps/chosen_both": -1.8703863620758057, + "logps/chosen_prompt": -0.851974606513977, + "logps/rejected": -7.060413360595703, + "logps/rejected_both": -6.973315238952637, + "logps/rejected_prompt": -1.0805187225341797, + "loss": 2.1013, + "nll_loss": 1.8690898418426514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7537980675697327, + "rewards/margins": 2.0703673362731934, + "rewards/rejected": -2.8241655826568604, + "step": 2340 + }, + { + "epoch": 1.88, + "grad_norm": 1.1925590095899927, + "learning_rate": 1.5312110338697426e-05, + "log_odds_chosen": 4.792149066925049, + "log_odds_ratio": -0.1287117898464203, + "logits/chosen": -2.9038636684417725, + "logits/chosen_prompt": -2.692437171936035, + "logits/rejected": -1.9894816875457764, + "logits/rejected_prompt": -2.6797823905944824, + "logps/chosen": -1.9725837707519531, + "logps/chosen_both": -1.9566154479980469, + "logps/chosen_prompt": -0.7425985932350159, + "logps/rejected": -6.638279914855957, + "logps/rejected_both": -6.564992427825928, + "logps/rejected_prompt": -0.9972286224365234, + "loss": 1.9786, + "nll_loss": 1.9555227756500244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7890334725379944, + "rewards/margins": 1.8662786483764648, + "rewards/rejected": -2.6553120613098145, + "step": 2350 + }, + { + "epoch": 1.888, + "grad_norm": 0.2631097802741203, + "learning_rate": 1.5119377994892094e-05, + "log_odds_chosen": 7.000193119049072, + "log_odds_ratio": -0.0028563719242811203, + "logits/chosen": -3.0186381340026855, + "logits/chosen_prompt": -2.723498821258545, + "logits/rejected": -1.5227829217910767, + "logits/rejected_prompt": -2.7204127311706543, + "logps/chosen": -1.8698396682739258, + "logps/chosen_both": -1.850454330444336, + "logps/chosen_prompt": -0.7684019207954407, + "logps/rejected": -8.695045471191406, + "logps/rejected_both": -8.554825782775879, + "logps/rejected_prompt": -1.0279042720794678, + "loss": 2.0014, + "nll_loss": 1.8499305248260498, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7479358315467834, + "rewards/margins": 2.7300820350646973, + "rewards/rejected": -3.478017807006836, + "step": 2360 + }, + { + "epoch": 1.896, + "grad_norm": 0.20426857310467877, + "learning_rate": 1.4927339107158437e-05, + "log_odds_chosen": 8.02978515625, + "log_odds_ratio": -0.0003904960467480123, + "logits/chosen": -2.951490879058838, + "logits/chosen_prompt": -2.708991289138794, + "logits/rejected": -1.2117061614990234, + "logits/rejected_prompt": -2.6999001502990723, + "logps/chosen": -1.9645278453826904, + "logps/chosen_both": -1.9457321166992188, + "logps/chosen_prompt": -0.761443018913269, + "logps/rejected": -9.840217590332031, + "logps/rejected_both": -9.700372695922852, + "logps/rejected_prompt": -0.9850748181343079, + "loss": 1.9906, + "nll_loss": 1.9449169635772705, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7858111262321472, + "rewards/margins": 3.1502761840820312, + "rewards/rejected": -3.936087131500244, + "step": 2370 + }, + { + "epoch": 1.904, + "grad_norm": 8.58950626485984, + "learning_rate": 1.4736007153446801e-05, + "log_odds_chosen": 8.620465278625488, + "log_odds_ratio": -0.00021180181647650898, + "logits/chosen": -2.903035879135132, + "logits/chosen_prompt": -2.735071897506714, + "logits/rejected": -1.012452483177185, + "logits/rejected_prompt": -2.7112841606140137, + "logps/chosen": -2.025474786758423, + "logps/chosen_both": -2.007967472076416, + "logps/chosen_prompt": -0.8391423225402832, + "logps/rejected": -10.502188682556152, + "logps/rejected_both": -10.356060028076172, + "logps/rejected_prompt": -0.9537385106086731, + "loss": 2.3368, + "nll_loss": 2.0072412490844727, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8101899027824402, + "rewards/margins": 3.39068603515625, + "rewards/rejected": -4.200875282287598, + "step": 2380 + }, + { + "epoch": 1.912, + "grad_norm": 0.19583726689690906, + "learning_rate": 1.4545395562092468e-05, + "log_odds_chosen": 6.079274654388428, + "log_odds_ratio": -0.4031279981136322, + "logits/chosen": -2.844682455062866, + "logits/chosen_prompt": -2.8039345741271973, + "logits/rejected": -1.3123562335968018, + "logits/rejected_prompt": -2.7909157276153564, + "logps/chosen": -3.1939139366149902, + "logps/chosen_both": -3.164135217666626, + "logps/chosen_prompt": -0.8311759233474731, + "logps/rejected": -9.186834335327148, + "logps/rejected_both": -9.063508033752441, + "logps/rejected_prompt": -1.087949275970459, + "loss": 2.1303, + "nll_loss": 3.163341999053955, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -1.277565598487854, + "rewards/margins": 2.3971686363220215, + "rewards/rejected": -3.674734592437744, + "step": 2390 + }, + { + "epoch": 1.92, + "grad_norm": 4.277812021967688, + "learning_rate": 1.4355517710873184e-05, + "log_odds_chosen": 6.059283256530762, + "log_odds_ratio": -0.09234263747930527, + "logits/chosen": -3.0424129962921143, + "logits/chosen_prompt": -2.83634614944458, + "logits/rejected": -1.6069847345352173, + "logits/rejected_prompt": -2.818171739578247, + "logps/chosen": -1.861696481704712, + "logps/chosen_both": -1.8472903966903687, + "logps/chosen_prompt": -0.783744752407074, + "logps/rejected": -7.765946865081787, + "logps/rejected_both": -7.661751747131348, + "logps/rejected_prompt": -1.0380439758300781, + "loss": 2.0007, + "nll_loss": 1.846143126487732, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.7446784973144531, + "rewards/margins": 2.3617005348205566, + "rewards/rejected": -3.1063787937164307, + "step": 2400 + }, + { + "epoch": 1.928, + "grad_norm": 0.1934446201158471, + "learning_rate": 1.4166386926070322e-05, + "log_odds_chosen": 7.342792510986328, + "log_odds_ratio": -0.005115572828799486, + "logits/chosen": -2.9572060108184814, + "logits/chosen_prompt": -2.7633354663848877, + "logits/rejected": -1.3063112497329712, + "logits/rejected_prompt": -2.7578670978546143, + "logps/chosen": -1.9242970943450928, + "logps/chosen_both": -1.9093306064605713, + "logps/chosen_prompt": -0.8123539686203003, + "logps/rejected": -9.096908569335938, + "logps/rejected_both": -8.975044250488281, + "logps/rejected_prompt": -1.0593974590301514, + "loss": 1.9902, + "nll_loss": 1.9083023071289062, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7697189450263977, + "rewards/margins": 2.869044780731201, + "rewards/rejected": -3.638763904571533, + "step": 2410 + }, + { + "epoch": 1.936, + "grad_norm": 0.216837041093156, + "learning_rate": 1.397801648153354e-05, + "log_odds_chosen": 6.378230094909668, + "log_odds_ratio": -0.07421709597110748, + "logits/chosen": -3.0056633949279785, + "logits/chosen_prompt": -2.768573045730591, + "logits/rejected": -1.5620958805084229, + "logits/rejected_prompt": -2.7487571239471436, + "logps/chosen": -1.9807904958724976, + "logps/chosen_both": -1.9623302221298218, + "logps/chosen_prompt": -0.8482378125190735, + "logps/rejected": -8.202530860900879, + "logps/rejected_both": -8.077143669128418, + "logps/rejected_prompt": -1.0352851152420044, + "loss": 1.9778, + "nll_loss": 1.9611247777938843, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7923161387443542, + "rewards/margins": 2.488696575164795, + "rewards/rejected": -3.2810122966766357, + "step": 2420 + }, + { + "epoch": 1.944, + "grad_norm": 0.21162368892876318, + "learning_rate": 1.3790419597749199e-05, + "log_odds_chosen": 5.369621753692627, + "log_odds_ratio": -0.20802097022533417, + "logits/chosen": -2.925058126449585, + "logits/chosen_prompt": -2.727915048599243, + "logits/rejected": -1.7108662128448486, + "logits/rejected_prompt": -2.729671001434326, + "logps/chosen": -2.030609607696533, + "logps/chosen_both": -2.013143301010132, + "logps/chosen_prompt": -0.7951982021331787, + "logps/rejected": -7.307798862457275, + "logps/rejected_both": -7.217469692230225, + "logps/rejected_prompt": -0.9677292108535767, + "loss": 2.0275, + "nll_loss": 2.0122172832489014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8122437596321106, + "rewards/margins": 2.1108758449554443, + "rewards/rejected": -2.92311954498291, + "step": 2430 + }, + { + "epoch": 1.952, + "grad_norm": 0.1882777054319625, + "learning_rate": 1.3603609440912507e-05, + "log_odds_chosen": 7.206502437591553, + "log_odds_ratio": -0.06993956863880157, + "logits/chosen": -2.9723217487335205, + "logits/chosen_prompt": -2.7605624198913574, + "logits/rejected": -1.3072056770324707, + "logits/rejected_prompt": -2.7452828884124756, + "logps/chosen": -2.0292842388153076, + "logps/chosen_both": -2.0142998695373535, + "logps/chosen_prompt": -0.8006251454353333, + "logps/rejected": -9.103940963745117, + "logps/rejected_both": -8.999374389648438, + "logps/rejected_prompt": -0.9301830530166626, + "loss": 1.9849, + "nll_loss": 2.0136048793792725, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8117138147354126, + "rewards/margins": 2.829862356185913, + "rewards/rejected": -3.6415767669677734, + "step": 2440 + }, + { + "epoch": 1.96, + "grad_norm": 6.854544334281628, + "learning_rate": 1.3417599122003464e-05, + "log_odds_chosen": 5.873773574829102, + "log_odds_ratio": -0.09982452541589737, + "logits/chosen": -2.8911209106445312, + "logits/chosen_prompt": -2.751624584197998, + "logits/rejected": -1.530667781829834, + "logits/rejected_prompt": -2.731210947036743, + "logps/chosen": -2.39859938621521, + "logps/chosen_both": -2.3739638328552246, + "logps/chosen_prompt": -0.818207859992981, + "logps/rejected": -8.159021377563477, + "logps/rejected_both": -8.031126976013184, + "logps/rejected_prompt": -0.9556495547294617, + "loss": 2.0669, + "nll_loss": 2.3730950355529785, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -0.9594398736953735, + "rewards/margins": 2.304168701171875, + "rewards/rejected": -3.263608455657959, + "step": 2450 + }, + { + "epoch": 1.968, + "grad_norm": 0.20527915967987695, + "learning_rate": 1.3232401695866687e-05, + "log_odds_chosen": 6.752752780914307, + "log_odds_ratio": -0.093865767121315, + "logits/chosen": -3.0047717094421387, + "logits/chosen_prompt": -2.7637996673583984, + "logits/rejected": -1.454332947731018, + "logits/rejected_prompt": -2.7430145740509033, + "logps/chosen": -1.9008424282073975, + "logps/chosen_both": -1.8828001022338867, + "logps/chosen_prompt": -0.877086341381073, + "logps/rejected": -8.502935409545898, + "logps/rejected_both": -8.374523162841797, + "logps/rejected_prompt": -1.0814439058303833, + "loss": 2.0633, + "nll_loss": 1.8823230266571045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7603369951248169, + "rewards/margins": 2.6408374309539795, + "rewards/rejected": -3.401175022125244, + "step": 2460 + }, + { + "epoch": 1.976, + "grad_norm": 6.470506820654642, + "learning_rate": 1.3048030160295196e-05, + "log_odds_chosen": 6.849400520324707, + "log_odds_ratio": -0.07237619161605835, + "logits/chosen": -2.963409662246704, + "logits/chosen_prompt": -2.758953094482422, + "logits/rejected": -1.3645999431610107, + "logits/rejected_prompt": -2.7408089637756348, + "logps/chosen": -2.0132029056549072, + "logps/chosen_both": -1.9941928386688232, + "logps/chosen_prompt": -0.7748836874961853, + "logps/rejected": -8.71554183959961, + "logps/rejected_both": -8.596506118774414, + "logps/rejected_prompt": -0.936238169670105, + "loss": 1.9731, + "nll_loss": 1.9934498071670532, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8052810430526733, + "rewards/margins": 2.6809353828430176, + "rewards/rejected": -3.4862167835235596, + "step": 2470 + }, + { + "epoch": 1.984, + "grad_norm": 0.18875404411296617, + "learning_rate": 1.2864497455118152e-05, + "log_odds_chosen": 5.949180603027344, + "log_odds_ratio": -0.20756885409355164, + "logits/chosen": -2.90920352935791, + "logits/chosen_prompt": -2.731333017349243, + "logits/rejected": -1.5196672677993774, + "logits/rejected_prompt": -2.7116055488586426, + "logps/chosen": -2.0656113624572754, + "logps/chosen_both": -2.0481104850769043, + "logps/chosen_prompt": -0.7715897560119629, + "logps/rejected": -7.913069725036621, + "logps/rejected_both": -7.810868263244629, + "logps/rejected_prompt": -1.0343679189682007, + "loss": 2.0494, + "nll_loss": 2.0465188026428223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8262445330619812, + "rewards/margins": 2.3389835357666016, + "rewards/rejected": -3.1652283668518066, + "step": 2480 + }, + { + "epoch": 1.992, + "grad_norm": 0.19965333207670072, + "learning_rate": 1.2681816461292715e-05, + "log_odds_chosen": 6.9041619300842285, + "log_odds_ratio": -0.07076757401227951, + "logits/chosen": -2.9241271018981934, + "logits/chosen_prompt": -2.7164487838745117, + "logits/rejected": -1.2974779605865479, + "logits/rejected_prompt": -2.7119083404541016, + "logps/chosen": -2.164299249649048, + "logps/chosen_both": -2.1463229656219482, + "logps/chosen_prompt": -0.8179939389228821, + "logps/rejected": -8.948786735534668, + "logps/rejected_both": -8.831026077270508, + "logps/rejected_prompt": -1.014527678489685, + "loss": 2.069, + "nll_loss": 2.1452174186706543, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8657197952270508, + "rewards/margins": 2.713794469833374, + "rewards/rejected": -3.579514265060425, + "step": 2490 + }, + { + "epoch": 2.0, + "grad_norm": 7.401684464890164, + "learning_rate": 1.2500000000000006e-05, + "log_odds_chosen": 7.955414772033691, + "log_odds_ratio": -0.004814439453184605, + "logits/chosen": -2.977412700653076, + "logits/chosen_prompt": -2.712825298309326, + "logits/rejected": -1.1496913433074951, + "logits/rejected_prompt": -2.6892926692962646, + "logps/chosen": -1.8996845483779907, + "logps/chosen_both": -1.8827598094940186, + "logps/chosen_prompt": -0.8927472233772278, + "logps/rejected": -9.687314987182617, + "logps/rejected_both": -9.540821075439453, + "logps/rejected_prompt": -1.00954270362854, + "loss": 2.0789, + "nll_loss": 1.882759690284729, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7598739266395569, + "rewards/margins": 3.1150519847869873, + "rewards/rejected": -3.8749260902404785, + "step": 2500 + } + ], + "logging_steps": 10, + "max_steps": 3750, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 3, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}